Coverage for src/wiktextract/extractor/ru/models.py: 100%

93 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-12 08:27 +0000

1from pydantic import BaseModel, ConfigDict, Field 

2 

3 

4class BaseModelWrap(BaseModel): 

5 model_config = ConfigDict( 

6 extra="forbid", 

7 strict=True, 

8 validate_assignment=True, 

9 validate_default=True, 

10 ) 

11 

12 

13class Translation(BaseModelWrap): 

14 word: str = Field(description="Translation term") 

15 lang_code: str = Field( 

16 default="", 

17 description="Wiktionary language code of the translation term", 

18 ) 

19 lang: str = Field( 

20 description="Localized language name of the translation term" 

21 ) 

22 sense: str = Field( 

23 default="", 

24 description="An optional gloss describing the sense translated", 

25 ) 

26 roman: str = Field(default="", description="Romanization of the word") 

27 tags: list[str] = [] 

28 raw_tags: list[str] = [] 

29 other: str = "" 

30 

31 

32class Linkage(BaseModelWrap): 

33 word: str = "" 

34 tags: list[str] = [] 

35 raw_tags: list[str] = [] 

36 sense: str = "" 

37 sense_index: int = Field( 

38 default=0, ge=0, description="Number of the definition, start from 1" 

39 ) 

40 

41 

42class Sound(BaseModelWrap): 

43 ipa: str = Field(default="", description="International Phonetic Alphabet") 

44 audio: str = Field(default="", description="Audio file name") 

45 wav_url: str = "" 

46 ogg_url: str = "" 

47 oga_url: str = "" 

48 mp3_url: str = "" 

49 opus_url: str = "" 

50 flac_url: str = "" 

51 tags: list[str] = Field( 

52 default=[], description="Specifying the variant of the pronunciation" 

53 ) 

54 raw_tags: list[str] = [] 

55 homophones: list[str] = Field( 

56 default=[], description="Words with same pronunciation" 

57 ) 

58 rhymes: str = "" 

59 

60 

61class Example(BaseModelWrap): 

62 text: str = Field(default="", description="Example usage sentence") 

63 bold_text_offsets: list[tuple[int, int]] = [] 

64 translation: str = Field( 

65 default="", description="Russian translation of the example sentence" 

66 ) 

67 bold_translation_offsets: list[tuple[int, int]] = [] 

68 ref: str = Field( 

69 default="", 

70 description="Example reference, combine data like author and title", 

71 ) 

72 author: str = Field(default="", description="Author's name") 

73 title: str = Field(default="", description="Title of the reference") 

74 date: str = Field(default="", description="Original date") 

75 date_published: str = Field(default="", description="Date of publication") 

76 collection: str = Field( 

77 default="", 

78 description="Name of the collection the example was taken from", 

79 ) 

80 editor: str = Field(default="", description="Editor") 

81 translator: str = Field(default="", description="Translator") 

82 source: str = Field( 

83 default="", 

84 description="Source of reference, corresponds to template " 

85 "parameter 'источник'", 

86 ) 

87 

88 

89class AltForm(BaseModelWrap): 

90 word: str 

91 

92 

93class Sense(BaseModelWrap): 

94 glosses: list[str] = Field( 

95 default=[], 

96 description="Gloss string for the word sense. This has been cleaned, " 

97 "and should be straightforward text with no tags.", 

98 ) 

99 tags: list[str] = Field( 

100 default=[], 

101 description="List of tags affecting the word sense.", 

102 ) 

103 raw_tags: list[str] = [] 

104 topics: list[str] = [] 

105 categories: list[str] = [] 

106 examples: list[Example] = Field(default=[], description="List of examples") 

107 form_of: list[AltForm] = [] 

108 

109 

110class Form(BaseModelWrap): 

111 form: str 

112 tags: list[str] = [] 

113 raw_tags: list[str] = [] 

114 

115 

116class Hyphenation(BaseModelWrap): 

117 parts: list[str] = [] 

118 tags: list[str] = [] 

119 raw_tags: list[str] = [] 

120 

121 

122class WordEntry(BaseModelWrap): 

123 """ 

124 WordEntry is a dictionary containing lexical information of a single word 

125 extracted from Wiktionary with wiktextract. 

126 """ 

127 

128 model_config = ConfigDict(title="Russian Wiktionary") 

129 

130 word: str = Field(description="word string") 

131 pos: str = Field(default="", description="Part of speech type") 

132 pos_title: str = Field(default="", description="Original POS title") 

133 lang_code: str = Field( 

134 description="Wiktionary language code", examples=["ru"] 

135 ) 

136 lang: str = Field( 

137 description="Localized language name of the word", examples=["Русский"] 

138 ) 

139 categories: list[str] = Field( 

140 default=[], 

141 description="list of non-disambiguated categories for the word", 

142 ) 

143 sounds: list[Sound] = [] 

144 senses: list[Sense] = [] 

145 translations: list[Translation] = [] 

146 forms: list[Form] = [] 

147 tags: list[str] = [] 

148 raw_tags: list[str] = [] 

149 antonyms: list[Linkage] = Field(default=[], description="List of antonyms") 

150 anagrams: list[Linkage] = Field(default=[], description="List of anagrams") 

151 variants: list[Linkage] = Field(default=[], description="List of variants") 

152 hypernyms: list[Linkage] = Field( 

153 default=[], description="List of hypernyms" 

154 ) 

155 hyponyms: list[Linkage] = Field(default=[], description="List of hyponyms") 

156 derived: list[Linkage] = Field( 

157 default=[], description="List of derived terms" 

158 ) 

159 meronyms: list[Linkage] = Field(default=[], description="List of meronyms") 

160 synonyms: list[Linkage] = Field(default=[], description="List of synonyms") 

161 coordinate_terms: list[Linkage] = Field( 

162 default=[], description="List of coordinate terms" 

163 ) 

164 holonyms: list[Linkage] = Field(default=[], description="List of holonyms") 

165 etymology_text: str = "" 

166 related: list[Linkage] = [] 

167 metagrams: list[Linkage] = [] 

168 proverbs: list[Linkage] = [] 

169 literal_meaning: str = "" 

170 hyphenations: list[Hyphenation] = []