Coverage for src/wiktextract/extractor/ru/models.py: 100%

92 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-15 05:18 +0000

1from pydantic import BaseModel, ConfigDict, Field 

2 

3 

4class BaseModelWrap(BaseModel): 

5 model_config = ConfigDict( 

6 extra="forbid", 

7 strict=True, 

8 validate_assignment=True, 

9 validate_default=True, 

10 ) 

11 

12 

13class Translation(BaseModelWrap): 

14 word: str = Field(description="Translation term") 

15 lang_code: str = Field( 

16 default="", 

17 description="Wiktionary language code of the translation term", 

18 ) 

19 lang: str = Field( 

20 description="Localized language name of the translation term" 

21 ) 

22 sense: str = Field( 

23 default="", 

24 description="An optional gloss describing the sense translated", 

25 ) 

26 roman: str = Field(default="", description="Romanization of the word") 

27 tags: list[str] = [] 

28 raw_tags: list[str] = [] 

29 

30 

31class Linkage(BaseModelWrap): 

32 word: str = "" 

33 tags: list[str] = [] 

34 raw_tags: list[str] = [] 

35 sense: str = "" 

36 sense_index: int = Field( 

37 default=0, ge=0, description="Number of the definition, start from 1" 

38 ) 

39 

40 

41class Sound(BaseModelWrap): 

42 ipa: str = Field(default="", description="International Phonetic Alphabet") 

43 audio: str = Field(default="", description="Audio file name") 

44 wav_url: str = "" 

45 ogg_url: str = "" 

46 oga_url: str = "" 

47 mp3_url: str = "" 

48 opus_url: str = "" 

49 flac_url: str = "" 

50 tags: list[str] = Field( 

51 default=[], description="Specifying the variant of the pronunciation" 

52 ) 

53 raw_tags: list[str] = [] 

54 homophones: list[str] = Field( 

55 default=[], description="Words with same pronunciation" 

56 ) 

57 rhymes: str = "" 

58 

59 

60class Example(BaseModelWrap): 

61 text: str = Field(default="", description="Example usage sentence") 

62 bold_text_offsets: list[tuple[int, int]] = [] 

63 translation: str = Field( 

64 default="", description="Russian translation of the example sentence" 

65 ) 

66 bold_translation_offsets: list[tuple[int, int]] = [] 

67 ref: str = Field( 

68 default="", 

69 description="Example reference, combine data like author and title", 

70 ) 

71 author: str = Field(default="", description="Author's name") 

72 title: str = Field(default="", description="Title of the reference") 

73 date: str = Field(default="", description="Original date") 

74 date_published: str = Field(default="", description="Date of publication") 

75 collection: str = Field( 

76 default="", 

77 description="Name of the collection the example was taken from", 

78 ) 

79 editor: str = Field(default="", description="Editor") 

80 translator: str = Field(default="", description="Translator") 

81 source: str = Field( 

82 default="", 

83 description="Source of reference, corresponds to template " 

84 "parameter 'источник'", 

85 ) 

86 

87 

88class AltForm(BaseModelWrap): 

89 word: str 

90 

91 

92class Sense(BaseModelWrap): 

93 glosses: list[str] = Field( 

94 default=[], 

95 description="Gloss string for the word sense. This has been cleaned, " 

96 "and should be straightforward text with no tags.", 

97 ) 

98 tags: list[str] = Field( 

99 default=[], 

100 description="List of tags affecting the word sense.", 

101 ) 

102 raw_tags: list[str] = [] 

103 topics: list[str] = [] 

104 categories: list[str] = [] 

105 examples: list[Example] = Field(default=[], description="List of examples") 

106 form_of: list[AltForm] = [] 

107 

108 

109class Form(BaseModelWrap): 

110 form: str 

111 tags: list[str] = [] 

112 raw_tags: list[str] = [] 

113 

114 

115class Hyphenation(BaseModelWrap): 

116 parts: list[str] = [] 

117 tags: list[str] = [] 

118 raw_tags: list[str] = [] 

119 

120 

121class WordEntry(BaseModelWrap): 

122 """ 

123 WordEntry is a dictionary containing lexical information of a single word 

124 extracted from Wiktionary with wiktextract. 

125 """ 

126 

127 model_config = ConfigDict(title="Russian Wiktionary") 

128 

129 word: str = Field(description="word string") 

130 pos: str = Field(default="", description="Part of speech type") 

131 pos_title: str = Field(default="", description="Original POS title") 

132 lang_code: str = Field( 

133 description="Wiktionary language code", examples=["ru"] 

134 ) 

135 lang: str = Field( 

136 description="Localized language name of the word", examples=["Русский"] 

137 ) 

138 categories: list[str] = Field( 

139 default=[], 

140 description="list of non-disambiguated categories for the word", 

141 ) 

142 sounds: list[Sound] = [] 

143 senses: list[Sense] = [] 

144 translations: list[Translation] = [] 

145 forms: list[Form] = [] 

146 tags: list[str] = [] 

147 raw_tags: list[str] = [] 

148 antonyms: list[Linkage] = Field(default=[], description="List of antonyms") 

149 anagrams: list[Linkage] = Field(default=[], description="List of anagrams") 

150 variants: list[Linkage] = Field(default=[], description="List of variants") 

151 hypernyms: list[Linkage] = Field( 

152 default=[], description="List of hypernyms" 

153 ) 

154 hyponyms: list[Linkage] = Field(default=[], description="List of hyponyms") 

155 derived: list[Linkage] = Field( 

156 default=[], description="List of derived terms" 

157 ) 

158 meronyms: list[Linkage] = Field(default=[], description="List of meronyms") 

159 synonyms: list[Linkage] = Field(default=[], description="List of synonyms") 

160 coordinate_terms: list[Linkage] = Field( 

161 default=[], description="List of coordinate terms" 

162 ) 

163 holonyms: list[Linkage] = Field(default=[], description="List of holonyms") 

164 etymology_text: str = "" 

165 related: list[Linkage] = [] 

166 metagrams: list[Linkage] = [] 

167 proverbs: list[Linkage] = [] 

168 literal_meaning: str = "" 

169 hyphenations: list[Hyphenation] = []