Coverage for src/wiktextract/extractor/ru/models.py: 100%

88 statements  

« prev     ^ index     » next       coverage.py v7.9.0, created at 2025-06-13 07:43 +0000

1from pydantic import BaseModel, ConfigDict, Field 

2 

3 

4class BaseModelWrap(BaseModel): 

5 model_config = ConfigDict( 

6 extra="forbid", 

7 strict=True, 

8 validate_assignment=True, 

9 validate_default=True, 

10 ) 

11 

12 

13class Translation(BaseModelWrap): 

14 word: str = Field(description="Translation term") 

15 lang_code: str = Field( 

16 default="", 

17 description="Wiktionary language code of the translation term", 

18 ) 

19 lang: str = Field( 

20 description="Localized language name of the translation term" 

21 ) 

22 sense: str = Field( 

23 default="", 

24 description="An optional gloss describing the sense translated", 

25 ) 

26 roman: str = Field(default="", description="Romanization of the word") 

27 tags: list[str] = [] 

28 raw_tags: list[str] = [] 

29 

30 

31class Linkage(BaseModelWrap): 

32 word: str = "" 

33 tags: list[str] = [] 

34 raw_tags: list[str] = [] 

35 sense: str = "" 

36 sense_index: int = Field( 

37 default=0, ge=0, description="Number of the definition, start from 1" 

38 ) 

39 

40 

41class Sound(BaseModelWrap): 

42 ipa: str = Field(default="", description="International Phonetic Alphabet") 

43 audio: str = Field(default="", description="Audio file name") 

44 wav_url: str = "" 

45 ogg_url: str = "" 

46 oga_url: str = "" 

47 mp3_url: str = "" 

48 opus_url: str = "" 

49 flac_url: str = "" 

50 tags: list[str] = Field( 

51 default=[], description="Specifying the variant of the pronunciation" 

52 ) 

53 raw_tags: list[str] = [] 

54 homophones: list[str] = Field( 

55 default=[], description="Words with same pronunciation" 

56 ) 

57 rhymes: str = "" 

58 

59 

60class Example(BaseModelWrap): 

61 text: str = Field(default="", description="Example usage sentence") 

62 bold_text_offsets: list[tuple[int, int]] = [] 

63 translation: str = Field( 

64 default="", description="Russian translation of the example sentence" 

65 ) 

66 bold_translation_offsets: list[tuple[int, int]] = [] 

67 ref: str = Field( 

68 default="", 

69 description="Example reference, combine data like author and title", 

70 ) 

71 author: str = Field(default="", description="Author's name") 

72 title: str = Field(default="", description="Title of the reference") 

73 date: str = Field(default="", description="Original date") 

74 date_published: str = Field(default="", description="Date of publication") 

75 collection: str = Field( 

76 default="", 

77 description="Name of the collection the example was taken from", 

78 ) 

79 editor: str = Field(default="", description="Editor") 

80 translator: str = Field(default="", description="Translator") 

81 source: str = Field( 

82 default="", 

83 description="Source of reference, corresponds to template " 

84 "parameter 'источник'", 

85 ) 

86 

87 

88class AltForm(BaseModelWrap): 

89 word: str 

90 

91 

92class Sense(BaseModelWrap): 

93 glosses: list[str] = Field( 

94 default=[], 

95 description="Gloss string for the word sense. This has been cleaned, " 

96 "and should be straightforward text with no tags.", 

97 ) 

98 tags: list[str] = Field( 

99 default=[], 

100 description="List of tags affecting the word sense.", 

101 ) 

102 raw_tags: list[str] = [] 

103 topics: list[str] = [] 

104 categories: list[str] = [] 

105 examples: list[Example] = Field(default=[], description="List of examples") 

106 form_of: list[AltForm] = [] 

107 

108 

109class Form(BaseModelWrap): 

110 form: str 

111 tags: list[str] = [] 

112 raw_tags: list[str] = [] 

113 

114 

115class WordEntry(BaseModelWrap): 

116 """ 

117 WordEntry is a dictionary containing lexical information of a single word 

118 extracted from Wiktionary with wiktextract. 

119 """ 

120 

121 model_config = ConfigDict(title="Russian Wiktionary") 

122 

123 word: str = Field(description="word string") 

124 pos: str = Field(default="", description="Part of speech type") 

125 pos_title: str = Field(default="", description="Original POS title") 

126 lang_code: str = Field( 

127 description="Wiktionary language code", examples=["ru"] 

128 ) 

129 lang: str = Field( 

130 description="Localized language name of the word", examples=["Русский"] 

131 ) 

132 categories: list[str] = Field( 

133 default=[], 

134 description="list of non-disambiguated categories for the word", 

135 ) 

136 sounds: list[Sound] = [] 

137 senses: list[Sense] = [] 

138 translations: list[Translation] = [] 

139 forms: list[Form] = [] 

140 tags: list[str] = [] 

141 raw_tags: list[str] = [] 

142 antonyms: list[Linkage] = Field(default=[], description="List of antonyms") 

143 anagrams: list[Linkage] = Field(default=[], description="List of anagrams") 

144 variants: list[Linkage] = Field(default=[], description="List of variants") 

145 hypernyms: list[Linkage] = Field( 

146 default=[], description="List of hypernyms" 

147 ) 

148 hyponyms: list[Linkage] = Field(default=[], description="List of hyponyms") 

149 derived: list[Linkage] = Field( 

150 default=[], description="List of derived terms" 

151 ) 

152 meronyms: list[Linkage] = Field(default=[], description="List of meronyms") 

153 synonyms: list[Linkage] = Field(default=[], description="List of synonyms") 

154 coordinate_terms: list[Linkage] = Field( 

155 default=[], description="List of coordinate terms" 

156 ) 

157 holonyms: list[Linkage] = Field(default=[], description="List of holonyms") 

158 etymology_text: str = "" 

159 related: list[Linkage] = [] 

160 metagrams: list[Linkage] = [] 

161 proverbs: list[Linkage] = [] 

162 literal_meaning: str = "" 

163 hyphenation: str = ""