Coverage for src/wiktextract/extractor/ru/models.py: 100%

85 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2024-12-27 08:07 +0000

1from pydantic import BaseModel, ConfigDict, Field 

2 

3 

4class BaseModelWrap(BaseModel): 

5 model_config = ConfigDict( 

6 extra="forbid", 

7 strict=True, 

8 validate_assignment=True, 

9 validate_default=True, 

10 ) 

11 

12 

13class Translation(BaseModelWrap): 

14 word: str = Field(description="Translation term") 

15 lang_code: str = Field( 

16 default="", 

17 description="Wiktionary language code of the translation term", 

18 ) 

19 lang: str = Field( 

20 description="Localized language name of the translation term" 

21 ) 

22 sense: str = Field( 

23 default="", 

24 description="An optional gloss describing the sense translated", 

25 ) 

26 roman: str = Field(default="", description="Romanization of the word") 

27 tags: list[str] = [] 

28 raw_tags: list[str] = [] 

29 

30 

31class Linkage(BaseModelWrap): 

32 word: str = "" 

33 tags: list[str] = [] 

34 raw_tags: list[str] = [] 

35 sense: str = "" 

36 sense_index: int = Field( 

37 default=0, ge=0, description="Number of the definition, start from 1" 

38 ) 

39 

40 

41class Sound(BaseModelWrap): 

42 ipa: str = Field(default="", description="International Phonetic Alphabet") 

43 audio: str = Field(default="", description="Audio file name") 

44 wav_url: str = "" 

45 ogg_url: str = "" 

46 oga_url: str = "" 

47 mp3_url: str = "" 

48 flac_url: str = "" 

49 tags: list[str] = Field( 

50 default=[], description="Specifying the variant of the pronunciation" 

51 ) 

52 raw_tags: list[str] = [] 

53 homophones: list[str] = Field( 

54 default=[], description="Words with same pronunciation" 

55 ) 

56 

57 

58class Example(BaseModelWrap): 

59 text: str = Field(default="", description="Example usage sentence") 

60 translation: str = Field( 

61 default="", description="Russian translation of the example sentence" 

62 ) 

63 ref: str = Field( 

64 default="", 

65 description="Example reference, combine data like author and title", 

66 ) 

67 author: str = Field(default="", description="Author's name") 

68 title: str = Field(default="", description="Title of the reference") 

69 date: str = Field(default="", description="Original date") 

70 date_published: str = Field(default="", description="Date of publication") 

71 collection: str = Field( 

72 default="", 

73 description="Name of the collection the example was taken from", 

74 ) 

75 editor: str = Field(default="", description="Editor") 

76 translator: str = Field(default="", description="Translator") 

77 source: str = Field( 

78 default="", 

79 description="Source of reference, corresponds to template " 

80 "parameter 'источник'", 

81 ) 

82 

83 

84class AltForm(BaseModelWrap): 

85 word: str 

86 

87 

88class Sense(BaseModelWrap): 

89 raw_glosses: list[str] = Field( 

90 default=[], 

91 description="Raw gloss string for the word sense. " 

92 "This might contain tags and other markup.", 

93 ) 

94 glosses: list[str] = Field( 

95 default=[], 

96 description="Gloss string for the word sense. This has been cleaned, " 

97 "and should be straightforward text with no tags.", 

98 ) 

99 tags: list[str] = Field( 

100 default=[], 

101 description="List of tags affecting the word sense.", 

102 ) 

103 raw_tags: list[str] = [] 

104 topics: list[str] = [] 

105 notes: list[str] = Field( 

106 default=[], 

107 description="Usually describing usage.", 

108 ) 

109 categories: list[str] = [] 

110 examples: list[Example] = Field(default=[], description="List of examples") 

111 form_of: list[AltForm] = [] 

112 

113 

114class Form(BaseModelWrap): 

115 form: str 

116 tags: list[str] = [] 

117 raw_tags: list[str] = [] 

118 

119 

120class WordEntry(BaseModelWrap): 

121 """ 

122 WordEntry is a dictionary containing lexical information of a single word 

123 extracted from Wiktionary with wiktextract. 

124 """ 

125 

126 model_config = ConfigDict(title="Russian Wiktionary") 

127 

128 word: str = Field(description="word string") 

129 pos: str = Field(default="", description="Part of speech type") 

130 pos_title: str = Field(default="", description="Original POS title") 

131 lang_code: str = Field( 

132 description="Wiktionary language code", examples=["ru"] 

133 ) 

134 lang: str = Field( 

135 description="Localized language name of the word", examples=["Русский"] 

136 ) 

137 categories: list[str] = Field( 

138 default=[], 

139 description="list of non-disambiguated categories for the word", 

140 ) 

141 sounds: list[Sound] = [] 

142 senses: list[Sense] = [] 

143 translations: list[Translation] = [] 

144 forms: list[Form] = [] 

145 tags: list[str] = [] 

146 raw_tags: list[str] = [] 

147 antonyms: list[Linkage] = Field(default=[], description="List of antonyms") 

148 anagrams: list[Linkage] = Field(default=[], description="List of anagrams") 

149 variants: list[Linkage] = Field(default=[], description="List of variants") 

150 hypernyms: list[Linkage] = Field( 

151 default=[], description="List of hypernyms" 

152 ) 

153 hyponyms: list[Linkage] = Field(default=[], description="List of hyponyms") 

154 derived: list[Linkage] = Field( 

155 default=[], description="List of derived terms" 

156 ) 

157 meronyms: list[Linkage] = Field(default=[], description="List of meronyms") 

158 synonyms: list[Linkage] = Field(default=[], description="List of synonyms") 

159 coordinate_terms: list[Linkage] = Field( 

160 default=[], description="List of coordinate terms" 

161 ) 

162 holonyms: list[Linkage] = Field(default=[], description="List of holonyms") 

163 etymology_text: str = "" 

164 related: list[Linkage] = [] 

165 metagrams: list[Linkage] = [] 

166 proverbs: list[Linkage] = [] 

167 literal_meaning: str = ""