Coverage for src/wiktextract/extractor/ru/models.py: 100%

1from pydantic import BaseModel, ConfigDict, Field

4class BaseModelWrap(BaseModel):

5 model_config = ConfigDict(

6 extra="forbid",

7 strict=True,

8 validate_assignment=True,

9 validate_default=True,

10 )

13class Translation(BaseModelWrap):

14 word: str = Field(description="Translation term")

15 lang_code: str = Field(

16 default="",

17 description="Wiktionary language code of the translation term",

18 )

19 lang: str = Field(

20 description="Localized language name of the translation term"

21 )

22 sense: str = Field(

23 default="",

24 description="An optional gloss describing the sense translated",

25 )

26 roman: str = Field(default="", description="Romanization of the word")

27 tags: list[str] = []

28 raw_tags: list[str] = []

31class Linkage(BaseModelWrap):

32 word: str = ""

33 tags: list[str] = []

34 raw_tags: list[str] = []

35 sense: str = ""

36 sense_index: int = Field(

37 default=0, ge=0, description="Number of the definition, start from 1"

38 )

41class Sound(BaseModelWrap):

42 ipa: str = Field(default="", description="International Phonetic Alphabet")

43 audio: str = Field(default="", description="Audio file name")

44 wav_url: str = ""

45 ogg_url: str = ""

46 oga_url: str = ""

47 mp3_url: str = ""

48 opus_url: str = ""

49 flac_url: str = ""

50 tags: list[str] = Field(

51 default=[], description="Specifying the variant of the pronunciation"

52 )

53 raw_tags: list[str] = []

54 homophones: list[str] = Field(

55 default=[], description="Words with same pronunciation"

56 )

57 rhymes: str = ""

60class Example(BaseModelWrap):

61 text: str = Field(default="", description="Example usage sentence")

62 bold_text_offsets: list[tuple[int, int]] = []

63 translation: str = Field(

64 default="", description="Russian translation of the example sentence"

65 )

66 bold_translation_offsets: list[tuple[int, int]] = []

67 ref: str = Field(

68 default="",

69 description="Example reference, combine data like author and title",

70 )

71 author: str = Field(default="", description="Author's name")

72 title: str = Field(default="", description="Title of the reference")

73 date: str = Field(default="", description="Original date")

74 date_published: str = Field(default="", description="Date of publication")

75 collection: str = Field(

76 default="",

77 description="Name of the collection the example was taken from",

78 )

79 editor: str = Field(default="", description="Editor")

80 translator: str = Field(default="", description="Translator")

81 source: str = Field(

82 default="",

83 description="Source of reference, corresponds to template "

84 "parameter 'источник'",

85 )

88class AltForm(BaseModelWrap):

89 word: str

92class Sense(BaseModelWrap):

93 glosses: list[str] = Field(

94 default=[],

95 description="Gloss string for the word sense. This has been cleaned, "

96 "and should be straightforward text with no tags.",

97 )

98 tags: list[str] = Field(

99 default=[],

100 description="List of tags affecting the word sense.",

101 )

102 raw_tags: list[str] = []

103 topics: list[str] = []

104 categories: list[str] = []

105 examples: list[Example] = Field(default=[], description="List of examples")

106 form_of: list[AltForm] = []

107

108

109class Form(BaseModelWrap):

110 form: str

111 tags: list[str] = []

112 raw_tags: list[str] = []

113

114

115class WordEntry(BaseModelWrap):

116 """

117 WordEntry is a dictionary containing lexical information of a single word

118 extracted from Wiktionary with wiktextract.

119 """

120

121 model_config = ConfigDict(title="Russian Wiktionary")

122

123 word: str = Field(description="word string")

124 pos: str = Field(default="", description="Part of speech type")

125 pos_title: str = Field(default="", description="Original POS title")

126 lang_code: str = Field(

127 description="Wiktionary language code", examples=["ru"]

128 )

129 lang: str = Field(

130 description="Localized language name of the word", examples=["Русский"]

131 )

132 categories: list[str] = Field(

133 default=[],

134 description="list of non-disambiguated categories for the word",

135 )

136 sounds: list[Sound] = []

137 senses: list[Sense] = []

138 translations: list[Translation] = []

139 forms: list[Form] = []

140 tags: list[str] = []

141 raw_tags: list[str] = []

142 antonyms: list[Linkage] = Field(default=[], description="List of antonyms")

143 anagrams: list[Linkage] = Field(default=[], description="List of anagrams")

144 variants: list[Linkage] = Field(default=[], description="List of variants")

145 hypernyms: list[Linkage] = Field(

146 default=[], description="List of hypernyms"

147 )

148 hyponyms: list[Linkage] = Field(default=[], description="List of hyponyms")

149 derived: list[Linkage] = Field(

150 default=[], description="List of derived terms"

151 )

152 meronyms: list[Linkage] = Field(default=[], description="List of meronyms")

153 synonyms: list[Linkage] = Field(default=[], description="List of synonyms")

154 coordinate_terms: list[Linkage] = Field(

155 default=[], description="List of coordinate terms"

156 )

157 holonyms: list[Linkage] = Field(default=[], description="List of holonyms")

158 etymology_text: str = ""

159 related: list[Linkage] = []

160 metagrams: list[Linkage] = []

161 proverbs: list[Linkage] = []

162 literal_meaning: str = ""

163 hyphenation: str = ""