Coverage for src / wiktextract / extractor / ru / models.py: 100%
85 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-09 02:20 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-09 02:20 +0000
1from pydantic import BaseModel, ConfigDict, Field
4class BaseModelWrap(BaseModel):
5 model_config = ConfigDict(
6 extra="forbid",
7 strict=True,
8 validate_assignment=True,
9 validate_default=True,
10 )
13class Translation(BaseModelWrap):
14 word: str = Field(description="Translation term")
15 lang_code: str = Field(
16 default="",
17 description="Wiktionary language code of the translation term",
18 )
19 lang: str = Field(
20 description="Localized language name of the translation term"
21 )
22 sense: str = Field(
23 default="",
24 description="An optional gloss describing the sense translated",
25 )
26 roman: str = Field(default="", description="Romanization of the word")
27 tags: list[str] = []
28 raw_tags: list[str] = []
29 other: str = ""
32class Linkage(BaseModelWrap):
33 word: str = ""
34 tags: list[str] = []
35 raw_tags: list[str] = []
36 sense: str = ""
37 sense_index: int = Field(
38 default=0, ge=0, description="Number of the definition, start from 1"
39 )
42class Sound(BaseModelWrap):
43 ipa: str = Field(default="", description="International Phonetic Alphabet")
44 audio: str = Field(default="", description="Audio file name")
45 wav_url: str = ""
46 ogg_url: str = ""
47 oga_url: str = ""
48 mp3_url: str = ""
49 opus_url: str = ""
50 flac_url: str = ""
51 tags: list[str] = Field(
52 default=[], description="Specifying the variant of the pronunciation"
53 )
54 raw_tags: list[str] = []
55 homophones: list[str] = Field(
56 default=[], description="Words with same pronunciation"
57 )
58 rhymes: str = ""
61class Example(BaseModelWrap):
62 text: str = Field(description="Example usage sentence")
63 bold_text_offsets: list[tuple[int, int]] = []
64 translation: str = Field(
65 default="", description="Russian translation of the example sentence"
66 )
67 bold_translation_offsets: list[tuple[int, int]] = []
68 ref: str = Field(
69 default="",
70 description="Example reference, combine data like author and title",
71 )
74class AltForm(BaseModelWrap):
75 word: str
78class Sense(BaseModelWrap):
79 glosses: list[str] = Field(
80 default=[],
81 description="Gloss string for the word sense. This has been cleaned, "
82 "and should be straightforward text with no tags.",
83 )
84 tags: list[str] = Field(
85 default=[],
86 description="List of tags affecting the word sense.",
87 )
88 raw_tags: list[str] = []
89 topics: list[str] = []
90 categories: list[str] = []
91 examples: list[Example] = Field(default=[], description="List of examples")
92 form_of: list[AltForm] = []
95class Form(BaseModelWrap):
96 form: str
97 tags: list[str] = []
98 raw_tags: list[str] = []
101class Hyphenation(BaseModelWrap):
102 parts: list[str] = []
103 tags: list[str] = []
104 raw_tags: list[str] = []
107class WordEntry(BaseModelWrap):
108 """
109 WordEntry is a dictionary containing lexical information of a single word
110 extracted from Wiktionary with wiktextract.
111 """
113 model_config = ConfigDict(title="Russian Wiktionary")
115 word: str = Field(description="word string")
116 pos: str = Field(default="", description="Part of speech type")
117 pos_title: str = Field(default="", description="Original POS title")
118 lang_code: str = Field(
119 description="Wiktionary language code", examples=["ru"]
120 )
121 lang: str = Field(
122 description="Localized language name of the word", examples=["Русский"]
123 )
124 categories: list[str] = Field(
125 default=[],
126 description="list of non-disambiguated categories for the word",
127 )
128 sounds: list[Sound] = []
129 senses: list[Sense] = []
130 translations: list[Translation] = []
131 forms: list[Form] = []
132 tags: list[str] = []
133 raw_tags: list[str] = []
134 antonyms: list[Linkage] = Field(default=[], description="List of antonyms")
135 anagrams: list[Linkage] = Field(default=[], description="List of anagrams")
136 variants: list[Linkage] = Field(default=[], description="List of variants")
137 hypernyms: list[Linkage] = Field(
138 default=[], description="List of hypernyms"
139 )
140 hyponyms: list[Linkage] = Field(default=[], description="List of hyponyms")
141 derived: list[Linkage] = Field(
142 default=[], description="List of derived terms"
143 )
144 meronyms: list[Linkage] = Field(default=[], description="List of meronyms")
145 synonyms: list[Linkage] = Field(default=[], description="List of synonyms")
146 coordinate_terms: list[Linkage] = Field(
147 default=[], description="List of coordinate terms"
148 )
149 holonyms: list[Linkage] = Field(default=[], description="List of holonyms")
150 etymology_texts: list[str] = []
151 related: list[Linkage] = []
152 metagrams: list[Linkage] = []
153 proverbs: list[Linkage] = []
154 literal_meaning: str = ""
155 hyphenations: list[Hyphenation] = []