Coverage for src/wiktextract/extractor/ru/models.py: 100%
93 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
1from pydantic import BaseModel, ConfigDict, Field
4class BaseModelWrap(BaseModel):
5 model_config = ConfigDict(
6 extra="forbid",
7 strict=True,
8 validate_assignment=True,
9 validate_default=True,
10 )
13class Translation(BaseModelWrap):
14 word: str = Field(description="Translation term")
15 lang_code: str = Field(
16 default="",
17 description="Wiktionary language code of the translation term",
18 )
19 lang: str = Field(
20 description="Localized language name of the translation term"
21 )
22 sense: str = Field(
23 default="",
24 description="An optional gloss describing the sense translated",
25 )
26 roman: str = Field(default="", description="Romanization of the word")
27 tags: list[str] = []
28 raw_tags: list[str] = []
29 other: str = ""
32class Linkage(BaseModelWrap):
33 word: str = ""
34 tags: list[str] = []
35 raw_tags: list[str] = []
36 sense: str = ""
37 sense_index: int = Field(
38 default=0, ge=0, description="Number of the definition, start from 1"
39 )
42class Sound(BaseModelWrap):
43 ipa: str = Field(default="", description="International Phonetic Alphabet")
44 audio: str = Field(default="", description="Audio file name")
45 wav_url: str = ""
46 ogg_url: str = ""
47 oga_url: str = ""
48 mp3_url: str = ""
49 opus_url: str = ""
50 flac_url: str = ""
51 tags: list[str] = Field(
52 default=[], description="Specifying the variant of the pronunciation"
53 )
54 raw_tags: list[str] = []
55 homophones: list[str] = Field(
56 default=[], description="Words with same pronunciation"
57 )
58 rhymes: str = ""
61class Example(BaseModelWrap):
62 text: str = Field(default="", description="Example usage sentence")
63 bold_text_offsets: list[tuple[int, int]] = []
64 translation: str = Field(
65 default="", description="Russian translation of the example sentence"
66 )
67 bold_translation_offsets: list[tuple[int, int]] = []
68 ref: str = Field(
69 default="",
70 description="Example reference, combine data like author and title",
71 )
72 author: str = Field(default="", description="Author's name")
73 title: str = Field(default="", description="Title of the reference")
74 date: str = Field(default="", description="Original date")
75 date_published: str = Field(default="", description="Date of publication")
76 collection: str = Field(
77 default="",
78 description="Name of the collection the example was taken from",
79 )
80 editor: str = Field(default="", description="Editor")
81 translator: str = Field(default="", description="Translator")
82 source: str = Field(
83 default="",
84 description="Source of reference, corresponds to template "
85 "parameter 'источник'",
86 )
89class AltForm(BaseModelWrap):
90 word: str
93class Sense(BaseModelWrap):
94 glosses: list[str] = Field(
95 default=[],
96 description="Gloss string for the word sense. This has been cleaned, "
97 "and should be straightforward text with no tags.",
98 )
99 tags: list[str] = Field(
100 default=[],
101 description="List of tags affecting the word sense.",
102 )
103 raw_tags: list[str] = []
104 topics: list[str] = []
105 categories: list[str] = []
106 examples: list[Example] = Field(default=[], description="List of examples")
107 form_of: list[AltForm] = []
110class Form(BaseModelWrap):
111 form: str
112 tags: list[str] = []
113 raw_tags: list[str] = []
116class Hyphenation(BaseModelWrap):
117 parts: list[str] = []
118 tags: list[str] = []
119 raw_tags: list[str] = []
122class WordEntry(BaseModelWrap):
123 """
124 WordEntry is a dictionary containing lexical information of a single word
125 extracted from Wiktionary with wiktextract.
126 """
128 model_config = ConfigDict(title="Russian Wiktionary")
130 word: str = Field(description="word string")
131 pos: str = Field(default="", description="Part of speech type")
132 pos_title: str = Field(default="", description="Original POS title")
133 lang_code: str = Field(
134 description="Wiktionary language code", examples=["ru"]
135 )
136 lang: str = Field(
137 description="Localized language name of the word", examples=["Русский"]
138 )
139 categories: list[str] = Field(
140 default=[],
141 description="list of non-disambiguated categories for the word",
142 )
143 sounds: list[Sound] = []
144 senses: list[Sense] = []
145 translations: list[Translation] = []
146 forms: list[Form] = []
147 tags: list[str] = []
148 raw_tags: list[str] = []
149 antonyms: list[Linkage] = Field(default=[], description="List of antonyms")
150 anagrams: list[Linkage] = Field(default=[], description="List of anagrams")
151 variants: list[Linkage] = Field(default=[], description="List of variants")
152 hypernyms: list[Linkage] = Field(
153 default=[], description="List of hypernyms"
154 )
155 hyponyms: list[Linkage] = Field(default=[], description="List of hyponyms")
156 derived: list[Linkage] = Field(
157 default=[], description="List of derived terms"
158 )
159 meronyms: list[Linkage] = Field(default=[], description="List of meronyms")
160 synonyms: list[Linkage] = Field(default=[], description="List of synonyms")
161 coordinate_terms: list[Linkage] = Field(
162 default=[], description="List of coordinate terms"
163 )
164 holonyms: list[Linkage] = Field(default=[], description="List of holonyms")
165 etymology_text: str = ""
166 related: list[Linkage] = []
167 metagrams: list[Linkage] = []
168 proverbs: list[Linkage] = []
169 literal_meaning: str = ""
170 hyphenations: list[Hyphenation] = []