Coverage for src/wiktextract/extractor/ru/models.py: 100%
88 statements
« prev ^ index » next coverage.py v7.9.0, created at 2025-06-13 07:43 +0000
« prev ^ index » next coverage.py v7.9.0, created at 2025-06-13 07:43 +0000
1from pydantic import BaseModel, ConfigDict, Field
4class BaseModelWrap(BaseModel):
5 model_config = ConfigDict(
6 extra="forbid",
7 strict=True,
8 validate_assignment=True,
9 validate_default=True,
10 )
13class Translation(BaseModelWrap):
14 word: str = Field(description="Translation term")
15 lang_code: str = Field(
16 default="",
17 description="Wiktionary language code of the translation term",
18 )
19 lang: str = Field(
20 description="Localized language name of the translation term"
21 )
22 sense: str = Field(
23 default="",
24 description="An optional gloss describing the sense translated",
25 )
26 roman: str = Field(default="", description="Romanization of the word")
27 tags: list[str] = []
28 raw_tags: list[str] = []
31class Linkage(BaseModelWrap):
32 word: str = ""
33 tags: list[str] = []
34 raw_tags: list[str] = []
35 sense: str = ""
36 sense_index: int = Field(
37 default=0, ge=0, description="Number of the definition, start from 1"
38 )
41class Sound(BaseModelWrap):
42 ipa: str = Field(default="", description="International Phonetic Alphabet")
43 audio: str = Field(default="", description="Audio file name")
44 wav_url: str = ""
45 ogg_url: str = ""
46 oga_url: str = ""
47 mp3_url: str = ""
48 opus_url: str = ""
49 flac_url: str = ""
50 tags: list[str] = Field(
51 default=[], description="Specifying the variant of the pronunciation"
52 )
53 raw_tags: list[str] = []
54 homophones: list[str] = Field(
55 default=[], description="Words with same pronunciation"
56 )
57 rhymes: str = ""
60class Example(BaseModelWrap):
61 text: str = Field(default="", description="Example usage sentence")
62 bold_text_offsets: list[tuple[int, int]] = []
63 translation: str = Field(
64 default="", description="Russian translation of the example sentence"
65 )
66 bold_translation_offsets: list[tuple[int, int]] = []
67 ref: str = Field(
68 default="",
69 description="Example reference, combine data like author and title",
70 )
71 author: str = Field(default="", description="Author's name")
72 title: str = Field(default="", description="Title of the reference")
73 date: str = Field(default="", description="Original date")
74 date_published: str = Field(default="", description="Date of publication")
75 collection: str = Field(
76 default="",
77 description="Name of the collection the example was taken from",
78 )
79 editor: str = Field(default="", description="Editor")
80 translator: str = Field(default="", description="Translator")
81 source: str = Field(
82 default="",
83 description="Source of reference, corresponds to template "
84 "parameter 'источник'",
85 )
88class AltForm(BaseModelWrap):
89 word: str
92class Sense(BaseModelWrap):
93 glosses: list[str] = Field(
94 default=[],
95 description="Gloss string for the word sense. This has been cleaned, "
96 "and should be straightforward text with no tags.",
97 )
98 tags: list[str] = Field(
99 default=[],
100 description="List of tags affecting the word sense.",
101 )
102 raw_tags: list[str] = []
103 topics: list[str] = []
104 categories: list[str] = []
105 examples: list[Example] = Field(default=[], description="List of examples")
106 form_of: list[AltForm] = []
109class Form(BaseModelWrap):
110 form: str
111 tags: list[str] = []
112 raw_tags: list[str] = []
115class WordEntry(BaseModelWrap):
116 """
117 WordEntry is a dictionary containing lexical information of a single word
118 extracted from Wiktionary with wiktextract.
119 """
121 model_config = ConfigDict(title="Russian Wiktionary")
123 word: str = Field(description="word string")
124 pos: str = Field(default="", description="Part of speech type")
125 pos_title: str = Field(default="", description="Original POS title")
126 lang_code: str = Field(
127 description="Wiktionary language code", examples=["ru"]
128 )
129 lang: str = Field(
130 description="Localized language name of the word", examples=["Русский"]
131 )
132 categories: list[str] = Field(
133 default=[],
134 description="list of non-disambiguated categories for the word",
135 )
136 sounds: list[Sound] = []
137 senses: list[Sense] = []
138 translations: list[Translation] = []
139 forms: list[Form] = []
140 tags: list[str] = []
141 raw_tags: list[str] = []
142 antonyms: list[Linkage] = Field(default=[], description="List of antonyms")
143 anagrams: list[Linkage] = Field(default=[], description="List of anagrams")
144 variants: list[Linkage] = Field(default=[], description="List of variants")
145 hypernyms: list[Linkage] = Field(
146 default=[], description="List of hypernyms"
147 )
148 hyponyms: list[Linkage] = Field(default=[], description="List of hyponyms")
149 derived: list[Linkage] = Field(
150 default=[], description="List of derived terms"
151 )
152 meronyms: list[Linkage] = Field(default=[], description="List of meronyms")
153 synonyms: list[Linkage] = Field(default=[], description="List of synonyms")
154 coordinate_terms: list[Linkage] = Field(
155 default=[], description="List of coordinate terms"
156 )
157 holonyms: list[Linkage] = Field(default=[], description="List of holonyms")
158 etymology_text: str = ""
159 related: list[Linkage] = []
160 metagrams: list[Linkage] = []
161 proverbs: list[Linkage] = []
162 literal_meaning: str = ""
163 hyphenation: str = ""