Coverage for src/wiktextract/extractor/ru/models.py: 100%
85 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from pydantic import BaseModel, ConfigDict, Field
4class BaseModelWrap(BaseModel):
5 model_config = ConfigDict(
6 extra="forbid",
7 strict=True,
8 validate_assignment=True,
9 validate_default=True,
10 )
13class Translation(BaseModelWrap):
14 word: str = Field(description="Translation term")
15 lang_code: str = Field(
16 default="",
17 description="Wiktionary language code of the translation term",
18 )
19 lang: str = Field(
20 description="Localized language name of the translation term"
21 )
22 sense: str = Field(
23 default="",
24 description="An optional gloss describing the sense translated",
25 )
26 roman: str = Field(default="", description="Romanization of the word")
27 tags: list[str] = []
28 raw_tags: list[str] = []
31class Linkage(BaseModelWrap):
32 word: str = ""
33 tags: list[str] = []
34 raw_tags: list[str] = []
35 sense: str = ""
36 sense_index: int = Field(
37 default=0, ge=0, description="Number of the definition, start from 1"
38 )
41class Sound(BaseModelWrap):
42 ipa: str = Field(default="", description="International Phonetic Alphabet")
43 audio: str = Field(default="", description="Audio file name")
44 wav_url: str = ""
45 ogg_url: str = ""
46 oga_url: str = ""
47 mp3_url: str = ""
48 flac_url: str = ""
49 tags: list[str] = Field(
50 default=[], description="Specifying the variant of the pronunciation"
51 )
52 raw_tags: list[str] = []
53 homophones: list[str] = Field(
54 default=[], description="Words with same pronunciation"
55 )
58class Example(BaseModelWrap):
59 text: str = Field(default="", description="Example usage sentence")
60 translation: str = Field(
61 default="", description="Russian translation of the example sentence"
62 )
63 ref: str = Field(
64 default="",
65 description="Example reference, combine data like author and title",
66 )
67 author: str = Field(default="", description="Author's name")
68 title: str = Field(default="", description="Title of the reference")
69 date: str = Field(default="", description="Original date")
70 date_published: str = Field(default="", description="Date of publication")
71 collection: str = Field(
72 default="",
73 description="Name of the collection the example was taken from",
74 )
75 editor: str = Field(default="", description="Editor")
76 translator: str = Field(default="", description="Translator")
77 source: str = Field(
78 default="",
79 description="Source of reference, corresponds to template "
80 "parameter 'источник'",
81 )
84class AltForm(BaseModelWrap):
85 word: str
88class Sense(BaseModelWrap):
89 raw_glosses: list[str] = Field(
90 default=[],
91 description="Raw gloss string for the word sense. "
92 "This might contain tags and other markup.",
93 )
94 glosses: list[str] = Field(
95 default=[],
96 description="Gloss string for the word sense. This has been cleaned, "
97 "and should be straightforward text with no tags.",
98 )
99 tags: list[str] = Field(
100 default=[],
101 description="List of tags affecting the word sense.",
102 )
103 raw_tags: list[str] = []
104 topics: list[str] = []
105 notes: list[str] = Field(
106 default=[],
107 description="Usually describing usage.",
108 )
109 categories: list[str] = []
110 examples: list[Example] = Field(default=[], description="List of examples")
111 form_of: list[AltForm] = []
114class Form(BaseModelWrap):
115 form: str
116 tags: list[str] = []
117 raw_tags: list[str] = []
120class WordEntry(BaseModelWrap):
121 """
122 WordEntry is a dictionary containing lexical information of a single word
123 extracted from Wiktionary with wiktextract.
124 """
126 model_config = ConfigDict(title="Russian Wiktionary")
128 word: str = Field(description="word string")
129 pos: str = Field(default="", description="Part of speech type")
130 pos_title: str = Field(default="", description="Original POS title")
131 lang_code: str = Field(
132 description="Wiktionary language code", examples=["ru"]
133 )
134 lang: str = Field(
135 description="Localized language name of the word", examples=["Русский"]
136 )
137 categories: list[str] = Field(
138 default=[],
139 description="list of non-disambiguated categories for the word",
140 )
141 sounds: list[Sound] = []
142 senses: list[Sense] = []
143 translations: list[Translation] = []
144 forms: list[Form] = []
145 tags: list[str] = []
146 raw_tags: list[str] = []
147 antonyms: list[Linkage] = Field(default=[], description="List of antonyms")
148 anagrams: list[Linkage] = Field(default=[], description="List of anagrams")
149 variants: list[Linkage] = Field(default=[], description="List of variants")
150 hypernyms: list[Linkage] = Field(
151 default=[], description="List of hypernyms"
152 )
153 hyponyms: list[Linkage] = Field(default=[], description="List of hyponyms")
154 derived: list[Linkage] = Field(
155 default=[], description="List of derived terms"
156 )
157 meronyms: list[Linkage] = Field(default=[], description="List of meronyms")
158 synonyms: list[Linkage] = Field(default=[], description="List of synonyms")
159 coordinate_terms: list[Linkage] = Field(
160 default=[], description="List of coordinate terms"
161 )
162 holonyms: list[Linkage] = Field(default=[], description="List of holonyms")
163 etymology_text: str = ""
164 related: list[Linkage] = []
165 metagrams: list[Linkage] = []
166 proverbs: list[Linkage] = []
167 literal_meaning: str = ""