Coverage for src/wiktextract/extractor/es/models.py: 100%
87 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from pydantic import BaseModel, ConfigDict, Field
4class BaseModelWrap(BaseModel):
5 model_config = ConfigDict(
6 extra="forbid",
7 strict=True,
8 validate_assignment=True,
9 validate_default=True,
10 )
13class Linkage(BaseModelWrap):
14 word: str
15 note: str = ""
16 alternative_spelling: str = Field(
17 default="", description="Alternative spelling of the word"
18 )
19 sense_index: str = ""
20 sense: str = ""
23class Translation(BaseModelWrap):
24 word: str = Field(description="Translation term")
25 lang_code: str = Field(
26 description="Wiktionary language code of the translation term"
27 )
28 lang: str = Field(description="Name of the language of translation")
29 sense_index: str = ""
30 raw_tags: list[str] = Field(
31 default=[],
32 description="Tags specifying the translated term, usually gender",
33 )
34 tags: list[str] = []
35 notes: list[str] = Field(default=[], description="A list of notes")
36 roman: str = Field(
37 default="", description="Transliteration in roman characters"
38 )
41class TemplateData(BaseModelWrap):
42 name: str = Field(default="", description="Template's name.")
43 args: dict[str, str] = Field(
44 default={}, description="Arguments given to the template, if any."
45 )
46 expansion: str = Field(
47 default="",
48 description="The result of expanding the template.",
49 )
52class Example(BaseModelWrap):
53 text: str = Field(description="Example usage sentence")
54 translation: str = Field(
55 default="", description="Spanish translation of the example sentence"
56 )
57 ref: str = ""
58 example_templates: list[TemplateData] = []
61class AltForm(BaseModelWrap):
62 word: str
65class Sense(BaseModelWrap):
66 glosses: list[str] = Field(
67 default=[],
68 description="list of gloss strings for the word sense."
69 "This has been cleaned, and should be no tagging.",
70 )
71 raw_tags: list[str] = []
72 tags: list[str] = []
73 topics: list[str] = []
74 categories: list[str] = Field(
75 default=[], description="Category links on the page"
76 )
77 examples: list[Example] = Field(default=[], description="List of examples")
78 # subsenses: list["Sense"] = Field(
79 # default=[], description="List of subsenses"
80 # )
81 sense_index: str = Field(
82 default="", description="Sense number used in Wiktionary"
83 )
84 form_of: list[AltForm] = []
87class Sound(BaseModelWrap):
88 ipa: str = Field("", description="International Phonetic Alphabet")
89 audio: str = Field("", description="Audio file name")
90 wav_url: str = ""
91 ogg_url: str = ""
92 mp3_url: str = ""
93 flac_url: str = ""
94 roman: str = Field("", description="Translitaration to Roman characters")
95 syllabic: str = Field("", description="Syllabic transcription")
96 raw_tags: list[str] = Field(
97 [], description="Specifying the variant of the pronunciation"
98 )
99 tags: list[str] = []
100 alternative: str = Field(
101 "", description="Alternative spelling with same pronunciation"
102 )
103 note: str = ""
104 not_same_pronunciation: bool = Field(
105 False, description="This is `True` for the 'Variantes' row"
106 )
107 rhymes: str = ""
108 homophone: str = ""
111class Form(BaseModelWrap):
112 form: str = ""
113 tags: list[str] = []
114 raw_tags: list[str] = []
115 row_span: int = Field(1, exclude=True)
118class WordEntry(BaseModelWrap):
119 """
120 WordEntry is a dictionary containing lexical information of a single word extracted from Wiktionary with wiktextract.
121 """ # noqa:E501
123 model_config = ConfigDict(title="Spanish Wiktionary")
125 word: str = Field(description="word string")
126 pos: str = Field(default="", description="Part of speech type")
127 pos_title: str = Field(default="", description="Original POS title")
128 lang_code: str = Field(
129 description="Wiktionary language code", examples=["es"]
130 )
131 lang: str = Field(
132 description="Localized language name of the word", examples=["español"]
133 )
134 senses: list[Sense] = []
135 categories: list[str] = Field(
136 default=[],
137 description="list of non-disambiguated categories for the word",
138 )
139 sounds: list[Sound] = []
140 translations: list[Translation] = []
141 etymology_text: str = Field(
142 default="", description="Etymology section as cleaned text."
143 )
144 etymology_templates: list[TemplateData] = Field(
145 default=[],
146 description="Templates and their arguments and expansions from the "
147 "etymology section.",
148 )
149 etymology_number: int = Field(
150 default=0,
151 description="For words with multiple numbered etymologies, this "
152 "contains the number of the etymology under which this entry appeared.",
153 )
154 antonyms: list[Linkage] = []
155 compounds: list[Linkage] = []
156 derived: list[Linkage] = []
157 hyponyms: list[Linkage] = []
158 hypernyms: list[Linkage] = []
159 idioms: list[Linkage] = []
160 meronyms: list[Linkage] = []
161 related: list[Linkage] = []
162 synonyms: list[Linkage] = []
163 proverbs: list[Linkage] = []
164 tags: list[str] = []
165 extra_sounds: dict[str, str] = {}
166 forms: list[Form] = []
167 hyphenation: str = ""