Coverage for src/wiktextract/extractor/es/models.py: 100%
96 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
1from pydantic import BaseModel, ConfigDict, Field
4class BaseModelWrap(BaseModel):
5 model_config = ConfigDict(
6 extra="forbid",
7 strict=True,
8 validate_assignment=True,
9 validate_default=True,
10 )
13class Linkage(BaseModelWrap):
14 word: str
15 note: str = ""
16 alternative_spelling: str = Field(
17 default="", description="Alternative spelling of the word"
18 )
19 sense_index: str = ""
20 sense: str = ""
23class Translation(BaseModelWrap):
24 word: str = Field(description="Translation term")
25 lang_code: str = Field(
26 description="Wiktionary language code of the translation term"
27 )
28 lang: str = Field(description="Name of the language of translation")
29 sense_index: str = ""
30 raw_tags: list[str] = Field(
31 default=[],
32 description="Tags specifying the translated term, usually gender",
33 )
34 tags: list[str] = []
35 notes: list[str] = Field(default=[], description="A list of notes")
36 roman: str = Field(
37 default="", description="Transliteration in roman characters"
38 )
39 sense: str = ""
42class Example(BaseModelWrap):
43 text: str = Field(description="Example usage sentence")
44 bold_text_offsets: list[tuple[int, int]] = []
45 translation: str = Field(
46 default="", description="Spanish translation of the example sentence"
47 )
48 bold_translation_offsets: list[tuple[int, int]] = []
49 ref: str = ""
52class AltForm(BaseModelWrap):
53 word: str
56class Sense(BaseModelWrap):
57 glosses: list[str] = Field(
58 default=[],
59 description="list of gloss strings for the word sense."
60 "This has been cleaned, and should be no tagging.",
61 )
62 raw_tags: list[str] = []
63 tags: list[str] = []
64 topics: list[str] = []
65 categories: list[str] = Field(
66 default=[], description="Category links on the page"
67 )
68 examples: list[Example] = Field(default=[], description="List of examples")
69 sense_index: str = Field(
70 default="", description="Sense number used in Wiktionary"
71 )
72 form_of: list[AltForm] = []
75class Sound(BaseModelWrap):
76 ipa: str = Field("", description="International Phonetic Alphabet")
77 audio: str = Field("", description="Audio file name")
78 wav_url: str = ""
79 oga_url: str = ""
80 ogg_url: str = ""
81 mp3_url: str = ""
82 opus_url: str = ""
83 flac_url: str = ""
84 roman: str = Field("", description="Translitaration to Roman characters")
85 syllabic: str = Field("", description="Syllabic transcription")
86 raw_tags: list[str] = Field(
87 [], description="Specifying the variant of the pronunciation"
88 )
89 tags: list[str] = []
90 alternative: str = Field(
91 "", description="Alternative spelling with same pronunciation"
92 )
93 note: str = ""
94 not_same_pronunciation: bool = Field(
95 False, description="This is `True` for the 'Variantes' row"
96 )
97 rhymes: str = ""
98 homophone: str = ""
101class Form(BaseModelWrap):
102 form: str = ""
103 tags: list[str] = []
104 raw_tags: list[str] = []
105 row_span: int = Field(1, exclude=True)
108class Hyphenation(BaseModelWrap):
109 parts: list[str] = []
110 tags: list[str] = []
111 raw_tags: list[str] = []
114class Attestation(BaseModelWrap):
115 date: str
118class WordEntry(BaseModelWrap):
119 """
120 WordEntry is a dictionary containing lexical information of a single word extracted from Wiktionary with wiktextract.
121 """ # noqa:E501
123 model_config = ConfigDict(title="Spanish Wiktionary")
125 word: str = Field(description="word string")
126 pos: str = Field(default="", description="Part of speech type")
127 pos_title: str = Field(default="", description="Original POS title")
128 lang_code: str = Field(
129 description="Wiktionary language code", examples=["es"]
130 )
131 lang: str = Field(
132 description="Localized language name of the word", examples=["español"]
133 )
134 senses: list[Sense] = []
135 categories: list[str] = Field(
136 default=[],
137 description="list of non-disambiguated categories for the word",
138 )
139 sounds: list[Sound] = []
140 translations: list[Translation] = []
141 etymology_text: str = Field(
142 default="", description="Etymology section as cleaned text."
143 )
144 antonyms: list[Linkage] = []
145 compounds: list[Linkage] = []
146 derived: list[Linkage] = []
147 hyponyms: list[Linkage] = []
148 hypernyms: list[Linkage] = []
149 idioms: list[Linkage] = []
150 meronyms: list[Linkage] = []
151 related: list[Linkage] = []
152 synonyms: list[Linkage] = []
153 proverbs: list[Linkage] = []
154 tags: list[str] = []
155 raw_tags: list[str] = []
156 extra_sounds: dict[str, str] = {}
157 forms: list[Form] = []
158 hyphenations: list[Hyphenation] = []
159 cognates: list[Linkage] = []
160 morphologies: list[Linkage] = []
161 descendants: list[Translation] = []
162 attestations: list[Attestation] = []