Coverage for src / wiktextract / extractor / es / models.py: 100%
96 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-12 08:09 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-12 08:09 +0000
1from pydantic import BaseModel, ConfigDict, Field
4class BaseModelWrap(BaseModel):
5 model_config = ConfigDict(
6 extra="forbid",
7 strict=True,
8 validate_assignment=True,
9 validate_default=True,
10 )
13class Linkage(BaseModelWrap):
14 word: str
15 note: str = ""
16 alternative_spelling: str = Field(
17 default="", description="Alternative spelling of the word"
18 )
19 sense_index: str = ""
20 sense: str = ""
23class Translation(BaseModelWrap):
24 word: str = Field(description="Translation term")
25 lang_code: str = Field(
26 description="Wiktionary language code of the translation term"
27 )
28 lang: str = Field(description="Name of the language of translation")
29 sense_index: str = ""
30 raw_tags: list[str] = Field(
31 default=[],
32 description="Tags specifying the translated term, usually gender",
33 )
34 tags: list[str] = []
35 notes: list[str] = Field(default=[], description="A list of notes")
36 roman: str = Field(
37 default="", description="Transliteration in roman characters"
38 )
39 sense: str = ""
42class Example(BaseModelWrap):
43 text: str = Field(description="Example usage sentence")
44 bold_text_offsets: list[tuple[int, int]] = []
45 translation: str = Field(
46 default="", description="Spanish translation of the example sentence"
47 )
48 bold_translation_offsets: list[tuple[int, int]] = []
49 ref: str = ""
52class AltForm(BaseModelWrap):
53 word: str
56class Sense(BaseModelWrap):
57 glosses: list[str] = Field(
58 default=[],
59 description="list of gloss strings for the word sense."
60 "This has been cleaned, and should be no tagging.",
61 )
62 raw_tags: list[str] = []
63 tags: list[str] = []
64 topics: list[str] = []
65 categories: list[str] = Field(
66 default=[], description="Category links on the page"
67 )
68 examples: list[Example] = Field(default=[], description="List of examples")
69 sense_index: str = Field(
70 default="", description="Sense number used in Wiktionary"
71 )
72 form_of: list[AltForm] = []
75class Sound(BaseModelWrap):
76 ipa: str = Field("", description="International Phonetic Alphabet")
77 audio: str = Field("", description="Audio file name")
78 wav_url: str = ""
79 oga_url: str = ""
80 ogg_url: str = ""
81 mp3_url: str = ""
82 opus_url: str = ""
83 flac_url: str = ""
84 roman: str = Field("", description="Translitaration to Roman characters")
85 syllabic: str = Field("", description="Syllabic transcription")
86 raw_tags: list[str] = Field(
87 [], description="Specifying the variant of the pronunciation"
88 )
89 tags: list[str] = []
90 alternative: str = Field(
91 "", description="Alternative spelling with same pronunciation"
92 )
93 note: str = ""
94 not_same_pronunciation: bool = Field(
95 False, description="This is `True` for the 'Variantes' row"
96 )
97 rhymes: str = ""
98 homophone: str = ""
99 other: str = ""
102class Form(BaseModelWrap):
103 form: str = ""
104 tags: list[str] = []
105 raw_tags: list[str] = []
106 row_span: int = Field(1, exclude=True)
109class Hyphenation(BaseModelWrap):
110 parts: list[str] = []
111 tags: list[str] = []
112 raw_tags: list[str] = []
115class Attestation(BaseModelWrap):
116 date: str
119class WordEntry(BaseModelWrap):
120 """
121 WordEntry is a dictionary containing lexical information of a single word extracted from Wiktionary with wiktextract.
122 """ # noqa:E501
124 model_config = ConfigDict(title="Spanish Wiktionary")
126 word: str = Field(description="word string")
127 pos: str = Field(default="", description="Part of speech type")
128 pos_title: str = Field(default="", description="Original POS title")
129 lang_code: str = Field(
130 description="Wiktionary language code", examples=["es"]
131 )
132 lang: str = Field(
133 description="Localized language name of the word", examples=["español"]
134 )
135 senses: list[Sense] = []
136 categories: list[str] = Field(
137 default=[],
138 description="list of non-disambiguated categories for the word",
139 )
140 sounds: list[Sound] = []
141 translations: list[Translation] = []
142 etymology_texts: list[str] = Field(
143 default=[], description="Etymology section as cleaned text."
144 )
145 antonyms: list[Linkage] = []
146 compounds: list[Linkage] = []
147 derived: list[Linkage] = []
148 hyponyms: list[Linkage] = []
149 hypernyms: list[Linkage] = []
150 idioms: list[Linkage] = []
151 meronyms: list[Linkage] = []
152 related: list[Linkage] = []
153 synonyms: list[Linkage] = []
154 proverbs: list[Linkage] = []
155 tags: list[str] = []
156 raw_tags: list[str] = []
157 forms: list[Form] = []
158 hyphenations: list[Hyphenation] = []
159 cognates: list[Linkage] = []
160 morphologies: list[Linkage] = []
161 descendants: list[Translation] = []
162 attestations: list[Attestation] = []