Coverage for src/wiktextract/extractor/de/models.py: 100%
103 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-10-25 10:11 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-10-25 10:11 +0000
1from pydantic import BaseModel, ConfigDict, Field
4class BaseModelWrap(BaseModel):
5 model_config = ConfigDict(
6 extra="forbid",
7 strict=True,
8 validate_assignment=True,
9 validate_default=True,
10 )
13class Linkage(BaseModelWrap):
14 word: str
15 sense_index: str = ""
16 note: str = ""
17 raw_tags: list[str] = []
18 tags: list[str] = []
21class Translation(BaseModelWrap):
22 sense: str = Field(
23 default="", description="A gloss of the sense being translated"
24 )
25 word: str = Field(default="", description="Translation term")
26 lang_code: str = Field(
27 default="",
28 description="Wiktionary language code of the translation term",
29 )
30 lang: str = Field(default="", description="Localized language name")
31 uncertain: bool = Field(
32 default=False, description="Translation marked as uncertain"
33 )
34 roman: str = Field(
35 default="", description="Transliteration to Roman characters"
36 )
37 sense_index: str = ""
38 raw_tags: list[str] = []
39 tags: list[str] = []
40 notes: list[str] = Field(default=[], description="A list of notes")
43class Example(BaseModelWrap):
44 text: str = Field(default="", description="Example usage sentence")
45 translation: str = Field(
46 default="", description="German translation of the example sentence"
47 )
48 raw_tags: list[str] = []
49 tags: list[str] = []
50 ref: str = Field(default="", description="Raw reference string")
51 url: str = Field(
52 default="", description="A web link. Not necessarily well-formated."
53 )
54 author: str = Field(default="", description="Author's name")
55 title: str = Field(default="", description="Title of the reference")
56 title_complement: str = Field(
57 default="", description="Complement to the title"
58 )
59 pages: str = Field(default="", description="Page numbers")
60 year: str = Field(default="", description="Year of publication")
61 publisher: str = Field(default="", description="Published by")
62 editor: str = Field(default="", description="Editor")
63 translator: str = Field(default="", description="Translator")
64 collection: str = Field(
65 default="",
66 description="Name of collection that reference was published in",
67 )
68 volume: str = Field(default="", description="Volume number")
69 comment: str = Field(default="", description="Comment on the reference")
70 day: str = Field(default="", description="Day of publication")
71 month: str = Field(default="", description="Month of publication")
72 accessdate: str = Field(
73 default="", description="Date of access of online reference"
74 )
75 date: str = Field(default="", description="Date of publication")
76 number: str = Field(default="", description="Issue number")
77 # chapter: Optional[str] = Field(default=None, description="Chapter name")
78 place: str = Field(default="", description="Place of publication")
79 # editor: Optional[str] = Field(default=None, description="Editor")
80 edition: str = Field(default="", description="Edition number")
81 isbn: str = Field(default="", description="ISBN number")
84class AltForm(BaseModelWrap):
85 word: str
88class Sense(BaseModelWrap):
89 glosses: list[str] = []
90 raw_tags: list[str] = []
91 tags: list[str] = []
92 categories: list[str] = []
93 examples: list["Example"] = Field(
94 default=[], description="List of examples"
95 )
96 # subsenses: list["Sense"] = Field(
97 # default=[], description="List of subsenses"
98 # )
99 sense_index: str = Field(
100 default="", description="Sense number used in Wiktionary"
101 )
102 topics: list[str] = []
103 form_of: list[AltForm] = []
106class Sound(BaseModelWrap):
107 ipa: str = Field(default="", description="International Phonetic Alphabet")
108 # phonetic_transcription: list[str] = Field(
109 # default=[], description="Phonetic transcription, less exact than IPA."
110 # )
111 audio: str = Field(default="", description="Audio file name")
112 wav_url: str = Field(default="")
113 ogg_url: str = Field(default="")
114 mp3_url: str = Field(default="")
115 oga_url: str = Field(default="")
116 flac_url: str = Field(default="")
117 lang_code: str = Field(default="", description="Wiktionary language code")
118 lang: str = Field(default="", description="Localized language name")
119 # roman: list[str] = Field(
120 # default=[], description="Translitaration to Roman characters"
121 # )
122 # syllabic: list[str] = Field(
123 # default=[], description="Syllabic transcription"
124 # )
125 raw_tags: list[str] = Field(
126 default=[], description="Specifying the variant of the pronunciation"
127 )
128 tags: list[str] = []
129 rhymes: str = ""
132class Form(BaseModelWrap):
133 form: str
134 tags: list[str] = []
135 raw_tags: list[str] = []
136 source: str = ""
137 sense_index: str = ""
140class WordEntry(BaseModelWrap):
141 """
142 WordEntry is a dictionary containing lexical information of a single word
143 extracted from Wiktionary with wiktextract.
144 """
146 model_config = ConfigDict(title="German Wiktionary")
148 word: str = Field(description="word string")
149 pos: str = Field(default="", description="Part of speech type")
150 other_pos: list[str] = []
151 # pos_title: str = Field(default=None, description="Original POS title")
152 lang_code: str = Field(
153 description="Wiktionary language code", examples=["es"]
154 )
155 lang: str = Field(
156 description="Localized language name of the word", examples=["español"]
157 )
158 senses: list[Sense] = []
159 # categories: list[str] = Field(
160 # default=[],
161 # description="list of non-disambiguated categories for the word",
162 # )
163 translations: list[Translation] = []
164 sounds: list[Sound] = []
165 antonyms: list[Linkage] = []
166 derived: list[Linkage] = []
167 hyponyms: list[Linkage] = []
168 hypernyms: list[Linkage] = []
169 holonyms: list[Linkage] = []
170 expressions: list[Linkage] = []
171 coordinate_terms: list[Linkage] = []
172 proverbs: list[Linkage] = []
173 synonyms: list[Linkage] = []
174 tags: list[str] = []
175 raw_tags: list[str] = []
176 categories: list[str] = []
177 redirects: list[str] = []
178 etymology_text: str = ""
179 forms: list[Form] = []
180 meronyms: list[Linkage] = []
181 hyphenation: str = ""