Coverage for src/wiktextract/extractor/de/models.py: 100%
121 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
1from pydantic import BaseModel, ConfigDict, Field
4class GermanBaseModel(BaseModel):
5 model_config = ConfigDict(
6 extra="forbid",
7 strict=True,
8 validate_assignment=True,
9 validate_default=True,
10 )
13class Linkage(GermanBaseModel):
14 word: str
15 sense_index: str = ""
16 note: str = ""
17 raw_tags: list[str] = []
18 tags: list[str] = []
21class Translation(GermanBaseModel):
22 sense: str = Field(
23 default="", description="A gloss of the sense being translated"
24 )
25 word: str = Field(default="", description="Translation term")
26 lang_code: str = Field(
27 default="",
28 description="Wiktionary language code of the translation term",
29 )
30 lang: str = Field(default="", description="Localized language name")
31 uncertain: bool = Field(
32 default=False, description="Translation marked as uncertain"
33 )
34 roman: str = Field(
35 default="", description="Transliteration to Roman characters"
36 )
37 sense_index: str = ""
38 raw_tags: list[str] = []
39 tags: list[str] = []
40 notes: list[str] = Field(default=[], description="A list of notes")
41 other: str = ""
44class Example(GermanBaseModel):
45 text: str = Field(default="", description="Example usage sentence")
46 italic_text_offsets: list[tuple[int, int]] = []
47 translation: str = Field(
48 default="", description="German translation of the example sentence"
49 )
50 italic_translation_offsets: list[tuple[int, int]] = []
51 raw_tags: list[str] = []
52 tags: list[str] = []
53 ref: str = Field(default="", description="Raw reference string")
54 url: str = Field(
55 default="", description="A web link. Not necessarily well-formated."
56 )
57 author: str = Field(default="", description="Author's name")
58 title: str = Field(default="", description="Title of the reference")
59 title_complement: str = Field(
60 default="", description="Complement to the title"
61 )
62 pages: str = Field(default="", description="Page numbers")
63 year: str = Field(default="", description="Year of publication")
64 publisher: str = Field(default="", description="Published by")
65 editor: str = Field(default="", description="Editor")
66 translator: str = Field(default="", description="Translator")
67 collection: str = Field(
68 default="",
69 description="Name of collection that reference was published in",
70 )
71 volume: str = Field(default="", description="Volume number")
72 comment: str = Field(default="", description="Comment on the reference")
73 day: str = Field(default="", description="Day of publication")
74 month: str = Field(default="", description="Month of publication")
75 accessdate: str = Field(
76 default="", description="Date of access of online reference"
77 )
78 date: str = Field(default="", description="Date of publication")
79 number: str = Field(default="", description="Issue number")
80 # chapter: Optional[str] = Field(default=None, description="Chapter name")
81 place: str = Field(default="", description="Place of publication")
82 # editor: Optional[str] = Field(default=None, description="Editor")
83 edition: str = Field(default="", description="Edition number")
84 isbn: str = Field(default="", description="ISBN number")
87class AltForm(GermanBaseModel):
88 word: str
91class Sense(GermanBaseModel):
92 glosses: list[str] = []
93 raw_tags: list[str] = []
94 tags: list[str] = []
95 categories: list[str] = []
96 examples: list["Example"] = Field(
97 default=[], description="List of examples"
98 )
99 sense_index: str = Field(
100 default="", description="Sense number used in Wiktionary"
101 )
102 topics: list[str] = []
103 form_of: list[AltForm] = []
104 alt_of: list[AltForm] = []
107class Sound(GermanBaseModel):
108 ipa: str = Field(default="", description="International Phonetic Alphabet")
109 audio: str = Field(default="", description="Audio file name")
110 wav_url: str = Field(default="")
111 ogg_url: str = Field(default="")
112 mp3_url: str = Field(default="")
113 oga_url: str = Field(default="")
114 flac_url: str = Field(default="")
115 opus_url: str = Field(default="")
116 raw_tags: list[str] = []
117 tags: list[str] = []
118 rhymes: str = ""
119 categories: list[str] = Field(default=[], exclude=True)
122class Form(GermanBaseModel):
123 form: str
124 tags: list[str] = []
125 raw_tags: list[str] = []
126 source: str = ""
127 sense_index: str = ""
130class Descendant(GermanBaseModel):
131 lang_code: str = Field(default="", description="Wiktionary language code")
132 lang: str = Field(default="", description="Language name")
133 word: str = ""
134 roman: str = ""
135 sense_index: str = ""
138class Hyphenation(GermanBaseModel):
139 parts: list[str] = []
140 tags: list[str] = []
141 raw_tags: list[str] = []
144class WordEntry(GermanBaseModel):
145 """
146 WordEntry is a dictionary containing lexical information of a single word
147 extracted from Wiktionary with wiktextract.
148 """
150 model_config = ConfigDict(title="German Wiktionary")
152 word: str = Field(description="word string")
153 pos: str = Field(default="", description="Part of speech type")
154 other_pos: list[str] = []
155 pos_title: str = Field(default="", description="Original POS title")
156 lang_code: str = Field(
157 description="Wiktionary language code", examples=["es"]
158 )
159 lang: str = Field(
160 description="Localized language name of the word", examples=["español"]
161 )
162 senses: list[Sense] = []
163 translations: list[Translation] = []
164 sounds: list[Sound] = []
165 antonyms: list[Linkage] = []
166 derived: list[Linkage] = []
167 hyponyms: list[Linkage] = []
168 hypernyms: list[Linkage] = []
169 holonyms: list[Linkage] = []
170 expressions: list[Linkage] = []
171 coordinate_terms: list[Linkage] = []
172 proverbs: list[Linkage] = []
173 synonyms: list[Linkage] = []
174 tags: list[str] = []
175 raw_tags: list[str] = []
176 categories: list[str] = []
177 redirects: list[str] = []
178 etymology_texts: list[str] = []
179 forms: list[Form] = []
180 meronyms: list[Linkage] = []
181 hyphenations: list[Hyphenation] = []
182 notes: list[str] = []
183 related: list[Linkage] = []
184 descendants: list[Descendant] = []