Coverage for src/wiktextract/extractor/de/models.py: 100%
124 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-03 05:44 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-03 05:44 +0000
1from pydantic import BaseModel, ConfigDict, Field
4class GermanBaseModel(BaseModel):
5 model_config = ConfigDict(
6 extra="forbid",
7 strict=True,
8 validate_assignment=True,
9 validate_default=True,
10 )
13class Linkage(GermanBaseModel):
14 word: str
15 sense_index: str = ""
16 note: str = ""
17 raw_tags: list[str] = []
18 tags: list[str] = []
19 topics: list[str] = []
22class Translation(GermanBaseModel):
23 sense: str = Field(
24 default="", description="A gloss of the sense being translated"
25 )
26 word: str = Field(default="", description="Translation term")
27 lang_code: str = Field(
28 default="",
29 description="Wiktionary language code of the translation term",
30 )
31 lang: str = Field(default="", description="Localized language name")
32 uncertain: bool = Field(
33 default=False, description="Translation marked as uncertain"
34 )
35 roman: str = Field(
36 default="", description="Transliteration to Roman characters"
37 )
38 sense_index: str = ""
39 raw_tags: list[str] = []
40 tags: list[str] = []
41 notes: list[str] = Field(default=[], description="A list of notes")
42 other: str = ""
45class Example(GermanBaseModel):
46 text: str = Field(default="", description="Example usage sentence")
47 italic_text_offsets: list[tuple[int, int]] = []
48 translation: str = Field(
49 default="", description="German translation of the example sentence"
50 )
51 italic_translation_offsets: list[tuple[int, int]] = []
52 raw_tags: list[str] = []
53 tags: list[str] = []
54 ref: str = Field(default="", description="Raw reference string")
55 url: str = Field(
56 default="", description="A web link. Not necessarily well-formated."
57 )
58 author: str = Field(default="", description="Author's name")
59 title: str = Field(default="", description="Title of the reference")
60 title_complement: str = Field(
61 default="", description="Complement to the title"
62 )
63 pages: str = Field(default="", description="Page numbers")
64 year: str = Field(default="", description="Year of publication")
65 publisher: str = Field(default="", description="Published by")
66 editor: str = Field(default="", description="Editor")
67 translator: str = Field(default="", description="Translator")
68 collection: str = Field(
69 default="",
70 description="Name of collection that reference was published in",
71 )
72 volume: str = Field(default="", description="Volume number")
73 comment: str = Field(default="", description="Comment on the reference")
74 day: str = Field(default="", description="Day of publication")
75 month: str = Field(default="", description="Month of publication")
76 accessdate: str = Field(
77 default="", description="Date of access of online reference"
78 )
79 date: str = Field(default="", description="Date of publication")
80 number: str = Field(default="", description="Issue number")
81 # chapter: Optional[str] = Field(default=None, description="Chapter name")
82 place: str = Field(default="", description="Place of publication")
83 # editor: Optional[str] = Field(default=None, description="Editor")
84 edition: str = Field(default="", description="Edition number")
85 isbn: str = Field(default="", description="ISBN number")
88class AltForm(GermanBaseModel):
89 word: str
92class Sense(GermanBaseModel):
93 glosses: list[str] = []
94 raw_tags: list[str] = []
95 tags: list[str] = []
96 categories: list[str] = []
97 examples: list["Example"] = Field(
98 default=[], description="List of examples"
99 )
100 sense_index: str = Field(
101 default="", description="Sense number used in Wiktionary"
102 )
103 topics: list[str] = []
104 form_of: list[AltForm] = []
105 alt_of: list[AltForm] = []
108class Sound(GermanBaseModel):
109 ipa: str = Field(default="", description="International Phonetic Alphabet")
110 audio: str = Field(default="", description="Audio file name")
111 wav_url: str = Field(default="")
112 ogg_url: str = Field(default="")
113 mp3_url: str = Field(default="")
114 oga_url: str = Field(default="")
115 flac_url: str = Field(default="")
116 opus_url: str = Field(default="")
117 raw_tags: list[str] = []
118 tags: list[str] = []
119 rhymes: str = ""
120 categories: list[str] = Field(default=[], exclude=True)
123class Form(GermanBaseModel):
124 form: str
125 tags: list[str] = []
126 raw_tags: list[str] = []
127 source: str = ""
128 sense_index: str = ""
129 topics: list[str] = []
130 pronouns: list[str] = []
133class Descendant(GermanBaseModel):
134 lang_code: str = Field(default="", description="Wiktionary language code")
135 lang: str = Field(default="", description="Language name")
136 word: str = ""
137 roman: str = ""
138 sense_index: str = ""
141class Hyphenation(GermanBaseModel):
142 parts: list[str] = []
143 tags: list[str] = []
144 raw_tags: list[str] = []
147class WordEntry(GermanBaseModel):
148 """
149 WordEntry is a dictionary containing lexical information of a single word
150 extracted from Wiktionary with wiktextract.
151 """
153 model_config = ConfigDict(title="German Wiktionary")
155 word: str = Field(description="word string")
156 pos: str = Field(default="", description="Part of speech type")
157 other_pos: list[str] = []
158 pos_title: str = Field(default="", description="Original POS title")
159 lang_code: str = Field(
160 description="Wiktionary language code", examples=["es"]
161 )
162 lang: str = Field(
163 description="Localized language name of the word", examples=["español"]
164 )
165 senses: list[Sense] = []
166 translations: list[Translation] = []
167 sounds: list[Sound] = []
168 antonyms: list[Linkage] = []
169 derived: list[Linkage] = []
170 hyponyms: list[Linkage] = []
171 hypernyms: list[Linkage] = []
172 holonyms: list[Linkage] = []
173 expressions: list[Linkage] = []
174 coordinate_terms: list[Linkage] = []
175 proverbs: list[Linkage] = []
176 synonyms: list[Linkage] = []
177 tags: list[str] = []
178 raw_tags: list[str] = []
179 categories: list[str] = []
180 redirects: list[str] = []
181 etymology_texts: list[str] = []
182 forms: list[Form] = []
183 meronyms: list[Linkage] = []
184 hyphenations: list[Hyphenation] = []
185 notes: list[str] = []
186 related: list[Linkage] = []
187 descendants: list[Descendant] = []