Coverage for src / wiktextract / extractor / de / models.py: 100%
103 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-05 07:46 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-05 07:46 +0000
1from pydantic import BaseModel, ConfigDict, Field
4class GermanBaseModel(BaseModel):
5 model_config = ConfigDict(
6 extra="forbid",
7 strict=True,
8 validate_assignment=True,
9 validate_default=True,
10 )
13class Linkage(GermanBaseModel):
14 word: str
15 sense_index: str = ""
16 note: str = ""
17 raw_tags: list[str] = []
18 tags: list[str] = []
19 topics: list[str] = []
22class Translation(GermanBaseModel):
23 sense: str = Field(
24 default="", description="A gloss of the sense being translated"
25 )
26 word: str = Field(default="", description="Translation term")
27 lang_code: str = Field(
28 default="",
29 description="Wiktionary language code of the translation term",
30 )
31 lang: str = Field(default="", description="Localized language name")
32 uncertain: bool = Field(
33 default=False, description="Translation marked as uncertain"
34 )
35 roman: str = Field(
36 default="", description="Transliteration to Roman characters"
37 )
38 sense_index: str = ""
39 raw_tags: list[str] = []
40 tags: list[str] = []
41 notes: list[str] = Field(default=[], description="A list of notes")
42 other: str = ""
45class Example(GermanBaseModel):
46 text: str = Field(description="Example usage sentence")
47 bold_text_offsets: list[tuple[int, int]] = Field(
48 default=[], description="Italic words"
49 )
50 translation: str = Field(
51 default="", description="German translation of the example sentence"
52 )
53 bold_translation_offsets: list[tuple[int, int]] = Field(
54 default=[], description="Italic words"
55 )
56 raw_tags: list[str] = []
57 tags: list[str] = []
58 ref: str = Field(default="", description="Raw reference string")
61class AltForm(GermanBaseModel):
62 word: str
65class Sense(GermanBaseModel):
66 glosses: list[str] = []
67 raw_tags: list[str] = []
68 tags: list[str] = []
69 categories: list[str] = []
70 examples: list["Example"] = Field(
71 default=[], description="List of examples"
72 )
73 sense_index: str = Field(
74 default="", description="Sense number used in Wiktionary"
75 )
76 topics: list[str] = []
77 form_of: list[AltForm] = []
78 alt_of: list[AltForm] = []
81class Sound(GermanBaseModel):
82 ipa: str = Field(default="", description="International Phonetic Alphabet")
83 audio: str = Field(default="", description="Audio file name")
84 wav_url: str = Field(default="")
85 ogg_url: str = Field(default="")
86 mp3_url: str = Field(default="")
87 oga_url: str = Field(default="")
88 flac_url: str = Field(default="")
89 opus_url: str = Field(default="")
90 raw_tags: list[str] = []
91 tags: list[str] = []
92 rhymes: str = ""
93 categories: list[str] = Field(default=[], exclude=True)
96class Form(GermanBaseModel):
97 form: str
98 tags: list[str] = []
99 raw_tags: list[str] = []
100 source: str = ""
101 sense_index: str = ""
102 topics: list[str] = []
103 pronouns: list[str] = []
106class Descendant(GermanBaseModel):
107 lang_code: str = Field(default="", description="Wiktionary language code")
108 lang: str = Field(default="", description="Language name")
109 word: str = ""
110 roman: str = ""
111 sense_index: str = ""
114class Hyphenation(GermanBaseModel):
115 parts: list[str] = []
116 tags: list[str] = []
117 raw_tags: list[str] = []
120class WordEntry(GermanBaseModel):
121 """
122 WordEntry is a dictionary containing lexical information of a single word
123 extracted from Wiktionary with wiktextract.
124 """
126 model_config = ConfigDict(title="German Wiktionary")
128 word: str = Field(description="word string")
129 pos: str = Field(default="", description="Part of speech type")
130 pos_title: str = Field(default="", description="Original POS title")
131 lang_code: str = Field(
132 description="Wiktionary language code", examples=["es"]
133 )
134 lang: str = Field(
135 description="Localized language name of the word", examples=["español"]
136 )
137 senses: list[Sense] = []
138 translations: list[Translation] = []
139 sounds: list[Sound] = []
140 antonyms: list[Linkage] = []
141 derived: list[Linkage] = []
142 hyponyms: list[Linkage] = []
143 hypernyms: list[Linkage] = []
144 holonyms: list[Linkage] = []
145 expressions: list[Linkage] = []
146 coordinate_terms: list[Linkage] = []
147 proverbs: list[Linkage] = []
148 synonyms: list[Linkage] = []
149 tags: list[str] = []
150 raw_tags: list[str] = []
151 categories: list[str] = []
152 redirects: list[str] = []
153 etymology_texts: list[str] = []
154 forms: list[Form] = []
155 meronyms: list[Linkage] = []
156 hyphenations: list[Hyphenation] = []
157 notes: list[str] = []
158 related: list[Linkage] = []
159 descendants: list[Descendant] = []