Coverage for src/wiktextract/extractor/de/models.py: 100%
103 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from pydantic import BaseModel, ConfigDict, Field
4class BaseModelWrap(BaseModel):
5 model_config = ConfigDict(
6 extra="forbid",
7 strict=True,
8 validate_assignment=True,
9 validate_default=True,
10 )
13class Linkage(BaseModelWrap):
14 word: str
15 sense_index: str = ""
16 note: str = ""
17 raw_tags: list[str] = []
18 tags: list[str] = []
21class Translation(BaseModelWrap):
22 sense: str = Field(
23 default="", description="A gloss of the sense being translated"
24 )
25 word: str = Field(default="", description="Translation term")
26 lang_code: str = Field(
27 default="",
28 description="Wiktionary language code of the translation term",
29 )
30 lang: str = Field(default="", description="Localized language name")
31 uncertain: bool = Field(
32 default=False, description="Translation marked as uncertain"
33 )
34 roman: str = Field(
35 default="", description="Transliteration to Roman characters"
36 )
37 sense_index: str = ""
38 raw_tags: list[str] = []
39 tags: list[str] = []
40 notes: list[str] = Field(default=[], description="A list of notes")
43class Example(BaseModelWrap):
44 text: str = Field(default="", description="Example usage sentence")
45 translation: str = Field(
46 default="", description="German translation of the example sentence"
47 )
48 raw_tags: list[str] = []
49 tags: list[str] = []
50 ref: str = Field(default="", description="Raw reference string")
51 url: str = Field(
52 default="", description="A web link. Not necessarily well-formated."
53 )
54 author: str = Field(default="", description="Author's name")
55 title: str = Field(default="", description="Title of the reference")
56 title_complement: str = Field(
57 default="", description="Complement to the title"
58 )
59 pages: str = Field(default="", description="Page numbers")
60 year: str = Field(default="", description="Year of publication")
61 publisher: str = Field(default="", description="Published by")
62 editor: str = Field(default="", description="Editor")
63 translator: str = Field(default="", description="Translator")
64 collection: str = Field(
65 default="",
66 description="Name of collection that reference was published in",
67 )
68 volume: str = Field(default="", description="Volume number")
69 comment: str = Field(default="", description="Comment on the reference")
70 day: str = Field(default="", description="Day of publication")
71 month: str = Field(default="", description="Month of publication")
72 accessdate: str = Field(
73 default="", description="Date of access of online reference"
74 )
75 date: str = Field(default="", description="Date of publication")
76 number: str = Field(default="", description="Issue number")
77 # chapter: Optional[str] = Field(default=None, description="Chapter name")
78 place: str = Field(default="", description="Place of publication")
79 # editor: Optional[str] = Field(default=None, description="Editor")
80 edition: str = Field(default="", description="Edition number")
81 isbn: str = Field(default="", description="ISBN number")
84class AltForm(BaseModelWrap):
85 word: str
88class Sense(BaseModelWrap):
89 glosses: list[str] = []
90 raw_tags: list[str] = []
91 tags: list[str] = []
92 categories: list[str] = []
93 examples: list["Example"] = Field(
94 default=[], description="List of examples"
95 )
96 # subsenses: list["Sense"] = Field(
97 # default=[], description="List of subsenses"
98 # )
99 sense_index: str = Field(
100 default="", description="Sense number used in Wiktionary"
101 )
102 topics: list[str] = []
103 form_of: list[AltForm] = []
106class Sound(BaseModelWrap):
107 ipa: str = Field(default="", description="International Phonetic Alphabet")
108 audio: str = Field(default="", description="Audio file name")
109 wav_url: str = Field(default="")
110 ogg_url: str = Field(default="")
111 mp3_url: str = Field(default="")
112 oga_url: str = Field(default="")
113 flac_url: str = Field(default="")
114 opus_url: str = Field(default="")
115 raw_tags: list[str] = []
116 tags: list[str] = []
117 rhymes: str = ""
118 categories: list[str] = Field(default=[], exclude=True)
121class Form(BaseModelWrap):
122 form: str
123 tags: list[str] = []
124 raw_tags: list[str] = []
125 source: str = ""
126 sense_index: str = ""
129class WordEntry(BaseModelWrap):
130 """
131 WordEntry is a dictionary containing lexical information of a single word
132 extracted from Wiktionary with wiktextract.
133 """
135 model_config = ConfigDict(title="German Wiktionary")
137 word: str = Field(description="word string")
138 pos: str = Field(default="", description="Part of speech type")
139 other_pos: list[str] = []
140 # pos_title: str = Field(default=None, description="Original POS title")
141 lang_code: str = Field(
142 description="Wiktionary language code", examples=["es"]
143 )
144 lang: str = Field(
145 description="Localized language name of the word", examples=["español"]
146 )
147 senses: list[Sense] = []
148 # categories: list[str] = Field(
149 # default=[],
150 # description="list of non-disambiguated categories for the word",
151 # )
152 translations: list[Translation] = []
153 sounds: list[Sound] = []
154 antonyms: list[Linkage] = []
155 derived: list[Linkage] = []
156 hyponyms: list[Linkage] = []
157 hypernyms: list[Linkage] = []
158 holonyms: list[Linkage] = []
159 expressions: list[Linkage] = []
160 coordinate_terms: list[Linkage] = []
161 proverbs: list[Linkage] = []
162 synonyms: list[Linkage] = []
163 tags: list[str] = []
164 raw_tags: list[str] = []
165 categories: list[str] = []
166 redirects: list[str] = []
167 etymology_text: str = ""
168 forms: list[Form] = []
169 meronyms: list[Linkage] = []
170 hyphenation: str = ""