Coverage for src / wiktextract / extractor / zh / models.py: 100%
144 statements
« prev ^ index » next coverage.py v7.13.1, created at 2025-12-29 01:50 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2025-12-29 01:50 +0000
1from pydantic import BaseModel, ConfigDict, Field
2from wikitextprocessor import NodeKind
5class ChineseBaseModel(BaseModel):
6 model_config = ConfigDict(
7 extra="forbid",
8 strict=True,
9 validate_assignment=True,
10 validate_default=True,
11 )
14class Example(ChineseBaseModel):
15 text: str = Field(
16 description="Example usage sentences, some might have have both "
17 "Simplified and Traditional Chinese forms",
18 )
19 bold_text_offsets: list[tuple[int, int]] = []
20 translation: str = Field(
21 default="", description="Chinese translation of the example sentence"
22 )
23 bold_translation_offsets: list[tuple[int, int]] = []
24 literal_meaning: str = ""
25 bold_literal_offsets: list[tuple[int, int]] = []
26 roman: str = Field(
27 default="", description="Romanization of the example sentence"
28 )
29 bold_roman_offsets: list[tuple[int, int]] = []
30 ref: str = Field(
31 default="",
32 description="Source of the sentence, like book title and page number",
33 )
34 ruby: list[tuple[str, ...]] = Field(
35 default=[], description="Japanese Kanji and furigana"
36 )
37 tags: list[str] = []
38 raw_tags: list[str] = []
41class AltForm(ChineseBaseModel):
42 word: str
43 tags: list[str] = []
44 roman: str = ""
47class Classifier(ChineseBaseModel):
48 classifier: str = ""
49 tags: list[str] = []
50 raw_tags: list[str] = []
53class ReferenceData(ChineseBaseModel):
54 text: str
55 refn: str = ""
58class AttestationData(ChineseBaseModel):
59 date: str
60 references: list[ReferenceData] = []
63class Sense(ChineseBaseModel):
64 glosses: list[str] = []
65 tags: list[str] = []
66 raw_tags: list[str] = []
67 topics: list[str] = []
68 categories: list[str] = []
69 examples: list[Example] = []
70 ruby: list[tuple[str, ...]] = Field(
71 default=[], description="Japanese Kanji and furigana"
72 )
73 alt_of: list[AltForm] = []
74 form_of: list[AltForm] = []
75 classifiers: list[Classifier] = []
76 attestations: list[AttestationData] = []
79class Form(ChineseBaseModel):
80 form: str = ""
81 tags: list[str] = []
82 raw_tags: list[str] = []
83 source: str = ""
84 ruby: list[tuple[str, ...]] = Field(
85 default=[], description="Japanese Kanji and furigana"
86 )
87 hiragana: str = ""
88 roman: str = ""
89 sense: str = ""
90 attestations: list[AttestationData] = []
93class Sound(ChineseBaseModel):
94 zh_pron: str = Field(default="", description="Chinese word pronunciation")
95 ipa: str = Field(default="", description="International Phonetic Alphabet")
96 audio: str = Field(default="", description="Audio file name")
97 wav_url: str = ""
98 oga_url: str = ""
99 ogg_url: str = ""
100 mp3_url: str = ""
101 opus_url: str = ""
102 flac_url: str = ""
103 tags: list[str] = []
104 raw_tags: list[str] = []
105 homophone: str = ""
106 enpr: str = Field(default="", description="English pronunciation")
107 other: str = ""
108 roman: str = ""
109 rhymes: str = ""
110 hangeul: str = ""
113class Translation(ChineseBaseModel):
114 lang_code: str = Field(
115 default="",
116 description="Wiktionary language code of the translation term",
117 )
118 lang: str = Field(default="", description="Translation language name")
119 word: str = Field(description="Translation term")
120 sense: str = Field(default="", description="Translation gloss")
121 tags: list[str] = []
122 raw_tags: list[str] = []
123 roman: str = Field(default="", description="Roman script")
124 alt: str = Field(default="", description="Alternative form")
125 lit: str = Field(default="", description="Literal translation for the term")
126 source: str = ""
129class Linkage(ChineseBaseModel):
130 word: str = ""
131 tags: list[str] = []
132 raw_tags: list[str] = []
133 roman: str = ""
134 sense: str = ""
135 ruby: list[tuple[str, ...]] = Field(
136 default=[], description="Japanese Kanji and furigana"
137 )
138 attestations: list[AttestationData] = []
141class Descendant(ChineseBaseModel):
142 lang_code: str = Field(default="", description="Wiktionary language code")
143 lang: str = Field(default="", description="Language name")
144 word: str = ""
145 roman: str = ""
146 tags: list[str] = []
147 raw_tags: list[str] = []
148 descendants: list["Descendant"] = []
149 ruby: list[tuple[str, ...]] = Field(
150 default=[], description="Japanese Kanji and furigana"
151 )
152 sense: str = ""
155class Hyphenation(ChineseBaseModel):
156 parts: list[str] = []
159class WordEntry(ChineseBaseModel):
160 model_config = ConfigDict(title="Chinese Wiktionary")
162 word: str = Field(description="Word string")
163 lang_code: str = Field(description="Wiktionary language code")
164 lang: str = Field(description="Localized language name")
165 pos: str = Field(description="Part of speech type")
166 pos_title: str = ""
167 pos_level: NodeKind = Field(default=NodeKind.ROOT, exclude=True)
168 etymology_texts: list[str] = []
169 etymology_examples: list[Example] = []
170 senses: list[Sense] = Field(default=[], description="Sense list")
171 forms: list[Form] = Field(default=[], description="Inflection forms list")
172 sounds: list[Sound] = []
173 translations: list[Translation] = []
174 synonyms: list[Linkage] = []
175 hyponyms: list[Linkage] = []
176 hypernyms: list[Linkage] = []
177 holonyms: list[Linkage] = []
178 meronyms: list[Linkage] = []
179 derived: list[Linkage] = []
180 troponyms: list[Linkage] = []
181 paronyms: list[Linkage] = []
182 related: list[Linkage] = []
183 abbreviations: list[Linkage] = []
184 proverbs: list[Linkage] = []
185 antonyms: list[Linkage] = []
186 coordinate_terms: list[Linkage] = []
187 various: list[Linkage] = []
188 compounds: list[Linkage] = []
189 title: str = Field(default="", description="Redirect page source title")
190 redirect: str = Field(default="", description="Redirect page target title")
191 categories: list[str] = []
192 notes: list[str] = []
193 tags: list[str] = []
194 raw_tags: list[str] = []
195 descendants: list[Descendant] = []
196 redirects: list[str] = Field(
197 default=[],
198 description="Soft redirect page, extracted from template zh-see ja-see",
199 )
200 literal_meaning: str = ""
201 original_title: str = ""
202 anagrams: list[Linkage] = []
203 hyphenations: list[Hyphenation] = []
204 classifiers: list[Classifier] = []