Coverage for src / wiktextract / extractor / zh / models.py: 100%
142 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-05 07:46 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-05 07:46 +0000
1from pydantic import BaseModel, ConfigDict, Field
2from wikitextprocessor import NodeKind
5class ChineseBaseModel(BaseModel):
6 model_config = ConfigDict(
7 extra="forbid",
8 strict=True,
9 validate_assignment=True,
10 validate_default=True,
11 )
14class Example(ChineseBaseModel):
15 text: str = Field(
16 description="Example usage sentences, some might have have both "
17 "Simplified and Traditional Chinese forms",
18 )
19 bold_text_offsets: list[tuple[int, int]] = []
20 translation: str = Field(
21 default="", description="Chinese translation of the example sentence"
22 )
23 bold_translation_offsets: list[tuple[int, int]] = []
24 literal_meaning: str = ""
25 bold_literal_offsets: list[tuple[int, int]] = []
26 roman: str = Field(
27 default="", description="Romanization of the example sentence"
28 )
29 bold_roman_offsets: list[tuple[int, int]] = []
30 ref: str = Field(
31 default="",
32 description="Source of the sentence, like book title and page number",
33 )
34 ruby: list[tuple[str, ...]] = Field(
35 default=[], description="Japanese Kanji and furigana"
36 )
37 tags: list[str] = []
38 raw_tags: list[str] = []
41class AltForm(ChineseBaseModel):
42 word: str
43 tags: list[str] = []
44 roman: str = ""
47class Classifier(ChineseBaseModel):
48 classifier: str = ""
49 tags: list[str] = []
50 raw_tags: list[str] = []
53class ReferenceData(ChineseBaseModel):
54 text: str
55 refn: str = ""
58class AttestationData(ChineseBaseModel):
59 date: str
60 references: list[ReferenceData] = []
63class Sense(ChineseBaseModel):
64 glosses: list[str] = []
65 tags: list[str] = []
66 raw_tags: list[str] = []
67 topics: list[str] = []
68 categories: list[str] = []
69 examples: list[Example] = []
70 ruby: list[tuple[str, ...]] = Field(
71 default=[], description="Japanese Kanji and furigana"
72 )
73 alt_of: list[AltForm] = []
74 form_of: list[AltForm] = []
75 classifiers: list[Classifier] = []
76 attestations: list[AttestationData] = []
79class Form(ChineseBaseModel):
80 form: str = ""
81 tags: list[str] = []
82 raw_tags: list[str] = []
83 source: str = ""
84 ruby: list[tuple[str, ...]] = Field(
85 default=[], description="Japanese Kanji and furigana"
86 )
87 hiragana: str = ""
88 roman: str = ""
89 sense: str = ""
90 attestations: list[AttestationData] = []
93class Sound(ChineseBaseModel):
94 zh_pron: str = Field(default="", description="Chinese word pronunciation")
95 ipa: str = Field(default="", description="International Phonetic Alphabet")
96 audio: str = Field(default="", description="Audio file name")
97 wav_url: str = ""
98 oga_url: str = ""
99 ogg_url: str = ""
100 mp3_url: str = ""
101 opus_url: str = ""
102 flac_url: str = ""
103 tags: list[str] = []
104 raw_tags: list[str] = []
105 homophone: str = ""
106 enpr: str = Field(default="", description="English pronunciation")
107 other: str = ""
108 roman: str = ""
109 rhymes: str = ""
112class Translation(ChineseBaseModel):
113 lang_code: str = Field(
114 default="",
115 description="Wiktionary language code of the translation term",
116 )
117 lang: str = Field(default="", description="Translation language name")
118 word: str = Field(description="Translation term")
119 sense: str = Field(default="", description="Translation gloss")
120 tags: list[str] = []
121 raw_tags: list[str] = []
122 roman: str = Field(default="", description="Roman script")
123 alt: str = Field(default="", description="Alternative form")
124 lit: str = Field(default="", description="Literal translation for the term")
125 source: str = ""
128class Linkage(ChineseBaseModel):
129 word: str = ""
130 tags: list[str] = []
131 raw_tags: list[str] = []
132 roman: str = ""
133 sense: str = ""
134 ruby: list[tuple[str, ...]] = Field(
135 default=[], description="Japanese Kanji and furigana"
136 )
137 attestations: list[AttestationData] = []
140class Descendant(ChineseBaseModel):
141 lang_code: str = Field(default="", description="Wiktionary language code")
142 lang: str = Field(default="", description="Language name")
143 word: str = ""
144 roman: str = ""
145 tags: list[str] = []
146 raw_tags: list[str] = []
147 descendants: list["Descendant"] = []
148 ruby: list[tuple[str, ...]] = Field(
149 default=[], description="Japanese Kanji and furigana"
150 )
151 sense: str = ""
154class Hyphenation(ChineseBaseModel):
155 parts: list[str] = []
158class WordEntry(ChineseBaseModel):
159 model_config = ConfigDict(title="Chinese Wiktionary")
161 word: str = Field(description="Word string")
162 lang_code: str = Field(description="Wiktionary language code")
163 lang: str = Field(description="Localized language name")
164 pos: str = Field(description="Part of speech type")
165 pos_title: str = ""
166 pos_level: NodeKind = Field(default=NodeKind.ROOT, exclude=True)
167 etymology_texts: list[str] = []
168 etymology_examples: list[Example] = []
169 senses: list[Sense] = Field(default=[], description="Sense list")
170 forms: list[Form] = Field(default=[], description="Inflection forms list")
171 sounds: list[Sound] = []
172 translations: list[Translation] = []
173 synonyms: list[Linkage] = []
174 hyponyms: list[Linkage] = []
175 hypernyms: list[Linkage] = []
176 holonyms: list[Linkage] = []
177 meronyms: list[Linkage] = []
178 derived: list[Linkage] = []
179 troponyms: list[Linkage] = []
180 paronyms: list[Linkage] = []
181 related: list[Linkage] = []
182 abbreviations: list[Linkage] = []
183 proverbs: list[Linkage] = []
184 antonyms: list[Linkage] = []
185 coordinate_terms: list[Linkage] = []
186 various: list[Linkage] = []
187 compounds: list[Linkage] = []
188 title: str = Field(default="", description="Redirect page source title")
189 redirect: str = Field(default="", description="Redirect page target title")
190 categories: list[str] = []
191 notes: list[str] = []
192 tags: list[str] = []
193 raw_tags: list[str] = []
194 descendants: list[Descendant] = []
195 redirects: list[str] = Field(
196 default=[],
197 description="Soft redirect page, extracted from template zh-see ja-see",
198 )
199 literal_meaning: str = ""
200 original_title: str = ""
201 anagrams: list[Linkage] = []
202 hyphenations: list[Hyphenation] = []