Coverage for src/wiktextract/extractor/zh/models.py: 100%
138 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
1from pydantic import BaseModel, ConfigDict, Field
2from wikitextprocessor import NodeKind
5class ChineseBaseModel(BaseModel):
6 model_config = ConfigDict(
7 extra="forbid",
8 strict=True,
9 validate_assignment=True,
10 validate_default=True,
11 )
14class Example(ChineseBaseModel):
15 text: str = Field(
16 default="",
17 description="Example usage sentences, some might have have both "
18 "Simplified and Traditional Chinese forms",
19 )
20 bold_text_offsets: list[tuple[int, int]] = []
21 translation: str = Field(
22 default="", description="Chinese translation of the example sentence"
23 )
24 bold_translation_offsets: list[tuple[int, int]] = []
25 literal_meaning: str = ""
26 bold_literal_offsets: list[tuple[int, int]] = []
27 roman: str = Field(
28 default="", description="Romanization of the example sentence"
29 )
30 bold_roman_offsets: list[tuple[int, int]] = []
31 ref: str = Field(
32 default="",
33 description="Source of the sentence, like book title and page number",
34 )
35 ruby: list[tuple[str, ...]] = Field(
36 default=[], description="Japanese Kanji and furigana"
37 )
38 tags: list[str] = []
39 raw_tags: list[str] = []
42class AltForm(ChineseBaseModel):
43 word: str
44 tags: list[str] = []
45 roman: str = ""
48class Classifier(ChineseBaseModel):
49 classifier: str = ""
50 tags: list[str] = []
51 raw_tags: list[str] = []
54class ReferenceData(ChineseBaseModel):
55 text: str
56 refn: str = ""
59class AttestationData(ChineseBaseModel):
60 date: str
61 references: list[ReferenceData] = []
64class Sense(ChineseBaseModel):
65 glosses: list[str] = []
66 tags: list[str] = []
67 raw_tags: list[str] = []
68 topics: list[str] = []
69 categories: list[str] = []
70 examples: list[Example] = []
71 ruby: list[tuple[str, ...]] = Field(
72 default=[], description="Japanese Kanji and furigana"
73 )
74 alt_of: list[AltForm] = []
75 form_of: list[AltForm] = []
76 classifiers: list[Classifier] = []
77 attestations: list[AttestationData] = []
80class Form(ChineseBaseModel):
81 form: str = ""
82 tags: list[str] = []
83 raw_tags: list[str] = []
84 source: str = ""
85 ruby: list[tuple[str, ...]] = Field(
86 default=[], description="Japanese Kanji and furigana"
87 )
88 hiragana: str = ""
89 roman: str = ""
90 sense: str = ""
91 attestations: list[AttestationData] = []
94class Sound(ChineseBaseModel):
95 zh_pron: str = Field(default="", description="Chinese word pronunciation")
96 ipa: str = Field(default="", description="International Phonetic Alphabet")
97 audio: str = Field(default="", description="Audio file name")
98 wav_url: str = ""
99 oga_url: str = ""
100 ogg_url: str = ""
101 mp3_url: str = ""
102 opus_url: str = ""
103 flac_url: str = ""
104 tags: list[str] = []
105 raw_tags: list[str] = []
106 homophone: str = ""
107 enpr: str = Field(default="", description="English pronunciation")
108 other: str = ""
109 roman: str = ""
112class Translation(ChineseBaseModel):
113 lang_code: str = Field(
114 default="",
115 description="Wiktionary language code of the translation term",
116 )
117 lang: str = Field(default="", description="Translation language name")
118 word: str = Field(description="Translation term")
119 sense: str = Field(default="", description="Translation gloss")
120 tags: list[str] = []
121 raw_tags: list[str] = []
122 roman: str = Field(default="", description="Roman script")
123 alt: str = Field(default="", description="Alternative form")
124 lit: str = Field(default="", description="Literal translation for the term")
125 source: str = ""
128class Linkage(ChineseBaseModel):
129 word: str = ""
130 tags: list[str] = []
131 raw_tags: list[str] = []
132 roman: str = ""
133 sense: str = ""
134 ruby: list[tuple[str, ...]] = Field(
135 default=[], description="Japanese Kanji and furigana"
136 )
137 attestations: list[AttestationData] = []
140class Descendant(ChineseBaseModel):
141 lang_code: str = Field(default="", description="Wiktionary language code")
142 lang: str = Field(default="", description="Language name")
143 word: str = ""
144 roman: str = ""
145 tags: list[str] = []
146 raw_tags: list[str] = []
147 descendants: list["Descendant"] = []
148 ruby: list[tuple[str, ...]] = Field(
149 default=[], description="Japanese Kanji and furigana"
150 )
151 sense: str = ""
154class WordEntry(ChineseBaseModel):
155 model_config = ConfigDict(title="Chinese Wiktionary")
157 word: str = Field(description="Word string")
158 lang_code: str = Field(description="Wiktionary language code")
159 lang: str = Field(description="Localized language name")
160 pos: str = Field(description="Part of speech type")
161 pos_title: str = ""
162 pos_level: NodeKind = Field(default=NodeKind.ROOT, exclude=True)
163 etymology_text: str = ""
164 etymology_examples: list[Example] = []
165 senses: list[Sense] = Field(default=[], description="Sense list")
166 forms: list[Form] = Field(default=[], description="Inflection forms list")
167 sounds: list[Sound] = []
168 translations: list[Translation] = []
169 synonyms: list[Linkage] = []
170 hyponyms: list[Linkage] = []
171 hypernyms: list[Linkage] = []
172 holonyms: list[Linkage] = []
173 meronyms: list[Linkage] = []
174 derived: list[Linkage] = []
175 troponyms: list[Linkage] = []
176 paronyms: list[Linkage] = []
177 related: list[Linkage] = []
178 abbreviation: list[Linkage] = []
179 proverbs: list[Linkage] = []
180 antonyms: list[Linkage] = []
181 coordinate_terms: list[Linkage] = []
182 various: list[Linkage] = []
183 compounds: list[Linkage] = []
184 title: str = Field(default="", description="Redirect page source title")
185 redirect: str = Field(default="", description="Redirect page target title")
186 categories: list[str] = []
187 notes: list[str] = []
188 tags: list[str] = []
189 raw_tags: list[str] = []
190 descendants: list[Descendant] = []
191 redirects: list[str] = Field(
192 default=[],
193 description="Soft redirect page, extracted from template zh-see ja-see",
194 )
195 literal_meaning: str = ""
196 original_title: str = ""
197 anagrams: list[Linkage] = []