Coverage for src/wiktextract/extractor/zh/models.py: 100%
136 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
1from pydantic import BaseModel, ConfigDict, Field
2from wikitextprocessor import NodeKind
5class ChineseBaseModel(BaseModel):
6 model_config = ConfigDict(
7 extra="forbid",
8 strict=True,
9 validate_assignment=True,
10 validate_default=True,
11 )
14class Example(ChineseBaseModel):
15 text: str = Field(
16 default="",
17 description="Example usage sentences, some might have have both "
18 "Simplified and Traditional Chinese forms",
19 )
20 bold_text_offsets: list[tuple[int, int]] = []
21 translation: str = Field(
22 default="", description="Chinese translation of the example sentence"
23 )
24 bold_translation_offsets: list[tuple[int, int]] = []
25 literal_meaning: str = ""
26 bold_literal_offsets: list[tuple[int, int]] = []
27 roman: str = Field(
28 default="", description="Romanization of the example sentence"
29 )
30 bold_roman_offsets: list[tuple[int, int]] = []
31 ref: str = Field(
32 default="",
33 description="Source of the sentence, like book title and page number",
34 )
35 ruby: list[tuple[str, ...]] = Field(
36 default=[], description="Japanese Kanji and furigana"
37 )
38 tags: list[str] = []
39 raw_tags: list[str] = []
42class AltForm(ChineseBaseModel):
43 word: str
44 tags: list[str] = []
45 roman: str = ""
48class Classifier(ChineseBaseModel):
49 classifier: str = ""
50 tags: list[str] = []
51 raw_tags: list[str] = []
54class ReferenceData(ChineseBaseModel):
55 text: str
56 refn: str = ""
59class AttestationData(ChineseBaseModel):
60 date: str
61 references: list[ReferenceData] = []
64class Sense(ChineseBaseModel):
65 glosses: list[str] = []
66 tags: list[str] = []
67 raw_tags: list[str] = []
68 topics: list[str] = []
69 categories: list[str] = []
70 examples: list[Example] = []
71 ruby: list[tuple[str, ...]] = Field(
72 default=[], description="Japanese Kanji and furigana"
73 )
74 alt_of: list[AltForm] = []
75 form_of: list[AltForm] = []
76 classifiers: list[Classifier] = []
77 attestations: list[AttestationData] = []
80class Form(ChineseBaseModel):
81 form: str = ""
82 tags: list[str] = []
83 raw_tags: list[str] = []
84 source: str = ""
85 ruby: list[tuple[str, ...]] = Field(
86 default=[], description="Japanese Kanji and furigana"
87 )
88 hiragana: str = ""
89 roman: str = ""
90 sense: str = ""
91 attestations: list[AttestationData] = []
94class Sound(ChineseBaseModel):
95 zh_pron: str = Field(default="", description="Chinese word pronunciation")
96 ipa: str = Field(default="", description="International Phonetic Alphabet")
97 audio: str = Field(default="", description="Audio file name")
98 wav_url: str = ""
99 oga_url: str = ""
100 ogg_url: str = ""
101 mp3_url: str = ""
102 opus_url: str = ""
103 flac_url: str = ""
104 tags: list[str] = []
105 raw_tags: list[str] = []
106 homophone: str = ""
107 enpr: str = Field(default="", description="English pronunciation")
108 other: str = ""
109 roman: str = ""
112class Translation(ChineseBaseModel):
113 lang_code: str = Field(
114 default="",
115 description="Wiktionary language code of the translation term",
116 )
117 lang: str = Field(default="", description="Translation language name")
118 word: str = Field(description="Translation term")
119 sense: str = Field(default="", description="Translation gloss")
120 tags: list[str] = []
121 raw_tags: list[str] = []
122 roman: str = Field(default="", description="Roman script")
123 alt: str = Field(default="", description="Alternative form")
124 lit: str = Field(default="", description="Literal translation for the term")
127class Linkage(ChineseBaseModel):
128 word: str = ""
129 tags: list[str] = []
130 raw_tags: list[str] = []
131 roman: str = ""
132 sense: str = ""
133 ruby: list[tuple[str, ...]] = Field(
134 default=[], description="Japanese Kanji and furigana"
135 )
136 attestations: list[AttestationData] = []
139class Descendant(ChineseBaseModel):
140 lang_code: str = Field(default="", description="Wiktionary language code")
141 lang: str = Field(default="", description="Language name")
142 word: str = ""
143 roman: str = ""
144 tags: list[str] = []
145 raw_tags: list[str] = []
146 descendants: list["Descendant"] = []
147 ruby: list[tuple[str, ...]] = Field(
148 default=[], description="Japanese Kanji and furigana"
149 )
152class WordEntry(ChineseBaseModel):
153 model_config = ConfigDict(title="Chinese Wiktionary")
155 word: str = Field(description="Word string")
156 lang_code: str = Field(description="Wiktionary language code")
157 lang: str = Field(description="Localized language name")
158 pos: str = Field(description="Part of speech type")
159 pos_title: str = ""
160 pos_level: NodeKind = Field(default=NodeKind.ROOT, exclude=True)
161 etymology_text: str = ""
162 etymology_examples: list[Example] = []
163 senses: list[Sense] = Field(default=[], description="Sense list")
164 forms: list[Form] = Field(default=[], description="Inflection forms list")
165 sounds: list[Sound] = []
166 translations: list[Translation] = []
167 synonyms: list[Linkage] = []
168 hyponyms: list[Linkage] = []
169 hypernyms: list[Linkage] = []
170 holonyms: list[Linkage] = []
171 meronyms: list[Linkage] = []
172 derived: list[Linkage] = []
173 troponyms: list[Linkage] = []
174 paronyms: list[Linkage] = []
175 related: list[Linkage] = []
176 abbreviation: list[Linkage] = []
177 proverbs: list[Linkage] = []
178 antonyms: list[Linkage] = []
179 coordinate_terms: list[Linkage] = []
180 various: list[Linkage] = []
181 compounds: list[Linkage] = []
182 title: str = Field(default="", description="Redirect page source title")
183 redirect: str = Field(default="", description="Redirect page target title")
184 categories: list[str] = []
185 notes: list[str] = []
186 tags: list[str] = []
187 raw_tags: list[str] = []
188 descendants: list[Descendant] = []
189 redirects: list[str] = Field(
190 default=[],
191 description="Soft redirect page, extracted from template zh-see ja-see",
192 )
193 literal_meaning: str = ""
194 original_title: str = ""
195 anagrams: list[Linkage] = []