Coverage for src/wiktextract/extractor/zh/models.py: 100%
124 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-11 10:26 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-11 10:26 +0000
1from pydantic import BaseModel, ConfigDict, Field
2from wikitextprocessor import NodeKind
5class ChineseBaseModel(BaseModel):
6 model_config = ConfigDict(
7 extra="forbid",
8 strict=True,
9 validate_assignment=True,
10 validate_default=True,
11 )
14class Example(ChineseBaseModel):
15 text: str = Field(
16 default="",
17 description="Example usage sentences, some might have have both "
18 "Simplified and Traditional Chinese forms",
19 )
20 bold_text_offsets: list[tuple[int, int]] = []
21 translation: str = Field(
22 default="", description="Chinese translation of the example sentence"
23 )
24 bold_translation_offsets: list[tuple[int, int]] = []
25 literal_meaning: str = ""
26 bold_literal_offsets: list[tuple[int, int]] = []
27 roman: str = Field(
28 default="", description="Romanization of the example sentence"
29 )
30 bold_roman_offsets: list[tuple[int, int]] = []
31 ref: str = Field(
32 default="",
33 description="Source of the sentence, like book title and page number",
34 )
35 ruby: list[tuple[str, ...]] = Field(
36 default=[], description="Japanese Kanji and furigana"
37 )
38 tags: list[str] = []
39 raw_tags: list[str] = []
42class AltForm(ChineseBaseModel):
43 word: str
44 tags: list[str] = []
47class Classifier(ChineseBaseModel):
48 classifier: str = ""
49 tags: list[str] = []
50 raw_tags: list[str] = []
53class Sense(ChineseBaseModel):
54 glosses: list[str] = []
55 tags: list[str] = []
56 raw_tags: list[str] = []
57 topics: list[str] = []
58 categories: list[str] = []
59 examples: list[Example] = []
60 ruby: list[tuple[str, ...]] = Field(
61 default=[], description="Japanese Kanji and furigana"
62 )
63 alt_of: list[AltForm] = []
64 form_of: list[AltForm] = []
65 classifiers: list[Classifier] = []
68class Form(ChineseBaseModel):
69 form: str = ""
70 tags: list[str] = []
71 raw_tags: list[str] = []
72 source: str = ""
73 ruby: list[tuple[str, ...]] = Field(
74 default=[], description="Japanese Kanji and furigana"
75 )
76 hiragana: str = ""
77 roman: str = ""
78 sense: str = ""
81class Sound(ChineseBaseModel):
82 zh_pron: str = Field(default="", description="Chinese word pronunciation")
83 ipa: str = Field(default="", description="International Phonetic Alphabet")
84 audio: str = Field(default="", description="Audio file name")
85 wav_url: str = ""
86 oga_url: str = ""
87 ogg_url: str = ""
88 mp3_url: str = ""
89 opus_url: str = ""
90 flac_url: str = ""
91 tags: list[str] = []
92 raw_tags: list[str] = []
93 homophone: str = ""
94 enpr: str = Field(default="", description="English pronunciation")
95 other: str = ""
96 roman: str = ""
99class Translation(ChineseBaseModel):
100 lang_code: str = Field(
101 default="",
102 description="Wiktionary language code of the translation term",
103 )
104 lang: str = Field(default="", description="Translation language name")
105 word: str = Field(description="Translation term")
106 sense: str = Field(default="", description="Translation gloss")
107 tags: list[str] = []
108 raw_tags: list[str] = []
109 roman: str = Field(default="", description="Roman script")
110 alt: str = Field(default="", description="Alternative form")
111 lit: str = Field(default="", description="Literal translation for the term")
114class Linkage(ChineseBaseModel):
115 word: str = ""
116 tags: list[str] = []
117 raw_tags: list[str] = []
118 roman: str = ""
119 sense: str = ""
120 ruby: list[tuple[str, ...]] = Field(
121 default=[], description="Japanese Kanji and furigana"
122 )
125class Descendant(ChineseBaseModel):
126 lang_code: str = Field(default="", description="Wiktionary language code")
127 lang: str = Field(default="", description="Language name")
128 word: str = ""
129 roman: str = ""
130 tags: list[str] = []
131 raw_tags: list[str] = []
132 descendants: list["Descendant"] = []
133 ruby: list[tuple[str, ...]] = Field(
134 default=[], description="Japanese Kanji and furigana"
135 )
138class WordEntry(ChineseBaseModel):
139 model_config = ConfigDict(title="Chinese Wiktionary")
141 word: str = Field(description="Word string")
142 lang_code: str = Field(description="Wiktionary language code")
143 lang: str = Field(description="Localized language name")
144 pos: str = Field(description="Part of speech type")
145 pos_title: str = ""
146 pos_level: NodeKind = Field(default=NodeKind.ROOT, exclude=True)
147 etymology_text: str = ""
148 etymology_examples: list[Example] = []
149 senses: list[Sense] = Field(default=[], description="Sense list")
150 forms: list[Form] = Field(default=[], description="Inflection forms list")
151 sounds: list[Sound] = []
152 translations: list[Translation] = []
153 synonyms: list[Linkage] = []
154 hyponyms: list[Linkage] = []
155 hypernyms: list[Linkage] = []
156 holonyms: list[Linkage] = []
157 meronyms: list[Linkage] = []
158 derived: list[Linkage] = []
159 troponyms: list[Linkage] = []
160 paronyms: list[Linkage] = []
161 related: list[Linkage] = []
162 abbreviation: list[Linkage] = []
163 proverbs: list[Linkage] = []
164 antonyms: list[Linkage] = []
165 coordinate_terms: list[Linkage] = []
166 various: list[Linkage] = []
167 compounds: list[Linkage] = []
168 title: str = Field(default="", description="Redirect page source title")
169 redirect: str = Field(default="", description="Redirect page target title")
170 categories: list[str] = []
171 notes: list[str] = []
172 tags: list[str] = []
173 raw_tags: list[str] = []
174 descendants: list[Descendant] = []
175 redirects: list[str] = Field(
176 default=[],
177 description="Soft redirect page, extracted from template zh-see ja-see",
178 )
179 literal_meaning: str = ""