Coverage for src/wiktextract/extractor/zh/models.py: 100%
114 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from pydantic import BaseModel, ConfigDict, Field
4class ChineseBaseModel(BaseModel):
5 model_config = ConfigDict(
6 extra="forbid",
7 strict=True,
8 validate_assignment=True,
9 validate_default=True,
10 )
13class Example(ChineseBaseModel):
14 text: str = Field(
15 default="",
16 description="Example usage sentences, some might have have both "
17 "Simplified and Traditional Chinese forms",
18 )
19 translation: str = Field(
20 default="", description="Chinese translation of the example sentence"
21 )
22 literal_meaning: str = ""
23 roman: str = Field(
24 default="", description="Romanization of the example sentence"
25 )
26 ref: str = Field(
27 default="",
28 description="Source of the sentence, like book title and page number",
29 )
30 ruby: list[tuple[str, ...]] = Field(
31 default=[], description="Japanese Kanji and furigana"
32 )
33 tags: list[str] = []
34 raw_tags: list[str] = []
37class AltForm(ChineseBaseModel):
38 word: str
39 tags: list[str] = []
42class Classifier(ChineseBaseModel):
43 classifier: str = ""
44 tags: list[str] = []
45 raw_tags: list[str] = []
48class Sense(ChineseBaseModel):
49 glosses: list[str] = []
50 tags: list[str] = []
51 raw_tags: list[str] = []
52 topics: list[str] = []
53 categories: list[str] = []
54 examples: list[Example] = []
55 ruby: list[tuple[str, ...]] = Field(
56 default=[], description="Japanese Kanji and furigana"
57 )
58 alt_of: list[AltForm] = []
59 form_of: list[AltForm] = []
60 classifiers: list[Classifier] = []
63class Form(ChineseBaseModel):
64 form: str = ""
65 tags: list[str] = []
66 raw_tags: list[str] = []
67 source: str = ""
68 ruby: list[tuple[str, ...]] = Field(
69 default=[], description="Japanese Kanji and furigana"
70 )
71 hiragana: str = ""
72 roman: str = ""
75class Sound(ChineseBaseModel):
76 zh_pron: str = Field(default="", description="Chinese word pronunciation")
77 ipa: str = Field(default="", description="International Phonetic Alphabet")
78 audio: str = Field(default="", description="Audio file name")
79 wav_url: str = ""
80 oga_url: str = ""
81 ogg_url: str = ""
82 mp3_url: str = ""
83 opus_url: str = ""
84 flac_url: str = ""
85 tags: list[str] = []
86 raw_tags: list[str] = []
87 homophone: str = ""
88 enpr: str = Field(default="", description="English pronunciation")
91class Translation(ChineseBaseModel):
92 lang_code: str = Field(
93 default="",
94 description="Wiktionary language code of the translation term",
95 )
96 lang: str = Field(default="", description="Translation language name")
97 word: str = Field(description="Translation term")
98 sense: str = Field(default="", description="Translation gloss")
99 tags: list[str] = []
100 raw_tags: list[str] = []
101 roman: str = Field(default="", description="Roman script")
102 alt: str = Field(default="", description="Alternative form")
103 lit: str = Field(default="", description="Literal translation for the term")
106class Linkage(ChineseBaseModel):
107 word: str = ""
108 tags: list[str] = []
109 raw_tags: list[str] = []
110 roman: str = ""
111 sense: str = ""
112 ruby: list[tuple[str, ...]] = Field(
113 default=[], description="Japanese Kanji and furigana"
114 )
117class Descendant(ChineseBaseModel):
118 lang_code: str = Field(default="", description="Wiktionary language code")
119 lang: str = Field(default="", description="Language name")
120 word: str = ""
121 roman: str = ""
122 tags: list[str] = []
123 raw_tags: list[str] = []
124 descendants: list["Descendant"] = []
125 ruby: list[tuple[str, ...]] = Field(
126 default=[], description="Japanese Kanji and furigana"
127 )
130class WordEntry(ChineseBaseModel):
131 model_config = ConfigDict(title="Chinese Wiktionary")
133 word: str = Field(description="Word string")
134 lang_code: str = Field(description="Wiktionary language code")
135 lang: str = Field(description="Localized language name")
136 pos: str = Field(description="Part of speech type")
137 etymology_text: str = ""
138 etymology_examples: list[Example] = []
139 senses: list[Sense] = Field(default=[], description="Sense list")
140 forms: list[Form] = Field(default=[], description="Inflection forms list")
141 sounds: list[Sound] = []
142 translations: list[Translation] = []
143 synonyms: list[Linkage] = []
144 hyponyms: list[Linkage] = []
145 hypernyms: list[Linkage] = []
146 holonyms: list[Linkage] = []
147 meronyms: list[Linkage] = []
148 derived: list[Linkage] = []
149 troponyms: list[Linkage] = []
150 paronyms: list[Linkage] = []
151 related: list[Linkage] = []
152 abbreviation: list[Linkage] = []
153 proverbs: list[Linkage] = []
154 antonyms: list[Linkage] = []
155 coordinate_terms: list[Linkage] = []
156 various: list[Linkage] = []
157 compounds: list[Linkage] = []
158 title: str = Field(default="", description="Redirect page source title")
159 redirect: str = Field(default="", description="Redirect page target title")
160 categories: list[str] = []
161 notes: list[str] = []
162 tags: list[str] = []
163 raw_tags: list[str] = []
164 descendants: list[Descendant] = []
165 redirects: list[str] = Field(
166 default=[],
167 description="Soft redirect page, extracted from template zh-see ja-see",
168 )
169 literal_meaning: str = ""