Coverage for src / wiktextract / extractor / zh / models.py: 100%

144 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2025-12-29 01:50 +0000

1from pydantic import BaseModel, ConfigDict, Field 

2from wikitextprocessor import NodeKind 

3 

4 

5class ChineseBaseModel(BaseModel): 

6 model_config = ConfigDict( 

7 extra="forbid", 

8 strict=True, 

9 validate_assignment=True, 

10 validate_default=True, 

11 ) 

12 

13 

14class Example(ChineseBaseModel): 

15 text: str = Field( 

16 description="Example usage sentences, some might have have both " 

17 "Simplified and Traditional Chinese forms", 

18 ) 

19 bold_text_offsets: list[tuple[int, int]] = [] 

20 translation: str = Field( 

21 default="", description="Chinese translation of the example sentence" 

22 ) 

23 bold_translation_offsets: list[tuple[int, int]] = [] 

24 literal_meaning: str = "" 

25 bold_literal_offsets: list[tuple[int, int]] = [] 

26 roman: str = Field( 

27 default="", description="Romanization of the example sentence" 

28 ) 

29 bold_roman_offsets: list[tuple[int, int]] = [] 

30 ref: str = Field( 

31 default="", 

32 description="Source of the sentence, like book title and page number", 

33 ) 

34 ruby: list[tuple[str, ...]] = Field( 

35 default=[], description="Japanese Kanji and furigana" 

36 ) 

37 tags: list[str] = [] 

38 raw_tags: list[str] = [] 

39 

40 

41class AltForm(ChineseBaseModel): 

42 word: str 

43 tags: list[str] = [] 

44 roman: str = "" 

45 

46 

47class Classifier(ChineseBaseModel): 

48 classifier: str = "" 

49 tags: list[str] = [] 

50 raw_tags: list[str] = [] 

51 

52 

53class ReferenceData(ChineseBaseModel): 

54 text: str 

55 refn: str = "" 

56 

57 

58class AttestationData(ChineseBaseModel): 

59 date: str 

60 references: list[ReferenceData] = [] 

61 

62 

63class Sense(ChineseBaseModel): 

64 glosses: list[str] = [] 

65 tags: list[str] = [] 

66 raw_tags: list[str] = [] 

67 topics: list[str] = [] 

68 categories: list[str] = [] 

69 examples: list[Example] = [] 

70 ruby: list[tuple[str, ...]] = Field( 

71 default=[], description="Japanese Kanji and furigana" 

72 ) 

73 alt_of: list[AltForm] = [] 

74 form_of: list[AltForm] = [] 

75 classifiers: list[Classifier] = [] 

76 attestations: list[AttestationData] = [] 

77 

78 

79class Form(ChineseBaseModel): 

80 form: str = "" 

81 tags: list[str] = [] 

82 raw_tags: list[str] = [] 

83 source: str = "" 

84 ruby: list[tuple[str, ...]] = Field( 

85 default=[], description="Japanese Kanji and furigana" 

86 ) 

87 hiragana: str = "" 

88 roman: str = "" 

89 sense: str = "" 

90 attestations: list[AttestationData] = [] 

91 

92 

93class Sound(ChineseBaseModel): 

94 zh_pron: str = Field(default="", description="Chinese word pronunciation") 

95 ipa: str = Field(default="", description="International Phonetic Alphabet") 

96 audio: str = Field(default="", description="Audio file name") 

97 wav_url: str = "" 

98 oga_url: str = "" 

99 ogg_url: str = "" 

100 mp3_url: str = "" 

101 opus_url: str = "" 

102 flac_url: str = "" 

103 tags: list[str] = [] 

104 raw_tags: list[str] = [] 

105 homophone: str = "" 

106 enpr: str = Field(default="", description="English pronunciation") 

107 other: str = "" 

108 roman: str = "" 

109 rhymes: str = "" 

110 hangeul: str = "" 

111 

112 

113class Translation(ChineseBaseModel): 

114 lang_code: str = Field( 

115 default="", 

116 description="Wiktionary language code of the translation term", 

117 ) 

118 lang: str = Field(default="", description="Translation language name") 

119 word: str = Field(description="Translation term") 

120 sense: str = Field(default="", description="Translation gloss") 

121 tags: list[str] = [] 

122 raw_tags: list[str] = [] 

123 roman: str = Field(default="", description="Roman script") 

124 alt: str = Field(default="", description="Alternative form") 

125 lit: str = Field(default="", description="Literal translation for the term") 

126 source: str = "" 

127 

128 

129class Linkage(ChineseBaseModel): 

130 word: str = "" 

131 tags: list[str] = [] 

132 raw_tags: list[str] = [] 

133 roman: str = "" 

134 sense: str = "" 

135 ruby: list[tuple[str, ...]] = Field( 

136 default=[], description="Japanese Kanji and furigana" 

137 ) 

138 attestations: list[AttestationData] = [] 

139 

140 

141class Descendant(ChineseBaseModel): 

142 lang_code: str = Field(default="", description="Wiktionary language code") 

143 lang: str = Field(default="", description="Language name") 

144 word: str = "" 

145 roman: str = "" 

146 tags: list[str] = [] 

147 raw_tags: list[str] = [] 

148 descendants: list["Descendant"] = [] 

149 ruby: list[tuple[str, ...]] = Field( 

150 default=[], description="Japanese Kanji and furigana" 

151 ) 

152 sense: str = "" 

153 

154 

155class Hyphenation(ChineseBaseModel): 

156 parts: list[str] = [] 

157 

158 

159class WordEntry(ChineseBaseModel): 

160 model_config = ConfigDict(title="Chinese Wiktionary") 

161 

162 word: str = Field(description="Word string") 

163 lang_code: str = Field(description="Wiktionary language code") 

164 lang: str = Field(description="Localized language name") 

165 pos: str = Field(description="Part of speech type") 

166 pos_title: str = "" 

167 pos_level: NodeKind = Field(default=NodeKind.ROOT, exclude=True) 

168 etymology_texts: list[str] = [] 

169 etymology_examples: list[Example] = [] 

170 senses: list[Sense] = Field(default=[], description="Sense list") 

171 forms: list[Form] = Field(default=[], description="Inflection forms list") 

172 sounds: list[Sound] = [] 

173 translations: list[Translation] = [] 

174 synonyms: list[Linkage] = [] 

175 hyponyms: list[Linkage] = [] 

176 hypernyms: list[Linkage] = [] 

177 holonyms: list[Linkage] = [] 

178 meronyms: list[Linkage] = [] 

179 derived: list[Linkage] = [] 

180 troponyms: list[Linkage] = [] 

181 paronyms: list[Linkage] = [] 

182 related: list[Linkage] = [] 

183 abbreviations: list[Linkage] = [] 

184 proverbs: list[Linkage] = [] 

185 antonyms: list[Linkage] = [] 

186 coordinate_terms: list[Linkage] = [] 

187 various: list[Linkage] = [] 

188 compounds: list[Linkage] = [] 

189 title: str = Field(default="", description="Redirect page source title") 

190 redirect: str = Field(default="", description="Redirect page target title") 

191 categories: list[str] = [] 

192 notes: list[str] = [] 

193 tags: list[str] = [] 

194 raw_tags: list[str] = [] 

195 descendants: list[Descendant] = [] 

196 redirects: list[str] = Field( 

197 default=[], 

198 description="Soft redirect page, extracted from template zh-see ja-see", 

199 ) 

200 literal_meaning: str = "" 

201 original_title: str = "" 

202 anagrams: list[Linkage] = [] 

203 hyphenations: list[Hyphenation] = [] 

204 classifiers: list[Classifier] = []