Coverage for src / wiktextract / extractor / zh / models.py: 100%

142 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-05 07:46 +0000

1from pydantic import BaseModel, ConfigDict, Field 

2from wikitextprocessor import NodeKind 

3 

4 

5class ChineseBaseModel(BaseModel): 

6 model_config = ConfigDict( 

7 extra="forbid", 

8 strict=True, 

9 validate_assignment=True, 

10 validate_default=True, 

11 ) 

12 

13 

14class Example(ChineseBaseModel): 

15 text: str = Field( 

16 description="Example usage sentences, some might have have both " 

17 "Simplified and Traditional Chinese forms", 

18 ) 

19 bold_text_offsets: list[tuple[int, int]] = [] 

20 translation: str = Field( 

21 default="", description="Chinese translation of the example sentence" 

22 ) 

23 bold_translation_offsets: list[tuple[int, int]] = [] 

24 literal_meaning: str = "" 

25 bold_literal_offsets: list[tuple[int, int]] = [] 

26 roman: str = Field( 

27 default="", description="Romanization of the example sentence" 

28 ) 

29 bold_roman_offsets: list[tuple[int, int]] = [] 

30 ref: str = Field( 

31 default="", 

32 description="Source of the sentence, like book title and page number", 

33 ) 

34 ruby: list[tuple[str, ...]] = Field( 

35 default=[], description="Japanese Kanji and furigana" 

36 ) 

37 tags: list[str] = [] 

38 raw_tags: list[str] = [] 

39 

40 

41class AltForm(ChineseBaseModel): 

42 word: str 

43 tags: list[str] = [] 

44 roman: str = "" 

45 

46 

47class Classifier(ChineseBaseModel): 

48 classifier: str = "" 

49 tags: list[str] = [] 

50 raw_tags: list[str] = [] 

51 

52 

53class ReferenceData(ChineseBaseModel): 

54 text: str 

55 refn: str = "" 

56 

57 

58class AttestationData(ChineseBaseModel): 

59 date: str 

60 references: list[ReferenceData] = [] 

61 

62 

63class Sense(ChineseBaseModel): 

64 glosses: list[str] = [] 

65 tags: list[str] = [] 

66 raw_tags: list[str] = [] 

67 topics: list[str] = [] 

68 categories: list[str] = [] 

69 examples: list[Example] = [] 

70 ruby: list[tuple[str, ...]] = Field( 

71 default=[], description="Japanese Kanji and furigana" 

72 ) 

73 alt_of: list[AltForm] = [] 

74 form_of: list[AltForm] = [] 

75 classifiers: list[Classifier] = [] 

76 attestations: list[AttestationData] = [] 

77 

78 

79class Form(ChineseBaseModel): 

80 form: str = "" 

81 tags: list[str] = [] 

82 raw_tags: list[str] = [] 

83 source: str = "" 

84 ruby: list[tuple[str, ...]] = Field( 

85 default=[], description="Japanese Kanji and furigana" 

86 ) 

87 hiragana: str = "" 

88 roman: str = "" 

89 sense: str = "" 

90 attestations: list[AttestationData] = [] 

91 

92 

93class Sound(ChineseBaseModel): 

94 zh_pron: str = Field(default="", description="Chinese word pronunciation") 

95 ipa: str = Field(default="", description="International Phonetic Alphabet") 

96 audio: str = Field(default="", description="Audio file name") 

97 wav_url: str = "" 

98 oga_url: str = "" 

99 ogg_url: str = "" 

100 mp3_url: str = "" 

101 opus_url: str = "" 

102 flac_url: str = "" 

103 tags: list[str] = [] 

104 raw_tags: list[str] = [] 

105 homophone: str = "" 

106 enpr: str = Field(default="", description="English pronunciation") 

107 other: str = "" 

108 roman: str = "" 

109 rhymes: str = "" 

110 

111 

112class Translation(ChineseBaseModel): 

113 lang_code: str = Field( 

114 default="", 

115 description="Wiktionary language code of the translation term", 

116 ) 

117 lang: str = Field(default="", description="Translation language name") 

118 word: str = Field(description="Translation term") 

119 sense: str = Field(default="", description="Translation gloss") 

120 tags: list[str] = [] 

121 raw_tags: list[str] = [] 

122 roman: str = Field(default="", description="Roman script") 

123 alt: str = Field(default="", description="Alternative form") 

124 lit: str = Field(default="", description="Literal translation for the term") 

125 source: str = "" 

126 

127 

128class Linkage(ChineseBaseModel): 

129 word: str = "" 

130 tags: list[str] = [] 

131 raw_tags: list[str] = [] 

132 roman: str = "" 

133 sense: str = "" 

134 ruby: list[tuple[str, ...]] = Field( 

135 default=[], description="Japanese Kanji and furigana" 

136 ) 

137 attestations: list[AttestationData] = [] 

138 

139 

140class Descendant(ChineseBaseModel): 

141 lang_code: str = Field(default="", description="Wiktionary language code") 

142 lang: str = Field(default="", description="Language name") 

143 word: str = "" 

144 roman: str = "" 

145 tags: list[str] = [] 

146 raw_tags: list[str] = [] 

147 descendants: list["Descendant"] = [] 

148 ruby: list[tuple[str, ...]] = Field( 

149 default=[], description="Japanese Kanji and furigana" 

150 ) 

151 sense: str = "" 

152 

153 

154class Hyphenation(ChineseBaseModel): 

155 parts: list[str] = [] 

156 

157 

158class WordEntry(ChineseBaseModel): 

159 model_config = ConfigDict(title="Chinese Wiktionary") 

160 

161 word: str = Field(description="Word string") 

162 lang_code: str = Field(description="Wiktionary language code") 

163 lang: str = Field(description="Localized language name") 

164 pos: str = Field(description="Part of speech type") 

165 pos_title: str = "" 

166 pos_level: NodeKind = Field(default=NodeKind.ROOT, exclude=True) 

167 etymology_texts: list[str] = [] 

168 etymology_examples: list[Example] = [] 

169 senses: list[Sense] = Field(default=[], description="Sense list") 

170 forms: list[Form] = Field(default=[], description="Inflection forms list") 

171 sounds: list[Sound] = [] 

172 translations: list[Translation] = [] 

173 synonyms: list[Linkage] = [] 

174 hyponyms: list[Linkage] = [] 

175 hypernyms: list[Linkage] = [] 

176 holonyms: list[Linkage] = [] 

177 meronyms: list[Linkage] = [] 

178 derived: list[Linkage] = [] 

179 troponyms: list[Linkage] = [] 

180 paronyms: list[Linkage] = [] 

181 related: list[Linkage] = [] 

182 abbreviations: list[Linkage] = [] 

183 proverbs: list[Linkage] = [] 

184 antonyms: list[Linkage] = [] 

185 coordinate_terms: list[Linkage] = [] 

186 various: list[Linkage] = [] 

187 compounds: list[Linkage] = [] 

188 title: str = Field(default="", description="Redirect page source title") 

189 redirect: str = Field(default="", description="Redirect page target title") 

190 categories: list[str] = [] 

191 notes: list[str] = [] 

192 tags: list[str] = [] 

193 raw_tags: list[str] = [] 

194 descendants: list[Descendant] = [] 

195 redirects: list[str] = Field( 

196 default=[], 

197 description="Soft redirect page, extracted from template zh-see ja-see", 

198 ) 

199 literal_meaning: str = "" 

200 original_title: str = "" 

201 anagrams: list[Linkage] = [] 

202 hyphenations: list[Hyphenation] = []