Coverage for src / wiktextract / extractor / zh / models.py: 100%

142 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-01 03:02 +0000

1from pydantic import BaseModel, ConfigDict, Field 

2from wikitextprocessor import NodeKind 

3 

4 

5class ChineseBaseModel(BaseModel): 

6 model_config = ConfigDict( 

7 extra="forbid", 

8 strict=True, 

9 validate_assignment=True, 

10 validate_default=True, 

11 ) 

12 

13 

14class Example(ChineseBaseModel): 

15 text: str = Field( 

16 default="", 

17 description="Example usage sentences, some might have have both " 

18 "Simplified and Traditional Chinese forms", 

19 ) 

20 bold_text_offsets: list[tuple[int, int]] = [] 

21 translation: str = Field( 

22 default="", description="Chinese translation of the example sentence" 

23 ) 

24 bold_translation_offsets: list[tuple[int, int]] = [] 

25 literal_meaning: str = "" 

26 bold_literal_offsets: list[tuple[int, int]] = [] 

27 roman: str = Field( 

28 default="", description="Romanization of the example sentence" 

29 ) 

30 bold_roman_offsets: list[tuple[int, int]] = [] 

31 ref: str = Field( 

32 default="", 

33 description="Source of the sentence, like book title and page number", 

34 ) 

35 ruby: list[tuple[str, ...]] = Field( 

36 default=[], description="Japanese Kanji and furigana" 

37 ) 

38 tags: list[str] = [] 

39 raw_tags: list[str] = [] 

40 

41 

42class AltForm(ChineseBaseModel): 

43 word: str 

44 tags: list[str] = [] 

45 roman: str = "" 

46 

47 

48class Classifier(ChineseBaseModel): 

49 classifier: str = "" 

50 tags: list[str] = [] 

51 raw_tags: list[str] = [] 

52 

53 

54class ReferenceData(ChineseBaseModel): 

55 text: str 

56 refn: str = "" 

57 

58 

59class AttestationData(ChineseBaseModel): 

60 date: str 

61 references: list[ReferenceData] = [] 

62 

63 

64class Sense(ChineseBaseModel): 

65 glosses: list[str] = [] 

66 tags: list[str] = [] 

67 raw_tags: list[str] = [] 

68 topics: list[str] = [] 

69 categories: list[str] = [] 

70 examples: list[Example] = [] 

71 ruby: list[tuple[str, ...]] = Field( 

72 default=[], description="Japanese Kanji and furigana" 

73 ) 

74 alt_of: list[AltForm] = [] 

75 form_of: list[AltForm] = [] 

76 classifiers: list[Classifier] = [] 

77 attestations: list[AttestationData] = [] 

78 

79 

80class Form(ChineseBaseModel): 

81 form: str = "" 

82 tags: list[str] = [] 

83 raw_tags: list[str] = [] 

84 source: str = "" 

85 ruby: list[tuple[str, ...]] = Field( 

86 default=[], description="Japanese Kanji and furigana" 

87 ) 

88 hiragana: str = "" 

89 roman: str = "" 

90 sense: str = "" 

91 attestations: list[AttestationData] = [] 

92 

93 

94class Sound(ChineseBaseModel): 

95 zh_pron: str = Field(default="", description="Chinese word pronunciation") 

96 ipa: str = Field(default="", description="International Phonetic Alphabet") 

97 audio: str = Field(default="", description="Audio file name") 

98 wav_url: str = "" 

99 oga_url: str = "" 

100 ogg_url: str = "" 

101 mp3_url: str = "" 

102 opus_url: str = "" 

103 flac_url: str = "" 

104 tags: list[str] = [] 

105 raw_tags: list[str] = [] 

106 homophone: str = "" 

107 enpr: str = Field(default="", description="English pronunciation") 

108 other: str = "" 

109 roman: str = "" 

110 rhymes: str = "" 

111 

112 

113class Translation(ChineseBaseModel): 

114 lang_code: str = Field( 

115 default="", 

116 description="Wiktionary language code of the translation term", 

117 ) 

118 lang: str = Field(default="", description="Translation language name") 

119 word: str = Field(description="Translation term") 

120 sense: str = Field(default="", description="Translation gloss") 

121 tags: list[str] = [] 

122 raw_tags: list[str] = [] 

123 roman: str = Field(default="", description="Roman script") 

124 alt: str = Field(default="", description="Alternative form") 

125 lit: str = Field(default="", description="Literal translation for the term") 

126 source: str = "" 

127 

128 

129class Linkage(ChineseBaseModel): 

130 word: str = "" 

131 tags: list[str] = [] 

132 raw_tags: list[str] = [] 

133 roman: str = "" 

134 sense: str = "" 

135 ruby: list[tuple[str, ...]] = Field( 

136 default=[], description="Japanese Kanji and furigana" 

137 ) 

138 attestations: list[AttestationData] = [] 

139 

140 

141class Descendant(ChineseBaseModel): 

142 lang_code: str = Field(default="", description="Wiktionary language code") 

143 lang: str = Field(default="", description="Language name") 

144 word: str = "" 

145 roman: str = "" 

146 tags: list[str] = [] 

147 raw_tags: list[str] = [] 

148 descendants: list["Descendant"] = [] 

149 ruby: list[tuple[str, ...]] = Field( 

150 default=[], description="Japanese Kanji and furigana" 

151 ) 

152 sense: str = "" 

153 

154 

155class Hyphenation(ChineseBaseModel): 

156 parts: list[str] = [] 

157 

158 

159class WordEntry(ChineseBaseModel): 

160 model_config = ConfigDict(title="Chinese Wiktionary") 

161 

162 word: str = Field(description="Word string") 

163 lang_code: str = Field(description="Wiktionary language code") 

164 lang: str = Field(description="Localized language name") 

165 pos: str = Field(description="Part of speech type") 

166 pos_title: str = "" 

167 pos_level: NodeKind = Field(default=NodeKind.ROOT, exclude=True) 

168 etymology_text: str = "" 

169 etymology_examples: list[Example] = [] 

170 senses: list[Sense] = Field(default=[], description="Sense list") 

171 forms: list[Form] = Field(default=[], description="Inflection forms list") 

172 sounds: list[Sound] = [] 

173 translations: list[Translation] = [] 

174 synonyms: list[Linkage] = [] 

175 hyponyms: list[Linkage] = [] 

176 hypernyms: list[Linkage] = [] 

177 holonyms: list[Linkage] = [] 

178 meronyms: list[Linkage] = [] 

179 derived: list[Linkage] = [] 

180 troponyms: list[Linkage] = [] 

181 paronyms: list[Linkage] = [] 

182 related: list[Linkage] = [] 

183 abbreviation: list[Linkage] = [] 

184 proverbs: list[Linkage] = [] 

185 antonyms: list[Linkage] = [] 

186 coordinate_terms: list[Linkage] = [] 

187 various: list[Linkage] = [] 

188 compounds: list[Linkage] = [] 

189 title: str = Field(default="", description="Redirect page source title") 

190 redirect: str = Field(default="", description="Redirect page target title") 

191 categories: list[str] = [] 

192 notes: list[str] = [] 

193 tags: list[str] = [] 

194 raw_tags: list[str] = [] 

195 descendants: list[Descendant] = [] 

196 redirects: list[str] = Field( 

197 default=[], 

198 description="Soft redirect page, extracted from template zh-see ja-see", 

199 ) 

200 literal_meaning: str = "" 

201 original_title: str = "" 

202 anagrams: list[Linkage] = [] 

203 hyphenations: list[Hyphenation] = []