Coverage for src/wiktextract/extractor/zh/models.py: 100%

138 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-12 08:27 +0000

1from pydantic import BaseModel, ConfigDict, Field 

2from wikitextprocessor import NodeKind 

3 

4 

5class ChineseBaseModel(BaseModel): 

6 model_config = ConfigDict( 

7 extra="forbid", 

8 strict=True, 

9 validate_assignment=True, 

10 validate_default=True, 

11 ) 

12 

13 

14class Example(ChineseBaseModel): 

15 text: str = Field( 

16 default="", 

17 description="Example usage sentences, some might have have both " 

18 "Simplified and Traditional Chinese forms", 

19 ) 

20 bold_text_offsets: list[tuple[int, int]] = [] 

21 translation: str = Field( 

22 default="", description="Chinese translation of the example sentence" 

23 ) 

24 bold_translation_offsets: list[tuple[int, int]] = [] 

25 literal_meaning: str = "" 

26 bold_literal_offsets: list[tuple[int, int]] = [] 

27 roman: str = Field( 

28 default="", description="Romanization of the example sentence" 

29 ) 

30 bold_roman_offsets: list[tuple[int, int]] = [] 

31 ref: str = Field( 

32 default="", 

33 description="Source of the sentence, like book title and page number", 

34 ) 

35 ruby: list[tuple[str, ...]] = Field( 

36 default=[], description="Japanese Kanji and furigana" 

37 ) 

38 tags: list[str] = [] 

39 raw_tags: list[str] = [] 

40 

41 

42class AltForm(ChineseBaseModel): 

43 word: str 

44 tags: list[str] = [] 

45 roman: str = "" 

46 

47 

48class Classifier(ChineseBaseModel): 

49 classifier: str = "" 

50 tags: list[str] = [] 

51 raw_tags: list[str] = [] 

52 

53 

54class ReferenceData(ChineseBaseModel): 

55 text: str 

56 refn: str = "" 

57 

58 

59class AttestationData(ChineseBaseModel): 

60 date: str 

61 references: list[ReferenceData] = [] 

62 

63 

64class Sense(ChineseBaseModel): 

65 glosses: list[str] = [] 

66 tags: list[str] = [] 

67 raw_tags: list[str] = [] 

68 topics: list[str] = [] 

69 categories: list[str] = [] 

70 examples: list[Example] = [] 

71 ruby: list[tuple[str, ...]] = Field( 

72 default=[], description="Japanese Kanji and furigana" 

73 ) 

74 alt_of: list[AltForm] = [] 

75 form_of: list[AltForm] = [] 

76 classifiers: list[Classifier] = [] 

77 attestations: list[AttestationData] = [] 

78 

79 

80class Form(ChineseBaseModel): 

81 form: str = "" 

82 tags: list[str] = [] 

83 raw_tags: list[str] = [] 

84 source: str = "" 

85 ruby: list[tuple[str, ...]] = Field( 

86 default=[], description="Japanese Kanji and furigana" 

87 ) 

88 hiragana: str = "" 

89 roman: str = "" 

90 sense: str = "" 

91 attestations: list[AttestationData] = [] 

92 

93 

94class Sound(ChineseBaseModel): 

95 zh_pron: str = Field(default="", description="Chinese word pronunciation") 

96 ipa: str = Field(default="", description="International Phonetic Alphabet") 

97 audio: str = Field(default="", description="Audio file name") 

98 wav_url: str = "" 

99 oga_url: str = "" 

100 ogg_url: str = "" 

101 mp3_url: str = "" 

102 opus_url: str = "" 

103 flac_url: str = "" 

104 tags: list[str] = [] 

105 raw_tags: list[str] = [] 

106 homophone: str = "" 

107 enpr: str = Field(default="", description="English pronunciation") 

108 other: str = "" 

109 roman: str = "" 

110 

111 

112class Translation(ChineseBaseModel): 

113 lang_code: str = Field( 

114 default="", 

115 description="Wiktionary language code of the translation term", 

116 ) 

117 lang: str = Field(default="", description="Translation language name") 

118 word: str = Field(description="Translation term") 

119 sense: str = Field(default="", description="Translation gloss") 

120 tags: list[str] = [] 

121 raw_tags: list[str] = [] 

122 roman: str = Field(default="", description="Roman script") 

123 alt: str = Field(default="", description="Alternative form") 

124 lit: str = Field(default="", description="Literal translation for the term") 

125 source: str = "" 

126 

127 

128class Linkage(ChineseBaseModel): 

129 word: str = "" 

130 tags: list[str] = [] 

131 raw_tags: list[str] = [] 

132 roman: str = "" 

133 sense: str = "" 

134 ruby: list[tuple[str, ...]] = Field( 

135 default=[], description="Japanese Kanji and furigana" 

136 ) 

137 attestations: list[AttestationData] = [] 

138 

139 

140class Descendant(ChineseBaseModel): 

141 lang_code: str = Field(default="", description="Wiktionary language code") 

142 lang: str = Field(default="", description="Language name") 

143 word: str = "" 

144 roman: str = "" 

145 tags: list[str] = [] 

146 raw_tags: list[str] = [] 

147 descendants: list["Descendant"] = [] 

148 ruby: list[tuple[str, ...]] = Field( 

149 default=[], description="Japanese Kanji and furigana" 

150 ) 

151 sense: str = "" 

152 

153 

154class WordEntry(ChineseBaseModel): 

155 model_config = ConfigDict(title="Chinese Wiktionary") 

156 

157 word: str = Field(description="Word string") 

158 lang_code: str = Field(description="Wiktionary language code") 

159 lang: str = Field(description="Localized language name") 

160 pos: str = Field(description="Part of speech type") 

161 pos_title: str = "" 

162 pos_level: NodeKind = Field(default=NodeKind.ROOT, exclude=True) 

163 etymology_text: str = "" 

164 etymology_examples: list[Example] = [] 

165 senses: list[Sense] = Field(default=[], description="Sense list") 

166 forms: list[Form] = Field(default=[], description="Inflection forms list") 

167 sounds: list[Sound] = [] 

168 translations: list[Translation] = [] 

169 synonyms: list[Linkage] = [] 

170 hyponyms: list[Linkage] = [] 

171 hypernyms: list[Linkage] = [] 

172 holonyms: list[Linkage] = [] 

173 meronyms: list[Linkage] = [] 

174 derived: list[Linkage] = [] 

175 troponyms: list[Linkage] = [] 

176 paronyms: list[Linkage] = [] 

177 related: list[Linkage] = [] 

178 abbreviation: list[Linkage] = [] 

179 proverbs: list[Linkage] = [] 

180 antonyms: list[Linkage] = [] 

181 coordinate_terms: list[Linkage] = [] 

182 various: list[Linkage] = [] 

183 compounds: list[Linkage] = [] 

184 title: str = Field(default="", description="Redirect page source title") 

185 redirect: str = Field(default="", description="Redirect page target title") 

186 categories: list[str] = [] 

187 notes: list[str] = [] 

188 tags: list[str] = [] 

189 raw_tags: list[str] = [] 

190 descendants: list[Descendant] = [] 

191 redirects: list[str] = Field( 

192 default=[], 

193 description="Soft redirect page, extracted from template zh-see ja-see", 

194 ) 

195 literal_meaning: str = "" 

196 original_title: str = "" 

197 anagrams: list[Linkage] = []