Coverage for src/wiktextract/extractor/zh/models.py: 100%

136 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-15 05:18 +0000

1from pydantic import BaseModel, ConfigDict, Field 

2from wikitextprocessor import NodeKind 

3 

4 

5class ChineseBaseModel(BaseModel): 

6 model_config = ConfigDict( 

7 extra="forbid", 

8 strict=True, 

9 validate_assignment=True, 

10 validate_default=True, 

11 ) 

12 

13 

14class Example(ChineseBaseModel): 

15 text: str = Field( 

16 default="", 

17 description="Example usage sentences, some might have have both " 

18 "Simplified and Traditional Chinese forms", 

19 ) 

20 bold_text_offsets: list[tuple[int, int]] = [] 

21 translation: str = Field( 

22 default="", description="Chinese translation of the example sentence" 

23 ) 

24 bold_translation_offsets: list[tuple[int, int]] = [] 

25 literal_meaning: str = "" 

26 bold_literal_offsets: list[tuple[int, int]] = [] 

27 roman: str = Field( 

28 default="", description="Romanization of the example sentence" 

29 ) 

30 bold_roman_offsets: list[tuple[int, int]] = [] 

31 ref: str = Field( 

32 default="", 

33 description="Source of the sentence, like book title and page number", 

34 ) 

35 ruby: list[tuple[str, ...]] = Field( 

36 default=[], description="Japanese Kanji and furigana" 

37 ) 

38 tags: list[str] = [] 

39 raw_tags: list[str] = [] 

40 

41 

42class AltForm(ChineseBaseModel): 

43 word: str 

44 tags: list[str] = [] 

45 roman: str = "" 

46 

47 

48class Classifier(ChineseBaseModel): 

49 classifier: str = "" 

50 tags: list[str] = [] 

51 raw_tags: list[str] = [] 

52 

53 

54class ReferenceData(ChineseBaseModel): 

55 text: str 

56 refn: str = "" 

57 

58 

59class AttestationData(ChineseBaseModel): 

60 date: str 

61 references: list[ReferenceData] = [] 

62 

63 

64class Sense(ChineseBaseModel): 

65 glosses: list[str] = [] 

66 tags: list[str] = [] 

67 raw_tags: list[str] = [] 

68 topics: list[str] = [] 

69 categories: list[str] = [] 

70 examples: list[Example] = [] 

71 ruby: list[tuple[str, ...]] = Field( 

72 default=[], description="Japanese Kanji and furigana" 

73 ) 

74 alt_of: list[AltForm] = [] 

75 form_of: list[AltForm] = [] 

76 classifiers: list[Classifier] = [] 

77 attestations: list[AttestationData] = [] 

78 

79 

80class Form(ChineseBaseModel): 

81 form: str = "" 

82 tags: list[str] = [] 

83 raw_tags: list[str] = [] 

84 source: str = "" 

85 ruby: list[tuple[str, ...]] = Field( 

86 default=[], description="Japanese Kanji and furigana" 

87 ) 

88 hiragana: str = "" 

89 roman: str = "" 

90 sense: str = "" 

91 attestations: list[AttestationData] = [] 

92 

93 

94class Sound(ChineseBaseModel): 

95 zh_pron: str = Field(default="", description="Chinese word pronunciation") 

96 ipa: str = Field(default="", description="International Phonetic Alphabet") 

97 audio: str = Field(default="", description="Audio file name") 

98 wav_url: str = "" 

99 oga_url: str = "" 

100 ogg_url: str = "" 

101 mp3_url: str = "" 

102 opus_url: str = "" 

103 flac_url: str = "" 

104 tags: list[str] = [] 

105 raw_tags: list[str] = [] 

106 homophone: str = "" 

107 enpr: str = Field(default="", description="English pronunciation") 

108 other: str = "" 

109 roman: str = "" 

110 

111 

112class Translation(ChineseBaseModel): 

113 lang_code: str = Field( 

114 default="", 

115 description="Wiktionary language code of the translation term", 

116 ) 

117 lang: str = Field(default="", description="Translation language name") 

118 word: str = Field(description="Translation term") 

119 sense: str = Field(default="", description="Translation gloss") 

120 tags: list[str] = [] 

121 raw_tags: list[str] = [] 

122 roman: str = Field(default="", description="Roman script") 

123 alt: str = Field(default="", description="Alternative form") 

124 lit: str = Field(default="", description="Literal translation for the term") 

125 

126 

127class Linkage(ChineseBaseModel): 

128 word: str = "" 

129 tags: list[str] = [] 

130 raw_tags: list[str] = [] 

131 roman: str = "" 

132 sense: str = "" 

133 ruby: list[tuple[str, ...]] = Field( 

134 default=[], description="Japanese Kanji and furigana" 

135 ) 

136 attestations: list[AttestationData] = [] 

137 

138 

139class Descendant(ChineseBaseModel): 

140 lang_code: str = Field(default="", description="Wiktionary language code") 

141 lang: str = Field(default="", description="Language name") 

142 word: str = "" 

143 roman: str = "" 

144 tags: list[str] = [] 

145 raw_tags: list[str] = [] 

146 descendants: list["Descendant"] = [] 

147 ruby: list[tuple[str, ...]] = Field( 

148 default=[], description="Japanese Kanji and furigana" 

149 ) 

150 

151 

152class WordEntry(ChineseBaseModel): 

153 model_config = ConfigDict(title="Chinese Wiktionary") 

154 

155 word: str = Field(description="Word string") 

156 lang_code: str = Field(description="Wiktionary language code") 

157 lang: str = Field(description="Localized language name") 

158 pos: str = Field(description="Part of speech type") 

159 pos_title: str = "" 

160 pos_level: NodeKind = Field(default=NodeKind.ROOT, exclude=True) 

161 etymology_text: str = "" 

162 etymology_examples: list[Example] = [] 

163 senses: list[Sense] = Field(default=[], description="Sense list") 

164 forms: list[Form] = Field(default=[], description="Inflection forms list") 

165 sounds: list[Sound] = [] 

166 translations: list[Translation] = [] 

167 synonyms: list[Linkage] = [] 

168 hyponyms: list[Linkage] = [] 

169 hypernyms: list[Linkage] = [] 

170 holonyms: list[Linkage] = [] 

171 meronyms: list[Linkage] = [] 

172 derived: list[Linkage] = [] 

173 troponyms: list[Linkage] = [] 

174 paronyms: list[Linkage] = [] 

175 related: list[Linkage] = [] 

176 abbreviation: list[Linkage] = [] 

177 proverbs: list[Linkage] = [] 

178 antonyms: list[Linkage] = [] 

179 coordinate_terms: list[Linkage] = [] 

180 various: list[Linkage] = [] 

181 compounds: list[Linkage] = [] 

182 title: str = Field(default="", description="Redirect page source title") 

183 redirect: str = Field(default="", description="Redirect page target title") 

184 categories: list[str] = [] 

185 notes: list[str] = [] 

186 tags: list[str] = [] 

187 raw_tags: list[str] = [] 

188 descendants: list[Descendant] = [] 

189 redirects: list[str] = Field( 

190 default=[], 

191 description="Soft redirect page, extracted from template zh-see ja-see", 

192 ) 

193 literal_meaning: str = "" 

194 original_title: str = "" 

195 anagrams: list[Linkage] = []