Coverage for src/wiktextract/extractor/zh/models.py: 100%

124 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-11 10:26 +0000

1from pydantic import BaseModel, ConfigDict, Field 

2from wikitextprocessor import NodeKind 

3 

4 

5class ChineseBaseModel(BaseModel): 

6 model_config = ConfigDict( 

7 extra="forbid", 

8 strict=True, 

9 validate_assignment=True, 

10 validate_default=True, 

11 ) 

12 

13 

14class Example(ChineseBaseModel): 

15 text: str = Field( 

16 default="", 

17 description="Example usage sentences, some might have have both " 

18 "Simplified and Traditional Chinese forms", 

19 ) 

20 bold_text_offsets: list[tuple[int, int]] = [] 

21 translation: str = Field( 

22 default="", description="Chinese translation of the example sentence" 

23 ) 

24 bold_translation_offsets: list[tuple[int, int]] = [] 

25 literal_meaning: str = "" 

26 bold_literal_offsets: list[tuple[int, int]] = [] 

27 roman: str = Field( 

28 default="", description="Romanization of the example sentence" 

29 ) 

30 bold_roman_offsets: list[tuple[int, int]] = [] 

31 ref: str = Field( 

32 default="", 

33 description="Source of the sentence, like book title and page number", 

34 ) 

35 ruby: list[tuple[str, ...]] = Field( 

36 default=[], description="Japanese Kanji and furigana" 

37 ) 

38 tags: list[str] = [] 

39 raw_tags: list[str] = [] 

40 

41 

42class AltForm(ChineseBaseModel): 

43 word: str 

44 tags: list[str] = [] 

45 

46 

47class Classifier(ChineseBaseModel): 

48 classifier: str = "" 

49 tags: list[str] = [] 

50 raw_tags: list[str] = [] 

51 

52 

53class Sense(ChineseBaseModel): 

54 glosses: list[str] = [] 

55 tags: list[str] = [] 

56 raw_tags: list[str] = [] 

57 topics: list[str] = [] 

58 categories: list[str] = [] 

59 examples: list[Example] = [] 

60 ruby: list[tuple[str, ...]] = Field( 

61 default=[], description="Japanese Kanji and furigana" 

62 ) 

63 alt_of: list[AltForm] = [] 

64 form_of: list[AltForm] = [] 

65 classifiers: list[Classifier] = [] 

66 

67 

68class Form(ChineseBaseModel): 

69 form: str = "" 

70 tags: list[str] = [] 

71 raw_tags: list[str] = [] 

72 source: str = "" 

73 ruby: list[tuple[str, ...]] = Field( 

74 default=[], description="Japanese Kanji and furigana" 

75 ) 

76 hiragana: str = "" 

77 roman: str = "" 

78 sense: str = "" 

79 

80 

81class Sound(ChineseBaseModel): 

82 zh_pron: str = Field(default="", description="Chinese word pronunciation") 

83 ipa: str = Field(default="", description="International Phonetic Alphabet") 

84 audio: str = Field(default="", description="Audio file name") 

85 wav_url: str = "" 

86 oga_url: str = "" 

87 ogg_url: str = "" 

88 mp3_url: str = "" 

89 opus_url: str = "" 

90 flac_url: str = "" 

91 tags: list[str] = [] 

92 raw_tags: list[str] = [] 

93 homophone: str = "" 

94 enpr: str = Field(default="", description="English pronunciation") 

95 other: str = "" 

96 roman: str = "" 

97 

98 

99class Translation(ChineseBaseModel): 

100 lang_code: str = Field( 

101 default="", 

102 description="Wiktionary language code of the translation term", 

103 ) 

104 lang: str = Field(default="", description="Translation language name") 

105 word: str = Field(description="Translation term") 

106 sense: str = Field(default="", description="Translation gloss") 

107 tags: list[str] = [] 

108 raw_tags: list[str] = [] 

109 roman: str = Field(default="", description="Roman script") 

110 alt: str = Field(default="", description="Alternative form") 

111 lit: str = Field(default="", description="Literal translation for the term") 

112 

113 

114class Linkage(ChineseBaseModel): 

115 word: str = "" 

116 tags: list[str] = [] 

117 raw_tags: list[str] = [] 

118 roman: str = "" 

119 sense: str = "" 

120 ruby: list[tuple[str, ...]] = Field( 

121 default=[], description="Japanese Kanji and furigana" 

122 ) 

123 

124 

125class Descendant(ChineseBaseModel): 

126 lang_code: str = Field(default="", description="Wiktionary language code") 

127 lang: str = Field(default="", description="Language name") 

128 word: str = "" 

129 roman: str = "" 

130 tags: list[str] = [] 

131 raw_tags: list[str] = [] 

132 descendants: list["Descendant"] = [] 

133 ruby: list[tuple[str, ...]] = Field( 

134 default=[], description="Japanese Kanji and furigana" 

135 ) 

136 

137 

138class WordEntry(ChineseBaseModel): 

139 model_config = ConfigDict(title="Chinese Wiktionary") 

140 

141 word: str = Field(description="Word string") 

142 lang_code: str = Field(description="Wiktionary language code") 

143 lang: str = Field(description="Localized language name") 

144 pos: str = Field(description="Part of speech type") 

145 pos_title: str = "" 

146 pos_level: NodeKind = Field(default=NodeKind.ROOT, exclude=True) 

147 etymology_text: str = "" 

148 etymology_examples: list[Example] = [] 

149 senses: list[Sense] = Field(default=[], description="Sense list") 

150 forms: list[Form] = Field(default=[], description="Inflection forms list") 

151 sounds: list[Sound] = [] 

152 translations: list[Translation] = [] 

153 synonyms: list[Linkage] = [] 

154 hyponyms: list[Linkage] = [] 

155 hypernyms: list[Linkage] = [] 

156 holonyms: list[Linkage] = [] 

157 meronyms: list[Linkage] = [] 

158 derived: list[Linkage] = [] 

159 troponyms: list[Linkage] = [] 

160 paronyms: list[Linkage] = [] 

161 related: list[Linkage] = [] 

162 abbreviation: list[Linkage] = [] 

163 proverbs: list[Linkage] = [] 

164 antonyms: list[Linkage] = [] 

165 coordinate_terms: list[Linkage] = [] 

166 various: list[Linkage] = [] 

167 compounds: list[Linkage] = [] 

168 title: str = Field(default="", description="Redirect page source title") 

169 redirect: str = Field(default="", description="Redirect page target title") 

170 categories: list[str] = [] 

171 notes: list[str] = [] 

172 tags: list[str] = [] 

173 raw_tags: list[str] = [] 

174 descendants: list[Descendant] = [] 

175 redirects: list[str] = Field( 

176 default=[], 

177 description="Soft redirect page, extracted from template zh-see ja-see", 

178 ) 

179 literal_meaning: str = ""