Coverage for src/wiktextract/extractor/zh/models.py: 100%

1from pydantic import BaseModel, ConfigDict, Field

4class ChineseBaseModel(BaseModel):

5 model_config = ConfigDict(

6 extra="forbid",

7 strict=True,

8 validate_assignment=True,

9 validate_default=True,

10 )

13class Example(ChineseBaseModel):

14 text: str = Field(

15 default="",

16 description="Example usage sentences, some might have have both "

17 "Simplified and Traditional Chinese forms",

18 )

19 translation: str = Field(

20 default="", description="Chinese translation of the example sentence"

21 )

22 literal_meaning: str = ""

23 roman: str = Field(

24 default="", description="Romanization of the example sentence"

25 )

26 ref: str = Field(

27 default="",

28 description="Source of the sentence, like book title and page number",

29 )

30 ruby: list[tuple[str, ...]] = Field(

31 default=[], description="Japanese Kanji and furigana"

32 )

33 tags: list[str] = []

34 raw_tags: list[str] = []

37class AltForm(ChineseBaseModel):

38 word: str

39 tags: list[str] = []

42class Classifier(ChineseBaseModel):

43 classifier: str = ""

44 tags: list[str] = []

45 raw_tags: list[str] = []

48class Sense(ChineseBaseModel):

49 glosses: list[str] = []

50 tags: list[str] = []

51 raw_tags: list[str] = []

52 topics: list[str] = []

53 categories: list[str] = []

54 examples: list[Example] = []

55 ruby: list[tuple[str, ...]] = Field(

56 default=[], description="Japanese Kanji and furigana"

57 )

58 alt_of: list[AltForm] = []

59 form_of: list[AltForm] = []

60 classifiers: list[Classifier] = []

63class Form(ChineseBaseModel):

64 form: str = ""

65 tags: list[str] = []

66 raw_tags: list[str] = []

67 source: str = ""

68 ruby: list[tuple[str, ...]] = Field(

69 default=[], description="Japanese Kanji and furigana"

70 )

71 hiragana: str = ""

72 roman: str = ""

75class Sound(ChineseBaseModel):

76 zh_pron: str = Field(default="", description="Chinese word pronunciation")

77 ipa: str = Field(default="", description="International Phonetic Alphabet")

78 audio: str = Field(default="", description="Audio file name")

79 wav_url: str = ""

80 oga_url: str = ""

81 ogg_url: str = ""

82 mp3_url: str = ""

83 opus_url: str = ""

84 flac_url: str = ""

85 tags: list[str] = []

86 raw_tags: list[str] = []

87 homophone: str = ""

88 enpr: str = Field(default="", description="English pronunciation")

91class Translation(ChineseBaseModel):

92 lang_code: str = Field(

93 default="",

94 description="Wiktionary language code of the translation term",

95 )

96 lang: str = Field(default="", description="Translation language name")

97 word: str = Field(description="Translation term")

98 sense: str = Field(default="", description="Translation gloss")

99 tags: list[str] = []

100 raw_tags: list[str] = []

101 roman: str = Field(default="", description="Roman script")

102 alt: str = Field(default="", description="Alternative form")

103 lit: str = Field(default="", description="Literal translation for the term")

104

105

106class Linkage(ChineseBaseModel):

107 word: str = ""

108 tags: list[str] = []

109 raw_tags: list[str] = []

110 roman: str = ""

111 sense: str = ""

112 ruby: list[tuple[str, ...]] = Field(

113 default=[], description="Japanese Kanji and furigana"

114 )

115

116

117class Descendant(ChineseBaseModel):

118 lang_code: str = Field(default="", description="Wiktionary language code")

119 lang: str = Field(default="", description="Language name")

120 word: str = ""

121 roman: str = ""

122 tags: list[str] = []

123 raw_tags: list[str] = []

124 descendants: list["Descendant"] = []

125 ruby: list[tuple[str, ...]] = Field(

126 default=[], description="Japanese Kanji and furigana"

127 )

128

129

130class WordEntry(ChineseBaseModel):

131 model_config = ConfigDict(title="Chinese Wiktionary")

132

133 word: str = Field(description="Word string")

134 lang_code: str = Field(description="Wiktionary language code")

135 lang: str = Field(description="Localized language name")

136 pos: str = Field(description="Part of speech type")

137 etymology_text: str = ""

138 etymology_examples: list[Example] = []

139 senses: list[Sense] = Field(default=[], description="Sense list")

140 forms: list[Form] = Field(default=[], description="Inflection forms list")

141 sounds: list[Sound] = []

142 translations: list[Translation] = []

143 synonyms: list[Linkage] = []

144 hyponyms: list[Linkage] = []

145 hypernyms: list[Linkage] = []

146 holonyms: list[Linkage] = []

147 meronyms: list[Linkage] = []

148 derived: list[Linkage] = []

149 troponyms: list[Linkage] = []

150 paronyms: list[Linkage] = []

151 related: list[Linkage] = []

152 abbreviation: list[Linkage] = []

153 proverbs: list[Linkage] = []

154 antonyms: list[Linkage] = []

155 coordinate_terms: list[Linkage] = []

156 various: list[Linkage] = []

157 compounds: list[Linkage] = []

158 title: str = Field(default="", description="Redirect page source title")

159 redirect: str = Field(default="", description="Redirect page target title")

160 categories: list[str] = []

161 notes: list[str] = []

162 tags: list[str] = []

163 raw_tags: list[str] = []

164 descendants: list[Descendant] = []

165 redirects: list[str] = Field(

166 default=[],

167 description="Soft redirect page, extracted from template zh-see ja-see",

168 )

169 literal_meaning: str = ""