Coverage for src/wiktextract/extractor/zh/section_titles.py: 100%

10 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-10-25 10:11 +0000

1from wiktextract.config import POSSubtitleData 

2 

3POS_TITLES: dict[str, POSSubtitleData] = { 

4 "不及物动词": {"pos": "verb", "tags": ["intransitive"]}, 

5 "不及物動詞": { 

6 "debug": "part-of-speech Intransitive verb is proscribed", 

7 "pos": "verb", 

8 "tags": ["intransitive"], 

9 }, 

10 "不定代词": {"pos": "pron"}, 

11 "不定冠詞": {"pos": "article"}, 

12 "不定冠词": {"pos": "article"}, 

13 "专有名詞": {"pos": "name"}, 

14 "专有名词": {"pos": "name"}, 

15 "中綴": {"pos": "interfix", "tags": ["morpheme"]}, 

16 "中缀": {"pos": "infix", "tags": ["morpheme"]}, 

17 "习语": {"pos": "phrase", "tags": ["idiomatic"]}, 

18 "人称代词": {"pos": "pron", "tags": ["person"]}, 

19 "介係詞": {"pos": "prep"}, 

20 "介系詞": {"pos": "prep"}, 

21 "介系词": {"pos": "prep"}, 

22 "介詞": {"pos": "prep"}, 

23 "介詞短語": {"pos": "prep_phrase"}, 

24 "介词": {"pos": "prep"}, 

25 "介词短语": {"pos": "prep_phrase"}, 

26 "代名詞": {"pos": "pron"}, 

27 "代名词": {"pos": "pron"}, 

28 "代詞": {"pos": "pron"}, 

29 "代词": {"pos": "pron"}, 

30 "俗語": {"pos": "phrase", "tags": ["idiomatic"]}, 

31 "俗语": {"pos": "phrase", "tags": ["idiomatic"]}, 

32 "关系代词": {"pos": "pron"}, 

33 "冠詞": {"pos": "article"}, 

34 "冠词": {"pos": "article"}, 

35 "分詞": {"pos": "verb", "tags": ["participle"]}, 

36 "分词": {"pos": "verb", "tags": ["participle"]}, 

37 "分類詞": {"pos": "classifier"}, 

38 "前綴": {"pos": "prefix", "tags": ["morpheme"]}, 

39 "前綴詞": {"pos": "prefix", "tags": ["morpheme"]}, 

40 "前缀": {"pos": "prefix", "tags": ["morpheme"]}, 

41 "前置詞": {"pos": "prep"}, 

42 "前置词": {"pos": "prep"}, 

43 "副助": {"pos": "particle"}, 

44 "副詞": {"pos": "adv"}, 

45 "副词": {"pos": "adv"}, 

46 "动詞": {"pos": "verb"}, 

47 "动词": {"pos": "verb"}, 

48 "助動詞": {"pos": "verb"}, 

49 "助数": {"pos": "classifier", "tags": ["measure word"]}, 

50 "助數詞": {"pos": "counter"}, 

51 "助詞": {"pos": "particle"}, 

52 "助词": {"pos": "particle"}, 

53 "動名詞": { 

54 "debug": "part-of-speech Gerund is proscribed", 

55 "pos": "verb", 

56 "tags": ["participle", "gerund"], 

57 }, 

58 "動詞": {"pos": "verb"}, 

59 "及物动词": {"pos": "verb", "tags": ["transitive"]}, 

60 "及物動詞": {"pos": "verb", "tags": ["transitive"]}, 

61 "叹词": {"pos": "intj"}, 

62 "名称": {"pos": "noun"}, 

63 "名稱": {"pos": "noun"}, 

64 "名詞": {"pos": "noun"}, 

65 "名词": {"pos": "noun"}, 

66 "后缀": {"pos": "suffix", "tags": ["morpheme"]}, 

67 "后置词": {"pos": "postp"}, 

68 "基数": {"pos": "num"}, 

69 "基数词": {"pos": "num"}, 

70 "基數": {"pos": "num"}, 

71 "字母": {"pos": "character", "tags": ["letter"]}, 

72 "字綴": {"pos": "affix"}, 

73 "字面": {"debug": "typo", "pos": "character", "tags": ["letter"]}, 

74 "定冠词": {"pos": "article"}, 

75 "寧詞": {"debug": "typo", "pos": "noun"}, 

76 "对应汉字": {"pos": "romanization"}, 

77 "对应词语": {"pos": "romanization"}, 

78 "專有名詞": {"pos": "name"}, 

79 "小品词": {"pos": "particle"}, 

80 "平假名": {"pos": "syllable"}, 

81 "序數": { 

82 "debug": "ordinal numbers should be adjectives", 

83 "pos": "adj", 

84 "tags": ["ordinal"], 

85 }, 

86 "序數詞": {"pos": "num"}, 

87 "康熙部首": {"pos": "symbol"}, 

88 "形容动词": {"pos": "adj_noun"}, 

89 "形容動詞": {"pos": "adj_noun"}, 

90 "形容詞": {"pos": "adj"}, 

91 "形容词": {"pos": "adj"}, 

92 "後綴": {"pos": "suffix", "tags": ["morpheme"]}, 

93 "後置詞": {"pos": "postp"}, 

94 "後附語素": {"pos": "suffix", "tags": ["clitic"]}, 

95 "惯用语": {"pos": "phrase"}, 

96 "感叹词": {"pos": "intj"}, 

97 "感嘆詞": {"pos": "intj"}, 

98 "感歎詞": {"pos": "intj"}, 

99 "慣用語": {"pos": "phrase"}, 

100 "成句": {"pos": "proverb"}, 

101 "成語": {"pos": "phrase", "tags": ["idiomatic"]}, 

102 "成语": {"pos": "phrase", "tags": ["idiomatic"]}, 

103 "拼音": {"pos": "romanization"}, 

104 "接助": {"pos": "particle"}, 

105 "接头": {"pos": "prefix"}, 

106 "接头詞": {"pos": "prefix"}, 

107 "接头词": {"pos": "prefix"}, 

108 "接尾": {"pos": "suffix"}, 

109 "接尾詞": {"pos": "suffix"}, 

110 "接尾词": {"pos": "suffix"}, 

111 "提助": {"pos": "article"}, 

112 "擬態詞": {"pos": "noun", "tags": ["ideophone"]}, 

113 "擬聲詞": {"pos": "noun", "tags": ["onomatopoeia"]}, 

114 "数字": {"pos": "num", "tags": ["number"]}, 

115 "数詞": {"pos": "num", "tags": ["number"]}, 

116 "数词": {"pos": "num", "tags": ["number"]}, 

117 "數字": {"pos": "num", "tags": ["number"]}, 

118 "數字符號": {"pos": "num", "tags": ["number"]}, 

119 "數詞": {"pos": "num", "tags": ["number"]}, 

120 "标点": {"pos": "punct", "tags": ["punctuation"]}, 

121 "标点符号": {"pos": "punct", "tags": ["punctuation"]}, 

122 "標點符號": { 

123 "debug": "part-of-speech Punctuation should be Punctuation mark", 

124 "pos": "punct", 

125 "tags": ["punctuation"], 

126 }, 

127 "歇后语": {"pos": "proverb", "tags": ["xiehouyu"]}, 

128 "歇後語": {"pos": "proverb", "tags": ["xiehouyu"]}, 

129 "汉语拼音": {"pos": "romanization"}, 

130 "漢語拼音": {"pos": "romanization"}, 

131 "注音符號": {"pos": "character"}, 

132 "漢字": {"pos": "character", "tags": ["han"]}, 

133 "汉字": {"pos": "character", "tags": ["han"]}, 

134 "片語": {"pos": "phrase"}, 

135 "物主代词": {"pos": "pron"}, 

136 "環綴": {"pos": "circumfix", "tags": ["morpheme"]}, 

137 "短語": {"debug": "usually used in singular", "pos": "phrase"}, 

138 "短语": {"pos": "phrase", "tags": ["idiomatic"]}, 

139 "符号": {"pos": "symbol"}, 

140 "符號": {"pos": "symbol"}, 

141 "简写": {"pos": "abbrev", "tags": ["abbreviation"]}, 

142 "縮寫": {"pos": "abbrev", "tags": ["abbreviation"]}, 

143 "縮約形": {"pos": "contraction", "tags": ["contraction"]}, 

144 "结合形式": {"pos": "combining_form", "tags": ["morpheme"]}, 

145 "缩写": { 

146 "debug": "part-of-speech Abbreviation is proscribed", 

147 "pos": "abbrev", 

148 "tags": ["abbreviation"], 

149 }, 

150 "缩约形": {"pos": "abbrev", "tags": ["abbreviation"]}, 

151 "罗马化": {"pos": "romanization"}, 

152 "罗马字": {"pos": "romanization"}, 

153 "羅馬化": {"pos": "romanization"}, 

154 "羅馬字": {"pos": "romanization"}, 

155 "習語": {"pos": "phrase", "tags": ["idiomatic"]}, 

156 "表語": {"pos": "adj", "tags": ["predicative"]}, 

157 "詞綴": {"pos": "affix"}, 

158 "諺語": {"pos": "proverb"}, 

159 "词组": {"pos": "phrase"}, 

160 "词缀": {"pos": "affix"}, 

161 "语气助词": {"pos": "particle"}, 

162 "谚语": {"pos": "proverb"}, 

163 "连体词": {"pos": "adnominal"}, 

164 "连词": {"pos": "conj"}, 

165 "連詞": {"pos": "conj"}, 

166 "連體詞": {"pos": "adnominal"}, 

167 "部件": {"pos": "component"}, 

168 "釋義": { 

169 # Means 'definition', some pages don't have POS but use this title 

170 "pos": "unknown" 

171 }, 

172 "释义": {"pos": "unknown"}, # simplify form of "釋義" 

173 "解释": {"pos": "unknown"}, 

174 "量詞": {"pos": "classifier"}, 

175 "量词": {"pos": "classifier"}, 

176 "間綴": {"pos": "interfix", "tags": ["morpheme"]}, 

177 "關係詞": {"pos": "conj", "tags": ["relative"]}, 

178 "附加符號": {"pos": "character", "tags": ["diacritic"]}, 

179 "附著語素": {"pos": "suffix", "tags": ["morpheme"]}, 

180 "限定詞": {"pos": "det"}, 

181 "限定词": {"pos": "det"}, 

182 "音節": {"pos": "syllable"}, 

183 "音节": {"pos": "syllable"}, 

184 "首字母縮略字": { 

185 "debug": "part-of-speech Initialism is proscribed", 

186 "pos": "abbrev", 

187 "tags": ["abbreviation"], 

188 }, 

189 "首字母縮略詞": {"pos": "abbrev", "tags": ["abbreviation"]}, 

190 "首字母缩略词": {"pos": "abbrev", "tags": ["abbreviation"]}, 

191} 

192 

193# map title to pydantic field 

194LINKAGE_TITLES: dict[str, str] = { 

195 "上下位關係": "hypernyms", 

196 "上义词": "hypernyms", 

197 "上位詞": "hypernyms", 

198 "上位語": "hypernyms", 

199 "上位词": "hypernyms", 

200 "上義詞": "hypernyms", 

201 "下义词": "hyponyms", 

202 "下位詞": "hyponyms", 

203 "下位語": "hyponyms", 

204 "下位词": "hyponyms", 

205 "下层词": "hyponyms", 

206 "下属词": "hyponyms", 

207 "下層概念": "derived", 

208 "下義詞": "hyponyms", 

209 "俗语": "related", 

210 "关联词": "related", 

211 "关联词条": "related", 

212 "其他书写系统": "synonyms", 

213 "其他写法": "synonyms", 

214 "其他变体": "synonyms", 

215 "其他字形": "synonyms", 

216 "其他字母": "synonyms", 

217 "其他字母系統": "synonyms", 

218 "其他字符系统": "synonyms", 

219 "其他字體": "synonyms", 

220 "其他寫法": "synonyms", 

221 "其他形式": "synonyms", 

222 "其他拼写方式": "synonyms", 

223 "其他拼写方法": "synonyms", 

224 "其他拼寫": "synonyms", 

225 "其他拼法": "synonyms", 

226 "其他文字": "synonyms", 

227 "其他文字系統": "synonyms", 

228 "其他書寫系統": "synonyms", 

229 "其他表記": "synonyms", 

230 "其他詞形": "synonyms", 

231 "其他词形": "synonyms", 

232 "其他译名": "synonyms", 

233 "其它词形": "synonyms", 

234 "分体词": "meronyms", 

235 "分體詞": "meronyms", 

236 "参看": "related", 

237 "參考詞彙": "synonyms", 

238 "反义符号": "antonyms", 

239 "反义词": "antonyms", 

240 "反義": "antonyms", 

241 "反義字": "antonyms", 

242 "反義詞": "antonyms", 

243 "另見": "related", 

244 "另见": "related", 

245 "可替代拼寫": "synonyms", 

246 "合寫": "related", 

247 "同一類別文字": "coordinate_terms", 

248 "同一類別詞彙": "coordinate_terms", 

249 "同义词": "synonyms", 

250 "同位詞": "coordinate_terms", 

251 "同意詞": "synonyms", 

252 "同根词": "related", 

253 "同类词汇": "related", 

254 "同級詞彙": "coordinate_terms", 

255 "同義字": "related", 

256 "同義詞": "synonyms", 

257 "同義語": "synonyms", 

258 "同類別詞彙": "coordinate_terms", 

259 "同類詞": "coordinate_terms", 

260 "同類詞彙": "coordinate_terms", 

261 "复合词": "derived", 

262 "對應詞": "coordinate_terms", 

263 "對等詞": "coordinate_terms", 

264 "局部關係詞": "meronyms", 

265 "延伸词": "related", 

266 "搭配詞": "derived", 

267 "整体词": "holonyms", 

268 "整體詞": "holonyms", 

269 "替代寫法": "synonyms", 

270 "替代形式": "synonyms", 

271 "杂项": "various", 

272 "派生": "derived", 

273 "派生字": "derived", 

274 "派生字母": "derived", 

275 "派生形式": "derived", 

276 "派生漢字": "derived", 

277 "派生詞": "derived", 

278 "派生詞彙": "derived", 

279 "派生詞語": "derived", 

280 "派生词": "derived", 

281 "派生词汇": "derived", 

282 "派生词组": "derived", 

283 "熟語": "related", 

284 "熟语": "related", 

285 "相似后缀": "related", 

286 "相似符號": "related", 

287 "相关后缀": "related", 

288 "相关形式": "related", 

289 "相关术语": "related", 

290 "相关条目": "related", 

291 "相关短语": "related", 

292 "相关词": "related", 

293 "相关词条": "related", 

294 "相关词汇": "related", 

295 "相关词组": "related", 

296 "相关词语": "related", 

297 "相关语": "related", 

298 "相關": "related", 

299 "相關字": "related", 

300 "相關派生": "related", 

301 "相關漢字": "related", 

302 "相關符號": "related", 

303 "相關詞": "related", 

304 "相關詞匯": "related", 

305 "相關詞彙": "related", 

306 "相關詞彙變格": "related", 

307 "相關詞會": "related", 

308 "相關詞條": "related", 

309 "相關詞語": "related", 

310 "相關語": "related", 

311 "类似中缀": "related", 

312 "类似后缀": "related", 

313 "組詞": "derived", 

314 "组词": "related", 

315 "衍生字": "derived", 

316 "衍生詞": "derived", 

317 "衍生詞彙": "derived", 

318 "衍生词": "derived", 

319 "衍生词汇": "derived", 

320 "複合詞": "compounds", 

321 "變體": "synonyms", 

322 "近义词": "synonyms", 

323 "近義詞": "synonyms", 

324 "近義語": "synonyms", 

325 "部分詞": "meronyms", 

326 "關聯詞": "related", 

327 "關聯詞彙": "related", 

328} 

329 

330ETYMOLOGY_TITLES: frozenset[str] = frozenset( 

331 [ 

332 "詞源", 

333 "词源", 

334 "典故", 

335 "語源", 

336 "语源", 

337 "字源", 

338 "詞語", 

339 "組成", 

340 "出處", 

341 "出处", 

342 ] 

343) 

344 

345IGNORED_TITLES: frozenset[str] = frozenset( 

346 [ 

347 "異序詞", 

348 "异序词", 

349 "異序词", 

350 "來源", 

351 "參考文獻", 

352 "参考文献", 

353 "參考資料", 

354 "參考來源", 

355 "参考资料", 

356 "参考", 

357 "參考", 

358 "參見", 

359 "参见", 

360 "參閱", 

361 "拓展閱讀", 

362 "拓展閲讀", 

363 "拓展阅读", 

364 "延伸阅读", 

365 "延伸閲讀", 

366 "延伸閱讀", 

367 "扩展阅读", 

368 "編碼", 

369 "编码", 

370 "回文", 

371 "回文構詞", 

372 "易位構詞", 

373 "外部鏈接", 

374 "外部链接", 

375 "外部連結", 

376 ] 

377) 

378 

379INFLECTION_TITLES: frozenset[str] = frozenset( 

380 [ 

381 "变格", 

382 "變格", 

383 "变位", 

384 "变形", 

385 "变位形式", 

386 "變位", 

387 "詞形變化", 

388 "词形变化", 

389 "輔音變化", 

390 "辅音变化", 

391 "語尾變化", 

392 "活用", 

393 "活用型", 

394 "活用形", 

395 "賓格", 

396 "屈折", 

397 "屈折形式", 

398 "曲折形式", 

399 "軟化變形", 

400 ] 

401) 

402 

403PRONUNCIATION_TITLES: frozenset[str] = frozenset( 

404 ["發音", "发音", "读音", "讀音", "注音", "讀法"] 

405) 

406 

407TRANSLATIONS_TITLES: frozenset[str] = frozenset(["翻譯", "翻译"]) 

408 

409DESCENDANTS_TITLES: frozenset[str] = frozenset(["派生語彙", "派生詞"]) 

410 

411NOTES_TITLES: frozenset[str] = frozenset(["使用說明", "用法說明"])