Coverage for src / wiktextract / extractor / zh / tags.py: 98%

35 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-12 08:09 +0000

1from .models import WordEntry 

2from .topics import LABEL_TOPICS 

3 

4GENDER_TAGS: dict[str, str] = { 

5 "陰性": "feminine", 

6 "阴性": "feminine", 

7 "陰性形式": "feminine", 

8 "陰性等價詞": "feminine", 

9 "陽性": "masculine", 

10 "陽性形式": "masculine", 

11 "中性": "neuter", 

12 "中性形式": "neuter", 

13} 

14 

15NUMBER_TAGS: dict[str, str | list[str]] = { 

16 "單數": "singular", 

17 "单数": "singular", 

18 "複數": "plural", 

19 "复数": "plural", 

20 "定單數": ["definite", "singular"], 

21 "定单数": ["definite", "singular"], 

22 "不定單數": ["indefinite", "singular"], 

23 "不定单数": ["indefinite", "singular"], 

24 "不定複數": ["indefinite", "plural"], 

25 "不定复数": ["indefinite", "plural"], 

26 "定複數": ["definite", "plural"], 

27 "斜格複數": ["oblique", "plural"], 

28 "主格單數": ["nominative", "singular"], 

29 "主格複數": ["nominative", "plural"], 

30 "屬格單數": ["genitive", "singular"], 

31 "屬格複數": ["genitive", "plural"], 

32 "陰性單數": ["feminine", "singular"], 

33 "陽性單數": ["masculine", "singular"], 

34 "陰性複數": ["feminine", "plural"], 

35 "陽性複數": ["masculine", "plural"], 

36 "中性複數": ["neuter", "plural"], 

37 "中性單數": ["neuter", "singular"], 

38 "賓格單數": ["accusative", "singular"], 

39 "賓格複數": ["accusative", "plural"], 

40 "無複數": "no-plural", 

41 # Template:nb-noun-m1 

42 "定指單數": ["definite", "singular"], 

43 "定指複數": ["definite", "plural"], 

44 "複": "plural", 

45 "單": "singular", 

46} 

47 

48# https://en.wikipedia.org/wiki/Count_noun 

49COUNT_TAGS: dict[str, str] = { 

50 "可數": "countable", 

51 "不可數": "uncountable", 

52} 

53 

54OTHER_TAGS: dict[str, str] = { 

55 "指小詞": "diminutive", 

56 "指小": "diminutive", 

57 "變格類型": "declension-pattern-of", 

58 "屬格": "genitive", 

59 "部分格": "partitive", 

60 "個人": "person", 

61 "無屈折": "indeclinable", 

62 "諺文": "hangeul", 

63 "漢字": "hanja", 

64 # Template:cs-proper noun 

65 "相關形容詞": ["relational", "adjective"], 

66 "關係形容詞": ["relational", "adjective"], 

67 "居民稱謂詞": "demonym", 

68 "女性居民稱謂詞": ["feminine", "demonym"], 

69 "定賓格": ["definite", "accusative"], 

70 "定宾格": ["definite", "accusative"], 

71 "拉丁字母拼寫": "romanization", 

72 "定指賓格": ["definite", "accusative"], 

73 "前元音和諧變體": "front-vowel-harmony", 

74 # Template:zh-forms 

75 "正體": "Standard-Chinese", 

76 "繁體": "Traditional-Chinese", 

77 "簡體": "Simplified-Chinese", 

78 "異體": "alternative", 

79 "仿譯詞": "calque", 

80 "貶稱詞": "pejorative", 

81 # Template:ms-noun 

82 "爪夷文": "Jawi", 

83 "非正式第一人稱屬格": ["informal", "first-person", "genitive"], 

84 "不禮貌第二人稱屬格": ["impolite", "second-person", "genitive"], 

85 "第三人稱屬格": ["third-person", "genitive"], 

86 "印尼語": "Indonesian", 

87 "姓氏": "surname", 

88} 

89 

90VERB_TAGS: dict[str, str] = { 

91 "及物": "transitive", 

92 "不及物": "intransitive", 

93 "动宾结构": "verb-object", 

94 "非完": "imperfective", 

95 "完": "perfective", 

96 "強變化": "strong", 

97 "動名詞": "supine", 

98 "命令式": "imperative", 

99 # Template:ar-verb 

100 "第I類": "form-i", 

101 "第II類": "form-ii", 

102 "第III類": "form-iii", 

103 "第IV類": "form-iv", 

104 "第V類": "form-v", 

105 "第VI類": "form-vi", 

106 "第VII類": "form-vii", 

107 "第IX類": "form-ix", 

108 "第X類": "form-x", 

109 "第XII類": "form-xii", 

110 "第XIII類": "form-xiii", 

111 "第XIV類": "form-xiv", 

112 "第XV類": "form-xv", 

113 "第Iq類": "form-iq", 

114 "第IIq類": "form-iiq", 

115 "第IIIq類": "form-iiiq", 

116 "第IVq類": "form-ivq", 

117 # Module:Jpan-headword 

118 "他動詞": "transitive", 

119 "自動詞": "intransitive", 

120 "自、他動詞": ["transitive", "intransitive"], 

121 "五段": "godan", 

122 "一段": "ichidan", 

123 "サ行": "suru", 

124 "カ行": "kuru", 

125 "イ形": "-i", 

126 "ナ形": "-na", 

127 "四段": "yodan", 

128 "二段": "nidan", 

129 "ラ行": "-ri", 

130 "ナリ": "-nari", 

131 "タリ": "-tari", 

132 # Template:ja-conj-bungo 

133 "語幹形態": "stem", 

134 "對比連接詞": ["contrastive", "conjunction"], 

135 "因果連接詞": ["causative", "conjunction"], 

136 "條件連接詞": ["conditional", "conjunction"], 

137 "過去式 (第一手消息)": "past", 

138 "過去式 (第二手消息)": "past", 

139 "完成式 (自覺動作)": "perfect", 

140 "完成式 (自然事件)": ["perfect", "natural"], 

141 "完成進行式": ["perfect", "continuative"], 

142 "意志形": "volitional", 

143 "副詞": "adverbial", 

144 "無助動詞": "no-auxiliary", 

145 "有助動詞": "auxiliary", 

146 # Template:ja-na 

147 "簡體否定": ["informal", "negative"], 

148 "簡體過去": ["informal", "past"], 

149 "簡體過去否定": ["informal", "past", "negative"], 

150 "敬體": "formal", 

151 "敬體否定": ["formal", "negative"], 

152 "敬體過去": ["formal", "past"], 

153 "敬體過去否定": ["formal", "past", "negative"], 

154 "連接形": "conjunctive", 

155 "暫定形": "provisional", 

156 "推量形": "volitional", 

157 "連用形": "adverbial", 

158 "程度形": "degree", 

159 # Template:ko-verb 

160 "不定式": "infinitive", 

161 "連續式": "sequential", 

162} 

163 

164# https://en.wikipedia.org/wiki/Japanese_grammar#Stem_forms 

165JA_STEM_FORMS: dict[str, str] = { 

166 "未然形": "imperfective", 

167 "連用形": "continuative", 

168 "終止形": "terminal", 

169 "連體形": "attributive", 

170 "連体形": "attributive", 

171 "假定形": "hypothetical", 

172 "仮定形": "hypothetical", 

173 "命令形": "imperative", 

174 "已然形": "realis", 

175} 

176 

177# https://en.wikipedia.org/wiki/Voice_(grammar) 

178VOICE_TAGS: dict[str, str | list[str]] = { 

179 "被動形": "passive", 

180 "使役形": "causative", 

181 "可能形": "potential", 

182 "意志形": "volitional", 

183 "否定形": "negative", 

184 "否定連用形": ["negative", "continuative"], 

185 "尊敬形": "formal", 

186 "完成形": "perfective", 

187 "接續形": "conjunctive", 

188 "條件形": ["hypothetical", "conditional"], 

189} 

190 

191COMPARISON_TAGS: dict[str, str] = { 

192 # https://en.wikipedia.org/wiki/Comparison_(grammar) 

193 "原级": "positive", 

194 "比較級": "comparative", 

195 "最高級": "superlative", 

196} 

197 

198TENSE_TAGS = { 

199 "過去時": "preterite", 

200 "過去式": "past", 

201 "過去分詞": ["past", "participle"], 

202 "現在時": "present", 

203 "第三人稱單數現在時": ["third-person", "singular", "present"], 

204 "助動詞": "auxiliary", 

205 # Template:de-verb 

206 "弱變化": "weak", 

207 "弱变化": "weak", 

208 "第三人稱單數簡單現在時": ["third-person", "singular", "present"], 

209 "現在分詞": ["present", "participle"], 

210 "一般過去時及過去分詞": ["past", "participle"], 

211 # Template:it-verb 

212 "第一人稱單數 現在時": ["first-person", "singular", "present"], 

213 "第一人稱單數 先過去時": ["first-person", "singular", "past", "historic"], 

214 # Template:de-adj 

215 "強變化主格陽性單數": ["strong", "nominative", "masculine", "singular"], 

216 # Template:la-verb 

217 "现在时不定式": ["present", "infinitive"], 

218 "完成时主动式": ["perfect", "active"], 

219 "目的动名词": "supine", 

220 # Template:ar-verb 

221 "非過去時": "non-past", 

222 "動詞性名詞": "noun-from-verb", 

223 "主動分詞": ["active", "participle"], 

224 "被動分詞": ["passive", "participle"], 

225 # Template:it-verb 

226 # https://zh.wiktionary.org/wiki/Module:It-headword 

227 "第一人稱單數現在時": ["first-person", "singular", "present"], 

228 "第一人稱單數先過去時": ["first-person", "singular", "past", "historic"], 

229 "第三人稱單數先過去時": ["third-person", "singular", "past", "historic"], 

230 "第一人稱單數過去分詞": ["first-person", "singular", "past", "participle"], 

231 "第三人稱單數過去分詞": ["third-person", "singular", "past", "participle"], 

232 "第一人稱單數未完成時": ["first-person", "singular", "imperfect"], 

233 "第三人稱單數未完成時": ["third-person", "singular", "imperfect"], 

234 "第一人稱單數未來時": ["first-person", "singular", "future"], 

235 "第三人稱單數未來時": ["third-person", "singular", "future"], 

236 "第一人稱單數現在時虛擬式": ["first-person", "singular", "subjunctive"], 

237 "第三人稱單數現在時虛擬式": ["third-person", "singular", "subjunctive"], 

238 "第一人稱單數未完成時虛擬式": [ 

239 "first-person", 

240 "singular", 

241 "imperfect", 

242 "subjunctive", 

243 ], 

244 "第三人稱單數未完成時虛擬式": [ 

245 "third-person", 

246 "singular", 

247 "imperfect", 

248 "subjunctive", 

249 ], 

250 # Template:es-verb 

251 "第一人稱單數過去時": ["first-person", "singular", "past"], 

252 # Template:ast-verb-ar 

253 "第一人稱單數直陳現在時": [ 

254 "first-person", 

255 "singular", 

256 "indicative", 

257 "present", 

258 ], 

259} 

260 

261GRAMMATICAL_TAGS: dict[str, str] = { 

262 **GENDER_TAGS, 

263 **NUMBER_TAGS, 

264 **COUNT_TAGS, 

265 **OTHER_TAGS, 

266 **VERB_TAGS, 

267 **JA_STEM_FORMS, 

268 **VOICE_TAGS, 

269 **COMPARISON_TAGS, 

270 **TENSE_TAGS, 

271} 

272 

273# https://zh.wiktionary.org/wiki/Template:Label 

274# https://zh.wiktionary.org/wiki/Module:Labels/data 

275# https://zh.wiktionary.org/wiki/Template:Qualifier 

276# https://zh.wiktionary.org/wiki/Template:古 

277# https://zh.wiktionary.org/wiki/Template:注释 

278LABEL_TAGS = { 

279 "棄用": "obsolete", 

280 "弃用": "obsolete", 

281 "比喻": "figuratively", 

282 "古": "archaic", 

283 "陽": "masculine", 

284 "陰": "feminine", 

285 "喻": "figuratively", 

286 "書": "literary", 

287 "口": "colloquial", 

288 "俚": "slang", 

289 "俗": "slang", 

290 "方": "dialectal", 

291 "废": "obsolete", 

292 "貶": "derogatory", 

293 "罕": "rare", 

294 "引": "broadly", 

295 "現已罕用": "archaic", 

296 # Module:Labels/data 

297 "back slang": "slang", 

298 "synecdochically": "synecdoche", 

299 "不再自由造詞": "idiomatic", 

300 "不及物": "intransitive", 

301 "不可數": "uncountable", 

302 "不定": "indefinite", 

303 "不常見": "uncommon", 

304 "不推薦使用": "proscribed", 

305 "中性": "neuter", 

306 "中間被動語態": "mediopassive", 

307 "中間語態": "middle", 

308 "主動語態": "active", 

309 "主要用於否定": ["usually", "with-negation"], 

310 "交互": "reciprocal", 

311 "以單數形式": "singular", 

312 "以複數形式": "in-plural", 

313 "作定語": "attributive", 

314 "作格": "ergative", 

315 "作表語": "predicative", 

316 "使役": "causative", 

317 "俗語": "idiomatic", 

318 "俚語": "slang", 

319 "俚语": "slang", 

320 "兒童用語": "childish", 

321 "公文": "bureaucratese", 

322 "冒犯": "offensive", 

323 "分詞": "participle", 

324 "前古典": "pre-Classical", 

325 "助動詞": "auxiliary", 

326 "助記符": "mnemonic", 

327 "及物": "transitive", 

328 "反問句": "rhetoric", 

329 "反身": "reflexive", 

330 "口語": "colloquial", 

331 "口语": "colloquial", 

332 "古舊": "archaic", 

333 "可數": "countable", 

334 "同性戀俚語": ["slang", "LGBT"], 

335 "名詞化": "noun-from-verb", 

336 "唯單": "singular-only", 

337 "唯複": "plural-only", 

338 "國際音標": "IPA", 

339 "基數詞": "cardinal", 

340 "大寫": "capitalized", 

341 "委婉": "euphemistic", 

342 "字面義": "literally", 

343 "完整": "perfect", 

344 "完整體": "perfective", 

345 "定語": "attributive", 

346 "實詞": "substantive", 

347 "尊敬": "honorific", 

348 "敬語": "honorific", 

349 "敬语": "honorific", 

350 "常用複數": "plural-normally", 

351 "幽默": "humorous", 

352 "序數詞": "ordinal", 

353 "廣義來說": "broadly", 

354 "引申": "broadly", 

355 "弱祈使式": "jussive", 

356 "強調": "emphatic", 

357 "後古典": "obsolete", 

358 "性別中立": "gender-neutral", 

359 "情態": "modal", 

360 "愛稱": "endearing", 

361 "所有格代詞": ["possessive", "pronoun", "without-noun"], 

362 "押韻俚語": "slang", 

363 "抽象名詞": "abstract-noun", 

364 "擬態詞": "ideophonic", 

365 "擬聲詞": "onomatopoeic", 

366 "新詞": "neologism", 

367 "方言": "dialectal", 

368 "書面": "literary", 

369 "书面": "literary", 

370 "有比較級": "comparable", 

371 "有生": "animate", 

372 "正式": "formal", 

373 "歷史": "historical", 

374 "比喻義": "figuratively", 

375 "無人稱": "impersonal", 

376 "無比較級": "not-comparable", 

377 "無生": "inanimate", 

378 "焦點": "focus", 

379 "狹義": "narrowly", 

380 "監獄俚語": "slang", 

381 "直陳語氣": "indicative", 

382 "短信": "Internet", 

383 "祈使語氣": "imperative", 

384 "禮貌": "polite", 

385 "種族歧視語": "slur", 

386 "粉絲用語": ["slang", "lifestyle"], 

387 "粗俗": "vulgar", 

388 "系動詞": "copulative", 

389 "網路用語": "Internet", 

390 "縮寫": "abbreviation", 

391 "罕用": "rare", 

392 "臨時語": "nonce-word", 

393 "虛擬語氣": "subjunctive", 

394 "表語": "predicative", 

395 "被動語態": "passive", 

396 "視覺方言": "pronunciation-spelling", 

397 "親切": "familiar", 

398 "詈語": "expletive", 

399 "詩歌": "poetic", 

400 "誇飾": "excessive", 

401 "語中音省略": "syncope", 

402 "諷刺": "sarcastic", 

403 "謙遜": "humble", 

404 "貶義": "derogatory", 

405 "轉喻義": "metonymically", 

406 "返璞詞": "retronym", 

407 "過時": "dated", 

408 "陰性": "feminine", 

409 "陽性": "masculine", 

410 "雙及物動詞": "ditransitive", 

411 "靜態動詞": "stative", 

412 "非完整": "imperfect", 

413 "非完整體": "imperfective", 

414 "非常罕用": "rare", 

415 "非標準": "nonstandard", 

416 "非标准": "nonstandard", 

417 "非標準形式": "nonstandard", 

418 "非正式": "informal", 

419 "首字母縮略詞": "initialism", 

420 "駭客語": ["Leet", "Internet"], 

421 "高語域": "honorific", 

422 "中醫": "Traditional-Chinese-Medicine", 

423 "修辭學": "rhetoric", 

424 "印度教": "Hinduism", 

425 "摩門教": "Mormonism", 

426 "物理": "particle", 

427 "猶太教": "Judaism", 

428 "納粹主義": "Nazism", 

429 "網際網路": "Internet", 

430 "耆那教": "Jainism", 

431 "聖經": "Biblical", 

432 "解剖學": "anatomy", 

433 "貴格會": "Quakerism", 

434 "錫克教": "Sikhism", 

435 "馬克思主義": "Marxism", 

436 # also from Module:Labels/data, but translated manually 

437 "喃字": "Chu-Nom", 

438 "反身代詞": "reflexive", 

439 "字面意義": "literally", 

440 "成語": "Chengyu", 

441 "及物、不及物": ["transitive", "intransitive"], 

442 "集合名詞": "collective", 

443 "控制動詞": "control-verb", 

444 "省略": "ellipsis", 

445 "分數": "fractional", 

446 "以雙數形式": "dual", 

447 "主要用於否定複數": ["negative", "plural"], 

448 "數詞縮寫": ["numeral", "abbreviation"], 

449 "主要用於肯定": "positive", 

450 "古典": "Classical", 

451 "中國大陸": "Mainland-China", 

452 "書面語": "literary", 

453 "文言": "literary", 

454 "詞幹": "stem", 

455 "烏爾都語寫法": "Urdu", 

456 "波斯-阿拉伯字母": "Perso-Arabic", 

457 # Template:kok-pos 

458 "拉丁字母": ["Latin", "character"], 

459 "卡納達文": ["Kannada", "character"], 

460 "尼瓦爾文拼寫": "Newa", 

461} 

462 

463# example sentence template 

464# https://zh.wiktionary.org/wiki/Template:Zh-x 

465# https://zh.wiktionary.org/wiki/Module:Zh-usex/data 

466ZH_X_TAGS = { 

467 "繁體": "Traditional-Chinese", 

468 "繁體和": "Traditional-Chinese", 

469 "簡體": "Simplified-Chinese", 

470 "繁體和簡體": ["Traditional-Chinese", "Simplified-Chinese"], 

471 "漢語拼音": "Pinyin", 

472 "粵拼": "Jyutping", 

473 "現代標準漢語": "Standard-Chinese", 

474 "文言文": "Classical-Chinese", 

475 "官話白話文": "Written-vernacular-Chinese", 

476 "粵語": "Cantonese", 

477 "吳語": "Wu", 

478 "廣州話": "Cantonese", 

479 "臺灣華語": "Taiwanese-Mandarin", 

480} 

481 

482# classifier tags 

483# https://zh.wiktionary.org/wiki/Template:zh-mw 

484# https://zh.wiktionary.org/wiki/Module:Zh/templates 

485ZH_TAGS = { 

486 "官話": "Mandarin", 

487 "贛語": "Gan", 

488 "客家話": "Hakka", 

489 "晉語": "Jin", 

490 "閩北語": "Min-Bei", 

491 "閩東語": "Min-Dong", 

492 "閩南語": "Min-Nan", 

493 "潮州話": "Teochew", 

494 "湘語": "Xiang", 

495} 

496 

497# https://zh.wiktionary.org/wiki/Template:Zh-pron 

498# https://zh.wiktionary.org/wiki/Module:Zh-pron 

499ZH_PRON_TAGS = { 

500 "拼音": "Pinyin", 

501 "注音": "Bopomofo", 

502 "潮州話拼音": "Peng'im", 

503 "上海": "Shanghai", 

504 "吳語學堂拼音": "Wugniu", 

505 "通用拼音": "Tongyong-Pinyin", 

506 "威妥瑪拼音": "Wade–Giles", 

507 "耶魯官話拼音": "Yale", 

508 "國語羅馬字": "Gwoyeu-Romatsyh", 

509 "西里爾字母轉寫": "Cyrillic", 

510 "西里爾字母": "Cyrillic", 

511 "漢語國際音標": "Sinological-IPA", 

512 "耶魯粵拼": ["Yale", "Jyutping"], 

513 "廣州話拼音": ["Cantonese", "Pinyin"], 

514 "廣東拼音": "Guangdong-Romanization", 

515 "國際音標": "IPA", 

516 "模仿白話字": "POJ", 

517 "標準粵語": "Standard-Cantonese", 

518 "廣州–香港話": ["Guangzhou", "Hong Kong"], 

519 "福州話": "Fuzhou", 

520 "平話字": "Foochow-Romanized", 

521 "客家語": "Hakka", 

522 "白話字": "Phak-fa-su", 

523 "泉漳話": "Hokkien", 

524 "泉州": "Quanzhou", 

525 "廈門": "Xiamen", 

526 "輕尾聲異讀": "toneless-final-syllable-variant", 

527 "維基詞典": "Wiktionary-specific", 

528 "維基詞典拼音": ["Wiktionary-specific", "Pinyin"], 

529 "維基詞典轉寫": "Wiktionary-specific", 

530 "成都話": "Chengdu", 

531 "四川話拼音": ["Sichuanese", "Pinyin"], 

532 "東干語": "Dongan", 

533 "台山話": "Taishanese", 

534 "四縣": "Sixian", 

535 "長沙話": "Changsha", 

536 "四川話拉丁化新文字": "Latinxua-Sin-Wenz", 

537 "台城": "Taicheng", 

538 "南昌話": "Nanchang", 

539 "四縣話": "Sixian", 

540 "苗栗": "Miaoli", 

541 "美濃": "Neipu", 

542 "客家語拼音": "Hakka-Romanization-System", 

543 "客家話拼音方案": "Hagfa-Pinyim", 

544 "太原話": "Taiyuan", 

545 "老派": "dated", 

546 "新加坡": "Singapore", 

547 "臺羅": "Tâi-lô", 

548 "普實台文": "Phofsit-Daibuun", 

549 "太湖片": "Northern", 

550 "吳音小字典": "MiniDict", 

551 "維基詞典羅馬化": ["Wiktionary-specific", "romanization"], 

552 "上海話": "Shanghai", 

553 "中古漢語": "Middle-Chinese", 

554 "莆仙語": "Puxian-Min", 

555 "莆仙話拼音": "Pouseng-Ping'ing", 

556 "莆田": "Putian", 

557 "仙遊": "Xianyou", 

558 "漳州": "Zhangzhou", 

559 "臺北": "Taibei", 

560 "高雄": "Kaohsiung", 

561 "實際讀音": "phonetic", 

562 "臺灣話": "Taiwanese", 

563 "常用": "general", 

564 "檳城": "Penang", 

565 "兒化": "Erhua", 

566 "文讀": "literary", 

567 "中國大陸標準讀法": ["Mainland-China", "standard"], 

568 "臺灣異讀法": ["Taiwan", "variant"], 

569 "中國大陸與臺灣標準讀法": ["Mainland-China", "Taiwan", "standard"], 

570 "異讀": "variant", 

571 "上古": "Old-Chinese", 

572 "白–沙": "Baxter–Sagart", 

573 "鄭張": "Zhengzhang", 

574} 

575 

576ZH_DIAL_TAGS = { 

577 "白話文": "Written-vernacular-Chinese", 

578 "北京": "Beijing", 

579 "燕京官話": "Northeastern-Mandarin", 

580 "冀魯官話": "Jilu-Mandarin", 

581 "膠遼官話": "Jiaoliao-Mandarin", 

582 "中原官話": "Central-Plains-Mandarin", 

583 "蘭銀官話": "Lanyin-Mandarin", 

584 "西南官話": "Southwestern-Mandarin", 

585 "江淮官話": "Jianghuai-Mandarin", 

586 "徽語": "Huizhou", 

587 "南部平話": "Southern-Pinghua", 

588 "濟南": "Jinan", 

589 "臺灣": "Taiwan", 

590} 

591 

592TH_PRON_TAGS = { 

593 "寫法": "orthographic", 

594 "音素": "phoneme", 

595 "泰語羅馬化": "romanization", 

596 "派汶拼音": "Paiboon", 

597 "皇家轉寫": "Royal-Institute", 

598 "非正字法": "unorthographical", 

599 "短音": "short", 

600} 

601 

602 

603ALL_TAGS = { 

604 **GRAMMATICAL_TAGS, 

605 **LABEL_TAGS, 

606 **ZH_X_TAGS, 

607 **ZH_TAGS, 

608 **ZH_PRON_TAGS, 

609 **ZH_DIAL_TAGS, 

610 **TH_PRON_TAGS, 

611 # Template:vi-ipa 

612 "河內": "Hà-Nội", 

613 "順化": "Huế", 

614 "胡志明市": "Saigon", 

615 # Template:hi-ipa 

616 "德里": "Delhi", 

617 # Template:hi-noun 

618 "烏爾都文拼寫": "Urdu", 

619 # Template:sa-ipa 

620 "吠陀": "Vedic", 

621 "古典梵語": "Classical-Sanskrit", 

622 # Template:ko-ipa 

623 "韓國標準語": "SK-Standard", 

624 "首爾": "Seoul", 

625 "首尔": "Seoul", 

626 "國語羅馬字(轉寫)": ["revised-romanization", "transliteration"], 

627 "馬科恩-賴肖爾式": "McCune–Reischauer", 

628 "耶魯拼音": ["Yale", "romanization"], 

629} 

630 

631 

632def translate_raw_tags(data: WordEntry) -> WordEntry: 

633 raw_tags = [] 

634 for raw_tag in data.raw_tags: 

635 if raw_tag in ALL_TAGS: 

636 tr_tag = ALL_TAGS[raw_tag] 

637 if isinstance(tr_tag, str) and tr_tag not in data.tags: 

638 data.tags.append(tr_tag) 

639 elif isinstance(tr_tag, list): 

640 data.tags.extend(tr_tag) 

641 elif raw_tag in LABEL_TOPICS and hasattr(data, "topics"): 

642 data.topics.append(LABEL_TOPICS[raw_tag]) 

643 elif raw_tag not in raw_tags: 643 ↛ 634line 643 didn't jump to line 634 because the condition on line 643 was always true

644 raw_tags.append(raw_tag) 

645 data.raw_tags = raw_tags 

646 return data 

647 

648 

649# https://zh.wiktionary.org/wiki/Template:T 

650# https://zh.wiktionary.org/wiki/Template:Head 

651# https://zh.wiktionary.org/wiki/Module:Gender_and_number 

652TEMPLATE_TAG_ARGS = { 

653 "f": "feminine", 

654 "m": "masculine", 

655 "n": "neuter", 

656 "c": "common", 

657 # Animacy 

658 "an": "animate", 

659 "in": "inanimate", 

660 # Animal (for Ukrainian, Belarusian, Polish) 

661 "anml": "animal", 

662 # Personal (for Ukrainian, Belarusian, Polish) 

663 "pr": "personal", 

664 # Nonpersonal not currently used 

665 "np": "nonpersonal", 

666 # Virility (for Polish) 

667 "vr": "virile", 

668 "nv": "nonvirile", 

669 # Numbers 

670 "s": "singular number", 

671 "d": "dual number", 

672 "p": "plural number", 

673 # Verb qualifiers 

674 "impf": "imperfective", 

675 "pf": "perfective", 

676 "mf": ["masculine", "feminine"], 

677}