Coverage for src / wiktextract / extractor / zh / tags.py: 98%
35 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-12 08:09 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-12 08:09 +0000
1from .models import WordEntry
2from .topics import LABEL_TOPICS
4GENDER_TAGS: dict[str, str] = {
5 "陰性": "feminine",
6 "阴性": "feminine",
7 "陰性形式": "feminine",
8 "陰性等價詞": "feminine",
9 "陽性": "masculine",
10 "陽性形式": "masculine",
11 "中性": "neuter",
12 "中性形式": "neuter",
13}
15NUMBER_TAGS: dict[str, str | list[str]] = {
16 "單數": "singular",
17 "单数": "singular",
18 "複數": "plural",
19 "复数": "plural",
20 "定單數": ["definite", "singular"],
21 "定单数": ["definite", "singular"],
22 "不定單數": ["indefinite", "singular"],
23 "不定单数": ["indefinite", "singular"],
24 "不定複數": ["indefinite", "plural"],
25 "不定复数": ["indefinite", "plural"],
26 "定複數": ["definite", "plural"],
27 "斜格複數": ["oblique", "plural"],
28 "主格單數": ["nominative", "singular"],
29 "主格複數": ["nominative", "plural"],
30 "屬格單數": ["genitive", "singular"],
31 "屬格複數": ["genitive", "plural"],
32 "陰性單數": ["feminine", "singular"],
33 "陽性單數": ["masculine", "singular"],
34 "陰性複數": ["feminine", "plural"],
35 "陽性複數": ["masculine", "plural"],
36 "中性複數": ["neuter", "plural"],
37 "中性單數": ["neuter", "singular"],
38 "賓格單數": ["accusative", "singular"],
39 "賓格複數": ["accusative", "plural"],
40 "無複數": "no-plural",
41 # Template:nb-noun-m1
42 "定指單數": ["definite", "singular"],
43 "定指複數": ["definite", "plural"],
44 "複": "plural",
45 "單": "singular",
46}
48# https://en.wikipedia.org/wiki/Count_noun
49COUNT_TAGS: dict[str, str] = {
50 "可數": "countable",
51 "不可數": "uncountable",
52}
54OTHER_TAGS: dict[str, str] = {
55 "指小詞": "diminutive",
56 "指小": "diminutive",
57 "變格類型": "declension-pattern-of",
58 "屬格": "genitive",
59 "部分格": "partitive",
60 "個人": "person",
61 "無屈折": "indeclinable",
62 "諺文": "hangeul",
63 "漢字": "hanja",
64 # Template:cs-proper noun
65 "相關形容詞": ["relational", "adjective"],
66 "關係形容詞": ["relational", "adjective"],
67 "居民稱謂詞": "demonym",
68 "女性居民稱謂詞": ["feminine", "demonym"],
69 "定賓格": ["definite", "accusative"],
70 "定宾格": ["definite", "accusative"],
71 "拉丁字母拼寫": "romanization",
72 "定指賓格": ["definite", "accusative"],
73 "前元音和諧變體": "front-vowel-harmony",
74 # Template:zh-forms
75 "正體": "Standard-Chinese",
76 "繁體": "Traditional-Chinese",
77 "簡體": "Simplified-Chinese",
78 "異體": "alternative",
79 "仿譯詞": "calque",
80 "貶稱詞": "pejorative",
81 # Template:ms-noun
82 "爪夷文": "Jawi",
83 "非正式第一人稱屬格": ["informal", "first-person", "genitive"],
84 "不禮貌第二人稱屬格": ["impolite", "second-person", "genitive"],
85 "第三人稱屬格": ["third-person", "genitive"],
86 "印尼語": "Indonesian",
87 "姓氏": "surname",
88}
90VERB_TAGS: dict[str, str] = {
91 "及物": "transitive",
92 "不及物": "intransitive",
93 "动宾结构": "verb-object",
94 "非完": "imperfective",
95 "完": "perfective",
96 "強變化": "strong",
97 "動名詞": "supine",
98 "命令式": "imperative",
99 # Template:ar-verb
100 "第I類": "form-i",
101 "第II類": "form-ii",
102 "第III類": "form-iii",
103 "第IV類": "form-iv",
104 "第V類": "form-v",
105 "第VI類": "form-vi",
106 "第VII類": "form-vii",
107 "第IX類": "form-ix",
108 "第X類": "form-x",
109 "第XII類": "form-xii",
110 "第XIII類": "form-xiii",
111 "第XIV類": "form-xiv",
112 "第XV類": "form-xv",
113 "第Iq類": "form-iq",
114 "第IIq類": "form-iiq",
115 "第IIIq類": "form-iiiq",
116 "第IVq類": "form-ivq",
117 # Module:Jpan-headword
118 "他動詞": "transitive",
119 "自動詞": "intransitive",
120 "自、他動詞": ["transitive", "intransitive"],
121 "五段": "godan",
122 "一段": "ichidan",
123 "サ行": "suru",
124 "カ行": "kuru",
125 "イ形": "-i",
126 "ナ形": "-na",
127 "四段": "yodan",
128 "二段": "nidan",
129 "ラ行": "-ri",
130 "ナリ": "-nari",
131 "タリ": "-tari",
132 # Template:ja-conj-bungo
133 "語幹形態": "stem",
134 "對比連接詞": ["contrastive", "conjunction"],
135 "因果連接詞": ["causative", "conjunction"],
136 "條件連接詞": ["conditional", "conjunction"],
137 "過去式 (第一手消息)": "past",
138 "過去式 (第二手消息)": "past",
139 "完成式 (自覺動作)": "perfect",
140 "完成式 (自然事件)": ["perfect", "natural"],
141 "完成進行式": ["perfect", "continuative"],
142 "意志形": "volitional",
143 "副詞": "adverbial",
144 "無助動詞": "no-auxiliary",
145 "有助動詞": "auxiliary",
146 # Template:ja-na
147 "簡體否定": ["informal", "negative"],
148 "簡體過去": ["informal", "past"],
149 "簡體過去否定": ["informal", "past", "negative"],
150 "敬體": "formal",
151 "敬體否定": ["formal", "negative"],
152 "敬體過去": ["formal", "past"],
153 "敬體過去否定": ["formal", "past", "negative"],
154 "連接形": "conjunctive",
155 "暫定形": "provisional",
156 "推量形": "volitional",
157 "連用形": "adverbial",
158 "程度形": "degree",
159 # Template:ko-verb
160 "不定式": "infinitive",
161 "連續式": "sequential",
162}
164# https://en.wikipedia.org/wiki/Japanese_grammar#Stem_forms
165JA_STEM_FORMS: dict[str, str] = {
166 "未然形": "imperfective",
167 "連用形": "continuative",
168 "終止形": "terminal",
169 "連體形": "attributive",
170 "連体形": "attributive",
171 "假定形": "hypothetical",
172 "仮定形": "hypothetical",
173 "命令形": "imperative",
174 "已然形": "realis",
175}
177# https://en.wikipedia.org/wiki/Voice_(grammar)
178VOICE_TAGS: dict[str, str | list[str]] = {
179 "被動形": "passive",
180 "使役形": "causative",
181 "可能形": "potential",
182 "意志形": "volitional",
183 "否定形": "negative",
184 "否定連用形": ["negative", "continuative"],
185 "尊敬形": "formal",
186 "完成形": "perfective",
187 "接續形": "conjunctive",
188 "條件形": ["hypothetical", "conditional"],
189}
191COMPARISON_TAGS: dict[str, str] = {
192 # https://en.wikipedia.org/wiki/Comparison_(grammar)
193 "原级": "positive",
194 "比較級": "comparative",
195 "最高級": "superlative",
196}
198TENSE_TAGS = {
199 "過去時": "preterite",
200 "過去式": "past",
201 "過去分詞": ["past", "participle"],
202 "現在時": "present",
203 "第三人稱單數現在時": ["third-person", "singular", "present"],
204 "助動詞": "auxiliary",
205 # Template:de-verb
206 "弱變化": "weak",
207 "弱变化": "weak",
208 "第三人稱單數簡單現在時": ["third-person", "singular", "present"],
209 "現在分詞": ["present", "participle"],
210 "一般過去時及過去分詞": ["past", "participle"],
211 # Template:it-verb
212 "第一人稱單數 現在時": ["first-person", "singular", "present"],
213 "第一人稱單數 先過去時": ["first-person", "singular", "past", "historic"],
214 # Template:de-adj
215 "強變化主格陽性單數": ["strong", "nominative", "masculine", "singular"],
216 # Template:la-verb
217 "现在时不定式": ["present", "infinitive"],
218 "完成时主动式": ["perfect", "active"],
219 "目的动名词": "supine",
220 # Template:ar-verb
221 "非過去時": "non-past",
222 "動詞性名詞": "noun-from-verb",
223 "主動分詞": ["active", "participle"],
224 "被動分詞": ["passive", "participle"],
225 # Template:it-verb
226 # https://zh.wiktionary.org/wiki/Module:It-headword
227 "第一人稱單數現在時": ["first-person", "singular", "present"],
228 "第一人稱單數先過去時": ["first-person", "singular", "past", "historic"],
229 "第三人稱單數先過去時": ["third-person", "singular", "past", "historic"],
230 "第一人稱單數過去分詞": ["first-person", "singular", "past", "participle"],
231 "第三人稱單數過去分詞": ["third-person", "singular", "past", "participle"],
232 "第一人稱單數未完成時": ["first-person", "singular", "imperfect"],
233 "第三人稱單數未完成時": ["third-person", "singular", "imperfect"],
234 "第一人稱單數未來時": ["first-person", "singular", "future"],
235 "第三人稱單數未來時": ["third-person", "singular", "future"],
236 "第一人稱單數現在時虛擬式": ["first-person", "singular", "subjunctive"],
237 "第三人稱單數現在時虛擬式": ["third-person", "singular", "subjunctive"],
238 "第一人稱單數未完成時虛擬式": [
239 "first-person",
240 "singular",
241 "imperfect",
242 "subjunctive",
243 ],
244 "第三人稱單數未完成時虛擬式": [
245 "third-person",
246 "singular",
247 "imperfect",
248 "subjunctive",
249 ],
250 # Template:es-verb
251 "第一人稱單數過去時": ["first-person", "singular", "past"],
252 # Template:ast-verb-ar
253 "第一人稱單數直陳現在時": [
254 "first-person",
255 "singular",
256 "indicative",
257 "present",
258 ],
259}
261GRAMMATICAL_TAGS: dict[str, str] = {
262 **GENDER_TAGS,
263 **NUMBER_TAGS,
264 **COUNT_TAGS,
265 **OTHER_TAGS,
266 **VERB_TAGS,
267 **JA_STEM_FORMS,
268 **VOICE_TAGS,
269 **COMPARISON_TAGS,
270 **TENSE_TAGS,
271}
273# https://zh.wiktionary.org/wiki/Template:Label
274# https://zh.wiktionary.org/wiki/Module:Labels/data
275# https://zh.wiktionary.org/wiki/Template:Qualifier
276# https://zh.wiktionary.org/wiki/Template:古
277# https://zh.wiktionary.org/wiki/Template:注释
278LABEL_TAGS = {
279 "棄用": "obsolete",
280 "弃用": "obsolete",
281 "比喻": "figuratively",
282 "古": "archaic",
283 "陽": "masculine",
284 "陰": "feminine",
285 "喻": "figuratively",
286 "書": "literary",
287 "口": "colloquial",
288 "俚": "slang",
289 "俗": "slang",
290 "方": "dialectal",
291 "废": "obsolete",
292 "貶": "derogatory",
293 "罕": "rare",
294 "引": "broadly",
295 "現已罕用": "archaic",
296 # Module:Labels/data
297 "back slang": "slang",
298 "synecdochically": "synecdoche",
299 "不再自由造詞": "idiomatic",
300 "不及物": "intransitive",
301 "不可數": "uncountable",
302 "不定": "indefinite",
303 "不常見": "uncommon",
304 "不推薦使用": "proscribed",
305 "中性": "neuter",
306 "中間被動語態": "mediopassive",
307 "中間語態": "middle",
308 "主動語態": "active",
309 "主要用於否定": ["usually", "with-negation"],
310 "交互": "reciprocal",
311 "以單數形式": "singular",
312 "以複數形式": "in-plural",
313 "作定語": "attributive",
314 "作格": "ergative",
315 "作表語": "predicative",
316 "使役": "causative",
317 "俗語": "idiomatic",
318 "俚語": "slang",
319 "俚语": "slang",
320 "兒童用語": "childish",
321 "公文": "bureaucratese",
322 "冒犯": "offensive",
323 "分詞": "participle",
324 "前古典": "pre-Classical",
325 "助動詞": "auxiliary",
326 "助記符": "mnemonic",
327 "及物": "transitive",
328 "反問句": "rhetoric",
329 "反身": "reflexive",
330 "口語": "colloquial",
331 "口语": "colloquial",
332 "古舊": "archaic",
333 "可數": "countable",
334 "同性戀俚語": ["slang", "LGBT"],
335 "名詞化": "noun-from-verb",
336 "唯單": "singular-only",
337 "唯複": "plural-only",
338 "國際音標": "IPA",
339 "基數詞": "cardinal",
340 "大寫": "capitalized",
341 "委婉": "euphemistic",
342 "字面義": "literally",
343 "完整": "perfect",
344 "完整體": "perfective",
345 "定語": "attributive",
346 "實詞": "substantive",
347 "尊敬": "honorific",
348 "敬語": "honorific",
349 "敬语": "honorific",
350 "常用複數": "plural-normally",
351 "幽默": "humorous",
352 "序數詞": "ordinal",
353 "廣義來說": "broadly",
354 "引申": "broadly",
355 "弱祈使式": "jussive",
356 "強調": "emphatic",
357 "後古典": "obsolete",
358 "性別中立": "gender-neutral",
359 "情態": "modal",
360 "愛稱": "endearing",
361 "所有格代詞": ["possessive", "pronoun", "without-noun"],
362 "押韻俚語": "slang",
363 "抽象名詞": "abstract-noun",
364 "擬態詞": "ideophonic",
365 "擬聲詞": "onomatopoeic",
366 "新詞": "neologism",
367 "方言": "dialectal",
368 "書面": "literary",
369 "书面": "literary",
370 "有比較級": "comparable",
371 "有生": "animate",
372 "正式": "formal",
373 "歷史": "historical",
374 "比喻義": "figuratively",
375 "無人稱": "impersonal",
376 "無比較級": "not-comparable",
377 "無生": "inanimate",
378 "焦點": "focus",
379 "狹義": "narrowly",
380 "監獄俚語": "slang",
381 "直陳語氣": "indicative",
382 "短信": "Internet",
383 "祈使語氣": "imperative",
384 "禮貌": "polite",
385 "種族歧視語": "slur",
386 "粉絲用語": ["slang", "lifestyle"],
387 "粗俗": "vulgar",
388 "系動詞": "copulative",
389 "網路用語": "Internet",
390 "縮寫": "abbreviation",
391 "罕用": "rare",
392 "臨時語": "nonce-word",
393 "虛擬語氣": "subjunctive",
394 "表語": "predicative",
395 "被動語態": "passive",
396 "視覺方言": "pronunciation-spelling",
397 "親切": "familiar",
398 "詈語": "expletive",
399 "詩歌": "poetic",
400 "誇飾": "excessive",
401 "語中音省略": "syncope",
402 "諷刺": "sarcastic",
403 "謙遜": "humble",
404 "貶義": "derogatory",
405 "轉喻義": "metonymically",
406 "返璞詞": "retronym",
407 "過時": "dated",
408 "陰性": "feminine",
409 "陽性": "masculine",
410 "雙及物動詞": "ditransitive",
411 "靜態動詞": "stative",
412 "非完整": "imperfect",
413 "非完整體": "imperfective",
414 "非常罕用": "rare",
415 "非標準": "nonstandard",
416 "非标准": "nonstandard",
417 "非標準形式": "nonstandard",
418 "非正式": "informal",
419 "首字母縮略詞": "initialism",
420 "駭客語": ["Leet", "Internet"],
421 "高語域": "honorific",
422 "中醫": "Traditional-Chinese-Medicine",
423 "修辭學": "rhetoric",
424 "印度教": "Hinduism",
425 "摩門教": "Mormonism",
426 "物理": "particle",
427 "猶太教": "Judaism",
428 "納粹主義": "Nazism",
429 "網際網路": "Internet",
430 "耆那教": "Jainism",
431 "聖經": "Biblical",
432 "解剖學": "anatomy",
433 "貴格會": "Quakerism",
434 "錫克教": "Sikhism",
435 "馬克思主義": "Marxism",
436 # also from Module:Labels/data, but translated manually
437 "喃字": "Chu-Nom",
438 "反身代詞": "reflexive",
439 "字面意義": "literally",
440 "成語": "Chengyu",
441 "及物、不及物": ["transitive", "intransitive"],
442 "集合名詞": "collective",
443 "控制動詞": "control-verb",
444 "省略": "ellipsis",
445 "分數": "fractional",
446 "以雙數形式": "dual",
447 "主要用於否定複數": ["negative", "plural"],
448 "數詞縮寫": ["numeral", "abbreviation"],
449 "主要用於肯定": "positive",
450 "古典": "Classical",
451 "中國大陸": "Mainland-China",
452 "書面語": "literary",
453 "文言": "literary",
454 "詞幹": "stem",
455 "烏爾都語寫法": "Urdu",
456 "波斯-阿拉伯字母": "Perso-Arabic",
457 # Template:kok-pos
458 "拉丁字母": ["Latin", "character"],
459 "卡納達文": ["Kannada", "character"],
460 "尼瓦爾文拼寫": "Newa",
461}
463# example sentence template
464# https://zh.wiktionary.org/wiki/Template:Zh-x
465# https://zh.wiktionary.org/wiki/Module:Zh-usex/data
466ZH_X_TAGS = {
467 "繁體": "Traditional-Chinese",
468 "繁體和": "Traditional-Chinese",
469 "簡體": "Simplified-Chinese",
470 "繁體和簡體": ["Traditional-Chinese", "Simplified-Chinese"],
471 "漢語拼音": "Pinyin",
472 "粵拼": "Jyutping",
473 "現代標準漢語": "Standard-Chinese",
474 "文言文": "Classical-Chinese",
475 "官話白話文": "Written-vernacular-Chinese",
476 "粵語": "Cantonese",
477 "吳語": "Wu",
478 "廣州話": "Cantonese",
479 "臺灣華語": "Taiwanese-Mandarin",
480}
482# classifier tags
483# https://zh.wiktionary.org/wiki/Template:zh-mw
484# https://zh.wiktionary.org/wiki/Module:Zh/templates
485ZH_TAGS = {
486 "官話": "Mandarin",
487 "贛語": "Gan",
488 "客家話": "Hakka",
489 "晉語": "Jin",
490 "閩北語": "Min-Bei",
491 "閩東語": "Min-Dong",
492 "閩南語": "Min-Nan",
493 "潮州話": "Teochew",
494 "湘語": "Xiang",
495}
497# https://zh.wiktionary.org/wiki/Template:Zh-pron
498# https://zh.wiktionary.org/wiki/Module:Zh-pron
499ZH_PRON_TAGS = {
500 "拼音": "Pinyin",
501 "注音": "Bopomofo",
502 "潮州話拼音": "Peng'im",
503 "上海": "Shanghai",
504 "吳語學堂拼音": "Wugniu",
505 "通用拼音": "Tongyong-Pinyin",
506 "威妥瑪拼音": "Wade–Giles",
507 "耶魯官話拼音": "Yale",
508 "國語羅馬字": "Gwoyeu-Romatsyh",
509 "西里爾字母轉寫": "Cyrillic",
510 "西里爾字母": "Cyrillic",
511 "漢語國際音標": "Sinological-IPA",
512 "耶魯粵拼": ["Yale", "Jyutping"],
513 "廣州話拼音": ["Cantonese", "Pinyin"],
514 "廣東拼音": "Guangdong-Romanization",
515 "國際音標": "IPA",
516 "模仿白話字": "POJ",
517 "標準粵語": "Standard-Cantonese",
518 "廣州–香港話": ["Guangzhou", "Hong Kong"],
519 "福州話": "Fuzhou",
520 "平話字": "Foochow-Romanized",
521 "客家語": "Hakka",
522 "白話字": "Phak-fa-su",
523 "泉漳話": "Hokkien",
524 "泉州": "Quanzhou",
525 "廈門": "Xiamen",
526 "輕尾聲異讀": "toneless-final-syllable-variant",
527 "維基詞典": "Wiktionary-specific",
528 "維基詞典拼音": ["Wiktionary-specific", "Pinyin"],
529 "維基詞典轉寫": "Wiktionary-specific",
530 "成都話": "Chengdu",
531 "四川話拼音": ["Sichuanese", "Pinyin"],
532 "東干語": "Dongan",
533 "台山話": "Taishanese",
534 "四縣": "Sixian",
535 "長沙話": "Changsha",
536 "四川話拉丁化新文字": "Latinxua-Sin-Wenz",
537 "台城": "Taicheng",
538 "南昌話": "Nanchang",
539 "四縣話": "Sixian",
540 "苗栗": "Miaoli",
541 "美濃": "Neipu",
542 "客家語拼音": "Hakka-Romanization-System",
543 "客家話拼音方案": "Hagfa-Pinyim",
544 "太原話": "Taiyuan",
545 "老派": "dated",
546 "新加坡": "Singapore",
547 "臺羅": "Tâi-lô",
548 "普實台文": "Phofsit-Daibuun",
549 "太湖片": "Northern",
550 "吳音小字典": "MiniDict",
551 "維基詞典羅馬化": ["Wiktionary-specific", "romanization"],
552 "上海話": "Shanghai",
553 "中古漢語": "Middle-Chinese",
554 "莆仙語": "Puxian-Min",
555 "莆仙話拼音": "Pouseng-Ping'ing",
556 "莆田": "Putian",
557 "仙遊": "Xianyou",
558 "漳州": "Zhangzhou",
559 "臺北": "Taibei",
560 "高雄": "Kaohsiung",
561 "實際讀音": "phonetic",
562 "臺灣話": "Taiwanese",
563 "常用": "general",
564 "檳城": "Penang",
565 "兒化": "Erhua",
566 "文讀": "literary",
567 "中國大陸標準讀法": ["Mainland-China", "standard"],
568 "臺灣異讀法": ["Taiwan", "variant"],
569 "中國大陸與臺灣標準讀法": ["Mainland-China", "Taiwan", "standard"],
570 "異讀": "variant",
571 "上古": "Old-Chinese",
572 "白–沙": "Baxter–Sagart",
573 "鄭張": "Zhengzhang",
574}
576ZH_DIAL_TAGS = {
577 "白話文": "Written-vernacular-Chinese",
578 "北京": "Beijing",
579 "燕京官話": "Northeastern-Mandarin",
580 "冀魯官話": "Jilu-Mandarin",
581 "膠遼官話": "Jiaoliao-Mandarin",
582 "中原官話": "Central-Plains-Mandarin",
583 "蘭銀官話": "Lanyin-Mandarin",
584 "西南官話": "Southwestern-Mandarin",
585 "江淮官話": "Jianghuai-Mandarin",
586 "徽語": "Huizhou",
587 "南部平話": "Southern-Pinghua",
588 "濟南": "Jinan",
589 "臺灣": "Taiwan",
590}
592TH_PRON_TAGS = {
593 "寫法": "orthographic",
594 "音素": "phoneme",
595 "泰語羅馬化": "romanization",
596 "派汶拼音": "Paiboon",
597 "皇家轉寫": "Royal-Institute",
598 "非正字法": "unorthographical",
599 "短音": "short",
600}
603ALL_TAGS = {
604 **GRAMMATICAL_TAGS,
605 **LABEL_TAGS,
606 **ZH_X_TAGS,
607 **ZH_TAGS,
608 **ZH_PRON_TAGS,
609 **ZH_DIAL_TAGS,
610 **TH_PRON_TAGS,
611 # Template:vi-ipa
612 "河內": "Hà-Nội",
613 "順化": "Huế",
614 "胡志明市": "Saigon",
615 # Template:hi-ipa
616 "德里": "Delhi",
617 # Template:hi-noun
618 "烏爾都文拼寫": "Urdu",
619 # Template:sa-ipa
620 "吠陀": "Vedic",
621 "古典梵語": "Classical-Sanskrit",
622 # Template:ko-ipa
623 "韓國標準語": "SK-Standard",
624 "首爾": "Seoul",
625 "首尔": "Seoul",
626 "國語羅馬字(轉寫)": ["revised-romanization", "transliteration"],
627 "馬科恩-賴肖爾式": "McCune–Reischauer",
628 "耶魯拼音": ["Yale", "romanization"],
629}
632def translate_raw_tags(data: WordEntry) -> WordEntry:
633 raw_tags = []
634 for raw_tag in data.raw_tags:
635 if raw_tag in ALL_TAGS:
636 tr_tag = ALL_TAGS[raw_tag]
637 if isinstance(tr_tag, str) and tr_tag not in data.tags:
638 data.tags.append(tr_tag)
639 elif isinstance(tr_tag, list):
640 data.tags.extend(tr_tag)
641 elif raw_tag in LABEL_TOPICS and hasattr(data, "topics"):
642 data.topics.append(LABEL_TOPICS[raw_tag])
643 elif raw_tag not in raw_tags: 643 ↛ 634line 643 didn't jump to line 634 because the condition on line 643 was always true
644 raw_tags.append(raw_tag)
645 data.raw_tags = raw_tags
646 return data
649# https://zh.wiktionary.org/wiki/Template:T
650# https://zh.wiktionary.org/wiki/Template:Head
651# https://zh.wiktionary.org/wiki/Module:Gender_and_number
652TEMPLATE_TAG_ARGS = {
653 "f": "feminine",
654 "m": "masculine",
655 "n": "neuter",
656 "c": "common",
657 # Animacy
658 "an": "animate",
659 "in": "inanimate",
660 # Animal (for Ukrainian, Belarusian, Polish)
661 "anml": "animal",
662 # Personal (for Ukrainian, Belarusian, Polish)
663 "pr": "personal",
664 # Nonpersonal not currently used
665 "np": "nonpersonal",
666 # Virility (for Polish)
667 "vr": "virile",
668 "nv": "nonvirile",
669 # Numbers
670 "s": "singular number",
671 "d": "dual number",
672 "p": "plural number",
673 # Verb qualifiers
674 "impf": "imperfective",
675 "pf": "perfective",
676 "mf": ["masculine", "feminine"],
677}