Coverage for src / wiktextract / extractor / ja / tags.py: 95%
24 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-02 00:27 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-02 00:27 +0000
1TAGS = {
2 "男性": "masculine",
3 "女性": "feminine",
4 "通性": "common",
5 "中性": "neuter",
6 "単数": "singular",
7 "複数": "plural",
8 "不変": "invariable",
9 "男性複数": ["masculine", "plural"],
10 "女性複数": ["feminine", "plural"],
11 # テンプレート:context/data
12 "くだけた表現": "informal",
13 "しばしば": "often",
14 "まれ": "rare",
15 "アイルランド": "Ireland",
16 "アフリカ": "Africa",
17 "アメカメカ": "Amecameca",
18 "アメリカ合衆国": "US",
19 "アルスター": "Ulster",
20 "アルゼンチン": "Argentina",
21 "アングロ・ノルマン": "Anglo-Norman",
22 "アンダルシア": "Andalusia",
23 "イェ方言": "Ijekavian",
24 "イギリス": "UK",
25 "イラン": "Iran",
26 "イロン方言": "Iron",
27 "インターネット": "Internet",
28 "インターネットスラング": "Internet",
29 "ウクライナ": "Ukraine",
30 "ウルグアイ": "Uruguay",
31 "ウーリ語": "Uri",
32 "エクアドル": "Ecuador",
33 "エ方言": "Ekavian",
34 "オノマトペ": "onomatopoeic",
35 "オルムルム": "Ormulum",
36 "オークニー": "Orkney",
37 "オーストリア": "Austrian",
38 "カイピラ方言": "Caipira",
39 "カイ方言": "Kajkavian",
40 "カサレヴサ": "Katharevousa",
41 "カナダ": "Canada",
42 "カルコーフォロ語": "Carcoforo",
43 "ガスコーニュ": "Gascony",
44 "ガーンジー": "Guernsey",
45 "キューバ": "Cuba",
46 "キリスト教": "Christian",
47 "クロアチア": "Croatian",
48 "グアテマラ": "Guatemala",
49 "グレッソネイ語": "Gressoney",
50 "ケニア": "Kenya",
51 "ケベック": "Quebec",
52 "ケルン語": "Kölsch",
53 "ケント": "Kentish",
54 "コイネー": "Koine",
55 "コノート": "Connacht",
56 "コロンビア": "Colombia",
57 "コンゴ": "Congo",
58 "サカティアングイス": "Zacatianguis",
59 "サーク": "Sark",
60 "シェットランド": "Shetland",
61 "シク教": "Sikhism",
62 "シンガポール": "Singapore",
63 "ジャイナ教": "Jainism",
64 "ジャマイカ": "Jamaica",
65 "ジャージー": "Jersey",
66 "ジンバブエ": "Zimbabwe",
67 "スイス": "Switzerland",
68 "スコットランド": "Scotland",
69 "ストゥシルヴァン": "Sutsilvan",
70 "スペイン": "Spain",
71 "スルシルヴァン": "Sursilvan",
72 "スルミラン": "Surmiran",
73 "セルビア": "Serbian",
74 "セルリング": "Sylt",
75 "タイ英語": "Thailand",
76 "タラシケヴィツァ": "Taraškievica",
77 "タントユカ": "Tantoyuca",
78 "ダリー語": "Dari",
79 "チコナメル": "Chiconamel",
80 "チコンテペク": "Chicontepec",
81 "チャ方言": "Chakavian",
82 "チリ": "Chile",
83 "ティマウ": "Timau",
84 "テペツィントラ": "Tepetzintla",
85 "テマパチェ": "Temapache",
86 "ディゴル方言": "Digor",
87 "デモティキ": "Demotic",
88 "トスカナ語": "Tuscany",
89 "ドイツ": "Germany",
90 "ドイツ南部": "Southern-Germany",
91 "ナチズム": "Nazism",
92 "ナミビア": "Namibia",
93 "ニカラグア": "Nicaragua",
94 "ニューカッスル": "Tyneside",
95 "ニュージーランド": "New-Zealand",
96 "ヌオロ": "Nuorese",
97 "バレンシア": "Valencia",
98 "パナマ": "Panama",
99 "ヒンズー教": "Hinduism",
100 "ビザンツ": "Byzantine",
101 "ビバロ・アルピーネ語": "Vivaro-Alpine",
102 "フィリピン": "Philippines",
103 "フェリング・エームラング": "Föhr-Amrum",
104 "フォルマッツァ語": "Formazza",
105 "フランス": "French",
106 "ブラジル": "Brazil",
107 "プロテスタント": "Protestant",
108 "プロヴァンス": "Provençal",
109 "プーター": "Puter",
110 "ヘルゴラント": "Helgoland",
111 "ベネズエラ": "Venezuela",
112 "ベルギー": "Belgium",
113 "ペルー": "Peru",
114 "ホンジュラス": "Honduras",
115 "ボスニア": "Bosnian",
116 "ボリビア": "Bolivia",
117 "ポルトガル": "Portugal",
118 "マンスター": "Munster",
119 "マーシア": "Mercian",
120 "ミストラル式綴り": "Mistralian",
121 "メキシコ": "Mexico",
122 "モンテネグロ": "Montenegro",
123 "モーリング": "Mooring",
124 "ユダヤ教": "Judaism",
125 "ヨーロッパ": "Europe",
126 "ラングドック": "Languedoc",
127 "リヒテンシュタイン": "Liechtenstein",
128 "リプアーリ語": "Ripuarian",
129 "リムーザン": "Limousin",
130 "リメッラ語": "Rimella",
131 "ルイジアナ": "Louisiana",
132 "ルゼルナ": "Luserna",
133 "ログドーロ": "Logudorese",
134 "ロシア": "Russia",
135 "ヴァラダール": "Vallander",
136 "ヴィーディングハルデ": "Wiedingharde",
137 "不可算": "uncountable",
138 "不変化名詞": "indeclinable",
139 "不活動体": "inanimate",
140 "他動詞": "transitive",
141 "代名詞的用法": "pronominal",
142 "俗語": "slang",
143 "修辞学": "rhetoric",
144 "倒語": "slang",
145 "再帰動詞": "reflexive",
146 "初期中英語": "Early-Middle-English",
147 "助動詞": "auxiliary",
148 "卑語": "vulgar",
149 "単数形で": "singular",
150 "単数形のみ": "singular singular-only singular",
151 "印": "India",
152 "叙法": "modal",
153 "叙述用法のみ": "predicative",
154 "口語": "informal",
155 "古用法": "dated",
156 "古語・廃語": "archaic",
157 "可算": "countable",
158 "地名": "place",
159 "地域": "regional",
160 "基数": "cardinal",
161 "多文化的ロンドン英語": "Multicultural-London-English",
162 "婉曲表現": "euphemistic",
163 "幼児語": "childish",
164 "序数": "ordinal",
165 "廃語": "obsolete",
166 "強い": "strong",
167 "形容詞的": "attributive",
168 "後期中英語": "Late-Middle-English",
169 "恐らく": "possibly",
170 "慣用的表現": "idiomatic",
171 "排斥された語": "proscribed",
172 "控えめに": "mildly",
173 "換喩的に": "metonymically",
174 "文章語": "literary",
175 "方言": "dialectal",
176 "時々": "sometimes",
177 "欠如動詞": "defective",
178 "正式・堅": "formal",
179 "歴史": "historical",
180 "比喩": "figuratively",
181 "比喩的に": "figuratively",
182 "比較形有り": "comparable",
183 "比較形無し": "not-comparable",
184 "活動体": "animate",
185 "滑稽": "humorous",
186 "特に": "especially",
187 "状態動詞": "stative",
188 "略語": "abbreviation",
189 "疑問詞": "interrogative",
190 "皮肉": "ironic",
191 "破格": "nonstandard",
192 "筋肉": "anatomy",
193 "米語": "US",
194 "絶対単数": "singular-only singular",
195 "絶対複数": "plural-only plural",
196 "能格動詞": "ergative",
197 "自他動詞": "ambitransitive",
198 "自動詞": "intransitive",
199 "英連邦": "Commonwealth",
200 "蔑称": "offensive",
201 "複合語で": "in-compounds",
202 "複数形で": "plural",
203 "西部": "Western",
204 "視覚方言": "pronunciation-spelling",
205 "詩的表現": "poetic",
206 "豪": "Australian",
207 "転じて": "broadly",
208 "軽侮語": "pejorative",
209 "近代ラテン語": "Netherlands",
210 "逐語的に": "literally",
211 "通常": "usually",
212 "通常複数形で": "plural-normally",
213 "造語": "neologism",
214 "関係詞": "relative",
215 "限定": "definite",
216 "集合名詞": "collective",
217 "集合的に": "collective",
218 "非人称": "impersonal",
219 "人称": "personal",
220 "非標準": "uncommon",
221 "頭字語": "initialism",
222 "首都": "uppercase",
223 # "en-verb" template
224 "三単現": ["third-person", "singular", "present"],
225 "現在分詞": ["present", "participle"],
226 "過去形": "past",
227 "過去分詞": ["past", "participle"],
228 "繁": "Traditional-Chinese",
229 "簡": "Simplified-Chinese",
230 # zh sound
231 "標準中国語": "Standard-Chinese",
232 "ピンイン": "Pinyin",
233 "注音符号": "Bopomofo",
234 "ウェード式": "Wade–Giles",
235 "IPA": "IPA",
236 "広東語": "Cantonese",
237 "改イェール式": ["Yale", "romanization", "Cantonese"],
238 "イェール式": "Yale",
239 "粤拼": "Jyutping",
240 "教院式": ["ILE", "romanization", "Cantonese"],
241 "広東拼音方案": "Guangdong-Romanization",
242 "台山語": "Taishanese",
243 "閩南語": "Min-Nan",
244 "漳州": "Zhangzhou",
245 "漳浦": "Zhangpu",
246 "高雄": "Kaohsiung",
247 "ペナン州": "Penang",
248 "白話字": "POJ",
249 "台湾ローマ字": "Tai-lo",
250 "普実台文": "Phofsit-Daibuun",
251 "廈門": "Xiamen",
252 "泉州": "Quanzhou",
253 "台北": "Taipei",
254 "潮州語": "Teochew",
255 "莆仙語": "Puxian-Min",
256 "客家語": "Hakka",
257 "呉語": "Wu",
258 "晋語": "Jin",
259 "ドンガン語": "Dungan",
260 # モジュール:gender and number
261 "非有生": "inanimate",
262 "有生": "animate",
263 "男性人間": "virile",
264 "非男性人間": "nonvirile",
265 # Template:ru-noun+
266 "生格": "genitive",
267 "複数主格": ["nominative", "plural"],
268 "複数生格": ["genitive", "plural"],
269 "形容詞": ["relational", "adjective"],
270 "指小形": "diminutive",
271 "不完了体": "imperfective",
272 "完了体": "perfective",
273 # Template:日本語ダ活用 日本語サ変活用
274 "未然形": "imperfective",
275 "連用形": "continuative",
276 "終止形": "terminal",
277 "連体形": "attributive",
278 "仮定形": "hypothetical",
279 "命令形": "imperative",
280 "命令": "imperative",
281 "過去・完了": ["past", "completive"],
282 "過去・完了・状態": ["past", "completive"],
283 "否定形": "negative",
284 "否定": "negative",
285 "否定(古風)": ["negative", "archaic"],
286 "自動詞化": "intransitive",
287 "言い切り": "definitive",
288 "名詞化": "noun-from-verb",
289 "自発・受身\n可能・尊敬": ["active", "passive", "possibly", "honorific"],
290 "丁寧": "polite",
291 "東京式": "Tokyo",
292 "京阪式": ["Kyoto", "Osaka"],
293 "推量・意志": "volitional",
294 "様態": "appearance",
295 "語幹": "stem",
296 # Template:日本語下一段活用
297 "意志・勧誘": "volitional",
298 "仮定条件": "conditional",
299 # Template:hu-noun
300 "複数・主格": "plural",
301 "・主格": "nominative",
302 "単数対格": ["singular", "accusative"],
303 "複数対格": ["plural", "accusative"],
304 "属格": "genitive",
305 "ラテン文字": "romanization",
306 "キリル文字": "Cyrillic",
307 # Template:de-noun
308 "複数形無し": "no-plural",
309 "複数形": "plural",
310 "指小辞無し": "no-diminutive",
311 "指小辞": "diminutive",
312 "廃用": "obsolete",
313 # Template:ja-noun
314 "旧字体": "kyūjitai",
315 # Template:ca-verb
316 "現在第一人称単数形": ["first-person", "singular", "present"],
317 # Template:en-adj
318 "比較形なし": "not-comparable",
319 "比較級": "comparative",
320 "最上級": "superlative",
321 "旧綴り": "archaic",
322 # Template:古典日本語四段活用
323 "已然形": "realis",
324}
326TOPICS = {
327 # テンプレート:context/data
328 "BDSM": "BDSM",
329 "LGBT": "LGBT",
330 "SF": "science-fiction",
331 "アイスホッケー": "ice-hockey",
332 "アメリカンフットボール": "American-football",
333 "アーチェリー": "archery",
334 "イスラム教": "Islam",
335 "イデオロギー": "ideology",
336 "ウイルス学": "virology",
337 "エネルギー": "energy",
338 "カトリック": "Catholicism",
339 "カードゲーム": "cards",
340 "カーリング": "curling",
341 "キリスト教": "Christianity",
342 "クリケット": "cricket",
343 "グラフィカルユーザインタフェース": "graphical-user-interface",
344 "グラフ理論": "graph-theory",
345 "コンピュータグラフィックス": "computer-graphics",
346 "ゴルフ": "golf",
347 "サイクリング": "cycling",
348 "サッカー": "soccer",
349 "サーフィン": "surfing",
350 "シャンチー": "xiangqi",
351 "スカッシュ": "squash",
352 "スキー": "skiing",
353 "スケート": "skating",
354 "スケートボード": "skateboarding",
355 "スヌーカー": "snooker",
356 "スノーボード": "snowboarding",
357 "スポーツ": "sports",
358 "ソフトウェア": "software",
359 "ソフトボール": "softball",
360 "ゾロアスター教": "Zoroastrianism",
361 "ダンス": "dance",
362 "ダーツ": "darts",
363 "チアリーディング": "cheerleading",
364 "チェス": "chess",
365 "テニス": "tennis",
366 "テレビ": "television",
367 "ハンドボール": "handball",
368 "ハードウェア": "computer-hardware",
369 "バスケットボール": "basketball",
370 "バレーボール": "volleyball",
371 "ビジネス": "business",
372 "ビリヤード": "billiards",
373 "ファシズム": "fascism",
374 "ファッション": "fashion",
375 "フェミニズム": "feminism",
376 "フェンシング": "fencing",
377 "フットボール": "football",
378 "ブリッジ": "bridge",
379 "プログラミング": "programming",
380 "ボウリング": "bowling",
381 "ボクシング": "boxing",
382 "ボディビル": "bodybuilding",
383 "ボート競技": "rowing",
384 "ポーカー": "poker",
385 "モータースポーツ": "motor-racing",
386 "ラクロス": "lacrosse",
387 "ラグビー": "rugby",
388 "レスリング": "wrestling",
389 "交通": "transport",
390 "人口学": "demography",
391 "仏教": "Buddhism",
392 "代数学": "algebra",
393 "代数幾何学": "algebraic-geometry",
394 "会計": "accounting",
395 "体操": "gymnastics",
396 "保険": "insurance",
397 "倫理学": "ethics",
398 "光学": "optics",
399 "免疫学": "immunology",
400 "共産主義": "communism",
401 "写真": "photography",
402 "分類学": "taxonomy",
403 "力学": "mechanics",
404 "動物学": "zoology",
405 "化学": "chemistry",
406 "化粧品": "cosmetics",
407 "医学": "medicine",
408 "医療": "healthcare",
409 "単位": "units-of-measure",
410 "占星術": "astrology",
411 "印刷": "printing",
412 "古生物学": "paleontology",
413 "哲学": "philosophy",
414 "哺乳類学": "mammalogy",
415 "商取引": "trading",
416 "園芸": "horticulture",
417 "地理": "geography",
418 "地質学": "geology",
419 "地震学": "seismology",
420 "外科学": "surgery",
421 "大工仕事": "carpentry",
422 "天体物理学": "astrophysics",
423 "天文学": "astronomy",
424 "娯楽": "entertainment",
425 "季節": "seasons",
426 "宗教": "religion",
427 "宝飾": "jewelry",
428 "家具": "furniture",
429 "寄生虫学": "parasitology",
430 "将棋": "shogi",
431 "岩石学": "petrology",
432 "工学": "engineering",
433 "幾何学": "geometry",
434 "建築": "architecture",
435 "微生物学": "microbiology",
436 "心理学": "psychology",
437 "性": "sexuality",
438 "性行為": "sex",
439 "情報学": "information-science",
440 "情報技術": "computing",
441 "戦争": "war",
442 "技術": "technology",
443 "政府": "government",
444 "政治": "politics",
445 "教育": "education",
446 "数学": "mathematics",
447 "数論": "number-theory",
448 "文学": "literature",
449 "文法": "grammar",
450 "文献学": "philology",
451 "料理": "cuisine",
452 "旅行": "travel",
453 "昆虫学": "entomology",
454 "時間": "time",
455 "有機化学": "organic-chemistry",
456 "林業": "forestry",
457 "柔道": "judo",
458 "植物学": "botany",
459 "武器": "weapon",
460 "歯学": "dentistry",
461 "歴史": "history",
462 "歴史学": "historiography",
463 "気候": "climatology",
464 "気象": "weather",
465 "水泳": "swimming",
466 "泌尿器科学": "urology",
467 "法律": "legal",
468 "活版印刷": "typography",
469 "流体力学": "fluid-dynamics",
470 "海事": "nautical",
471 "海洋学": "oceanography",
472 "消防": "firefighting",
473 "火器": "firearms",
474 "火山学": "volcanology",
475 "無機化学": "inorganic-chemistry",
476 "熱力学": "thermodynamics",
477 "物理学": "physics",
478 "犯罪学": "criminology",
479 "狩猟": "hunting",
480 "生化学": "biochemistry",
481 "生態学": "ecology",
482 "生物学": "biology",
483 "生理学": "physiology",
484 "疑似科学": "pseudoscience",
485 "疫学": "epidemiology",
486 "病理学": "pathology",
487 "発生学": "embryology",
488 "相撲": "sumo",
489 "眼科学": "ophthalmology",
490 "社会学": "sociology",
491 "社会科学": "social-science",
492 "社会言語学": "sociolinguistics",
493 "神学": "theology",
494 "神経学": "neurology",
495 "神経解剖学": "neuroanatomy",
496 "神話": "mythology",
497 "神道": "Shinto",
498 "科学": "sciences",
499 "競馬": "horse-racing",
500 "精神医学": "psychiatry",
501 "紋章学": "heraldry",
502 "紡績": "weaving",
503 "細胞学": "cytology",
504 "細菌学": "bacteriology",
505 "経営学": "management",
506 "経済": "economics",
507 "統計学": "statistics",
508 "線型代数学": "linear-algebra",
509 "翻訳研究": "translation-studies",
510 "老年学": "gerontology",
511 "考古学": "archaeology",
512 "肉": "meat",
513 "腫瘍学": "oncology",
514 "自動車": "automobile",
515 "自動車機器": "automotive",
516 "航空": "aviation",
517 "航空工学": "aeronautics",
518 "色": "color",
519 "花粉学": "palynology",
520 "芸術": "arts",
521 "著作権": "copyright",
522 "薬理学": "pharmacology",
523 "藻類学": "phycology",
524 "蠍": "beer",
525 "血液学": "hematology",
526 "衣類": "clothing",
527 "製造": "manufacturing",
528 "解剖学": "anatomy",
529 "解析学": "mathematical-analysis",
530 "言語": "language",
531 "言語学": "linguistics",
532 "詩": "poetry",
533 "語彙論": "lexicology",
534 "語用論": "pragmatics",
535 "調理": "cooking",
536 "論理学": "logic",
537 "資本主義": "capitalism",
538 "超心理学": "parapsychology",
539 "軍事": "military",
540 "辞書学": "lexicography",
541 "農業": "agriculture",
542 "通貨": "numismatics",
543 "運動": "exercise",
544 "道路": "road",
545 "遺伝学": "genetics",
546 "都市": "city",
547 "都道府県": "prefectures-of-Japan",
548 "酒": "beer",
549 "重量挙げ": "weightlifting",
550 "野球": "baseball",
551 "野菜": "vegetable",
552 "金融": "finance",
553 "釣り": "fishing",
554 "鉄道": "rail-transport",
555 "鉱物学": "mineralogy",
556 "陸上競技": "athletics",
557 "集合論": "set-theory",
558 "電子工学": "electronics",
559 "電気": "electricity",
560 "電磁気学": "electromagnetism",
561 "電話": "telephone",
562 "音声学": "phonetics",
563 "音楽": "music",
564 "音韻論": "phonology",
565 "韻律": "prosody",
566 "食品": "food",
567 "馬術": "equestrianism",
568 "魚": "fish",
569 "魚類学": "ichthyology",
570 "鳥類学": "ornithology",
571 "麻雀": "mahjong",
572 "演算": "arithmetic",
573 "ゲーム": "games",
574}
577def translate_raw_tags(data):
578 raw_tags = []
579 for raw_tag in data.raw_tags:
580 if raw_tag in TAGS:
581 add_tag(raw_tag, data)
582 elif "/" in raw_tag:
583 for r_tag in raw_tag.split("/"):
584 r_tag = r_tag.strip()
585 if r_tag in TAGS: 585 ↛ 583line 585 didn't jump to line 583 because the condition on line 585 was always true
586 add_tag(r_tag, data)
587 elif raw_tag in TOPICS and hasattr(data, "topics"):
588 data.topics.append(TOPICS[raw_tag])
589 else:
590 raw_tags.append(raw_tag)
591 data.raw_tags = raw_tags
594def add_tag(raw_tag, data):
595 tr_tag = TAGS[raw_tag]
596 if isinstance(tr_tag, str) and tr_tag not in data.tags:
597 data.tags.append(TAGS[raw_tag])
598 elif isinstance(tr_tag, list):
599 for t_tag in tr_tag:
600 if t_tag not in data.tags: 600 ↛ 599line 600 didn't jump to line 599 because the condition on line 600 was always true
601 data.tags.append(t_tag)