Coverage for src/wiktextract/extractor/ja/tags.py: 95%
24 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-03 06:55 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-03 06:55 +0000
1TAGS = {
2 "男性": "masculine",
3 "女性": "feminine",
4 "通性": "common",
5 "中性": "neuter",
6 "単数": "singular",
7 "複数": "plural",
8 "不変": "invariable",
9 "男性複数": ["masculine", "plural"],
10 "女性複数": ["feminine", "plural"],
11 # テンプレート:context/data
12 "くだけた表現": "informal",
13 "しばしば": "often",
14 "まれ": "rare",
15 "アイルランド": "Ireland",
16 "アフリカ": "Africa",
17 "アメカメカ": "Amecameca",
18 "アメリカ合衆国": "US",
19 "アルスター": "Ulster",
20 "アルゼンチン": "Argentina",
21 "アングロ・ノルマン": "Anglo-Norman",
22 "アンダルシア": "Andalusia",
23 "イェ方言": "Ijekavian",
24 "イギリス": "UK",
25 "イラン": "Iran",
26 "イロン方言": "Iron",
27 "インターネット": "Internet",
28 "インターネットスラング": ["Internet", "slang"],
29 "ウクライナ": "Ukraine",
30 "ウルグアイ": "Uruguay",
31 "ウーリ語": "Uri",
32 "エクアドル": "Ecuador",
33 "エ方言": "Ekavian",
34 "オノマトペ": "onomatopoeic",
35 "オルムルム": "Ormulum",
36 "オークニー": "Orkney",
37 "オーストリア": "Austrian",
38 "カイピラ方言": "Caipira",
39 "カイ方言": "Kajkavian",
40 "カサレヴサ": "Katharevousa",
41 "カナダ": "Canada",
42 "カルコーフォロ語": "Carcoforo",
43 "ガスコーニュ": "Gascony",
44 "ガーンジー": "Guernsey",
45 "キューバ": "Cuba",
46 "キリスト教": "Christian",
47 "クロアチア": "Croatian",
48 "グアテマラ": "Guatemala",
49 "グレッソネイ語": "Gressoney",
50 "ケニア": "Kenya",
51 "ケベック": "Quebec",
52 "ケルン語": "Kölsch",
53 "ケント": "Kentish",
54 "コイネー": "Koine",
55 "コノート": "Connacht",
56 "コロンビア": "Colombia",
57 "コンゴ": "Congo",
58 "サカティアングイス": "Zacatianguis",
59 "サーク": "Sark",
60 "シェットランド": "Shetland",
61 "シク教": "Sikhism",
62 "シンガポール": "Singapore",
63 "ジャイナ教": "Jainism",
64 "ジャマイカ": "Jamaica",
65 "ジャージー": "Jersey",
66 "ジンバブエ": "Zimbabwe",
67 "スイス": "Switzerland",
68 "スコットランド": "Scotland",
69 "ストゥシルヴァン": "Sutsilvan",
70 "スペイン": "Spain",
71 "スルシルヴァン": "Sursilvan",
72 "スルミラン": "Surmiran",
73 "セルビア": "Serbian",
74 "セルリング": "Sylt",
75 "タイ英語": "Thailand",
76 "タラシケヴィツァ": "Taraškievica",
77 "タントユカ": "Tantoyuca",
78 "ダリー語": "Dari",
79 "チコナメル": "Chiconamel",
80 "チコンテペク": "Chicontepec",
81 "チャ方言": "Chakavian",
82 "チリ": "Chile",
83 "ティマウ": "Timau",
84 "テペツィントラ": "Tepetzintla",
85 "テマパチェ": "Temapache",
86 "ディゴル方言": "Digor",
87 "デモティキ": "Demotic",
88 "トスカナ語": "Tuscany",
89 "ドイツ": "Germany",
90 "ドイツ南部": "Southern-Germany",
91 "ナチズム": "Nazism",
92 "ナミビア": "Namibia",
93 "ニカラグア": "Nicaragua",
94 "ニューカッスル": "Tyneside",
95 "ニュージーランド": "New-Zealand",
96 "ヌオロ": "Nuorese",
97 "バレンシア": "Valencia",
98 "パナマ": "Panama",
99 "ヒンズー教": "Hinduism",
100 "ビザンツ": "Byzantine",
101 "ビバロ・アルピーネ語": "Vivaro-Alpine",
102 "フィリピン": "Philippines",
103 "フェリング・エームラング": "Föhr-Amrum",
104 "フォルマッツァ語": "Formazza",
105 "フランス": "French",
106 "ブラジル": "Brazil",
107 "プロテスタント": "Protestant",
108 "プロヴァンス": "Provençal",
109 "プーター": "Puter",
110 "ヘルゴラント": "Helgoland",
111 "ベネズエラ": "Venezuela",
112 "ベルギー": "Belgium",
113 "ペルー": "Peru",
114 "ホンジュラス": "Honduras",
115 "ボスニア": "Bosnian",
116 "ボリビア": "Bolivia",
117 "ポルトガル": "Portugal",
118 "マンスター": "Munster",
119 "マーシア": "Mercian",
120 "ミストラル式綴り": "Mistralian",
121 "メキシコ": "Mexico",
122 "モンテネグロ": "Montenegro",
123 "モーリング": "Mooring",
124 "ユダヤ教": "Judaism",
125 "ヨーロッパ": "Europe",
126 "ラングドック": "Languedoc",
127 "リヒテンシュタイン": "Liechtenstein",
128 "リプアーリ語": "Ripuarian",
129 "リムーザン": "Limousin",
130 "リメッラ語": "Rimella",
131 "ルイジアナ": "Louisiana",
132 "ルゼルナ": "Luserna",
133 "ログドーロ": "Logudorese",
134 "ロシア": "Russia",
135 "ヴァラダール": "Vallander",
136 "ヴィーディングハルデ": "Wiedingharde",
137 "不可算": "uncountable",
138 "不変化名詞": "indeclinable",
139 "不活動体": "inanimate",
140 "他動詞": "transitive",
141 "代名詞的用法": "pronominal",
142 "俗語": "slang",
143 "修辞学": "rhetoric",
144 "倒語": "slang",
145 "再帰動詞": "reflexive",
146 "初期中英語": "Early-Middle-English",
147 "助動詞": "auxiliary",
148 "卑語": "vulgar",
149 "単数形で": "singular",
150 "単数形のみ": ["singular", "singular-only"],
151 "印": "India",
152 "叙法": "modal",
153 "叙述用法のみ": "predicative",
154 "口語": "informal",
155 "古用法": "dated",
156 "古語・廃語": "archaic",
157 "可算": "countable",
158 "地名": "place",
159 "地域": "regional",
160 "基数": "cardinal",
161 "多文化的ロンドン英語": "Multicultural-London-English",
162 "婉曲表現": "euphemistic",
163 "幼児語": "childish",
164 "序数": "ordinal",
165 "廃語": "obsolete",
166 "強い": "strong",
167 "形容詞的": "attributive",
168 "後期中英語": "Late-Middle-English",
169 "恐らく": "possibly",
170 "慣用的表現": "idiomatic",
171 "排斥された語": "proscribed",
172 "控えめに": "mildly",
173 "換喩的に": "metonymically",
174 "文章語": "literary",
175 "方言": "dialectal",
176 "時々": "sometimes",
177 "欠如動詞": "defective",
178 "正式・堅": "formal",
179 "歴史": "historical",
180 "比喩": "figuratively",
181 "比喩的に": "figuratively",
182 "比較形有り": "comparable",
183 "比較形無し": "not-comparable",
184 "活動体": "animate",
185 "滑稽": "humorous",
186 "特に": "especially",
187 "状態動詞": "stative",
188 "略語": "abbreviation",
189 "疑問詞": "interrogative",
190 "皮肉": "ironic",
191 "破格": "nonstandard",
192 "米語": "US",
193 "絶対単数": ["singular", "singular-only"],
194 "絶対複数": ["plural", "plural-only"],
195 "能格動詞": "ergative",
196 "自他動詞": "ambitransitive",
197 "自動詞": "intransitive",
198 "英連邦": "Commonwealth",
199 "蔑称": "offensive",
200 "複合語で": "in-compounds",
201 "複数形で": "plural",
202 "西部": "Western",
203 "視覚方言": "pronunciation-spelling",
204 "詩的表現": "poetic",
205 "豪": "Australian",
206 "転じて": "broadly",
207 "軽侮語": "pejorative",
208 "近代ラテン語": "Netherlands",
209 "逐語的に": "literally",
210 "通常": "usually",
211 "通常複数形で": "plural-normally",
212 "造語": "neologism",
213 "関係詞": "relative",
214 "限定": "definite",
215 "集合名詞": "collective",
216 "集合的に": "collective",
217 "非人称": "impersonal",
218 "人称": "personal",
219 "非標準": "uncommon",
220 "頭字語": "initialism",
221 # "en-verb" template
222 "三単現": ["third-person", "singular", "present"],
223 "現在分詞": ["present", "participle"],
224 "過去形": "past",
225 "過去分詞": ["past", "participle"],
226 "繁": "Traditional-Chinese",
227 "簡": "Simplified-Chinese",
228 # zh sound
229 "標準中国語": "Standard-Chinese",
230 "ピンイン": "Pinyin",
231 "注音符号": "Bopomofo",
232 "ウェード式": "Wade-Giles",
233 "IPA": "IPA",
234 "広東語": "Cantonese",
235 "改イェール式": ["Yale", "romanization", "Cantonese"],
236 "イェール式": "Yale",
237 "粤拼": "Jyutping",
238 "教院式": ["ILE", "romanization", "Cantonese"],
239 "広東拼音方案": "Guangdong-Romanization",
240 "台山語": "Taishanese",
241 "閩南語": "Min-Nan",
242 "漳州": "Zhangzhou",
243 "漳浦": "Zhangpu",
244 "高雄": "Kaohsiung",
245 "ペナン州": "Penang",
246 "白話字": "POJ",
247 "台湾ローマ字": "Tai-lo",
248 "普実台文": "Phofsit-Daibuun",
249 "廈門": "Xiamen",
250 "泉州": "Quanzhou",
251 "台北": "Taipei",
252 "潮州語": "Teochew",
253 "莆仙語": "Puxian-Min",
254 "客家語": "Hakka",
255 "呉語": "Wu",
256 "晋語": "Jin",
257 "ドンガン語": "Dungan",
258 # モジュール:gender and number
259 "非有生": "inanimate",
260 "有生": "animate",
261 "男性人間": "virile",
262 "非男性人間": "nonvirile",
263 # Template:ru-noun+
264 "生格": "genitive",
265 "複数主格": ["nominative", "plural"],
266 "複数生格": ["genitive", "plural"],
267 "形容詞": ["relational", "adjective"],
268 "指小形": "diminutive",
269 "不完了体": "imperfective",
270 "完了体": "perfective",
271 # Template:日本語ダ活用 日本語サ変活用
272 "未然形": "imperfective",
273 "連用形": "continuative",
274 "終止形": "conclusive",
275 "連体形": "attributive",
276 "仮定形": "hypothetical",
277 "命令形": "imperative",
278 "命令": "imperative",
279 "過去・完了": ["past", "completive"],
280 "過去・完了・状態": ["past", "completive"],
281 "否定形": "negative",
282 "否定": "negative",
283 "否定(古風)": ["negative", "archaic"],
284 "自動詞化": "intransitive",
285 "言い切り": "definitive",
286 "名詞化": "noun-from-verb",
287 "自発・受身\n可能・尊敬": ["active", "passive", "possibly", "honorific"],
288 "丁寧": "polite",
289 "東京式": "Tokyo",
290 "京阪式": ["Kyoto", "Osaka"],
291 "推量・意志": "volitional",
292 "様態": "evidential",
293 "語幹": "stem",
294 # Template:日本語下一段活用
295 "意志・勧誘": "volitional",
296 "仮定条件": "conditional",
297 # Template:hu-noun
298 "複数・主格": "plural",
299 "・主格": "nominative",
300 "単数対格": ["singular", "accusative"],
301 "複数対格": ["plural", "accusative"],
302 "属格": "genitive",
303 "ラテン文字": "romanization",
304 "キリル文字": "Cyrillic",
305 # Template:de-noun
306 "複数形無し": "no-plural",
307 "複数形": "plural",
308 "指小辞無し": "no-diminutive",
309 "指小辞": "diminutive",
310 "廃用": "obsolete",
311 # Template:ja-noun
312 "旧字体": "kyūjitai",
313 # Template:ca-verb
314 "現在第一人称単数形": ["first-person", "singular", "present"],
315 # Template:en-adj
316 "比較形なし": "not-comparable",
317 "比較級": "comparative",
318 "最上級": "superlative",
319 "旧綴り": "archaic",
320 # Template:古典日本語四段活用
321 "已然形": "realis",
322}
324TOPICS = {
325 # テンプレート:context/data
326 "BDSM": "BDSM",
327 "LGBT": "LGBT",
328 "SF": "science-fiction",
329 "アイスホッケー": "ice-hockey",
330 "アメリカンフットボール": "American-football",
331 "アーチェリー": "archery",
332 "イスラム教": "Islam",
333 "イデオロギー": "ideology",
334 "ウイルス学": "virology",
335 "エネルギー": "energy",
336 "カトリック": "Catholicism",
337 "カードゲーム": "cards",
338 "カーリング": "curling",
339 "キリスト教": "Christianity",
340 "クリケット": "cricket",
341 "グラフィカルユーザインタフェース": "graphical-user-interface",
342 "グラフ理論": "graph-theory",
343 "コンピュータグラフィックス": "computer-graphics",
344 "ゴルフ": "golf",
345 "サイクリング": "cycling",
346 "サッカー": "soccer",
347 "サーフィン": "surfing",
348 "シャンチー": "xiangqi",
349 "スカッシュ": "squash",
350 "スキー": "skiing",
351 "スケート": "skating",
352 "スケートボード": "skateboarding",
353 "スヌーカー": "snooker",
354 "スノーボード": "snowboarding",
355 "スポーツ": "sports",
356 "ソフトウェア": "software",
357 "ソフトボール": "softball",
358 "ゾロアスター教": "Zoroastrianism",
359 "ダンス": "dance",
360 "ダーツ": "darts",
361 "チアリーディング": "cheerleading",
362 "チェス": "chess",
363 "テニス": "tennis",
364 "テレビ": "television",
365 "ハンドボール": "handball",
366 "ハードウェア": "computer-hardware",
367 "バスケットボール": "basketball",
368 "バレーボール": "volleyball",
369 "ビジネス": "business",
370 "ビリヤード": "billiards",
371 "ファシズム": "fascism",
372 "ファッション": "fashion",
373 "フェミニズム": "feminism",
374 "フェンシング": "fencing",
375 "フットボール": "football",
376 "ブリッジ": "bridge",
377 "プログラミング": "programming",
378 "ボウリング": "bowling",
379 "ボクシング": "boxing",
380 "ボディビル": "bodybuilding",
381 "ボート競技": "rowing",
382 "ポーカー": "poker",
383 "モータースポーツ": "motor-racing",
384 "ラクロス": "lacrosse",
385 "ラグビー": "rugby",
386 "レスリング": "wrestling",
387 "交通": "transport",
388 "人口学": "demography",
389 "仏教": "Buddhism",
390 "代数学": "algebra",
391 "代数幾何学": "algebraic-geometry",
392 "会計": "accounting",
393 "体操": "gymnastics",
394 "保険": "insurance",
395 "倫理学": "ethics",
396 "光学": "optics",
397 "免疫学": "immunology",
398 "共産主義": "communism",
399 "写真": "photography",
400 "分類学": "taxonomy",
401 "力学": "mechanics",
402 "動物学": "zoology",
403 "化学": "chemistry",
404 "化粧品": "cosmetics",
405 "医学": "medicine",
406 "医療": "healthcare",
407 "単位": "units-of-measure",
408 "占星術": "astrology",
409 "印刷": "printing",
410 "古生物学": "paleontology",
411 "哲学": "philosophy",
412 "哺乳類学": "mammalogy",
413 "商取引": "trading",
414 "園芸": "horticulture",
415 "地理": "geography",
416 "地質学": "geology",
417 "地震学": "seismology",
418 "外科学": "surgery",
419 "大工仕事": "carpentry",
420 "天体物理学": "astrophysics",
421 "天文学": "astronomy",
422 "娯楽": "entertainment",
423 "季節": "seasons",
424 "宗教": "religion",
425 "宝飾": "jewelry",
426 "家具": "furniture",
427 "寄生虫学": "parasitology",
428 "将棋": "shogi",
429 "岩石学": "petrology",
430 "工学": "engineering",
431 "幾何学": "geometry",
432 "建築": "architecture",
433 "微生物学": "microbiology",
434 "心理学": "psychology",
435 "性": "sexuality",
436 "性行為": "sex",
437 "情報学": "information-science",
438 "情報技術": "computing",
439 "戦争": "war",
440 "技術": "technology",
441 "政府": "government",
442 "政治": "politics",
443 "教育": "education",
444 "数学": "mathematics",
445 "数論": "number-theory",
446 "文学": "literature",
447 "文法": "grammar",
448 "文献学": "philology",
449 "料理": "cuisine",
450 "旅行": "travel",
451 "昆虫学": "entomology",
452 "時間": "time",
453 "有機化学": "organic-chemistry",
454 "林業": "forestry",
455 "柔道": "judo",
456 "植物学": "botany",
457 "武器": "weapon",
458 "歯学": "dentistry",
459 "歴史": "history",
460 "歴史学": "historiography",
461 "気候": "climatology",
462 "気象": "weather",
463 "水泳": "swimming",
464 "泌尿器科学": "urology",
465 "法律": "law",
466 "活版印刷": "typography",
467 "流体力学": "fluid-dynamics",
468 "海事": "nautical",
469 "海洋学": "oceanography",
470 "消防": "firefighting",
471 "火器": "firearms",
472 "火山学": "volcanology",
473 "無機化学": "inorganic-chemistry",
474 "熱力学": "thermodynamics",
475 "物理学": "physics",
476 "犯罪学": "criminology",
477 "狩猟": "hunting",
478 "生化学": "biochemistry",
479 "生態学": "ecology",
480 "生物学": "biology",
481 "生理学": "physiology",
482 "疑似科学": "pseudoscience",
483 "疫学": "epidemiology",
484 "病理学": "pathology",
485 "発生学": "embryology",
486 "相撲": "sumo",
487 "眼科学": "ophthalmology",
488 "社会学": "sociology",
489 "社会科学": "social-science",
490 "社会言語学": "sociolinguistics",
491 "神学": "theology",
492 "神経学": "neurology",
493 "神経解剖学": "neuroanatomy",
494 "神話": "mythology",
495 "神道": "Shinto",
496 "科学": "sciences",
497 "競馬": "horse-racing",
498 "精神医学": "psychiatry",
499 "紋章学": "heraldry",
500 "紡績": "weaving",
501 "細胞学": "cytology",
502 "細菌学": "bacteriology",
503 "経営学": "management",
504 "経済": "economics",
505 "統計学": "statistics",
506 "線型代数学": "linear-algebra",
507 "翻訳研究": "translation-studies",
508 "老年学": "gerontology",
509 "考古学": "archaeology",
510 "肉": "meat",
511 "腫瘍学": "oncology",
512 "自動車": "automobile",
513 "自動車機器": "automotive",
514 "航空": "aviation",
515 "航空工学": "aeronautics",
516 "色": "color",
517 "花粉学": "palynology",
518 "芸術": "arts",
519 "著作権": "copyright",
520 "薬理学": "pharmacology",
521 "藻類学": "phycology",
522 "蠍": "beer",
523 "血液学": "hematology",
524 "衣類": "clothing",
525 "製造": "manufacturing",
526 "解剖学": "anatomy",
527 "解析学": "mathematical-analysis",
528 "言語": "language",
529 "言語学": "linguistics",
530 "詩": "poetry",
531 "語彙論": "lexicology",
532 "語用論": "pragmatics",
533 "調理": "cooking",
534 "論理学": "logic",
535 "資本主義": "capitalism",
536 "超心理学": "parapsychology",
537 "軍事": "military",
538 "辞書学": "lexicography",
539 "農業": "agriculture",
540 "通貨": "numismatics",
541 "運動": "exercise",
542 "道路": "road",
543 "遺伝学": "genetics",
544 "都市": "city",
545 "都道府県": "prefectures-of-Japan",
546 "酒": "beer",
547 "重量挙げ": "weightlifting",
548 "野球": "baseball",
549 "野菜": "vegetable",
550 "金融": "finance",
551 "釣り": "fishing",
552 "鉄道": "rail-transport",
553 "鉱物学": "mineralogy",
554 "陸上競技": "athletics",
555 "集合論": "set-theory",
556 "電子工学": "electronics",
557 "電気": "electricity",
558 "電磁気学": "electromagnetism",
559 "電話": "telephone",
560 "音声学": "phonetics",
561 "音楽": "music",
562 "音韻論": "phonology",
563 "韻律": "prosody",
564 "食品": "food",
565 "馬術": "equestrianism",
566 "魚": "fish",
567 "魚類学": "ichthyology",
568 "鳥類学": "ornithology",
569 "麻雀": "mahjong",
570 "演算": "arithmetic",
571 "ゲーム": "games",
572 "首都": "capital-city",
573 "筋肉": "anatomy",
574}
577def translate_raw_tags(data):
578 raw_tags = []
579 for raw_tag in data.raw_tags:
580 if raw_tag in TAGS:
581 add_tag(raw_tag, data)
582 elif "/" in raw_tag:
583 for r_tag in raw_tag.split("/"):
584 r_tag = r_tag.strip()
585 if r_tag in TAGS: 585 ↛ 583line 585 didn't jump to line 583 because the condition on line 585 was always true
586 add_tag(r_tag, data)
587 elif raw_tag in TOPICS and hasattr(data, "topics"):
588 data.topics.append(TOPICS[raw_tag])
589 else:
590 raw_tags.append(raw_tag)
591 data.raw_tags = raw_tags
594def add_tag(raw_tag, data):
595 tr_tag = TAGS[raw_tag]
596 if isinstance(tr_tag, str) and tr_tag not in data.tags:
597 data.tags.append(TAGS[raw_tag])
598 elif isinstance(tr_tag, list):
599 for t_tag in tr_tag:
600 if t_tag not in data.tags: 600 ↛ 599line 600 didn't jump to line 599 because the condition on line 600 was always true
601 data.tags.append(t_tag)