Coverage for src / wiktextract / extractor / ko / tags.py: 90%
20 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-12 08:09 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-12 08:09 +0000
1from .models import WordEntry
3# https://ko.wiktionary.org/wiki/모듈:labels/data/topical
4# https://ko.wiktionary.org/wiki/모듈:labels/data
5GLOSS_TAGS = {
6 "인명": "name",
7 "고어": "archaic",
8 "구식": "archaic",
9 # "대명동사": "",
10 # "말고름": "",
11 "비유": "metaphoric",
12 "사어": "obsolete", # dead language
13 "유아어": "baby-talk",
14 "자동사": "intransitive",
15 "직역": "literally",
16 "타동사": "transitive",
17 "드물게": "rare",
18 "원래의 의미": "naturally",
19 "문학적": "literary",
20 "해학적": "humorous",
21 "완곡적": "euphemistic",
22 "가산": "countable",
23 "불가산": "uncountable",
24}
26SOUND_TAGS = {
27 # 틀:ko-IPA
28 "국어의 로마자 표기\nRevised Romanization": ["revised", "romanization"],
29 "국어의 로마자 표기 (음역)\nRevised Romanization (translit.)": [
30 "revised",
31 "romanization",
32 "transliteration",
33 ],
34 "매큔-라이샤워 표기\nMcCune-Reischauer": "McCune-Reischauer",
35 "예일 표기\nYale Romanization": ["Yale", "romanization"],
36 "표준어": "SK-Standard",
37 "서울": "Seoul",
38 # 틀:ja-pron
39 "도쿄": "Tokyo",
40 # 틀:발음 듣기, 틀:IPA
41 "영국": "UK",
42 "미국": "US",
43 "영": "UK",
44 "미": "US",
45 "표준": "standard",
46 "남부": "South",
47 "북부": "North",
48 "고대": "archaic",
49 "동부": "East",
50 "서부": "West",
51 "포르투갈": "Portugal",
52 "이집트": "Egypt",
53 "시리아": "Syria",
54 "브라질": "Brazil",
55 "독일": "Germany",
56 "현대": "modern",
57 "캐나다": "Canada",
58 "하노이": "Hanoi",
59 "브라질 남부": "Southern-Brazil",
60 "벨기에": "Belgium",
61 "이란": "Iran",
62 "파리": "Paris",
63 "모로코": "Morocco",
64 "베를린": "Berlin",
65 "비격식체": "informal",
66 "민난어 장저우": ["Min-Nan", "Zhangzhou"],
67}
69HEADER_TAGS = {
70 # 틀:한국어_동사
71 "활용": "infinitive",
72 "연결형": "sequential",
73 "명사형": "noun",
74 "사동사": "causative",
75 "한글": "hangeul",
76 "한자": "hanja",
77 # 모듈:Jpan-headword
78 "자동사 및 타동사": ["transitive", "intransitive"],
79 "연용형": "stem",
80 "과거형": "past",
81 "5단 활용": "godan",
82 "1단 활용": "ichidan",
83 "サ행 변격": "suru",
84 "kuru": "kuru",
85}
87# also in linkage lists
88TRANSLATION_TAGS = {
89 "남성": "masculine",
90 "여성": "feminine",
91 "라틴": "Latin",
92 "중성": "neuter",
93 "간체": "Simplified-Chinese",
94 "번체": "Traditional-Chinese",
95 "번체자": "Traditional-Chinese",
96 "오스트리아": "Austria",
97 "히브리 문자": ["Hebrew", "letter"],
98 "아랍 문자": ["Arabic", "letter"],
99 "복수형": "plural",
100 "단수": "singular",
101 "복수": "plural",
102 "불완료체": "imperfect",
103 "완료체": "completive",
104 "양성": "masculine",
105 "바이에른 방언": ["Bavarian", "dialectal"],
106 "광둥어": "Cantonese",
107 "오스트레일리아": "Australia",
108 "글라골 문자": ["Glagolitic", "letter"],
109 "속어": "slang",
110 "멕시코 속어": ["Mexico", "slang"],
111 "에스파냐": "Spain",
112 "가타카나": "katakana",
113 "고어": "archaic",
114 "쯔놈": "Chu-Nom",
115 "형용사": "adjective",
116 "사투리": "dialectal",
117 "약자": "abbreviation",
118 "동사": "verb",
119 "드문 단어": "rare",
120}
122TAGS = {
123 **GLOSS_TAGS,
124 **SOUND_TAGS,
125 **HEADER_TAGS,
126 **TRANSLATION_TAGS,
127 # Template:zh-forms
128 "정체": "Traditional-Chinese",
129 "간체": "Simplified-Chinese",
130 # Template:zh-x
131 "대만 관화": "Taiwanese-Mandarin",
132 "표준 중국어": "Standard-Chinese",
133 "한어병음": "Pinyin",
134 "광저우 광둥어": "Guangzhou-Cantonese",
135 "월병": "Jyutping",
136 # template:ja-kanjitab
137 "구자체": "kyūjitai",
138}
140TOPICS = {
141 "금융": "finance",
142 "광고": "advertising",
143 "군사": "military",
144 "어류": "fish",
145 "물리": "physics",
146 "법률": "law",
147 "식물": "botany",
148 "역사": "history",
149 "의류": "clothing",
150 "의학": "medicine",
151 "전기": "electricity",
152 # "조류": "birds",
153 "지리": "geography",
154 "프로그래밍": "programming",
155 "컴퓨터": "computer",
156 "해부학": "anatomy",
157 "정치": "politics",
158 "종교": "religion",
159 "가톨릭": "Catholicism",
160 "축구": "football",
161 # "체육": "physical-education",
162}
165def translate_raw_tags(data: WordEntry) -> None:
166 raw_tags = []
167 for raw_tag in data.raw_tags:
168 if raw_tag in TAGS:
169 tr_tag = TAGS[raw_tag]
170 if isinstance(tr_tag, str):
171 data.tags.append(tr_tag)
172 elif isinstance(tr_tag, list): 172 ↛ 167line 172 didn't jump to line 167 because the condition on line 172 was always true
173 data.tags.extend(tr_tag)
174 elif hasattr(data, "topics") and raw_tag in TOPICS: 174 ↛ 175line 174 didn't jump to line 175 because the condition on line 174 was never true
175 data.topics.append(TOPICS[raw_tag])
176 else:
177 raw_tags.append(raw_tag)
178 data.raw_tags = raw_tags