Coverage for src / wiktextract / extractor / ko / tags.py: 90%
20 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-21 08:01 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-21 08:01 +0000
1from .models import WordEntry
3# https://ko.wiktionary.org/wiki/모듈:labels/data/topical
4# https://ko.wiktionary.org/wiki/모듈:labels/data
5GLOSS_TAGS = {
6 "인명": "name",
7 "고어": "archaic",
8 "구식": "archaic",
9 # "대명동사": "",
10 # "말고름": "",
11 "비유": "metaphoric",
12 "사어": "obsolete", # dead language
13 "유아어": "baby-talk",
14 "자동사": "intransitive",
15 "직역": "literally",
16 "타동사": "transitive",
17 "드물게": "rare",
18 "원래의 의미": "naturally",
19 "문학적": "literary",
20 "해학적": "humorous",
21 "완곡적": "euphemistic",
22 "가산": "countable",
23 "불가산": "uncountable",
24}
26SOUND_TAGS = {
27 # 틀:ko-IPA
28 "Revised Romanization": ["revised", "romanization"],
29 "Revised Romanization (translit.)": [
30 "revised",
31 "romanization",
32 "transliteration",
33 ],
34 "McCune-Reischauer": "McCune-Reischauer",
35 "Yale Romanization": ["Yale", "romanization"],
36 "표준어/서울": ["SK-Standard", "Seoul"],
37 # 틀:ja-pron
38 "도쿄": "Tokyo",
39 # 틀:발음 듣기, 틀:IPA
40 "영국": "UK",
41 "미국": "US",
42 "영": "UK",
43 "미": "US",
44 "표준": "standard",
45 "남부": "South",
46 "북부": "North",
47 "고대": "archaic",
48 "동부": "East",
49 "서부": "West",
50 "포르투갈": "Portugal",
51 "이집트": "Egypt",
52 "시리아": "Syria",
53 "브라질": "Brazil",
54 "독일": "Germany",
55 "현대": "modern",
56 "캐나다": "Canada",
57 "하노이": "Hanoi",
58 "브라질 남부": "Southern-Brazil",
59 "벨기에": "Belgium",
60 "이란": "Iran",
61 "파리": "Paris",
62 "모로코": "Morocco",
63 "베를린": "Berlin",
64 "비격식체": "informal",
65 "민난어 장저우": ["Min-Nan", "Zhangzhou"],
66}
68HEADER_TAGS = {
69 # 틀:한국어_동사
70 "활용": "infinitive",
71 "연결형": "sequential",
72 "명사형": "noun",
73 "사동사": "causative",
74 "한글": "hangeul",
75 "한자": "hanja",
76 # 모듈:Jpan-headword
77 "자동사 및 타동사": ["transitive", "intransitive"],
78 "연용형": "stem",
79 "과거형": "past",
80 "5단 활용": "godan",
81 "1단 활용": "ichidan",
82 "サ행 변격": "suru",
83 "kuru": "kuru",
84}
86# also in linkage lists
87TRANSLATION_TAGS = {
88 "남성": "masculine",
89 "여성": "feminine",
90 "라틴": "Latin",
91 "중성": "neuter",
92 "간체": "Simplified-Chinese",
93 "번체": "Traditional-Chinese",
94 "번체자": "Traditional-Chinese",
95 "오스트리아": "Austria",
96 "표준어": "standard",
97 "히브리 문자": ["Hebrew", "letter"],
98 "아랍 문자": ["Arabic", "letter"],
99 "복수형": "plural",
100 "단수": "singular",
101 "복수": "plural",
102 "불완료체": "imperfect",
103 "완료체": "completive",
104 "양성": "masculine",
105 "바이에른 방언": ["Bavarian", "dialectal"],
106 "광둥어": "Cantonese",
107 "오스트레일리아": "Australia",
108 "글라골 문자": ["Glagolitic", "letter"],
109 "속어": "slang",
110 "멕시코 속어": ["Mexico", "slang"],
111 "에스파냐": "Spain",
112 "가타카나": "katakana",
113 "고어": "archaic",
114 "쯔놈": "Chu-Nom",
115 "형용사": "adjective",
116 "사투리": "dialectal",
117 "약자": "abbreviation",
118 "동사": "verb",
119 "드문 단어": "rare",
120}
122TAGS = {
123 **GLOSS_TAGS,
124 **SOUND_TAGS,
125 **HEADER_TAGS,
126 **TRANSLATION_TAGS,
127 # Template:zh-forms
128 "정체": "Traditional-Chinese",
129 "간체": "Simplified-Chinese",
130 # Template:zh-x
131 "대만 관화": "Taiwanese-Mandarin",
132 "표준 중국어": "Standard-Chinese",
133 "한어병음": "Pinyin",
134 "광저우 광둥어": "Guangzhou-Cantonese",
135 "월병": "Jyutping",
136}
138TOPICS = {
139 "금융": "finance",
140 "광고": "advertising",
141 "군사": "military",
142 "어류": "fish",
143 "물리": "physics",
144 "법률": "law",
145 "식물": "botany",
146 "역사": "history",
147 "의류": "clothing",
148 "의학": "medicine",
149 "전기": "electricity",
150 # "조류": "birds",
151 "지리": "geography",
152 "프로그래밍": "programming",
153 "컴퓨터": "computer",
154 "해부학": "anatomy",
155 "정치": "politics",
156 "종교": "religion",
157 "가톨릭": "Catholicism",
158 "축구": "football",
159 # "체육": "physical-education",
160}
163def translate_raw_tags(data: WordEntry) -> None:
164 raw_tags = []
165 for raw_tag in data.raw_tags:
166 if raw_tag in TAGS:
167 tr_tag = TAGS[raw_tag]
168 if isinstance(tr_tag, str):
169 data.tags.append(tr_tag)
170 elif isinstance(tr_tag, list): 170 ↛ 165line 170 didn't jump to line 165 because the condition on line 170 was always true
171 data.tags.extend(tr_tag)
172 elif hasattr(data, "topics") and raw_tag in TOPICS: 172 ↛ 173line 172 didn't jump to line 173 because the condition on line 172 was never true
173 data.topics.append(TOPICS[raw_tag])
174 else:
175 raw_tags.append(raw_tag)
176 data.raw_tags = raw_tags