Coverage for src / wiktextract / extractor / ko / tags.py: 90%
20 statements
« prev ^ index » next coverage.py v7.13.1, created at 2025-12-29 01:50 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2025-12-29 01:50 +0000
1from .models import WordEntry
3# https://ko.wiktionary.org/wiki/모듈:labels/data/topical
4# https://ko.wiktionary.org/wiki/모듈:labels/data
5GLOSS_TAGS = {
6 "인명": "name",
7 "고어": "archaic",
8 "구식": "archaic",
9 # "대명동사": "",
10 # "말고름": "",
11 "비유": "metaphoric",
12 "사어": "obsolete", # dead language
13 "유아어": "baby-talk",
14 "자동사": "intransitive",
15 "직역": "literally",
16 "타동사": "transitive",
17 "드물게": "rare",
18 "원래의 의미": "naturally",
19 "문학적": "literary",
20 "해학적": "humorous",
21 "완곡적": "euphemistic",
22 "가산": "countable",
23 "불가산": "uncountable",
24}
26SOUND_TAGS = {
27 # 틀:ko-IPA
28 "국어의 로마자 표기\nRevised Romanization": ["revised", "romanization"],
29 "국어의 로마자 표기 (음역)\nRevised Romanization (translit.)": [
30 "revised",
31 "romanization",
32 "transliteration",
33 ],
34 "매큔-라이샤워 표기\nMcCune-Reischauer": "McCune-Reischauer",
35 "예일 표기\nYale Romanization": ["Yale", "romanization"],
36 "표준어": "SK-Standard",
37 "서울": "Seoul",
38 # 틀:ja-pron
39 "도쿄": "Tokyo",
40 # 틀:발음 듣기, 틀:IPA
41 "영국": "UK",
42 "미국": "US",
43 "영": "UK",
44 "미": "US",
45 "표준": "standard",
46 "남부": "South",
47 "북부": "North",
48 "고대": "archaic",
49 "동부": "East",
50 "서부": "West",
51 "포르투갈": "Portugal",
52 "이집트": "Egypt",
53 "시리아": "Syria",
54 "브라질": "Brazil",
55 "독일": "Germany",
56 "현대": "modern",
57 "캐나다": "Canada",
58 "하노이": "Hanoi",
59 "브라질 남부": "Southern-Brazil",
60 "벨기에": "Belgium",
61 "이란": "Iran",
62 "파리": "Paris",
63 "모로코": "Morocco",
64 "베를린": "Berlin",
65 "비격식체": "informal",
66 "민난어 장저우": ["Min-Nan", "Zhangzhou"],
67 "대한민국": "South-Korea",
68}
70HEADER_TAGS = {
71 # 틀:한국어_동사
72 "활용": "infinitive",
73 "연결형": "sequential",
74 "명사형": "noun",
75 "사동사": "causative",
76 "한글": "hangeul",
77 "한자": "hanja",
78 # 모듈:Jpan-headword
79 "자동사 및 타동사": ["transitive", "intransitive"],
80 "연용형": "stem",
81 "과거형": "past",
82 "5단 활용": "godan",
83 "1단 활용": "ichidan",
84 "サ행 변격": "suru",
85 "kuru": "kuru",
86}
88# also in linkage lists
89TRANSLATION_TAGS = {
90 "남성": "masculine",
91 "여성": "feminine",
92 "라틴": "Latin",
93 "중성": "neuter",
94 "간체": "Simplified-Chinese",
95 "번체": "Traditional-Chinese",
96 "번체자": "Traditional-Chinese",
97 "오스트리아": "Austria",
98 "히브리 문자": ["Hebrew", "letter"],
99 "아랍 문자": ["Arabic", "letter"],
100 "복수형": "plural",
101 "단수": "singular",
102 "복수": "plural",
103 "불완료체": "imperfect",
104 "완료체": "completive",
105 "양성": "masculine",
106 "바이에른 방언": ["Bavarian", "dialectal"],
107 "광둥어": "Cantonese",
108 "오스트레일리아": "Australia",
109 "글라골 문자": ["Glagolitic", "letter"],
110 "속어": "slang",
111 "멕시코 속어": ["Mexico", "slang"],
112 "에스파냐": "Spain",
113 "가타카나": "katakana",
114 "고어": "archaic",
115 "쯔놈": "Chu-Nom",
116 "형용사": "adjective",
117 "사투리": "dialectal",
118 "약자": "abbreviation",
119 "동사": "verb",
120 "드문 단어": "rare",
121}
123TAGS = {
124 **GLOSS_TAGS,
125 **SOUND_TAGS,
126 **HEADER_TAGS,
127 **TRANSLATION_TAGS,
128 # Template:zh-forms
129 "정체": "Traditional-Chinese",
130 "간체": "Simplified-Chinese",
131 # Template:zh-x
132 "대만 관화": "Taiwanese-Mandarin",
133 "표준 중국어": "Standard-Chinese",
134 "한어병음": "Pinyin",
135 "광저우 광둥어": "Guangzhou-Cantonese",
136 "월병": "Jyutping",
137 # template:ja-kanjitab
138 "구자체": "kyūjitai",
139}
141TOPICS = {
142 "금융": "finance",
143 "광고": "advertising",
144 "군사": "military",
145 "어류": "fish",
146 "물리": "physics",
147 "법률": "law",
148 "식물": "botany",
149 "역사": "history",
150 "의류": "clothing",
151 "의학": "medicine",
152 "전기": "electricity",
153 # "조류": "birds",
154 "지리": "geography",
155 "프로그래밍": "programming",
156 "컴퓨터": "computer",
157 "해부학": "anatomy",
158 "정치": "politics",
159 "종교": "religion",
160 "가톨릭": "Catholicism",
161 "축구": "football",
162 # "체육": "physical-education",
163}
166def translate_raw_tags(data: WordEntry) -> None:
167 raw_tags = []
168 for raw_tag in data.raw_tags:
169 if raw_tag in TAGS:
170 tr_tag = TAGS[raw_tag]
171 if isinstance(tr_tag, str):
172 data.tags.append(tr_tag)
173 elif isinstance(tr_tag, list): 173 ↛ 168line 173 didn't jump to line 168 because the condition on line 173 was always true
174 data.tags.extend(tr_tag)
175 elif hasattr(data, "topics") and raw_tag in TOPICS: 175 ↛ 176line 175 didn't jump to line 176 because the condition on line 175 was never true
176 data.topics.append(TOPICS[raw_tag])
177 else:
178 raw_tags.append(raw_tag)
179 data.raw_tags = raw_tags