Coverage for src/wiktextract/extractor/id/tags.py: 62%
25 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
1from .models import WordEntry
3GLOSS_TAGS = {
4 # https://id.wiktionary.org/wiki/Templat:Istilah
5 "cak": "slang",
6 "kiasan": "figuratively",
7 "mdt": "morpheme",
8 "Sas": "literary",
9 "arkais": "archaic",
10 "klasik": "Classical",
11 "nonformal": "informal",
12 "akar": "root",
13 "akronim": "acronym",
14 "kasar": "impolite",
15 "hormat": "honorific",
16 "generalisasi": "general",
17 "neologisme": "neologism",
18 "eufemisme": "euphemism",
19 "Sunda": "Sundanese",
20 "Aceh": "Acehnese",
21 "Banjar": "Banjarese",
22 "Minangkabau": "Minangkabau",
23 "Gorontalo": "Gorontalo",
24 "Madura": "Madurese",
25 "Batak": "Batak",
26}
28TRANSLATION_TAGS = {
29 # https://id.wiktionary.org/wiki/Modul:gender_and_number/data
30 "m": "masculine",
31 "f": "feminine",
32 "n": "neuter",
33 "jamak": "plural",
34}
36NUM_TAGS = {
37 "tunggal": "singular",
38 "plural": "plural",
39}
41SOUND_TAGS = {
42 "RP": "Received-Pronunciation",
43 "US": "US",
44}
46POS_HEADER_TAGS = {
47 "pasif": "passive",
48 "transitif": "transitive",
49 "imperatif": "imperative",
50 "aktif": "active",
51}
53TAGS = {
54 **TRANSLATION_TAGS,
55 **GLOSS_TAGS,
56 **NUM_TAGS,
57 **SOUND_TAGS,
58 **POS_HEADER_TAGS,
59}
61TOPICS = {
62 "Kim": "chemistry",
63 "Mat": "mathematics",
64 "Ling": "linguistics",
65 "Dok": "medicine",
66 "Bio": "biology",
67 "Sas": "literature",
68 "Mus": "music",
69 "Antr": "anthropology",
70 "Ars": "architecture",
71 "Ark": "archaeology",
72 "Psi": "psychology",
73 "Isl": "Islam",
74 "Geo": "geology",
75 "Hin": "Hinduism",
76 "Hid": "hydrology",
77 "Huk": "law",
78 "Kat": "Catholicism",
79 "Fis": "physics",
80 "Olr": "sports",
81 "Dik": "education",
82 "Far": "pharmacology",
83 "Lay": "shipping",
84 "Komp": "computer",
85 "Kris": "Christianity",
86 "Bot": "botany",
87 "Stat": "statistics",
88 "Tan": "agriculture",
89 "Elek": "electronics",
90 "Anat": "anatomy",
91 "Adm": "administration",
92 "Sen": "arts",
93 "Tas": "Sufism",
94 "Graf": "printing",
95 "Astron": "astronomy",
96 "Met": "meteorology",
97 "Dag": "commerce",
98 "Zool": "zoology",
99 "Min": "mineralogy",
100 "Kom": "communications",
101 "Hut": "forestry",
102 "Eko": "economy",
103 "Tek": "technology",
104 "Mil": "military",
105 "Sos": "sociology",
106 "Pol": "politics",
107 # "Tern": "",
108 "Man": "management",
109 "Ikn": "fishing",
110 "Fil": "philosophy",
111 "Astrol": "astrology",
112 "Keu": "finance",
113 "Filol": "philology",
114 "Metal": "metallurgy",
115 "Ent": "entomology",
116 "Bud": "Buddhism",
117 # "Idt": "industry",
118 "Mik": "mycology",
119 "Pet": "petrology",
120 "Dem": "demography",
121 # "Hidm": "hydrometeorology",
122 "Film": "film",
123 "Tbg": "culinary",
124 "Bakt": "bacteriology",
125 "Foto": "photography",
126 "Pang": ["food", "sciences"],
127 "Hindu Bali": "Balinese Hinduism",
128}
131def translate_raw_tags(data: WordEntry) -> None:
132 raw_tags = []
133 for raw_tag in data.raw_tags:
134 if raw_tag in TAGS and hasattr(data, "tags"):
135 tr_tag = TAGS[raw_tag]
136 if isinstance(tr_tag, str): 136 ↛ 138line 136 didn't jump to line 138 because the condition on line 136 was always true
137 data.tags.append(tr_tag)
138 elif isinstance(tr_tag, list):
139 data.tags.extend(tr_tag)
140 elif raw_tag in TOPICS and hasattr(data, "topics"): 140 ↛ 141line 140 didn't jump to line 141 because the condition on line 140 was never true
141 topic = TOPICS[raw_tag]
142 if isinstance(topic, str):
143 data.topics.append(topic)
144 elif isinstance(topic, list):
145 data.topics.extend(topic)
146 else:
147 raw_tags.append(raw_tag)
148 data.raw_tags = raw_tags