Coverage for src / wiktextract / extractor / it / tags.py: 97%
21 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-12 08:09 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-12 08:09 +0000
1from .models import WordEntry
3TABLE_TAGS = {
4 # https://it.wiktionary.org/wiki/Template:It-decl-agg4
5 "singolare": "singular",
6 "plurale": "plural",
7 "positivo": "positive",
8 "superlativo assoluto": ["absolute", "superlative"],
9 "maschile": "masculine",
10 "femminile": "feminine",
11 # https://it.wiktionary.org/wiki/Template:It-decl-agg2
12 "m e f": ["masculine", "feminine"],
13 # https://it.wiktionary.org/wiki/Template:It-conj
14 "infinito": "infinitive",
15 "verbo ausiliare": "auxiliary",
16 "gerundio": "gerund",
17 "participio presente": ["present", "participle"],
18 "participio passato": ["past", "participle"],
19 "prima": "first-person",
20 "seconda": "second-person",
21 "terza": "third-person",
22 "presente": "present",
23 "imperfetto": "imperfect",
24 "passato remoto": "past-remote",
25 "futuro": "future",
26 "passato prossimo": ["past", "perfect"],
27 "trapassato prossimo": ["pluperfect", "past", "perfect"],
28 "trapassato remoto": ["historic", "past-remote"],
29 "futuro anteriore": ["future", "perfect"],
30 "passato": "past",
31 "trapassato": ["past", "perfect"],
32 "imperativo": "imperative",
33 "riflessivo pronominale": ["reflexive", "pronominal"],
34 # Template:Fr-conj
35 "verbo ausiliare\nauxiliaire": "auxiliary",
36 "gerundio\ngérondif": "gerund",
37 "participio presente\nparticipe présent": ["present", "participle"],
38 "participio passato\nparticipe passé": ["past", "participle"],
39 "presente\nprésent": "present",
40 "imperfetto\nimparfait": "imperfect",
41 "passato remoto\npassé simple": "past-remote",
42 "futuro\nfutur simple": "future",
43 "passato prossimo\npassé composé": ["past", "perfect"],
44 "trapassato prossimo\nplus-que-parfait": ["pluperfect", "past", "perfect"],
45 "trapassato remoto\npassé anterieur": ["historic", "past-remote"],
46 "futuro anteriore\nfutur antérieur": ["future", "perfect"],
47 "passato\npassé": "past",
48 "trapassato\nplus-que-parfait": ["past", "perfect"],
49 "imperativo\nimpératif": "imperative",
50 # Template:La-conj
51 "paradigma": "paradigm",
52 "attivo": "active",
53 "passivo": "passive",
54 "perfetto": "perfect",
55 "participio": "participle",
56 "nominativo": "nominative",
57 "genitivo": "genitive",
58 "dativo": "dative",
59 "accusativo": "accusative",
60 "ablativo": "ablative",
61 "gerundivo": "gerund",
62 "supino": "supine",
63 "futuro semplice": "future",
64 "piuccheperfetto": "pluperfect",
65}
67FORM_LINE_TEMPLATE_TAGS = {
68 # https://it.wiktionary.org/wiki/Template:A_cmp
69 "comparativo": "comparative",
70 "superlativo": "superlative",
71 "plur": "plural",
72 "Participio presente": ["present", "participle"],
73 "Participio passato": ["past", "participle"],
74 "Intransitivo pronominale": ["intransitive", "pronominal"], # Template:inpr
75 "Deponente": "deponent",
76 "Ausiliare": "auxiliary",
77 "Passivo": "passive",
78 "Reciproco": "reciprocal",
79 "Riflessivo": "reflexive",
80 "Attivo": "active",
81 "Riflessivo pronominale": ["reflexive", "pronominal"],
82 # Template:en-verb
83 "3ª persona sing. presente": ["third-person", "singular", "present"],
84 "passato semplice": "past",
85 "passato semplice e participio passato": ["past", "participle"],
86}
88# https://it.wiktionary.org/wiki/Template:Term/d
89TERM_TEMPLATE_TOPICS = {
90 "abbigliamento": "clothing",
91 "aeronautica": "aeronautics",
92 "agricoltura": "agriculture",
93 "algebra": "algebra",
94 "ambito sportivo": "sports",
95 "anatomia": "anatomy",
96 # "animali": "",
97 "antropologia": "anthropology",
98 "araldica": "heraldry",
99 "archeologia": "archaeology",
100 "architettura": "architecture",
101 "aritmetica": "arithmetic",
102 "arm.": "weaponry",
103 "arma": "weaponry",
104 "armamento": "weaponry",
105 "armi": "weaponry",
106 "arte": "arts",
107 "astrologia": "astrology",
108 "astronomia": "astronomy",
109 "botanica": "botany",
110 "biochimica": "biochemistry",
111 "biologia": "biology",
112 "biotecnologia": "biotechnology",
113 # "burocrazia": "",
114 "calcio": "soccer",
115 "carte": "card-games",
116 "chimica": "chemistry",
117 "chimica generale": "chemistry",
118 "chimica inorganica": "chemistry",
119 "chimica organica": "chemistry",
120 "chimica analitica": "chemistry",
121 "chimica industriale": "chemistry",
122 "chirurgia": "surgery",
123 "cinematografia": "cinematography",
124 "colore": "color",
125 "commercio": "commerce",
126 # "composti organici": "",
127 # "composti inorganici": "",
128 "cristianesimo": "Christianity",
129 "danza": "dance",
130 # "diritto": "",
131 "ecclesiastico": "ecclesiastical",
132 "ecologia": "ecology",
133 "economia": "economics",
134 "edilizia": "construction",
135 "elementi chimici": "chemistry",
136 "elettronica": "electronics",
137 "elettrotecnica": "electrical-engineering",
138 "entomologia": "entomology",
139 "equitazione": "equitation",
140 "erpetologia": "herpetology",
141 # "esoterismo": "",
142 "etnologia": "ethnology",
143 "falegnameria": "carpentry",
144 # "familiare": "",
145 "farmacologia": "pharmacology",
146 "ferrovia": "railways",
147 "filosofia": "philosophy",
148 "finanza": "finance",
149 "fisica": "physics",
150 "fisiologia": "physiology",
151 "fonologia": "phonology",
152 # "forestierismo": "",
153 "fotografia": "photography",
154 # "gastronomia": "gastronomy",
155 "genetica": "genetics",
156 "geografia": "geography",
157 "geologia": "geology",
158 "geometria": "geometry",
159 "gioco": "games",
160 "giornalistico": "journalism",
161 "grammatica": "grammar",
162 "idraulica": "hydraulics",
163 "informatica": "informatics",
164 "ingegneria": "engineering",
165 "internet": "Internet",
166 "ittiologia": "ichthyology",
167 "legale": "law",
168 "letteratura": "literature",
169 "linguistica": "linguistics",
170 # "macelleria": "",
171 "malacologia": "malacology",
172 "mammalogia": "mammalogy",
173 "marina": "navy",
174 "matematica": "mathematics",
175 "meccanica": "mechanics",
176 "medicina": "medicine",
177 "metallurgia": "metallurgy",
178 "meteorologia": "meteorology",
179 # "Metrica": "",
180 # "Metrica classica": "",
181 # "Metrica contemporanea": "",
182 # "Metrica latina": "",
183 "militare": "military",
184 "minerale": "mineralogy",
185 "mineralogia": "mineralogy",
186 "mitologia": "mythology",
187 "moda": "fashion",
188 "musica": "music",
189 "numismatica": "numismatics",
190 "ornitologia": "ornithology",
191 # "pedagogia": "pedagogy",
192 # "pittura": "painting",
193 "poesia": "poetry",
194 # "polimeri": "",
195 "politica": "politics",
196 # "Popolare": "",
197 # "Professioni": "",
198 "psichiatria": "psychiatry",
199 "psicanalisi": "psychoanalysis",
200 "psicologia": "psychology",
201 "religione": "religion",
202 "topografia": "topography",
203 # "Toponimi": "",
204 "paleontologia": "paleontology",
205 "pianta": "botany",
206 "scacchi": "chess",
207 # "Scuola": "",
208 "sessualità": "sexuality",
209 "sociologia": "sociology",
210 "sport": "sports",
211 "sport invernali": "sports",
212 "statistica": "statistics",
213 "storia": "history",
214 # "strumenti musicali": "",
215 "teatro": "theater",
216 "tecnica": "technology",
217 "tecnologia": "technology",
218 "telecomunicazioni": "telecommunications",
219 "tessile": "textiles",
220 "tipografia": "typography",
221 "veterinaria": "veterinary",
222 "zoologia": "zoology",
223 # "zootecnica": "",
224}
226TERM_TEMPLATE_TAGS = {
227 "antico": "archaic",
228 "obsoleto": "obsolete",
229 "formale": "formal",
230 "gergale": "slang",
231 "informale": "informal",
232 "letterario": "literary",
233 "neologismo": "neologism",
234 "offensivo": "offensive",
235 "raro": "rare",
236 "regionale": "regional",
237 "volgare": "vulgar",
238}
240# https://it.wiktionary.org/wiki/Categoria:Template_ambito
241GLOSS_LIST_TEMPLATE_TAGS = {
242 "accrescitivo": "augmentative", # Template:Accr
243 "colloquiale": "colloquial", # Template:Coll
244 "diminutivo": "diminutive", # Template:Dim
245 "per estensione": "broadly", # Template:Est
246 "senso figurato": "figuratively", # Template:Fig
247 "letteralmente": "literally", # Template:Lett
248 "peggiorativo": "pejorative", # Template:Pegg
249 "riferito solo a persone": "person", # Template:Pers
250 "per sineddoche": "synecdoche", # Template:Sndc
251 "specialmente al plurale": ["especially", "in-plural"], # Template:Spec pl
252 "spregiativo": "pejorative", # Template:Spreg
253 "vezzeggiativo": "endearing", # Template:Vezz
254 "volgare": "vulgar", # Template:Vulg
255}
257OTHER_TAGS = {
258 "Transitivo": "transitive",
259 "Intransitivo": "intransitive",
260 "toponimo": "toponymic",
261 "classe": "class",
262}
265TAGS = {
266 **TABLE_TAGS,
267 **FORM_LINE_TEMPLATE_TAGS,
268 **TERM_TEMPLATE_TAGS,
269 **GLOSS_LIST_TEMPLATE_TAGS,
270 **OTHER_TAGS,
271}
274def translate_raw_tags(data: WordEntry) -> None:
275 raw_tags = []
276 for raw_tag in data.raw_tags:
277 if raw_tag in TAGS and hasattr(data, "tags"):
278 tr_tag = TAGS[raw_tag]
279 if isinstance(tr_tag, str):
280 data.tags.append(tr_tag)
281 elif isinstance(tr_tag, list): 281 ↛ 276line 281 didn't jump to line 276 because the condition on line 281 was always true
282 data.tags.extend(tr_tag)
283 elif raw_tag in TERM_TEMPLATE_TOPICS and hasattr(data, "topics"):
284 data.topics.append(TERM_TEMPLATE_TOPICS[raw_tag])
285 else:
286 raw_tags.append(raw_tag)
287 data.raw_tags = raw_tags