Coverage for src/wiktextract/extractor/it/tags.py: 97%
21 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-03 05:44 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-03 05:44 +0000
1from .models import WordEntry
3TABLE_TAGS = {
4 # https://it.wiktionary.org/wiki/Template:It-decl-agg4
5 "singolare": "singular",
6 "plurale": "plural",
7 "positivo": "positive",
8 "superlativo assoluto": ["absolute", "superlative"],
9 "maschile": "masculine",
10 "femminile": "feminine",
11 # https://it.wiktionary.org/wiki/Template:It-decl-agg2
12 "m e f": ["masculine", "feminine"],
13 # https://it.wiktionary.org/wiki/Template:It-conj
14 "infinito": "infinitive",
15 "verbo ausiliare": "auxiliary",
16 "gerundio": "gerund",
17 "participio presente": ["present", "participle"],
18 "participio passato": ["past", "participle"],
19 "prima": "first-person",
20 "seconda": "second-person",
21 "terza": "third-person",
22 "presente": "present",
23 "imperfetto": "imperfect",
24 "passato remoto": "past-remote",
25 "futuro": "future",
26 "passato prossimo": ["past", "perfect"],
27 "trapassato prossimo": ["pluperfect", "past", "perfect"],
28 "trapassato remoto": ["historic", "past-remote"],
29 "futuro anteriore": ["future", "perfect"],
30 "passato": "past",
31 "trapassato": ["past", "perfect"],
32 "imperativo": "imperative",
33 "riflessivo pronominale": ["reflexive", "pronominal"],
34 # Template:Fr-conj
35 "verbo ausiliare\nauxiliaire": "auxiliary",
36 "gerundio\ngérondif": "gerund",
37 "participio presente\nparticipe présent": ["present", "participle"],
38 "participio passato\nparticipe passé": ["past", "participle"],
39 "presente\nprésent": "present",
40 "imperfetto\nimparfait": "imperfect",
41 "passato remoto\npassé simple": "past-remote",
42 "futuro\nfutur simple": "future",
43 "passato prossimo\npassé composé": ["past", "perfect"],
44 "trapassato prossimo\nplus-que-parfait": ["pluperfect", "past", "perfect"],
45 "trapassato remoto\npassé anterieur": ["historic", "past-remote"],
46 "futuro anteriore\nfutur antérieur": ["future", "perfect"],
47 "passato\npassé": "past",
48 "trapassato\nplus-que-parfait": ["past", "perfect"],
49 "imperativo\nimpératif": "imperative",
50 # Template:La-conj
51 "paradigma": "paradigm",
52 "attivo": "active",
53 "passivo": "passive",
54 "perfetto": "perfect",
55 "participio": "participle",
56 "nominativo": "nominative",
57 "genitivo": "genitive",
58 "dativo": "dative",
59 "accusativo": "accusative",
60 "ablativo": "ablative",
61 "gerundivo": "gerund",
62 "supino": "supine",
63 "futuro semplice": "future",
64 "piuccheperfetto": "pluperfect",
65}
67FORM_LINE_TEMPLATE_TAGS = {
68 # https://it.wiktionary.org/wiki/Template:A_cmp
69 "comparativo": "comparative",
70 "superlativo": "superlative",
71 "plur": "plural",
72 "Participio presente": ["present", "participle"],
73 "Participio passato": ["past", "participle"],
74 "Intransitivo pronominale": ["intransitive", "pronominal"], # Template:inpr
75 "Deponente": "deponent",
76 "Ausiliare": "auxiliary",
77 "Passivo": "passive",
78 "Reciproco": "reciprocal",
79 "Riflessivo": "reflexive",
80 "Attivo": "active",
81 "Riflessivo pronominale": ["reflexive", "pronominal"],
82}
84# https://it.wiktionary.org/wiki/Template:Term/d
85TERM_TEMPLATE_TOPICS = {
86 "abbigliamento": "clothing",
87 "aeronautica": "aeronautics",
88 "agricoltura": "agriculture",
89 "algebra": "algebra",
90 "ambito sportivo": "sports",
91 "anatomia": "anatomy",
92 # "animali": "",
93 "antropologia": "anthropology",
94 "araldica": "heraldry",
95 "archeologia": "archaeology",
96 "architettura": "architecture",
97 "aritmetica": "arithmetic",
98 "arm.": "weaponry",
99 "arma": "weaponry",
100 "armamento": "weaponry",
101 "armi": "weaponry",
102 "arte": "arts",
103 "astrologia": "astrology",
104 "astronomia": "astronomy",
105 "botanica": "botany",
106 "biochimica": "biochemistry",
107 "biologia": "biology",
108 "biotecnologia": "biotechnology",
109 # "burocrazia": "",
110 "calcio": "soccer",
111 "carte": "card-games",
112 "chimica": "chemistry",
113 "chimica generale": "chemistry",
114 "chimica inorganica": "chemistry",
115 "chimica organica": "chemistry",
116 "chimica analitica": "chemistry",
117 "chimica industriale": "chemistry",
118 "chirurgia": "surgery",
119 "cinematografia": "cinematography",
120 "colore": "color",
121 "commercio": "commerce",
122 # "composti organici": "",
123 # "composti inorganici": "",
124 "cristianesimo": "Christianity",
125 "danza": "dance",
126 # "diritto": "",
127 "ecclesiastico": "ecclesiastical",
128 "ecologia": "ecology",
129 "economia": "economics",
130 "edilizia": "construction",
131 "elementi chimici": "chemistry",
132 "elettronica": "electronics",
133 "elettrotecnica": "electrical-engineering",
134 "entomologia": "entomology",
135 "equitazione": "equitation",
136 "erpetologia": "herpetology",
137 # "esoterismo": "",
138 "etnologia": "ethnology",
139 "falegnameria": "carpentry",
140 # "familiare": "",
141 "farmacologia": "pharmacology",
142 "ferrovia": "railways",
143 "filosofia": "philosophy",
144 "finanza": "finance",
145 "fisica": "physics",
146 "fisiologia": "physiology",
147 "fonologia": "phonology",
148 # "forestierismo": "",
149 "fotografia": "photography",
150 # "gastronomia": "gastronomy",
151 "genetica": "genetics",
152 "geografia": "geography",
153 "geologia": "geology",
154 "geometria": "geometry",
155 "gioco": "games",
156 "giornalistico": "journalism",
157 "grammatica": "grammar",
158 "idraulica": "hydraulics",
159 "informatica": "informatics",
160 "ingegneria": "engineering",
161 "internet": "Internet",
162 "ittiologia": "ichthyology",
163 "legale": "law",
164 "letteratura": "literature",
165 "linguistica": "linguistics",
166 # "macelleria": "",
167 "malacologia": "malacology",
168 "mammalogia": "mammalogy",
169 "marina": "navy",
170 "matematica": "mathematics",
171 "meccanica": "mechanics",
172 "medicina": "medicine",
173 "metallurgia": "metallurgy",
174 "meteorologia": "meteorology",
175 # "Metrica": "",
176 # "Metrica classica": "",
177 # "Metrica contemporanea": "",
178 # "Metrica latina": "",
179 "militare": "military",
180 "minerale": "mineralogy",
181 "mineralogia": "mineralogy",
182 "mitologia": "mythology",
183 "moda": "fashion",
184 "musica": "music",
185 "numismatica": "numismatics",
186 "ornitologia": "ornithology",
187 # "pedagogia": "pedagogy",
188 # "pittura": "painting",
189 "poesia": "poetry",
190 # "polimeri": "",
191 "politica": "politics",
192 # "Popolare": "",
193 # "Professioni": "",
194 "psichiatria": "psychiatry",
195 "psicanalisi": "psychoanalysis",
196 "psicologia": "psychology",
197 "religione": "religion",
198 "topografia": "topography",
199 # "Toponimi": "",
200 "paleontologia": "paleontology",
201 "pianta": "botany",
202 "scacchi": "chess",
203 # "Scuola": "",
204 "sessualità": "sexuality",
205 "sociologia": "sociology",
206 "sport": "sports",
207 "sport invernali": "sports",
208 "statistica": "statistics",
209 "storia": "history",
210 # "strumenti musicali": "",
211 "teatro": "theater",
212 "tecnica": "technology",
213 "tecnologia": "technology",
214 "telecomunicazioni": "telecommunications",
215 "tessile": "textiles",
216 "tipografia": "typography",
217 "veterinaria": "veterinary",
218 "zoologia": "zoology",
219 # "zootecnica": "",
220}
222TERM_TEMPLATE_TAGS = {
223 "antico": "archaic",
224 "obsoleto": "obsolete",
225 "formale": "formal",
226 "gergale": "slang",
227 "informale": "informal",
228 "letterario": "literary",
229 "neologismo": "neologism",
230 "offensivo": "offensive",
231 "raro": "rare",
232 "regionale": "regional",
233 "volgare": "vulgar",
234}
236# https://it.wiktionary.org/wiki/Categoria:Template_ambito
237GLOSS_LIST_TEMPATE_TAGS = {
238 "accrescitivo": "augmentative", # Template:Accr
239 "colloquiale": "colloquial", # Template:Coll
240 "diminutivo": "diminutive", # Template:Dim
241 "per estensione": "broadly", # Template:Est
242 "senso figurato": "figuratively", # Template:Fig
243 "letteralmente": "literally", # Template:Lett
244 "peggiorativo": "pejorative", # Template:Pegg
245 "riferito solo a persone": "person", # Template:Pers
246 "per sineddoche": "synecdoche", # Template:Sndc
247 "specialmente al plurale": ["especially", "in-plural"], # Template:Spec pl
248 "spregiativo": "pejorative", # Template:Spreg
249 "vezzeggiativo": "endearing", # Template:Vezz
250 "volgare": "vulgar", # Template:Vulg
251}
253OTHER_TAGS = {
254 "Transitivo": "transitive",
255 "Intransitivo": "intransitive",
256 "toponimo": "toponymic",
257 "classe": "class",
258}
261TAGS = {
262 **TABLE_TAGS,
263 **FORM_LINE_TEMPLATE_TAGS,
264 **TERM_TEMPLATE_TAGS,
265 **GLOSS_LIST_TEMPATE_TAGS,
266 **OTHER_TAGS,
267}
270def translate_raw_tags(data: WordEntry) -> None:
271 raw_tags = []
272 for raw_tag in data.raw_tags:
273 if raw_tag in TAGS and hasattr(data, "tags"):
274 tr_tag = TAGS[raw_tag]
275 if isinstance(tr_tag, str):
276 data.tags.append(tr_tag)
277 elif isinstance(tr_tag, list): 277 ↛ 272line 277 didn't jump to line 272 because the condition on line 277 was always true
278 data.tags.extend(tr_tag)
279 elif raw_tag in TERM_TEMPLATE_TOPICS and hasattr(data, "topics"):
280 data.topics.append(TERM_TEMPLATE_TOPICS[raw_tag])
281 else:
282 raw_tags.append(raw_tag)
283 data.raw_tags = raw_tags