Coverage for src/wiktextract/extractor/it/tags.py: 97%
21 statements
« prev ^ index » next coverage.py v7.9.0, created at 2025-06-13 07:43 +0000
« prev ^ index » next coverage.py v7.9.0, created at 2025-06-13 07:43 +0000
1from .models import WordEntry
3TABLE_TAGS = {
4 # https://it.wiktionary.org/wiki/Template:It-decl-agg4
5 "singolare": "singular",
6 "plurale": "plural",
7 "positivo": "positive",
8 "superlativo assoluto": ["absolute", "superlative"],
9 "maschile": "masculine",
10 "femminile": "feminine",
11 # https://it.wiktionary.org/wiki/Template:It-decl-agg2
12 "m e f": ["masculine", "feminine"],
13 # https://it.wiktionary.org/wiki/Template:It-conj
14 "infinito": "infinitive",
15 "verbo ausiliare": "auxiliary",
16 "gerundio": "gerund",
17 "participio presente": ["present", "participle"],
18 "participio passato": ["past", "participle"],
19 "prima": "first-person",
20 "seconda": "second-person",
21 "terza": "third-person",
22 "presente": "present",
23 "imperfetto": "imperfect",
24 "passato remoto": "past-remote",
25 "futuro": "future",
26 "passato prossimo": ["past", "perfect"],
27 "trapassato prossimo": ["pluperfect", "past", "perfect"],
28 "trapassato remoto": ["historic", "past-remote"],
29 "futuro anteriore": ["future", "perfect"],
30 "passato": "past",
31 "trapassato": ["past", "perfect"],
32 "imperativo": "imperative",
33 "riflessivo pronominale": ["reflexive", "pronominal"],
34}
36FORM_LINE_TEMPLATE_TAGS = {
37 # https://it.wiktionary.org/wiki/Template:A_cmp
38 "comparativo": "comparative",
39 "superlativo": "superlative",
40 "plur": "plural",
41 "Participio presente": ["present", "participle"],
42 "Participio passato": ["past", "participle"],
43 "Intransitivo pronominale": ["intransitive", "pronominal"], # Template:inpr
44 "Deponente": "deponent",
45 "Ausiliare": "auxiliary",
46 "Passivo": "passive",
47 "Reciproco": "reciprocal",
48 "Riflessivo": "reflexive",
49 "Attivo": "active",
50 "Riflessivo pronominale": ["reflexive", "pronominal"],
51}
53# https://it.wiktionary.org/wiki/Template:Term/d
54TERM_TEMPLATE_TOPICS = {
55 "abbigliamento": "clothing",
56 "aeronautica": "aeronautics",
57 "agricoltura": "agriculture",
58 "algebra": "algebra",
59 "ambito sportivo": "sports",
60 "anatomia": "anatomy",
61 # "animali": "",
62 "antropologia": "anthropology",
63 "araldica": "heraldry",
64 "archeologia": "archaeology",
65 "architettura": "architecture",
66 "aritmetica": "arithmetic",
67 "arm.": "weaponry",
68 "arma": "weaponry",
69 "armamento": "weaponry",
70 "armi": "weaponry",
71 "arte": "arts",
72 "astrologia": "astrology",
73 "astronomia": "astronomy",
74 "botanica": "botany",
75 "biochimica": "biochemistry",
76 "biologia": "biology",
77 "biotecnologia": "biotechnology",
78 # "burocrazia": "",
79 "calcio": "soccer",
80 "carte": "card-games",
81 "chimica": "chemistry",
82 "chimica generale": "chemistry",
83 "chimica inorganica": "chemistry",
84 "chimica organica": "chemistry",
85 "chimica analitica": "chemistry",
86 "chimica industriale": "chemistry",
87 "chirurgia": "surgery",
88 "cinematografia": "cinematography",
89 "colore": "color",
90 "commercio": "commerce",
91 # "composti organici": "",
92 # "composti inorganici": "",
93 "cristianesimo": "Christianity",
94 "danza": "dance",
95 # "diritto": "",
96 "ecclesiastico": "ecclesiastical",
97 "ecologia": "ecology",
98 "economia": "economics",
99 "edilizia": "construction",
100 "elementi chimici": "chemistry",
101 "elettronica": "electronics",
102 "elettrotecnica": "electrical-engineering",
103 "entomologia": "entomology",
104 "equitazione": "equitation",
105 "erpetologia": "herpetology",
106 # "esoterismo": "",
107 "etnologia": "ethnology",
108 "falegnameria": "carpentry",
109 # "familiare": "",
110 "farmacologia": "pharmacology",
111 "ferrovia": "railways",
112 "filosofia": "philosophy",
113 "finanza": "finance",
114 "fisica": "physics",
115 "fisiologia": "physiology",
116 "fonologia": "phonology",
117 # "forestierismo": "",
118 "fotografia": "photography",
119 # "gastronomia": "gastronomy",
120 "genetica": "genetics",
121 "geografia": "geography",
122 "geologia": "geology",
123 "geometria": "geometry",
124 "gioco": "games",
125 "giornalistico": "journalism",
126 "grammatica": "grammar",
127 "idraulica": "hydraulics",
128 "informatica": "informatics",
129 "ingegneria": "engineering",
130 "internet": "Internet",
131 "ittiologia": "ichthyology",
132 "legale": "law",
133 "letteratura": "literature",
134 "linguistica": "linguistics",
135 # "macelleria": "",
136 "malacologia": "malacology",
137 "mammalogia": "mammalogy",
138 "marina": "navy",
139 "matematica": "mathematics",
140 "meccanica": "mechanics",
141 "medicina": "medicine",
142 "metallurgia": "metallurgy",
143 "meteorologia": "meteorology",
144 # "Metrica": "",
145 # "Metrica classica": "",
146 # "Metrica contemporanea": "",
147 # "Metrica latina": "",
148 "militare": "military",
149 "minerale": "mineralogy",
150 "mineralogia": "mineralogy",
151 "mitologia": "mythology",
152 "moda": "fashion",
153 "musica": "music",
154 "numismatica": "numismatics",
155 "ornitologia": "ornithology",
156 # "pedagogia": "pedagogy",
157 # "pittura": "painting",
158 "poesia": "poetry",
159 # "polimeri": "",
160 "politica": "politics",
161 # "Popolare": "",
162 # "Professioni": "",
163 "psichiatria": "psychiatry",
164 "psicanalisi": "psychoanalysis",
165 "psicologia": "psychology",
166 "religione": "religion",
167 "topografia": "topography",
168 # "Toponimi": "",
169 "paleontologia": "paleontology",
170 "pianta": "botany",
171 "scacchi": "chess",
172 # "Scuola": "",
173 "sessualità": "sexuality",
174 "sociologia": "sociology",
175 "sport": "sports",
176 "sport invernali": "sports",
177 "statistica": "statistics",
178 "storia": "history",
179 # "strumenti musicali": "",
180 "teatro": "theater",
181 "tecnica": "technology",
182 "tecnologia": "technology",
183 "telecomunicazioni": "telecommunications",
184 "tessile": "textiles",
185 "tipografia": "typography",
186 "veterinaria": "veterinary",
187 "zoologia": "zoology",
188 # "zootecnica": "",
189}
191TERM_TEMPLATE_TAGS = {
192 "antico": "archaic",
193 "obsoleto": "obsolete",
194 "formale": "formal",
195 "gergale": "slang",
196 "informale": "informal",
197 "letterario": "literary",
198 "neologismo": "neologism",
199 "offensivo": "offensive",
200 "raro": "rare",
201 "regionale": "regional",
202 "volgare": "vulgar",
203}
205# https://it.wiktionary.org/wiki/Categoria:Template_ambito
206GLOSS_LIST_TEMPATE_TAGS = {
207 "accrescitivo": "augmentative", # Template:Accr
208 "colloquiale": "colloquial", # Template:Coll
209 "diminutivo": "diminutive", # Template:Dim
210 "per estensione": "broadly", # Template:Est
211 "senso figurato": "figuratively", # Template:Fig
212 "letteralmente": "literally", # Template:Lett
213 "peggiorativo": "pejorative", # Template:Pegg
214 "riferito solo a persone": "person", # Template:Pers
215 "per sineddoche": "synecdoche", # Template:Sndc
216 "specialmente al plurale": ["especially", "in-plural"], # Template:Spec pl
217 "spregiativo": "pejorative", # Template:Spreg
218 "vezzeggiativo": "endearing", # Template:Vezz
219 "volgare": "vulgar", # Template:Vulg
220}
222OTHER_TAGS = {
223 "Transitivo": "transitive",
224 "Intransitivo": "intransitive",
225 "toponimo": "toponymic",
226 "classe": "class",
227}
230TAGS = {
231 **TABLE_TAGS,
232 **FORM_LINE_TEMPLATE_TAGS,
233 **TERM_TEMPLATE_TAGS,
234 **GLOSS_LIST_TEMPATE_TAGS,
235 **OTHER_TAGS,
236}
239def translate_raw_tags(data: WordEntry) -> None:
240 raw_tags = []
241 for raw_tag in data.raw_tags:
242 if raw_tag in TAGS and hasattr(data, "tags"):
243 tr_tag = TAGS[raw_tag]
244 if isinstance(tr_tag, str):
245 data.tags.append(tr_tag)
246 elif isinstance(tr_tag, list): 246 ↛ 241line 246 didn't jump to line 241 because the condition on line 246 was always true
247 data.tags.extend(tr_tag)
248 elif raw_tag in TERM_TEMPLATE_TOPICS and hasattr(data, "topics"):
249 data.topics.append(TERM_TEMPLATE_TOPICS[raw_tag])
250 else:
251 raw_tags.append(raw_tag)
252 data.raw_tags = raw_tags