Coverage for src/wiktextract/extractor/it/tags.py: 97%
20 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from .models import WordEntry
3TABLE_TAGS = {
4 # https://it.wiktionary.org/wiki/Template:It-decl-agg4
5 "singolare": "singular",
6 "plurale": "plural",
7 "positivo": "positive",
8 "superlativo assoluto": ["absolute", "superlative"],
9 "maschile": "masculine",
10 "femminile": "feminine",
11 # https://it.wiktionary.org/wiki/Template:It-decl-agg2
12 "m e f": ["masculine", "feminine"],
13 # https://it.wiktionary.org/wiki/Template:It-conj
14 "infinito": "infinitive",
15 "verbo ausiliare": "auxiliary",
16 "gerundio": "gerund",
17 "participio presente": ["present", "participle"],
18 "participio passato": ["past", "participle"],
19 "prima": "first-person",
20 "seconda": "second-person",
21 "terza": "third-person",
22 "presente": "present",
23 "imperfetto": "imperfect",
24 "passato remoto": "past-remote",
25 "futuro": "future",
26 "passato prossimo": ["past", "perfect"],
27 "trapassato prossimo": ["pluperfect", "past", "perfect"],
28 "trapassato remoto": ["historic", "past-remote"],
29 "futuro anteriore": ["future", "perfect"],
30 "passato": "past",
31 "trapassato": ["past", "perfect"],
32 "imperativo": "imperative",
33 "riflessivo pronominale": ["reflexive", "pronominal"],
34}
36FORM_LINE_TEMPLATE_TAGS = {
37 # https://it.wiktionary.org/wiki/Template:A_cmp
38 "comparativo": "comparative",
39 "superlativo": "superlative",
40}
42# https://it.wiktionary.org/wiki/Template:Term/d
43TERM_TEMPLATE_TOPICS = {
44 "abbigliamento": "clothing",
45 "aeronautica": "aeronautics",
46 "agricoltura": "agriculture",
47 "algebra": "algebra",
48 "ambito sportivo": "sports",
49 "anatomia": "anatomy",
50 # "animali": "",
51 "antropologia": "anthropology",
52 "araldica": "heraldry",
53 "archeologia": "archaeology",
54 "architettura": "architecture",
55 "aritmetica": "arithmetic",
56 "arm.": "weaponry",
57 "arma": "weaponry",
58 "armamento": "weaponry",
59 "armi": "weaponry",
60 "arte": "arts",
61 "astrologia": "astrology",
62 "astronomia": "astronomy",
63 "botanica": "botany",
64 "biochimica": "biochemistry",
65 "biologia": "biology",
66 "biotecnologia": "biotechnology",
67 # "burocrazia": "",
68 "calcio": "soccer",
69 "carte": "card-games",
70 "chimica": "chemistry",
71 "chimica generale": "chemistry",
72 "chimica inorganica": "chemistry",
73 "chimica organica": "chemistry",
74 "chimica analitica": "chemistry",
75 "chimica industriale": "chemistry",
76 "chirurgia": "surgery",
77 "cinematografia": "cinematography",
78 "colore": "color",
79 "commercio": "commerce",
80 # "composti organici": "",
81 # "composti inorganici": "",
82 "cristianesimo": "Christianity",
83 "danza": "dance",
84 # "diritto": "",
85 "ecclesiastico": "ecclesiastical",
86 "ecologia": "ecology",
87 "economia": "economics",
88 "edilizia": "construction",
89 "elementi chimici": "chemistry",
90 "elettronica": "electronics",
91 "elettrotecnica": "electrical-engineering",
92 "entomologia": "entomology",
93 "equitazione": "equitation",
94 "erpetologia": "herpetology",
95 # "esoterismo": "",
96 "etnologia": "ethnology",
97 "falegnameria": "carpentry",
98 # "familiare": "",
99 "farmacologia": "pharmacology",
100 "ferrovia": "railways",
101 "filosofia": "philosophy",
102 "finanza": "finance",
103 "fisica": "physics",
104 "fisiologia": "physiology",
105 "fonologia": "phonology",
106 # "forestierismo": "",
107 "fotografia": "photography",
108 # "gastronomia": "gastronomy",
109 "genetica": "genetics",
110 "geografia": "geography",
111 "geologia": "geology",
112 "geometria": "geometry",
113 "gioco": "games",
114 "giornalistico": "journalism",
115 "grammatica": "grammar",
116 "idraulica": "hydraulics",
117 "informatica": "informatics",
118 "ingegneria": "engineering",
119 "internet": "Internet",
120 "ittiologia": "ichthyology",
121 "legale": "law",
122 "letteratura": "literature",
123 "linguistica": "linguistics",
124 # "macelleria": "",
125 "malacologia": "malacology",
126 "mammalogia": "mammalogy",
127 "marina": "navy",
128 "matematica": "mathematics",
129 "meccanica": "mechanics",
130 "medicina": "medicine",
131 "metallurgia": "metallurgy",
132 "meteorologia": "meteorology",
133 # "Metrica": "",
134 # "Metrica classica": "",
135 # "Metrica contemporanea": "",
136 # "Metrica latina": "",
137 "militare": "military",
138 "minerale": "mineralogy",
139 "mineralogia": "mineralogy",
140 "mitologia": "mythology",
141 "moda": "fashion",
142 "musica": "music",
143 "numismatica": "numismatics",
144 "ornitologia": "ornithology",
145 # "pedagogia": "pedagogy",
146 # "pittura": "painting",
147 "poesia": "poetry",
148 # "polimeri": "",
149 "politica": "politics",
150 # "Popolare": "",
151 # "Professioni": "",
152 "psichiatria": "psychiatry",
153 "psicanalisi": "psychoanalysis",
154 "psicologia": "psychology",
155 "religione": "religion",
156 "topografia": "topography",
157 # "Toponimi": "",
158 "paleontologia": "paleontology",
159 "pianta": "botany",
160 "scacchi": "chess",
161 # "Scuola": "",
162 "sessualità": "sexuality",
163 "sociologia": "sociology",
164 "sport": "sports",
165 "sport invernali": "sports",
166 "statistica": "statistics",
167 "storia": "history",
168 # "strumenti musicali": "",
169 "teatro": "theater",
170 "tecnica": "technology",
171 "tecnologia": "technology",
172 "telecomunicazioni": "telecommunications",
173 "tessile": "textiles",
174 "tipografia": "typography",
175 "veterinaria": "veterinary",
176 "zoologia": "zoology",
177 # "zootecnica": "",
178}
180TERM_TEMPLATE_TAGS = {
181 "antico": "archaic",
182 "obsoleto": "obsolete",
183 "formale": "formal",
184 "gergale": "slang",
185 "informale": "informal",
186 "letterario": "literary",
187 "neologismo": "neologism",
188 "offensivo": "offensive",
189 "raro": "rare",
190 "regionale": "regional",
191 "volgare": "vulgar",
192}
194# https://it.wiktionary.org/wiki/Categoria:Template_ambito
195GLOSS_LIST_TEMPATE_TAGS = {
196 "accrescitivo": "augmentative", # Template:Accr
197 "colloquiale": "colloquial", # Template:Coll
198 "diminutivo": "diminutive", # Template:Dim
199 "per estensione": "broadly", # Template:Est
200 "senso figurato": "figuratively", # Template:Fig
201 "letteralmente": "literally", # Template:Lett
202 "peggiorativo": "pejorative", # Template:Pegg
203 "riferito solo a persone": "person", # Template:Pers
204 "per sineddoche": "synecdoche", # Template:Sndc
205 "specialmente al plurale": ["especially", "in-plural"], # Template:Spec pl
206 "spregiativo": "pejorative", # Template:Spreg
207 "vezzeggiativo": "endearing", # Template:Vezz
208 "volgare": "vulgar", # Template:Vulg
209}
212TAGS = {
213 **TABLE_TAGS,
214 **FORM_LINE_TEMPLATE_TAGS,
215 **TERM_TEMPLATE_TAGS,
216 **GLOSS_LIST_TEMPATE_TAGS,
217}
220def translate_raw_tags(data: WordEntry) -> None:
221 raw_tags = []
222 for raw_tag in data.raw_tags:
223 if raw_tag in TAGS and hasattr(data, "tags"):
224 tr_tag = TAGS[raw_tag]
225 if isinstance(tr_tag, str):
226 data.tags.append(tr_tag)
227 elif isinstance(tr_tag, list): 227 ↛ 222line 227 didn't jump to line 222 because the condition on line 227 was always true
228 data.tags.extend(tr_tag)
229 elif raw_tag in TERM_TEMPLATE_TOPICS and hasattr(data, "topics"):
230 data.topics.append(TERM_TEMPLATE_TOPICS[raw_tag])
231 else:
232 raw_tags.append(raw_tag)
233 data.raw_tags = raw_tags