Coverage for src/wiktextract/extractor/es/tags.py: 88%
37 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
1from .models import WordEntry
3# https://es.wiktionary.org/wiki/Wikcionario:Lista_de_etiquetas
5# https://es.wiktionary.org/wiki/Plantilla:uso
6USO_TAGS = {
7 "académico": "academic",
8 "afectado": "literary",
9 # "afectuoso": "affectionate",
10 "anticuado": "outdated",
11 "antiguo": "obsolete",
12 "arcaico": "obsolete",
13 "arcaizante": "obsolete",
14 "arcaísmo": "obsolete",
15 "bajo": "colloquial",
16 "chistoso": "jocular",
17 "clásico": "Classical",
18 "coloq": "colloquial",
19 "coloq.": "colloquial",
20 "coloquial": "colloquial",
21 # "culto": "worship",
22 "cómico": "jocular",
23 "despect.": "derogatory",
24 "despectivo": "derogatory",
25 "desus": "outdated",
26 "desus.": "outdated",
27 "desusada": "outdated",
28 "desusado": "outdated",
29 "elevado": "literary",
30 "epiceno": "epicene",
31 "eufemismo": "euphemism",
32 "eutca": "adjective",
33 "familiar": "colloquial",
34 "figurado": "figurative",
35 "formal": "formal",
36 "germanía": "slang",
37 "grosero": "vulgar",
38 "humorístico": "jocular",
39 "infantil": "childish",
40 "informal": "colloquial",
41 "infrecuente": "rare",
42 "irónico": "ironic",
43 "jerga": "slang",
44 # "jerga legal": "legal jargon",
45 # "jerga metalera": "metal slang",
46 # "jerga skinhead": "skinhead slang",
47 "jergal": "slang",
48 "jocoso": "jocular",
49 "literario": "literary",
50 "malsonante": "vulgar",
51 # "mayúscula": "capital letter",
52 # "mayúscula disciplinas": "usually lowercase",
53 # "mayúscula puntos cardinales":
54 # "It is lowercase except when it is part of a proper noun",
55 # "mayúscula taxones": "taxonomy rules for capitalization",
56 "medieval": "Medieval",
57 "minúscula": "lower case",
58 "moderno": "modern",
59 "neologismo": "neologism",
60 "obsoleto": "obsolete",
61 # "ordinal compuesto": "rules for compound ordinal numbers",
62 # "ordinal-compuesto": "rules for compound ordinal numbers",
63 "peyorativo": "derogatory",
64 "poco frecuente": "rare",
65 "poco usado": "rare",
66 "popular": "colloquial",
67 "poético": "literary",
68 "raro": "rare",
69 # "rur": "rural",
70 # "rural": "rural",
71 "sebtc": "noun",
72 "setc": "noun",
73 # "sin uso": "The editors have been unable to find any examples of use of
74 # this word in published texts or corpus",
75 "slang": "slang",
76 "soez": "vulgar",
77 "sutp": "plural",
78 "suts": "singular",
79 # "trans-prefijo": "",
80 "ucsp": ["plural", "noun"],
81 "umcp": "pronominal",
82 "umcs": "noun",
83 "umcv": "vocative",
84 "umep": "plural",
85 "umpl": "plural",
86 "utca": "adjective",
87 "utcadv": "adverb",
88 "utcaf": ["feminine", "adjective"],
89 "utcam": ["masculine", "adjective"],
90 "utcc": "conjunction",
91 "utcf": ["feminine", "noun"],
92 "utcg": "gerund",
93 # "utci": "It is also used as intransitive",
94 "utcm": ["masculine", "noun"],
95 "utcp": "pronominal",
96 "utcprnl": "pronominal",
97 "utcs": "noun",
98 "utcsf": ["feminine", "noun"],
99 "utcsm": ["masculine", "noun"],
100 "utcsma": ["masculine", "feminine", "noun"],
101 "utcsmf": "noun",
102 "utct": "transitive",
103 "utmp": ["pronominal", "verb"],
104 "utrep": "repeated",
105 "vulg": "vulgar",
106 "vulgar": "vulgar",
107}
109# https://es.wiktionary.org/wiki/Plantilla:ámbito
110AMBITO_TAGS = {
111 "Acaxochitlán": "Acaxochitlán",
112 "Alemania": "Germany",
113 "Algarve": "Algarve",
114 "Almería": "Almería",
115 "Am.": "America",
116 "Amecameca": "Amecameca",
117 "América": "America",
118 "América Central": "Central-America",
119 "América Latina": "America",
120 "América central": "Central-America",
121 "América del Sur": "South-America",
122 "Andalucía": "Andalusia",
123 "Antioquia": "Antioquia",
124 "Aragón": "Aragon",
125 "Aragón Oriental": "Eastern-Aragon",
126 "Arg.": "Argentina",
127 "Argentina": "Argentina",
128 "Asturias": "Asturias",
129 "Atlapexco": "Atlapexco",
130 "Australia": "Australia",
131 "Baja California": "Lower-California",
132 "Baja Navarra": "Lower-Navarre",
133 "Bearn": "Bearn",
134 "Bearne": "Bearn",
135 "Belluno": "Belluno",
136 "Bolivia": "Bolivia",
137 "Bolonia": "Bologna",
138 "Brasil": "Brazil",
139 "Brasil meridional": "Southern-Brazil",
140 "Burgos": "Burgos",
141 "Calpan": "Calpan",
142 "Campeche": "Campeche",
143 "Canadá": "Canada",
144 "Canarias": "Canaries",
145 "Canoa": "Canoe",
146 "Cantabria": "Cantabria",
147 "Caribe": "Caribbean",
148 "Carpi": "Carpi",
149 "Cartagena": "Cartagena",
150 "Castilla": "Castile",
151 "Cataluña": "Catalonia",
152 "Centro América": "Central-America",
153 "Centro de Chile": "Central-Chile",
154 "Centro de México": "Central-Mexico",
155 "Centroamérica": "Central-America",
156 "Ceuta": "Ceuta",
157 "Chiapas": "Chiapas",
158 "Chicontepec": "Chicontepec",
159 "Chihuahua": "Chihuahua",
160 "Chile": "Chile",
161 "Chiloé": "Chiloé",
162 "Chipilo": "Chipilo",
163 "Chl.": "Chile",
164 "Cholula": "Cholula",
165 "Chubut": "Chubut",
166 "Ciudad de México": "Mexico-City",
167 "Colombia": "Colombia",
168 "Connacht": "Connacht",
169 "Connemara": "Connemara",
170 "Cono Sur": "South-Cone",
171 "Costa Rica": "Costa-Rica",
172 "Cuba": "Cuba",
173 "Cuentepec": "Cuentepec",
174 "Cuetzalan": "Cuetzalan",
175 "Cádiz": "Cádiz",
176 "Córdoba (España)": "Córdoba",
177 "Dominicana": "Dominican-Republic",
178 "EEUU": "US",
179 "Ecuador": "Ecuador",
180 "El Salv.": "El-Salvador",
181 "El Salvador": "El-Salvador",
182 "Escocia": "Scotland",
183 "España": "Spain",
184 "Estados Unidos": "US",
185 "Europa": "Europe",
186 "Euskadi": "Basque Country",
187 "Extremadura": "Extremadura",
188 "Extremadura (España)": "Extremadura",
189 "Filipinas": "Philippines",
190 "Francia": "France",
191 "Galicia": "Galicia",
192 "Gascuña": "Gascony",
193 "Granada": "Grenada",
194 # "Grischun": "grisón",
195 "Guadalajara": "Guadalajara",
196 "Guanajuato": "Guanajuato",
197 "Guatemala": "Guatemala",
198 "Guernsey": "Guernsey",
199 "Guinea Ecuatorial": "Equatorial-Guinea",
200 "Hidalgo": "Hidalgo",
201 "Hond.": "Honduras",
202 "Honduras": "Honduras",
203 "Huauchinango": "Huauchinango",
204 "Huejutla": "Huejutla",
205 "Huelva": "Huelva",
206 "Hueyapan": "Hueyapan",
207 "Inglaterra": "England",
208 "Iparralde": "Iparralde",
209 "Irlanda": "Ireland",
210 "Islas Baleares": "Balearic-Islands",
211 "Italia": "Italy",
212 "Jalisco": "Jalisco",
213 "Jaltocán": "Jaltocan",
214 "Jamaica": "Jamaica",
215 "Jersey": "Jersey",
216 "La Rioja": "La Rioja",
217 "La Rioja (Argentina)": "La Rioja (Argentina)",
218 "Labort": "Labort",
219 "Languedoc": "Languedoc",
220 "Lapurdi": "Labort",
221 "León": "León",
222 "Liechtenstein": "Liechtenstein",
223 "Logudoro": "Logudoro",
224 "Lunfardo": "Lunfardo",
225 "Mallorca": "Mallorca",
226 "Marruecos": "Morocco",
227 "Matlapa": "Matlapa",
228 "Michoacán": "Michoacán",
229 "Milpa Alta": "Milpa Alta",
230 "Mirandola": "Mirandola",
231 "Munster": "Munster",
232 "Murcia": "Murcia",
233 "Méjico": "Mexico",
234 "México": "Mexico",
235 "México (México)": "Mexico",
236 "Módena": "Modena",
237 "Navarra": "Navarra",
238 "Nicaragua": "Nicaragua",
239 "Normandía": "Normandy",
240 "Norte de Argentina": "Northern-Argentina",
241 "Norte de Chile": "Northern-Chile",
242 "Nueva Zelanda": "New-Zealand",
243 "Nueva Zelandia": "New-Zealand",
244 "Nuevo León": "Nuevo-León",
245 "Nuevo México": "New-Mexico",
246 "Oaxaca": "Oaxaca",
247 "Orizatlán": "Orizatlán",
248 "Palencia": "Palencia",
249 "Panamá": "Panama",
250 "Paraguay": "Paraguay",
251 "País Vasco": "Basque Country",
252 "País Vasco francés": "French Basque Country",
253 "Perú": "Peru",
254 "Portugal": "Portugal",
255 "Provenza": "Provence",
256 "Puebla": "Puebla",
257 "Puerto Rico": "Puerto-Rico",
258 # "Puter": "high engadino"
259 "Quebec": "Quebec",
260 "Querétaro": "Querétaro",
261 "Reino Unido": "UK",
262 "República Dominicana": "Dominican-Republic",
263 "Ribera de Navarra": "Ribera-Navarra",
264 "Rioja": "Rioja",
265 "Rioplatense": "Río-de-la-Plata",
266 "Río de la Plata": "Río-de-la-Plata",
267 "Salamanca": "Salamanca",
268 "Salamanca (España)": "Salamanca",
269 "San Juan Quiahije": "San-Juan-Quiahije",
270 "San Luis Potosí": "San-Luis-Potosí",
271 "Santa María Yosoyúa": "Santa-María-Yosoyúa",
272 "Sevilla": "Seville",
273 "Sinaloa": "Sinaloa",
274 "Sola": "Sola",
275 "Sonora": "Sonora",
276 "Soria": "Soria",
277 "Soule": "Sola",
278 "Sudamérica": "South-America",
279 "Suiza": "Switzerland",
280 "Sur de Chile": "Southern-Chile",
281 "Suramérica": "South-America",
282 "Surmiran": "Surmirano",
283 "Sursilvan": "Sursilvano",
284 "Sutsilvan": "Sutsilvano",
285 "Tamazunchale": "Tamazunchale",
286 "Tepeojuma": "Tepeojuma",
287 "Texcoco": "Texcoco",
288 "Tlaxcala": "Tlaxcala",
289 "Tlaxpanaloya": "Tlaxpanaloya",
290 "Toulouse": "Toulouse",
291 "USA": "United States",
292 "Ulster": "Ulster",
293 "Uruguay": "Uruguay",
294 "Valencia": "Valencia",
295 "Vallader": "Bajo-engadino",
296 "Venecia": "Venice",
297 "Venezuela": "Venezuela",
298 "Veracruz": "Veracruz",
299 "Vizcaya": "Vizcaya",
300 "Waterford": "Waterford",
301 "Xayacatlán": "Xayacatlán",
302 "Xilitla": "Xilitla",
303 "Xochiatipan": "Xochiatipan",
304 "Yaganiza": "Yaganiza",
305 "Yahualica": "Yahualica",
306 "Yojovi": "Yojovi",
307 "Yucatán": "Yucatán",
308 "Zacatecas": "Zacatecas",
309 "Zamora": "Zamora",
310 "Zuberoa": "Sola",
311 "Zulia": "Zulia",
312 # "alto engadino": "alto engadino",
313 "anglonormando": "Anglo-Norman",
314 # "bajo engadino": "bajo engadino",
315 # "grischun": "grisón",
316 # "grisón": "grisón",
317 # "haquetía": "haquety",
318 "logudorés": "Logudoro",
319 "lunf": "Lunfardo",
320 "lunfardismo": "Lunfardo",
321 "lunfardo": "Lunfardo",
322 "parlache": ["Colombia", "slang"],
323 # "puter": "high engadino",
324 "rioplatense": "Río-de-la-Plata",
325 "rpl": "Río-de-la-Plata",
326 "subsilvano": "Sursilvan",
327 "supramirano": "Surmiran",
328 "suprasilvano": "Sursilvan",
329 "sur de Chile": "Southern-Chile",
330 "surmirano": "Surmiran",
331 "sursilvano": "Sursilvan",
332 "sutsilvano": "Sursilvan",
333 "vallader": "Lower-Engadine",
334 "Á. R. Plata": "Río-de-la-Plata",
335 "Álava": "Álava",
336}
339# https://es.wiktionary.org/wiki/Plantilla:csem
340CSEM_TOPICS = {
341 "aeronáutica": "aeronautics",
342 "agricultura": "agriculture",
343 "ajedrez": "chess",
344 # "Algas": "algae",
345 "alimento": "food",
346 "alimentos": "food",
347 "alpinismo": "mountaineering",
348 "alquimia": "alchemy",
349 "anatomía": "anatomy",
350 # "Anfibios": "amphibians",
351 # "Angiología": "angiology",
352 # "Animales": "animals",
353 # "Animales extintos": "extinct animals",
354 "antropología": "anthropology",
355 # "Antropotomía": "anthropotomy",
356 # "Antropónimos": "anthroponyms",
357 # "Anélidos": "annelids",
358 # "Apellidos": "surnames",
359 "apicultura": "beekeeping",
360 # "Arbustos": "shrubbery",
361 "aritmética": "arithmetic",
362 "armas": "weaponry",
363 "arqueología": "archeology",
364 "arquitectura": "architecture",
365 "arte": "art",
366 "arte marciales": "martial arts",
367 "artes marciales": "martial arts",
368 # "Artrópodos": "arthropods",
369 # "Arácnidos": "arachnids",
370 "astrofísica": "astrophysics",
371 "astrología": "astrology",
372 "astronomía": "astronomy",
373 "atletismo": "athletics",
374 # "Audiología": "audiology",
375 # "Automovilismo": "motoring",
376 # "Aves": "birds",
377 # "Bacterias": "bacteria",
378 "danza": "dance",
379 "baloncesto": "basketball",
380 "balonmano": "handball",
381 # "Batería de cocina": "cookware",
382 # "Bebidas": "drinks",
383 "billar": "billiards",
384 "biología": "biology",
385 "bioquímica": "biochemistry",
386 "bolos": "bowling",
387 "botánica": "botany",
388 "béisbol": "baseball",
389 # "Caza": "hunt",
390 # "Cactus": "cactus",
391 # "Campamento": "camp",
392 "cardiología": "cardiology",
393 "carpintería": "carpentry",
394 # "Casos gramaticales": "grammatical cases",
395 # "Cereales": "cereals",
396 # "Cerrajería": "locksmith",
397 "cetrería": "falconry",
398 "ciclismo": "cycling",
399 "ciencia": "science",
400 "ciencia ficción": "science fiction",
401 # "Cine": "cinema",
402 "cinegética": "hunting",
403 "cinematografía": "cinematography",
404 # "Cinología": "cynology",
405 "cirugía": "surgery",
406 "ciudades": "cities",
407 # "Cnidarios": "cnidarians",
408 # "Colores": "colors",
409 # "Comercio": "trade",
410 "comunicación": "communication",
411 # "Condimentos": "condiments",
412 # "Constelaciones": "constellations",
413 "construcción": "construction",
414 "contabilidad": "accounting",
415 "continentes": "continents",
416 # "Cordilleras": "mountain ranges",
417 "correos": "mail",
418 # "Cosmetología": "cosmetology",
419 "cosmología": "cosmology",
420 "cosmética": "cosmetics",
421 "costura": "sewing",
422 "cristianismo": "Christianity",
423 "cronología": "chronology",
424 # "Crustáceos": "crustaceans",
425 # "Cubertería": "cutlery",
426 # "Cultura": "culture",
427 "deporte": "sports",
428 # "Derecho": "right",
429 # "Dinosaurios": "dinosaurs",
430 # "Dioses": "gods",
431 "diseño": "design",
432 "días de la semana": "weekdays",
433 "ecología": "ecology",
434 "economía": "economics",
435 # "Edafología": "edaphology",
436 "educación": "education",
437 "electricidad": "electricity",
438 "electromagnetismo": "electromagnetism",
439 "electrónica": "electronics",
440 # "Elementos químicos": "chemical elements",
441 # "Emojis": "emoji",
442 # "Emoticonos": "emoticons",
443 # "Enfermedades": "diseases",
444 "enología": "oenology",
445 # "Enseñanza": "teaching",
446 "entomología": "entomology",
447 # "Equinodermos": "echinoderms",
448 # "Equitación": "horse riding",
449 # "Eras históricas": "historical eras",
450 # "Escultura": "sculpture",
451 "esgrima": "fencing",
452 # "Especias": "spices",
453 "estaciones": "seasons",
454 "estadística": "statistics",
455 # "Estomatología": "stomatology",
456 # "Estética": "esthetic",
457 "fabril": "manufacturing",
458 # "Fantasía": "fancy",
459 # "Farmacia": "pharmacy",
460 "farmacología": "pharmacology",
461 "feminismo": "feminism",
462 # "Festividades": "festivities",
463 "ficción": "fiction",
464 # "Ficción fantástica": "fantasy fiction",
465 "ficología": "phycology",
466 "filatelia": "philately",
467 "filosofía": "philosophy",
468 "finanzas": "finance",
469 "fisiología": "physiology",
470 # "Flores": "flowers",
471 # "Fobias": "phobias",
472 "folclore": "folklore",
473 "fonética": "phonetics",
474 # "Formas": "shapes",
475 "fotografía": "photography",
476 # "Fraccionarios": "fractionals",
477 # "Frutas": "fruit",
478 # "Frutos": "fruits",
479 "fármacos": "drugs",
480 # "Física": "physical",
481 "fútbol": "soccer",
482 # "Ganadería": "cattle raising",
483 # "Gastronomía": "gastronomy",
484 # "Gentilicios": "gentilicios",
485 "genética": "genetics",
486 "geografía": "geography",
487 "geología": "geology",
488 "geometría": "geometry",
489 # "Gimnasia": "gym",
490 # "Glotónimos": "gluttonyms",
491 "gramática": "grammar",
492 # "Granjería": "farming",
493 # "Guarismos": "figures",
494 "halconería": "falconry",
495 "herramientas": "tools",
496 "heráldica": "heraldry",
497 "hidrología": "hydrology",
498 # "Hierbas": "herbs",
499 # "Higiene": "hygiene",
500 # "Hipocorísticos": "hypocoristic",
501 "historia": "history",
502 # "Historieta": "cartoon",
503 # "Hockey sobre césped": "field hockey",
504 # "Hongos": "fungus",
505 "horticultura": "horticulture",
506 # "Hostelería": "hostelry",
507 # "Huesos": "bones",
508 # "Humanidades": "humanities",
509 "ictiología": "ichthyology",
510 # "Lenguas": "languages",
511 "imprenta": "printing",
512 # "Industria": "industry",
513 "informática": "computing",
514 "ingeniería": "engineering",
515 "inmunología": "immunology",
516 "insectos": "insects",
517 # "Instrumentos": "instruments",
518 # "Instrumentos de medición": "measurement tools",
519 # "Instrumentos musicales": "musical instruments",
520 "interfaz gráfica de usuario": "graphical user interface",
521 "internet": "Internet",
522 # "Invertebrados": "invertebrates",
523 "islam": "Islam",
524 # "Islas": "islands",
525 # "Judaísmo": "judaism",
526 "juegos": "games",
527 # "Juguetes": "toys",
528 "lgbt": "LGBT",
529 # "Lagos": "lakes",
530 "lexicografía": "lexicography",
531 "lingüística": "linguistics",
532 "literatura": "literature",
533 # "Logística": "logistics",
534 # "Lucha": "struggle",
535 # "Líquidos": "liquids",
536 "lógica": "logic",
537 "malabarismo": "juggling",
538 "mamíferos": "mammals",
539 # "Mares": "seas",
540 "náutica": "nautical",
541 "matemática": "mathematics",
542 # "Materia": "subject",
543 # "Materiales": "materials",
544 "mecánica": "mechanics",
545 "medicina": "medicine",
546 # "Meses": "months",
547 # "Metales": "metals",
548 "metalurgia": "metallurgy",
549 "meteorología": "meteorology",
550 "metrología": "metrology",
551 "micología": "mycology",
552 "microbiología": "microbiology",
553 "milicia": "military",
554 # "Minerales": "minerals",
555 "mineralogía": "mineralogy",
556 "minería": "mining",
557 "mitología": "mythology",
558 "mobiliario": "furniture",
559 # "Moluscos": "mollusks",
560 # "Monedas": "coins",
561 # "Moneras": "moneras",
562 # "Montañas": "mountains",
563 "muebles": "furniture",
564 "música": "music",
565 # "Naipes": "playing cards",
566 "natación": "swimming",
567 # "Naturaleza": "nature",
568 # "Nemátodos": "nematodes",
569 "neurología": "neurology",
570 "numismática": "numismatics",
571 # "Nutrición": "nutrition",
572 # "Números": "numbers",
573 "ocultismo": "occultism",
574 # "Ocupaciones": "activities",
575 # "Océanos": "oceans",
576 "odontología": "odontology",
577 # "Oficios": "trades",
578 "oftalmología": "ophthalmology",
579 "ornitología": "ornithology",
580 "paleontología": "paleontology",
581 "parapsicología": "parapsychology",
582 # "Parentesco": "relationship",
583 # "Partes del día": "parts of the day",
584 "países": "countries",
585 "peces": "fish",
586 # "Penínsulas": "peninsulas",
587 "periodismo": "journalism",
588 "perros": "dogs",
589 # "Personajes bíblicos": "biblical characters",
590 # "Personajes ficticios": "fictional characters",
591 # "Personajes históricos": "historical figures",
592 "pesca": "fishing",
593 # "Pesos y medidas": "weights and measures",
594 # "Pintura": "paint",
595 "planetas": "planets",
596 # "Plantas": "floors",
597 # "Platelmintos": "platyhelminths",
598 # "Platos": "dishes",
599 # "Poblaciones": "populations",
600 "poesía": "poetry",
601 # "Política": "policy",
602 "pragmática": "pragmatics",
603 # "Prehistoria": "prehistory",
604 # "Vestimenta": "outfit",
605 # "Profesiones": "professions",
606 # "Protistas": "protists",
607 # "Pseudociencias": "pseudosciences",
608 "psicología": "psychology",
609 "psiquiatría": "psychiatry",
610 "química": "chemistry",
611 "química orgánica": "organic chemistry",
612 # "Radiocomunicación": "radiocomunication",
613 # "Regiones": "regions",
614 # "Reinos biológicos": "biological kingdoms",
615 "religión": "religion",
616 # "Relojería": "watchmaking",
617 # "Reptiles": "reptiles",
618 # "Restaurantes": "restaurants",
619 # "Retórica": "rhetoric",
620 "rugby": "rugby",
621 # "Ríos": "rivers",
622 # "Sabores": "flavors",
623 "salud": "health",
624 # "Saludos": "greetings",
625 # "Satélites": "satellites",
626 # "Seguridad": "security",
627 # "Semiología": "semiology",
628 "semiótica": "semiotics",
629 # "Sentidos": "senses",
630 # "Sentimientos": "feelings",
631 "serpientes": "snakes",
632 "sexualidad": "sexuality",
633 # "Signos": "signs",
634 # "Signos ortográficos": "spelling signs",
635 "silvicultura": "forestry",
636 # "Sociedad": "society",
637 "sociología": "sociology",
638 # "Símbolos": "symbols",
639 # "Símbolos alquímicos": "alchemical symbols",
640 # "Símbolos astronómicos": "astronomical symbols",
641 # "Símbolos de unidades monetarias": "symbols of monetary units",
642 "tauromaquia": "bullfighting",
643 "taxonomía": "taxonomy",
644 "teatro": "theater",
645 "tecnología": "technology",
646 "textiles": "textiles",
647 "telecomunicaciones": "telecommunications",
648 # "Telecomunicación": "telecommunication",
649 "tenis": "tennis",
650 "teología": "theology",
651 "termodinámica": "thermodynamics",
652 "tiempo": "time",
653 "tipografía": "typography",
654 "topografía": "topography",
655 # "Topónimos": "toponyms",
656 "transporte": "transport",
657 # "Tribus urbanas": "urban tribes",
658 "turismo": "tourism",
659 # "Unidades de tiempo": "time units",
660 # "Urbanismo": "town planning",
661 # "Utensilios": "utensils",
662 "vegetales": "vegetable",
663 "vehículos": "vehicles",
664 "verduras": "vegetable",
665 # "Vertebrados": "vertebrates",
666 # "Veterinaria": "vet",
667 # "Virtudes": "virtues",
668 # "Vivienda": "living place",
669 # "Waterpolo": "water polo",
670 "zoología": "zoology",
671 "zootomía": "zootomy",
672 "álgebra": "algebra",
673 # "Árboles": "trees",
674 "ética": "ethics",
675 "óptica": "optics",
676}
678NUMBER_TAGS = {
679 # "inflect.*" templates
680 "singular": "singular",
681 "plural": "plural",
682 "dual": "dual",
683}
685GENDER_TAGS = {
686 "masculino": "masculine",
687 "femenino": "feminine",
688 "neutro": "neuter",
689}
691COMPARISON_TAGS = {
692 "positivo": "positive",
693 "comparativo": "comparative",
694 "superlativo": "superlative",
695}
697PERSON_TAGS = {
698 "primera": "first-person",
699 "segunda": "second-person",
700 "tercera": "third-person",
701}
703TENSE_TAGS = {
704 "presente": "present",
705 "pretérito imperfecto": ["past", "imperfect"],
706 "pretérito indefinido": ["indefinite", "preterite"],
707 "futuro": "future",
708 "condicional": "conditional",
709 "pretérito perfecto": ["present", "perfect"],
710 "pretérito pluscuamperfecto": "pluperfect",
711 "pretérito anterior": ["past", "anterior"],
712 "futuro perfecto": ["future", "perfect"],
713 "condicional perfecto": ["conditional", "perfect"],
714}
716VERB_FORM_TAGS = {
717 "infinitivo": "infinitive",
718 "gerundio": "gerund",
719 "participio": "participle",
720}
722TABLE_TAGS = {
723 # Plantilla:es.v
724 "formas no personales (verboides)": "impersonal",
725 "futuro compuesto": ["future", "compound"],
726 "pretérito perfecto compuesto": ["present", "perfect", "compound"],
727 "condicional simple": "conditional",
728 "condicional compuesto": ["conditional", "compound"],
729 "indicativo": "indicative",
730 "subjuntivo": "subjunctive",
731 "imperativo": "imperative",
732 # Plantilla:es.v.conj.ar
733 "formas no personales": "impersonal",
734 "formas personales": "personal",
735 "modo indicativo": "indicative",
736 "modo subjuntivo": "subjunctive",
737 "modo imperativo": "imperative",
738 "pretérito": "preterite",
739 # Template:inflect.ine.sust.atem.his.mf-C
740 "nominativo": "nominative",
741 "vocativo": "vocative",
742 "acusativo": "accusative",
743 "genitivo": "genitive",
744 "ablativo": "ablative",
745 "dativo": "dative",
746 "locativo": "locative",
747 "instrumental": "instrumental",
748 # Template:inflect.eu.sust.inanim.prop
749 "indefinido": "indefinite",
750 "definido": "definite",
751 "ergativo": "ergative",
752 "comitativo": "comitative",
753 "benefactivo": "benefactive",
754 "causativo": "causative",
755 "inesivo": "inessive",
756 "separativo": "separative",
757 "adlativo": "allative",
758 # "adl. extremo": ["allative"],
759 "adverbial": "adverbial",
760 "partitivo": "partitive",
761 "prolativo": "prolative",
762}
764SOUND_TAGS = {
765 # Template:pron-graf
766 "brasilero": "Brazilian",
767 "carioca": "Rio-de-Janeiro",
768 "gaúcho": "Rio-Grande-De-Sul",
769 "europeo": "European",
770 "central": "Central",
771 "valenciano": "Valencian",
772 "baleárico": "Balearic",
773 "eclesiástico": "Ecclesiastical",
774 "received pronunciation": "Received-Pronunciation",
775 "received pronunciation anticuado": ["Received-Pronunciation", "obsolete"],
776 "reino unido, obsoleto o dialectal": ["UK", "obsolete", "dialectal"],
777 "ee. uu.": "US",
778 "australia": "Australia",
779 "nueva zelanda": "New-Zealand",
780 "california": "California",
781 "received pronunciation, general american, canadá": [
782 "Received-Pronunciation",
783 "General-American",
784 "Canada",
785 ],
786 "australia, nueva zelanda": ["Australia", "New-Zealand"],
787 "india": "India",
788 "general american": "General-American",
789 "general american, canadá": ["General-American", "Canada"],
790 "general american, standard canadian": [
791 "General-American",
792 "Standard-Canadian",
793 ],
794 "londres": "London",
795}
797# Template:es.v
798ES_V_SUP_TAGS = {
799 "adj/adv": ["adjective", "adverb"],
800 "part": "participle",
801 "part/adj": ["participle", "adjective"],
802 "arg/uru": ["Argentina", "Uruguay"],
803}
806POS_TITLE_TAGS = {
807 "sustantiva": "substantive",
808 "femenina": "feminine",
809 "masculina": "masculine",
810 "ordinal": "ordinal",
811 "partitiva": "partitive",
812 "adjetiva": "adjectival",
813}
816ALL_TAGS = {
817 **NUMBER_TAGS,
818 **GENDER_TAGS,
819 **COMPARISON_TAGS,
820 **PERSON_TAGS,
821 **TENSE_TAGS,
822 **VERB_FORM_TAGS,
823 **TABLE_TAGS,
824 **SOUND_TAGS,
825 **ES_V_SUP_TAGS,
826 **POS_TITLE_TAGS,
827 "afirmativo": "affirmative",
828 "negativo": "negative",
829 "simples": "simple",
830 "compuestas": "compound",
831 "invariante": "invariable",
832}
835def translate_raw_tags(data: WordEntry):
836 raw_tags = []
837 for raw_tag in data.raw_tags:
838 lower_raw_tag = raw_tag.lower()
839 if lower_raw_tag in ALL_TAGS:
840 tr_tag = ALL_TAGS[lower_raw_tag]
841 if isinstance(tr_tag, str) and tr_tag not in data.tags:
842 data.tags.append(tr_tag)
843 elif isinstance(tr_tag, list): 843 ↛ 837line 843 didn't jump to line 837 because the condition on line 843 was always true
844 for tag in tr_tag:
845 if tag not in data.tags: 845 ↛ 844line 845 didn't jump to line 844 because the condition on line 845 was always true
846 data.tags.append(tag)
847 elif lower_raw_tag in CSEM_TOPICS and hasattr(data, "topics"):
848 data.topics.append(CSEM_TOPICS[lower_raw_tag])
849 elif raw_tag in AMBITO_TAGS:
850 tr_tag = AMBITO_TAGS[raw_tag]
851 if isinstance(tr_tag, str) and tr_tag not in data.tags: 851 ↛ 853line 851 didn't jump to line 853 because the condition on line 851 was always true
852 data.tags.append(tr_tag)
853 elif isinstance(tr_tag, list):
854 data.tags.extend(tr_tag)
855 else:
856 raw_tags.append(raw_tag)
857 data.raw_tags = raw_tags