Coverage for src/wiktextract/extractor/es/tags.py: 86%
25 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from .models import WordEntry
3# https://es.wiktionary.org/wiki/Wikcionario:Lista_de_etiquetas
5# https://es.wiktionary.org/wiki/Plantilla:uso
6USO_TAGS = {
7 "académico": "academic",
8 "afectado": "literary",
9 # "afectuoso": "affectionate",
10 "anticuado": "outdated",
11 "antiguo": "obsolete",
12 "arcaico": "obsolete",
13 "arcaizante": "obsolete",
14 "arcaísmo": "obsolete",
15 "bajo": "colloquial",
16 "chistoso": "jocular",
17 "coloq": "colloquial",
18 "coloq.": "colloquial",
19 "coloquial": "colloquial",
20 # "culto": "worship",
21 "cómico": "jocular",
22 "despect.": "derogatory",
23 "despectivo": "derogatory",
24 "desus": "outdated",
25 "desus.": "outdated",
26 "desusada": "outdated",
27 "desusado": "outdated",
28 "elevado": "literary",
29 "epiceno": "epicene",
30 "eufemismo": "euphemism",
31 "eutca": "adjective",
32 "familiar": "colloquial",
33 "figurado": "figurative",
34 "formal": "formal",
35 "germanía": "slang",
36 "grosero": "vulgar",
37 "humorístico": "jocular",
38 "infantil": "childish",
39 "informal": "colloquial",
40 "infrecuente": "rare",
41 "irónico": "ironic",
42 "jerga": "slang",
43 # "jerga legal": "legal jargon",
44 # "jerga metalera": "metal slang",
45 # "jerga skinhead": "skinhead slang",
46 "jergal": "slang",
47 "jocoso": "jocular",
48 "literario": "literary",
49 "malsonante": "vulgar",
50 # "mayúscula": "capital letter",
51 # "mayúscula disciplinas": "usually lowercase",
52 # "mayúscula puntos cardinales":
53 # "It is lowercase except when it is part of a proper noun",
54 # "mayúscula taxones": "taxonomy rules for capitalization",
55 "medieval": "Medieval",
56 "minúscula": "lower case",
57 "neologismo": "neologism",
58 "obsoleto": "obsolete",
59 # "ordinal compuesto": "rules for compound ordinal numbers",
60 # "ordinal-compuesto": "rules for compound ordinal numbers",
61 "peyorativo": "derogatory",
62 "poco frecuente": "rare",
63 "poco usado": "rare",
64 "popular": "colloquial",
65 "poético": "literary",
66 "raro": "rare",
67 # "rur": "rural",
68 # "rural": "rural",
69 "sebtc": "noun",
70 "setc": "noun",
71 # "sin uso": "The editors have been unable to find any examples of use of
72 # this word in published texts or corpus",
73 "slang": "slang",
74 "soez": "vulgar",
75 "sutp": "plural",
76 "suts": "singular",
77 # "trans-prefijo": "",
78 "ucsp": ["plural", "noun"],
79 "umcp": "pronominal",
80 "umcs": "noun",
81 "umcv": "vocative",
82 "umep": "plural",
83 "umpl": "plural",
84 "utca": "adjective",
85 "utcadv": "adverb",
86 "utcaf": ["feminine", "adjective"],
87 "utcam": ["masculine", "adjective"],
88 "utcc": "conjunction",
89 "utcf": ["feminine", "noun"],
90 "utcg": "gerund",
91 # "utci": "It is also used as intransitive",
92 "utcm": ["masculine", "noun"],
93 "utcp": "pronominal",
94 "utcprnl": "pronominal",
95 "utcs": "noun",
96 "utcsf": ["feminine", "noun"],
97 "utcsm": ["masculine", "noun"],
98 "utcsma": ["masculine", "feminine", "noun"],
99 "utcsmf": "noun",
100 "utct": "transitive",
101 "utmp": ["pronominal", "verb"],
102 "utrep": "repeated",
103 "vulg": "vulgar",
104 "vulgar": "vulgar",
105}
107# https://es.wiktionary.org/wiki/Plantilla:ámbito
108AMBITO_TAGS = {
109 "Acaxochitlán": "Acaxochitlán",
110 "Alemania": "Germany",
111 "Algarve": "Algarve (Portugal)",
112 "Almería": "Almería",
113 "Am.": "America",
114 "Amecameca": "Amecameca",
115 "América": "America",
116 "América Central": "Central America",
117 "América Latina": "America",
118 "América central": "Central America",
119 "América del Sur": "South America",
120 "Andalucía": "Andalusia",
121 "Antioquia": "Antioquia",
122 "Aragón": "Aragon",
123 "Aragón Oriental": "Eastern Aragon",
124 "Arg.": "Argentina",
125 "Argentina": "Argentina",
126 "Asturias": "Asturias",
127 "Atlapexco": "Atlapexco",
128 "Australia": "Australia",
129 "Baja California": "Lower California",
130 "Baja Navarra": "Lower Navarre (France)",
131 "Bearn": "Bearn (France)",
132 "Bearne": "Bearn (France)",
133 "Belluno": "Belluno",
134 "Bolivia": "Bolivia",
135 "Bolonia": "Bologna",
136 "Brasil": "Brazil",
137 "Brasil meridional": "Southern Brazil",
138 "Burgos": "Burgos",
139 "Calpan": "Calpan",
140 "Campeche": "Campeche",
141 "Canadá": "Canada",
142 "Canarias": "Canaries",
143 "Canoa": "Canoe",
144 "Cantabria": "Cantabria",
145 "Caribe": "Caribbean",
146 "Carpi": "Carpi",
147 "Cartagena": "Cartagena",
148 "Castilla": "Castile",
149 "Cataluña": "Catalonia",
150 "Centro América": "Central America",
151 "Centro de Chile": "Center of Chile",
152 "Centro de México": "Mexico Center",
153 "Centroamérica": "Central America",
154 "Ceuta": "Ceuta",
155 "Chiapas": "Chiapas",
156 "Chicontepec": "Chicontepec",
157 "Chihuahua": "Chihuahua",
158 "Chile": "Chile",
159 "Chiloé": "Chiloé (Chile)",
160 "Chipilo": "Chipilo",
161 "Chl.": "Chile",
162 "Cholula": "Cholula",
163 "Chubut": "Chubut",
164 "Ciudad de México": "Mexico City",
165 "Colombia": "Colombia",
166 "Connacht": "Connacht (Ireland)",
167 "Connemara": "Connemara (Ireland)",
168 "Cono Sur": "South Cone",
169 "Costa Rica": "Costa Rica",
170 "Cuba": "Cuba",
171 "Cuentepec": "Cuentepec",
172 "Cuetzalan": "Cuetzalan",
173 "Cádiz": "Cádiz",
174 "Córdoba (España)": "Córdoba (Spain)",
175 "Dominicana": "Dominican Republic",
176 "EEUU": "United States",
177 "Ecuador": "Ecuador",
178 "El Salv.": "El Salvador",
179 "El Salvador": "El Salvador",
180 "Escocia": "Scotland",
181 "España": "Spain",
182 "Estados Unidos": "United States",
183 "Europa": "Europe",
184 "Euskadi": "Basque Country",
185 "Extremadura": "Extremadura",
186 "Extremadura (España)": "Extremadura",
187 "Filipinas": "Philippines",
188 "Francia": "France",
189 "Galicia": "Galicia",
190 "Gascuña": "Gascony (France)",
191 "Granada": "Grenada",
192 # "Grischun": "grisón",
193 "Guadalajara": "Guadalajara (Spain)",
194 "Guanajuato": "Guanajuato",
195 "Guatemala": "Guatemala",
196 "Guernsey": "Guernsey",
197 "Guinea Ecuatorial": "Equatorial Guinea",
198 "Hidalgo": "Hidalgo",
199 "Hond.": "Honduras",
200 "Honduras": "Honduras",
201 "Huauchinango": "Huauchinango",
202 "Huejutla": "Huejutla",
203 "Huelva": "Huelva (Spain)",
204 "Hueyapan": "Hueyapan",
205 "Inglaterra": "England",
206 "Iparralde": "Iparralde",
207 "Irlanda": "Ireland",
208 "Islas Baleares": "Balearic Islands",
209 "Italia": "Italy",
210 "Jalisco": "Jalisco",
211 "Jaltocán": "Jaltocan",
212 "Jamaica": "Jamaica",
213 "Jersey": "Jersey",
214 "La Rioja": "La Rioja (Spain)",
215 "La Rioja (Argentina)": "La Rioja (Argentina)",
216 "Labort": "Labort (France)",
217 "Languedoc": "Languedoc (France)",
218 "Lapurdi": "Labort (France)",
219 "León": "León (Spain)",
220 "Liechtenstein": "Liechtenstein",
221 "Logudoro": "Logudoro",
222 "Lunfardo": "Lunfardo",
223 "Mallorca": "Mallorca (Balearic Islands)",
224 "Marruecos": "Morocco",
225 "Matlapa": "Matlapa",
226 "Michoacán": "Michoacán (Mexico)",
227 "Milpa Alta": "Milpa Alta",
228 "Mirandola": "Mirandola",
229 "Munster": "Munster (Ireland)",
230 "Murcia": "Murcia",
231 "Méjico": "Mexico",
232 "México": "Mexico",
233 "México (México)": "Mexico DF",
234 "Módena": "Modena",
235 "Navarra": "Navarra",
236 "Nicaragua": "Nicaragua",
237 "Normandía": "Normandy",
238 "Norte de Argentina": "Northern Argentina",
239 "Norte de Chile": "Northern Chile",
240 "Nueva Zelanda": "New Zealand",
241 "Nueva Zelandia": "New Zealand",
242 "Nuevo León": "Nuevo León",
243 "Nuevo México": "New Mexico",
244 "Oaxaca": "Oaxaca",
245 "Orizatlán": "Orizatlán",
246 "Palencia": "Palencia",
247 "Panamá": "Panama",
248 "Paraguay": "Paraguay",
249 "País Vasco": "Basque Country",
250 "País Vasco francés": "French Basque Country",
251 "Perú": "Peru",
252 "Portugal": "Portugal",
253 "Provenza": "Provence (France)",
254 "Puebla": "Puebla",
255 "Puerto Rico": "Puerto Rico",
256 # "Puter": "high engadino"
257 "Quebec": "Quebec",
258 "Querétaro": "Querétaro",
259 "Reino Unido": "United Kingdom",
260 "República Dominicana": "Dominican Republic",
261 "Ribera de Navarra": "Ribera Navarra",
262 "Rioja": "Rioja (Spain)",
263 "Rioplatense": "Río de la Plata",
264 "Río de la Plata": "Río de la Plata",
265 "Salamanca": "Salamanca",
266 "Salamanca (España)": "Salamanca",
267 "San Juan Quiahije": "San Juan Quiahije",
268 "San Luis Potosí": "San Luis Potosí",
269 "Santa María Yosoyúa": "Santa María Yosoyúa",
270 "Sevilla": "Seville",
271 "Sinaloa": "Sinaloa",
272 "Sola": "Sola (France)",
273 "Sonora": "Sonora",
274 "Soria": "Soria (Spain)",
275 "Soule": "Sola (France)",
276 "Sudamérica": "South America",
277 "Suiza": "Switzerland",
278 "Sur de Chile": "Southern Chile",
279 "Suramérica": "South America",
280 "Surmiran": "Surmirano",
281 "Sursilvan": "Sursilvano",
282 "Sutsilvan": "Sutsilvano",
283 "Tamazunchale": "Tamazunchale",
284 "Tepeojuma": "Tepeojuma",
285 "Texcoco": "Texcoco",
286 "Tlaxcala": "Tlaxcala",
287 "Tlaxpanaloya": "Tlaxpanaloya",
288 "Toulouse": "Toulouse",
289 "USA": "United States",
290 "Ulster": "Ulster (Ireland, R. U.)",
291 "Uruguay": "Uruguay",
292 "Valencia": "Valencia",
293 "Vallader": "Bajo engadino",
294 "Venecia": "Venice",
295 "Venezuela": "Venezuela",
296 "Veracruz": "Veracruz",
297 "Vizcaya": "Vizcaya (Spain)",
298 "Waterford": "Waterford (Ireland)",
299 "Xayacatlán": "Xayacatlán",
300 "Xilitla": "Xilitla",
301 "Xochiatipan": "Xochiatipan",
302 "Yaganiza": "Yaganiza",
303 "Yahualica": "Yahualica",
304 "Yojovi": "Yojovi",
305 "Yucatán": "Yucatán",
306 "Zacatecas": "Zacatecas",
307 "Zamora": "Zamora (Spain)",
308 "Zuberoa": "Sola (France)",
309 "Zulia": "Zulia (Venezuela)",
310 # "alto engadino": "alto engadino",
311 "anglonormando": "Anglo-Norman",
312 # "bajo engadino": "bajo engadino",
313 # "grischun": "grisón",
314 # "grisón": "grisón",
315 # "haquetía": "haquety",
316 "logudorés": "Logudoro",
317 "lunf": "Lunfardo",
318 "lunfardismo": "Lunfardo",
319 "lunfardo": "Lunfardo",
320 "parlache": ["Colombia", "slang"],
321 # "puter": "high engadino",
322 "rioplatense": "Río de la Plata",
323 "rpl": "Río de la Plata",
324 "subsilvano": "Sursilvan",
325 "supramirano": "Surmiran",
326 "suprasilvano": "Sursilvan",
327 "sur de Chile": "Southern Chile",
328 "surmirano": "Surmiran",
329 "sursilvano": "Sursilvan",
330 "sutsilvano": "Sursilvan",
331 "vallader": "bajo engadino",
332 "Á. R. Plata": "Río de la Plata",
333 "Álava": "Álava",
334}
337# https://es.wiktionary.org/wiki/Plantilla:csem
338CSEM_TOPICS = {
339 "aeronáutica": "aeronautics",
340 "agricultura": "agriculture",
341 "ajedrez": "chess",
342 # "Algas": "algae",
343 "alimento": "food",
344 "alimentos": "food",
345 "alpinismo": "mountaineering",
346 "alquimia": "alchemy",
347 "anatomía": "anatomy",
348 # "Anfibios": "amphibians",
349 # "Angiología": "angiology",
350 # "Animales": "animals",
351 # "Animales extintos": "extinct animals",
352 "antropología": "anthropology",
353 # "Antropotomía": "anthropotomy",
354 # "Antropónimos": "anthroponyms",
355 # "Anélidos": "annelids",
356 # "Apellidos": "surnames",
357 "apicultura": "beekeeping",
358 # "Arbustos": "shrubbery",
359 "aritmética": "arithmetic",
360 "armas": "weaponry",
361 "arqueología": "archeology",
362 "arquitectura": "architecture",
363 "arte": "art",
364 "arte marciales": "martial arts",
365 "artes marciales": "martial arts",
366 # "Artrópodos": "arthropods",
367 # "Arácnidos": "arachnids",
368 "astrofísica": "astrophysics",
369 "astrología": "astrology",
370 "astronomía": "astronomy",
371 "atletismo": "athletics",
372 # "Audiología": "audiology",
373 # "Automovilismo": "motoring",
374 # "Aves": "birds",
375 # "Bacterias": "bacteria",
376 "danza": "dance",
377 "baloncesto": "basketball",
378 "balonmano": "handball",
379 # "Batería de cocina": "cookware",
380 # "Bebidas": "drinks",
381 "billar": "billiards",
382 "biología": "biology",
383 "bioquímica": "biochemistry",
384 "bolos": "bowling",
385 "botánica": "botany",
386 "béisbol": "baseball",
387 # "Caza": "hunt",
388 # "Cactus": "cactus",
389 # "Campamento": "camp",
390 "cardiología": "cardiology",
391 "carpintería": "carpentry",
392 # "Casos gramaticales": "grammatical cases",
393 # "Cereales": "cereals",
394 # "Cerrajería": "locksmith",
395 "cetrería": "falconry",
396 "ciclismo": "cycling",
397 "ciencia": "science",
398 "ciencia ficción": "science fiction",
399 # "Cine": "cinema",
400 "cinegética": "hunting",
401 "cinematografía": "cinematography",
402 # "Cinología": "cynology",
403 "cirugía": "surgery",
404 "ciudades": "cities",
405 # "Cnidarios": "cnidarians",
406 # "Colores": "colors",
407 # "Comercio": "trade",
408 "comunicación": "communication",
409 # "Condimentos": "condiments",
410 # "Constelaciones": "constellations",
411 "construcción": "construction",
412 "contabilidad": "accounting",
413 "continentes": "continents",
414 # "Cordilleras": "mountain ranges",
415 "correos": "mail",
416 # "Cosmetología": "cosmetology",
417 "cosmología": "cosmology",
418 "cosmética": "cosmetics",
419 "costura": "sewing",
420 "cristianismo": "Christianity",
421 "cronología": "chronology",
422 # "Crustáceos": "crustaceans",
423 # "Cubertería": "cutlery",
424 # "Cultura": "culture",
425 "deporte": "sports",
426 # "Derecho": "right",
427 # "Dinosaurios": "dinosaurs",
428 # "Dioses": "gods",
429 "diseño": "design",
430 "días de la semana": "weekdays",
431 "ecología": "ecology",
432 "economía": "economics",
433 # "Edafología": "edaphology",
434 "educación": "education",
435 "electricidad": "electricity",
436 "electromagnetismo": "electromagnetism",
437 "electrónica": "electronics",
438 # "Elementos químicos": "chemical elements",
439 # "Emojis": "emoji",
440 # "Emoticonos": "emoticons",
441 # "Enfermedades": "diseases",
442 "enología": "oenology",
443 # "Enseñanza": "teaching",
444 "entomología": "entomology",
445 # "Equinodermos": "echinoderms",
446 # "Equitación": "horse riding",
447 # "Eras históricas": "historical eras",
448 # "Escultura": "sculpture",
449 "esgrima": "fencing",
450 # "Especias": "spices",
451 "estaciones": "seasons",
452 "estadística": "statistics",
453 # "Estomatología": "stomatology",
454 # "Estética": "esthetic",
455 "fabril": "manufacturing",
456 # "Fantasía": "fancy",
457 # "Farmacia": "pharmacy",
458 "farmacología": "pharmacology",
459 "feminismo": "feminism",
460 # "Festividades": "festivities",
461 "ficción": "fiction",
462 # "Ficción fantástica": "fantasy fiction",
463 "ficología": "phycology",
464 "filatelia": "philately",
465 "filosofía": "philosophy",
466 "finanzas": "finance",
467 "fisiología": "physiology",
468 # "Flores": "flowers",
469 # "Fobias": "phobias",
470 "folclore": "folklore",
471 "fonética": "phonetics",
472 # "Formas": "shapes",
473 "fotografía": "photography",
474 # "Fraccionarios": "fractionals",
475 # "Frutas": "fruit",
476 # "Frutos": "fruits",
477 "fármacos": "drugs",
478 # "Física": "physical",
479 "fútbol": "soccer",
480 # "Ganadería": "cattle raising",
481 # "Gastronomía": "gastronomy",
482 # "Gentilicios": "gentilicios",
483 "genética": "genetics",
484 "geografía": "geography",
485 "geología": "geology",
486 "geometría": "geometry",
487 # "Gimnasia": "gym",
488 # "Glotónimos": "gluttonyms",
489 "gramática": "grammar",
490 # "Granjería": "farming",
491 # "Guarismos": "figures",
492 "halconería": "falconry",
493 "herramientas": "tools",
494 "heráldica": "heraldry",
495 "hidrología": "hydrology",
496 # "Hierbas": "herbs",
497 # "Higiene": "hygiene",
498 # "Hipocorísticos": "hypocoristic",
499 "historia": "history",
500 # "Historieta": "cartoon",
501 # "Hockey sobre césped": "field hockey",
502 # "Hongos": "fungus",
503 "horticultura": "horticulture",
504 # "Hostelería": "hostelry",
505 # "Huesos": "bones",
506 # "Humanidades": "humanities",
507 "ictiología": "ichthyology",
508 # "Lenguas": "languages",
509 "imprenta": "printing",
510 # "Industria": "industry",
511 "informática": "computing",
512 "ingeniería": "engineering",
513 "inmunología": "immunology",
514 "insectos": "insects",
515 # "Instrumentos": "instruments",
516 # "Instrumentos de medición": "measurement tools",
517 # "Instrumentos musicales": "musical instruments",
518 "interfaz gráfica de usuario": "graphical user interface",
519 "internet": "Internet",
520 # "Invertebrados": "invertebrates",
521 "islam": "Islam",
522 # "Islas": "islands",
523 # "Judaísmo": "judaism",
524 "juegos": "games",
525 # "Juguetes": "toys",
526 "lgbt": "LGBT",
527 # "Lagos": "lakes",
528 "lexicografía": "lexicography",
529 "lingüística": "linguistics",
530 "literatura": "literature",
531 # "Logística": "logistics",
532 # "Lucha": "struggle",
533 # "Líquidos": "liquids",
534 "lógica": "logic",
535 "malabarismo": "juggling",
536 "mamíferos": "mammals",
537 # "Mares": "seas",
538 "náutica": "nautical",
539 "matemática": "mathematics",
540 # "Materia": "subject",
541 # "Materiales": "materials",
542 "mecánica": "mechanics",
543 "medicina": "medicine",
544 # "Meses": "months",
545 # "Metales": "metals",
546 "metalurgia": "metallurgy",
547 "meteorología": "meteorology",
548 "metrología": "metrology",
549 "micología": "mycology",
550 "microbiología": "microbiology",
551 "milicia": "military",
552 # "Minerales": "minerals",
553 "mineralogía": "mineralogy",
554 "minería": "mining",
555 "mitología": "mythology",
556 "mobiliario": "furniture",
557 # "Moluscos": "mollusks",
558 # "Monedas": "coins",
559 # "Moneras": "moneras",
560 # "Montañas": "mountains",
561 "muebles": "furniture",
562 "música": "music",
563 # "Naipes": "playing cards",
564 "natación": "swimming",
565 # "Naturaleza": "nature",
566 # "Nemátodos": "nematodes",
567 "neurología": "neurology",
568 "numismática": "numismatics",
569 # "Nutrición": "nutrition",
570 # "Números": "numbers",
571 "ocultismo": "occultism",
572 # "Ocupaciones": "activities",
573 # "Océanos": "oceans",
574 "odontología": "odontology",
575 # "Oficios": "trades",
576 "oftalmología": "ophthalmology",
577 "ornitología": "ornithology",
578 "paleontología": "paleontology",
579 "parapsicología": "parapsychology",
580 # "Parentesco": "relationship",
581 # "Partes del día": "parts of the day",
582 "países": "countries",
583 "peces": "fish",
584 # "Penínsulas": "peninsulas",
585 "periodismo": "journalism",
586 "perros": "dogs",
587 # "Personajes bíblicos": "biblical characters",
588 # "Personajes ficticios": "fictional characters",
589 # "Personajes históricos": "historical figures",
590 "pesca": "fishing",
591 # "Pesos y medidas": "weights and measures",
592 # "Pintura": "paint",
593 "planetas": "planets",
594 # "Plantas": "floors",
595 # "Platelmintos": "platyhelminths",
596 # "Platos": "dishes",
597 # "Poblaciones": "populations",
598 "poesía": "poetry",
599 # "Política": "policy",
600 "pragmática": "pragmatics",
601 # "Prehistoria": "prehistory",
602 # "Vestimenta": "outfit",
603 # "Profesiones": "professions",
604 # "Protistas": "protists",
605 # "Pseudociencias": "pseudosciences",
606 "psicología": "psychology",
607 "psiquiatría": "psychiatry",
608 "química": "chemistry",
609 "química orgánica": "organic chemistry",
610 # "Radiocomunicación": "radiocomunication",
611 # "Regiones": "regions",
612 # "Reinos biológicos": "biological kingdoms",
613 "religión": "religion",
614 # "Relojería": "watchmaking",
615 # "Reptiles": "reptiles",
616 # "Restaurantes": "restaurants",
617 # "Retórica": "rhetoric",
618 "rugby": "rugby",
619 # "Ríos": "rivers",
620 # "Sabores": "flavors",
621 "salud": "health",
622 # "Saludos": "greetings",
623 # "Satélites": "satellites",
624 # "Seguridad": "security",
625 # "Semiología": "semiology",
626 "semiótica": "semiotics",
627 # "Sentidos": "senses",
628 # "Sentimientos": "feelings",
629 "serpientes": "snakes",
630 "sexualidad": "sexuality",
631 # "Signos": "signs",
632 # "Signos ortográficos": "spelling signs",
633 "silvicultura": "forestry",
634 # "Sociedad": "society",
635 "sociología": "sociology",
636 # "Símbolos": "symbols",
637 # "Símbolos alquímicos": "alchemical symbols",
638 # "Símbolos astronómicos": "astronomical symbols",
639 # "Símbolos de unidades monetarias": "symbols of monetary units",
640 "tauromaquia": "bullfighting",
641 "taxonomía": "taxonomy",
642 "teatro": "theater",
643 "tecnología": "technology",
644 "textiles": "textiles",
645 "telecomunicaciones": "telecommunications",
646 # "Telecomunicación": "telecommunication",
647 "tenis": "tennis",
648 "teología": "theology",
649 "termodinámica": "thermodynamics",
650 "tiempo": "time",
651 "tipografía": "typography",
652 "topografía": "topography",
653 # "Topónimos": "toponyms",
654 "transporte": "transport",
655 # "Tribus urbanas": "urban tribes",
656 "turismo": "tourism",
657 # "Unidades de tiempo": "time units",
658 # "Urbanismo": "town planning",
659 # "Utensilios": "utensils",
660 "vegetales": "vegetable",
661 "vehículos": "vehicles",
662 "verduras": "vegetable",
663 # "Vertebrados": "vertebrates",
664 # "Veterinaria": "vet",
665 # "Virtudes": "virtues",
666 # "Vivienda": "living place",
667 # "Waterpolo": "water polo",
668 "zoología": "zoology",
669 "zootomía": "zootomy",
670 "álgebra": "algebra",
671 # "Árboles": "trees",
672 "ética": "ethics",
673 "óptica": "optics",
674}
676NUMBER_TAGS = {
677 # "inflect.*" templates
678 "singular": "singular",
679 "plural": "plural",
680}
682GENDER_TAGS = {
683 "masculino": "masculine",
684 "femenino": "feminine",
685 "neutro": "neuter",
686}
688COMPARISON_TAGS = {
689 "positivo": "positive",
690 "comparativo": "comparative",
691 "superlativo": "superlative",
692}
694PERSON_TAGS = {
695 "primera": "first-person",
696 "segunda": "second-person",
697 "tercera": "third-person",
698}
700TENSE_TAGS = {
701 "presente": "present",
702 "pretérito imperfecto": ["past", "imperfect"],
703 "pretérito indefinido": ["indefinite", "preterite"],
704 "futuro": "future",
705 "condicional": "conditional",
706 "pretérito perfecto": ["present", "perfect"],
707 "pretérito pluscuamperfecto": "pluperfect",
708 "pretérito anterior": ["past", "anterior"],
709 "futuro perfecto": ["future", "perfect"],
710 "condicional perfecto": ["conditional", "perfect"],
711}
713VERB_FORM_TAGS = {
714 "infinitivo": "infinitive",
715 "gerundio": "gerund",
716 "participio": "participle",
717}
720ALL_TAGS = {
721 **NUMBER_TAGS,
722 **GENDER_TAGS,
723 **COMPARISON_TAGS,
724 **PERSON_TAGS,
725 **TENSE_TAGS,
726 **VERB_FORM_TAGS,
727 "afirmativo": "affirmative",
728 "negativo": "negative",
729 "simples": "simple",
730 "compuestas": "compound",
731}
734def translate_raw_tags(data: WordEntry):
735 raw_tags = []
736 for raw_tag in data.raw_tags:
737 lower_raw_tag = raw_tag.lower()
738 if lower_raw_tag in ALL_TAGS:
739 tr_tag = ALL_TAGS[lower_raw_tag]
740 if isinstance(tr_tag, str): 740 ↛ 742line 740 didn't jump to line 742 because the condition on line 740 was always true
741 data.tags.append(tr_tag)
742 elif isinstance(tr_tag, list):
743 data.tags.extend(tr_tag)
744 elif lower_raw_tag in CSEM_TOPICS and hasattr(data, "topics"):
745 data.topics.append(CSEM_TOPICS[lower_raw_tag])
746 else:
747 raw_tags.append(raw_tag)
748 data.raw_tags = raw_tags