Coverage for src/wiktextract/extractor/es/translation.py: 84%
69 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
1import itertools
2import re
4from mediawiki_langcodes import code_to_name
5from wikitextprocessor.parser import LevelNode, NodeKind, TemplateNode
7from ...page import clean_node
8from ...wxr_context import WiktextractContext
9from .models import Translation, WordEntry
10from .tags import translate_raw_tags
13def extract_translation_section(
14 wxr: WiktextractContext,
15 page_data: list[WordEntry],
16 level_node: LevelNode,
17 is_translation: bool = True,
18) -> None:
19 tr_data = []
20 cats = []
21 sense = ""
22 sense_index = ""
23 for t_node in level_node.find_child(NodeKind.TEMPLATE):
24 if t_node.template_name in ["t", "d"]:
25 new_tr_list, new_cats = process_t_template(
26 wxr, t_node, sense, sense_index
27 )
28 tr_data.extend(new_tr_list)
29 cats.extend(new_cats)
30 elif t_node.template_name == "trad-arriba" and is_translation: 30 ↛ 23line 30 didn't jump to line 23 because the condition on line 30 was always true
31 sense = clean_node(wxr, None, t_node.template_parameters.get(1, ""))
32 m = re.match(r"\[([\d.a-z]+)\]", sense)
33 if m is not None: 33 ↛ 23line 33 didn't jump to line 23 because the condition on line 33 was always true
34 sense_index = m.group(1)
35 sense = sense[m.end() :].strip()
37 for data in page_data:
38 if ( 38 ↛ 37line 38 didn't jump to line 37 because the condition on line 38 was always true
39 data.lang_code == page_data[-1].lang_code
40 and data.etymology_text == page_data[-1].etymology_text
41 ):
42 if is_translation: 42 ↛ 45line 42 didn't jump to line 45 because the condition on line 42 was always true
43 data.translations.extend(tr_data)
44 else:
45 data.descendants.extend(tr_data)
46 data.categories.extend(cats)
49# https://es.wiktionary.org/wiki/Módulo:t
50T_GENDERS = {
51 "m": "masculine",
52 "f": "feminine",
53 "mf": ["masculine", "feminine"],
54 "n": "neuter",
55 "c": "common",
56}
57T_NUMBERS = {
58 "s": "singular",
59 "sg": "singular",
60 "p": "plural",
61 "pl": "plural",
62 "d": "dual",
63 "du": "dual",
64}
67def process_t_template(
68 wxr: WiktextractContext,
69 template_node: TemplateNode,
70 sense: str,
71 sense_index: str,
72) -> tuple[list[Translation], list[str]]:
73 # https://es.wiktionary.org/wiki/Plantilla:t
74 tr_list = []
75 cats = {}
76 lang_code = template_node.template_parameters.get(1, "") or "unknown"
77 template_text = clean_node(wxr, cats, template_node)
78 lang_name = template_text[: template_text.find(":")].strip("* ")
79 if lang_name == "": # in case Lua error 79 ↛ 80line 79 didn't jump to line 80 because the condition on line 79 was never true
80 lang_name = code_to_name(lang_code, "es") or "unknown"
82 for tr_index in itertools.count(1): 82 ↛ 135line 82 didn't jump to line 135 because the loop on line 82 didn't complete
83 if (
84 "t" + str(tr_index) not in template_node.template_parameters
85 and "d" + str(tr_index) not in template_node.template_parameters
86 ):
87 break
88 tr_data = Translation(
89 lang_code=lang_code,
90 lang=lang_name,
91 word="",
92 sense=sense,
93 sense_index=sense_index,
94 )
95 for param_prefix, field in (
96 ("t", "word"),
97 ("d", "word"),
98 ("a", "sense_index"),
99 ("tl", "roman"),
100 ("nota", "raw_tags"),
101 ("g", "tags"),
102 ("n", "tags"),
103 ):
104 param = param_prefix + str(tr_index)
105 if param not in template_node.template_parameters:
106 continue
107 value = clean_node(
108 wxr, None, template_node.template_parameters[param]
109 )
110 if param_prefix == "g":
111 value = T_GENDERS.get(value)
112 elif param_prefix == "n": 112 ↛ 113line 112 didn't jump to line 113 because the condition on line 112 was never true
113 value = T_NUMBERS.get(value)
114 elif param_prefix == "a" and value != "":
115 sense_index = value
116 if value is None or value == "": 116 ↛ 117line 116 didn't jump to line 117 because the condition on line 116 was never true
117 continue
119 pre_value = getattr(tr_data, field)
120 if isinstance(pre_value, list):
121 if isinstance(value, list): 121 ↛ 122line 121 didn't jump to line 122 because the condition on line 121 was never true
122 pre_value.extend(value)
123 else:
124 pre_value.append(value)
125 else:
126 setattr(tr_data, field, value)
128 if tr_data.sense_index == "" and sense_index != "": 128 ↛ 130line 128 didn't jump to line 130 because the condition on line 128 was never true
129 # usually only first word has index param
130 tr_data.sense_index = sense_index
132 if len(tr_data.word) > 0: 132 ↛ 82line 132 didn't jump to line 82 because the condition on line 132 was always true
133 translate_raw_tags(tr_data)
134 tr_list.append(tr_data)
135 return tr_list, cats.get("categories", [])