Coverage for src/wiktextract/extractor/es/pos.py: 86%
145 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
1import re
3from wikitextprocessor.parser import (
4 LEVEL_KIND_FLAGS,
5 LevelNode,
6 NodeKind,
7 TemplateNode,
8 WikiNode,
9)
11from ...page import clean_node
12from ...wxr_context import WiktextractContext
13from .example import process_ejemplo_template
14from .inflection import process_inflect_template
15from .linkage import process_linkage_template
16from .models import AltForm, Form, Sense, WordEntry
17from .section_titles import LINKAGE_TITLES
18from .tags import ALL_TAGS, translate_raw_tags
21def extract_pos_section(
22 wxr: WiktextractContext,
23 word_entry: WordEntry,
24 level_node: LevelNode,
25 section_title: str,
26) -> None:
27 for raw_tag in section_title.split():
28 if raw_tag in ALL_TAGS:
29 tr_tag = ALL_TAGS[raw_tag]
30 if isinstance(tr_tag, str) and tr_tag not in word_entry.tags:
31 word_entry.tags.append(tr_tag)
32 elif isinstance(tr_tag, list): 32 ↛ 33line 32 didn't jump to line 33 because the condition on line 32 was never true
33 for tag in tr_tag:
34 if tag not in word_entry.tags:
35 word_entry.tags.append(tag)
37 has_list = False
38 for node in level_node.children:
39 if isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
40 has_list = True
41 if node.sarg == ";":
42 for list_item in node.find_child(NodeKind.LIST_ITEM):
43 extract_gloss_list_item(wxr, word_entry, list_item, Sense())
44 elif re.fullmatch(r":+;", node.sarg) is not None: # nested gloss
45 parent_sense = Sense()
46 parent_gloss_num = len(node.sarg) - 1
47 for sense in word_entry.senses[::-1]: 47 ↛ 51line 47 didn't jump to line 51 because the loop on line 47 didn't complete
48 if len(sense.glosses) == parent_gloss_num:
49 parent_sense = sense
50 break
51 for list_item in node.find_child(NodeKind.LIST_ITEM):
52 sense = parent_sense.model_copy(deep=True)
53 sense.sense_index = ""
54 extract_gloss_list_item(wxr, word_entry, list_item, sense)
55 elif node.sarg == ":" and len(word_entry.senses) > 0: 55 ↛ 38line 55 didn't jump to line 38 because the condition on line 55 was always true
56 for list_item in node.find_child(NodeKind.LIST_ITEM):
57 extract_gloss_list_item(
58 wxr, word_entry, list_item, word_entry.senses[-1]
59 )
60 elif isinstance(node, TemplateNode):
61 if node.template_name.startswith("inflect."):
62 process_inflect_template(wxr, word_entry, node)
63 elif node.template_name in ["es.sust", "es.adj", "es.v"]:
64 extract_pos_header_template(wxr, word_entry, node)
65 elif node.template_name.removesuffix("s") in LINKAGE_TITLES: 65 ↛ 66line 65 didn't jump to line 66 because the condition on line 65 was never true
66 process_linkage_template(wxr, word_entry, node)
67 elif node.template_name == "ejemplo" and len(word_entry.senses) > 0:
68 process_ejemplo_template(wxr, word_entry.senses[-1], node)
69 elif node.template_name == "uso" and len(word_entry.senses) > 0:
70 process_uso_template(wxr, word_entry.senses[-1], node)
71 elif node.template_name == "ámbito" and len(word_entry.senses) > 0: 71 ↛ 38line 71 didn't jump to line 38 because the condition on line 71 was always true
72 process_ambito_template(wxr, word_entry.senses[-1], node)
74 if not has_list:
75 sense = Sense()
76 gloss = clean_node(
77 wxr, sense, list(level_node.invert_find_child(LEVEL_KIND_FLAGS))
78 )
79 if gloss != "":
80 sense.glosses.append(gloss)
81 word_entry.senses.append(sense)
84def extract_gloss_list_item(
85 wxr: WiktextractContext,
86 word_entry: WordEntry,
87 list_item: WikiNode,
88 sense: Sense,
89) -> None:
90 if list_item.sarg.endswith(";"):
91 raw_tag_text = clean_node(wxr, sense, list_item.children)
92 for index, node in enumerate(list_item.children): 92 ↛ 101line 92 didn't jump to line 101 because the loop on line 92 didn't complete
93 if isinstance(node, str) and sense.sense_index == "": 93 ↛ 92line 93 didn't jump to line 92 because the condition on line 93 was always true
94 m = re.search(r"[\d.a-z]+", node)
95 if m is not None: 95 ↛ 92line 95 didn't jump to line 92 because the condition on line 95 was always true
96 sense.sense_index = m.group(0)
97 raw_tag_text = clean_node(
98 wxr, sense, list_item.children[index + 1 :]
99 )
100 break
101 for raw_tag in raw_tag_text.split(","):
102 raw_tag = raw_tag.strip()
103 if raw_tag != "":
104 sense.raw_tags.append(raw_tag)
106 gloss_nodes = []
107 for node in (
108 list_item.definition
109 if list_item.definition is not None
110 else list_item.children
111 ):
112 if isinstance(node, TemplateNode) and node.template_name.startswith(
113 ("f.", "forma ", "plural")
114 ):
115 process_forma_template(wxr, sense, node)
116 gloss_nodes.append(node)
117 elif not (isinstance(node, WikiNode) and node.kind == NodeKind.LIST):
118 gloss_nodes.append(node)
120 gloss_text = clean_node(wxr, sense, gloss_nodes)
121 if gloss_text != "": 121 ↛ 127line 121 didn't jump to line 127 because the condition on line 121 was always true
122 sense.glosses.append(gloss_text)
123 translate_raw_tags(sense)
124 if list_item.sarg.endswith(";"):
125 word_entry.senses.append(sense)
127 for node in (
128 list_item.definition
129 if list_item.definition is not None
130 else list_item.children
131 ):
132 if isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
133 for child_list_item in node.find_child(NodeKind.LIST_ITEM):
134 child_sense = sense.model_copy(deep=True)
135 child_sense.sense_index = ""
136 extract_gloss_list_item(
137 wxr, word_entry, child_list_item, child_sense
138 )
141def process_forma_template(
142 wxr: WiktextractContext, sense: Sense, template: TemplateNode
143) -> None:
144 # https://es.wiktionary.org/wiki/Plantilla:forma_verbo
145 form_of = clean_node(wxr, None, template.template_parameters.get(1, ""))
146 if form_of != "": 146 ↛ exitline 146 didn't return from function 'process_forma_template' because the condition on line 146 was always true
147 sense.form_of.append(AltForm(word=form_of))
148 if (
149 "pronominal" in template.template_parameters
150 or "pronom" in template.template_parameters
151 ):
152 sense.form_of.append(AltForm(word=form_of + "se"))
153 if "form-of" not in sense.tags: 153 ↛ exitline 153 didn't return from function 'process_forma_template' because the condition on line 153 was always true
154 sense.tags.append("form-of")
157def process_uso_template(
158 wxr: WiktextractContext, sense: Sense, t_node: TemplateNode
159) -> None:
160 # https://es.wiktionary.org/wiki/Plantilla:uso
161 from .tags import USO_TAGS
163 for arg_name, arg_value in t_node.template_parameters.items():
164 if isinstance(arg_name, int):
165 arg_value = clean_node(wxr, None, arg_value)
166 if arg_value in USO_TAGS: 166 ↛ 173line 166 didn't jump to line 173 because the condition on line 166 was always true
167 tr_tags = USO_TAGS[arg_value]
168 if isinstance(tr_tags, str): 168 ↛ 170line 168 didn't jump to line 170 because the condition on line 168 was always true
169 sense.tags.append(USO_TAGS[arg_value])
170 elif isinstance(tr_tags, list):
171 sense.tags.extend(USO_TAGS[arg_value])
172 else:
173 sense.raw_tags.append(arg_value)
175 clean_node(wxr, sense, t_node) # save category links
178def process_ambito_template(
179 wxr: WiktextractContext, sense: Sense, t_node: TemplateNode
180) -> None:
181 # https://es.wiktionary.org/wiki/Plantilla:ámbito
182 # location data
183 from .tags import AMBITO_TAGS
185 for arg_name, arg_value in t_node.template_parameters.items():
186 if isinstance(arg_name, int): 186 ↛ 185line 186 didn't jump to line 185 because the condition on line 186 was always true
187 arg_value = clean_node(wxr, None, arg_value)
188 if arg_value in AMBITO_TAGS: 188 ↛ 185line 188 didn't jump to line 185 because the condition on line 188 was always true
189 tr_tags = AMBITO_TAGS[arg_value]
190 if isinstance(tr_tags, str): 190 ↛ 192line 190 didn't jump to line 192 because the condition on line 190 was always true
191 sense.tags.append(AMBITO_TAGS[arg_value])
192 elif isinstance(tr_tags, list):
193 sense.tags.extend(tr_tags)
195 clean_node(wxr, sense, t_node) # save category links
198def extract_pos_header_template(
199 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode
200) -> None:
201 # https://es.wiktionary.org/wiki/Plantilla:es.sust
202 expanded_node = wxr.wtp.parse(
203 wxr.wtp.node_to_wikitext(t_node), expand_all=True
204 )
205 raw_tag = ""
206 for node in expanded_node.children:
207 if isinstance(node, str) and node.strip().endswith(":"):
208 raw_tag = clean_node(wxr, None, node).strip(": ¦()")
209 elif isinstance(node, WikiNode) and node.kind == NodeKind.LINK:
210 form = Form(form=clean_node(wxr, None, node))
211 if form.form == "": 211 ↛ 212line 211 didn't jump to line 212 because the condition on line 211 was never true
212 continue
213 if raw_tag != "": 213 ↛ 217line 213 didn't jump to line 217 because the condition on line 213 was always true
214 for r_tag in raw_tag.split():
215 form.raw_tags.append(r_tag)
216 translate_raw_tags(form)
217 word_entry.forms.append(form)
218 elif isinstance(node, WikiNode) and node.kind == NodeKind.ITALIC:
219 r_tag = clean_node(wxr, None, node)
220 if r_tag != "": 220 ↛ 206line 220 didn't jump to line 206 because the condition on line 220 was always true
221 word_entry.raw_tags.append(r_tag)