Coverage for src/wiktextract/extractor/es/pos.py: 86%
145 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-13 10:14 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-13 10:14 +0000
1import re
3from wikitextprocessor.parser import (
4 LEVEL_KIND_FLAGS,
5 LevelNode,
6 NodeKind,
7 TemplateNode,
8 WikiNode,
9)
11from ...page import clean_node
12from ...wxr_context import WiktextractContext
13from .example import process_ejemplo_template
14from .inflection import process_inflect_template
15from .linkage import process_linkage_template
16from .models import AltForm, Form, Sense, WordEntry
17from .section_titles import LINKAGE_TITLES
18from .tags import ALL_TAGS, translate_raw_tags
21def extract_pos_section(
22 wxr: WiktextractContext,
23 word_entry: WordEntry,
24 level_node: LevelNode,
25 section_title: str,
26) -> None:
27 for raw_tag in section_title.split():
28 if raw_tag in ALL_TAGS:
29 tr_tag = ALL_TAGS[raw_tag]
30 if isinstance(tr_tag, str) and tr_tag not in word_entry.tags:
31 word_entry.tags.append(tr_tag)
32 elif isinstance(tr_tag, list): 32 ↛ 33line 32 didn't jump to line 33 because the condition on line 32 was never true
33 for tag in tr_tag:
34 if tag not in word_entry.tags:
35 word_entry.tags.append(tag)
37 has_list = False
38 for node in level_node.children:
39 if isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
40 has_list = True
41 if node.sarg == ";":
42 for list_item in node.find_child(NodeKind.LIST_ITEM):
43 extract_gloss_list_item(wxr, word_entry, list_item, Sense())
44 elif re.fullmatch(r":+;", node.sarg) is not None: # nested gloss
45 parent_sense = Sense()
46 parent_gloss_num = len(node.sarg) - 1
47 for sense in word_entry.senses[::-1]: 47 ↛ 51line 47 didn't jump to line 51 because the loop on line 47 didn't complete
48 if len(sense.glosses) == parent_gloss_num:
49 parent_sense = sense
50 break
51 for list_item in node.find_child(NodeKind.LIST_ITEM):
52 sense = parent_sense.model_copy(deep=True)
53 sense.sense_index = ""
54 extract_gloss_list_item(wxr, word_entry, list_item, sense)
55 elif node.sarg == ":" and len(word_entry.senses) > 0: 55 ↛ 38line 55 didn't jump to line 38 because the condition on line 55 was always true
56 for list_item in node.find_child(NodeKind.LIST_ITEM):
57 extract_gloss_list_item(
58 wxr, word_entry, list_item, word_entry.senses[-1]
59 )
60 elif isinstance(node, TemplateNode):
61 if node.template_name.startswith("inflect."):
62 process_inflect_template(wxr, word_entry, node)
63 elif node.template_name in ["es.sust", "es.adj", "es.v"]:
64 extract_pos_header_template(wxr, word_entry, node)
65 elif node.template_name.removesuffix("s") in LINKAGE_TITLES: 65 ↛ 66line 65 didn't jump to line 66 because the condition on line 65 was never true
66 process_linkage_template(wxr, word_entry, node)
67 elif node.template_name == "ejemplo" and len(word_entry.senses) > 0:
68 process_ejemplo_template(wxr, word_entry.senses[-1], node)
69 elif node.template_name == "uso" and len(word_entry.senses) > 0:
70 process_uso_template(wxr, word_entry.senses[-1], node)
71 elif node.template_name == "ámbito" and len(word_entry.senses) > 0: 71 ↛ 38line 71 didn't jump to line 38 because the condition on line 71 was always true
72 process_ambito_template(wxr, word_entry.senses[-1], node)
74 if not has_list:
75 sense = Sense()
76 gloss = clean_node(
77 wxr,
78 sense,
79 list(
80 level_node.invert_find_child(
81 LEVEL_KIND_FLAGS, include_empty_str=True
82 )
83 ),
84 )
85 if gloss != "":
86 sense.glosses.append(gloss)
87 word_entry.senses.append(sense)
90def extract_gloss_list_item(
91 wxr: WiktextractContext,
92 word_entry: WordEntry,
93 list_item: WikiNode,
94 sense: Sense,
95) -> None:
96 if list_item.sarg.endswith(";"):
97 raw_tag_text = clean_node(wxr, sense, list_item.children)
98 for index, node in enumerate(list_item.children): 98 ↛ 107line 98 didn't jump to line 107 because the loop on line 98 didn't complete
99 if isinstance(node, str) and sense.sense_index == "": 99 ↛ 98line 99 didn't jump to line 98 because the condition on line 99 was always true
100 m = re.search(r"[\d.a-z]+", node)
101 if m is not None: 101 ↛ 98line 101 didn't jump to line 98 because the condition on line 101 was always true
102 sense.sense_index = m.group(0)
103 raw_tag_text = clean_node(
104 wxr, sense, list_item.children[index + 1 :]
105 )
106 break
107 for raw_tag in raw_tag_text.split(","):
108 raw_tag = raw_tag.strip()
109 if raw_tag != "":
110 sense.raw_tags.append(raw_tag)
112 gloss_nodes = []
113 for node in (
114 list_item.definition
115 if list_item.definition is not None
116 else list_item.children
117 ):
118 if isinstance(node, TemplateNode) and node.template_name.startswith(
119 ("f.", "forma ", "plural")
120 ):
121 process_forma_template(wxr, sense, node)
122 gloss_nodes.append(node)
123 elif not (isinstance(node, WikiNode) and node.kind == NodeKind.LIST):
124 gloss_nodes.append(node)
126 gloss_text = clean_node(wxr, sense, gloss_nodes)
127 if gloss_text != "": 127 ↛ 133line 127 didn't jump to line 133 because the condition on line 127 was always true
128 sense.glosses.append(gloss_text)
129 translate_raw_tags(sense)
130 if list_item.sarg.endswith(";"):
131 word_entry.senses.append(sense)
133 for node in (
134 list_item.definition
135 if list_item.definition is not None
136 else list_item.children
137 ):
138 if isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
139 for child_list_item in node.find_child(NodeKind.LIST_ITEM):
140 child_sense = sense.model_copy(deep=True)
141 child_sense.sense_index = ""
142 extract_gloss_list_item(
143 wxr, word_entry, child_list_item, child_sense
144 )
147def process_forma_template(
148 wxr: WiktextractContext, sense: Sense, template: TemplateNode
149) -> None:
150 # https://es.wiktionary.org/wiki/Plantilla:forma_verbo
151 form_of = clean_node(wxr, None, template.template_parameters.get(1, ""))
152 if form_of != "": 152 ↛ exitline 152 didn't return from function 'process_forma_template' because the condition on line 152 was always true
153 sense.form_of.append(AltForm(word=form_of))
154 if (
155 "pronominal" in template.template_parameters
156 or "pronom" in template.template_parameters
157 ):
158 sense.form_of.append(AltForm(word=form_of + "se"))
159 if "form-of" not in sense.tags: 159 ↛ exitline 159 didn't return from function 'process_forma_template' because the condition on line 159 was always true
160 sense.tags.append("form-of")
163def process_uso_template(
164 wxr: WiktextractContext, sense: Sense, t_node: TemplateNode
165) -> None:
166 # https://es.wiktionary.org/wiki/Plantilla:uso
167 from .tags import USO_TAGS
169 for arg_name, arg_value in t_node.template_parameters.items():
170 if isinstance(arg_name, int):
171 arg_value = clean_node(wxr, None, arg_value)
172 if arg_value in USO_TAGS: 172 ↛ 179line 172 didn't jump to line 179 because the condition on line 172 was always true
173 tr_tags = USO_TAGS[arg_value]
174 if isinstance(tr_tags, str): 174 ↛ 176line 174 didn't jump to line 176 because the condition on line 174 was always true
175 sense.tags.append(USO_TAGS[arg_value])
176 elif isinstance(tr_tags, list):
177 sense.tags.extend(USO_TAGS[arg_value])
178 else:
179 sense.raw_tags.append(arg_value)
181 clean_node(wxr, sense, t_node) # save category links
184def process_ambito_template(
185 wxr: WiktextractContext, sense: Sense, t_node: TemplateNode
186) -> None:
187 # https://es.wiktionary.org/wiki/Plantilla:ámbito
188 # location data
189 from .tags import AMBITO_TAGS
191 for arg_name, arg_value in t_node.template_parameters.items():
192 if isinstance(arg_name, int): 192 ↛ 191line 192 didn't jump to line 191 because the condition on line 192 was always true
193 arg_value = clean_node(wxr, None, arg_value)
194 if arg_value in AMBITO_TAGS: 194 ↛ 191line 194 didn't jump to line 191 because the condition on line 194 was always true
195 tr_tags = AMBITO_TAGS[arg_value]
196 if isinstance(tr_tags, str): 196 ↛ 198line 196 didn't jump to line 198 because the condition on line 196 was always true
197 sense.tags.append(AMBITO_TAGS[arg_value])
198 elif isinstance(tr_tags, list):
199 sense.tags.extend(tr_tags)
201 clean_node(wxr, sense, t_node) # save category links
204def extract_pos_header_template(
205 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode
206) -> None:
207 # https://es.wiktionary.org/wiki/Plantilla:es.sust
208 expanded_node = wxr.wtp.parse(
209 wxr.wtp.node_to_wikitext(t_node), expand_all=True
210 )
211 raw_tag = ""
212 for node in expanded_node.children:
213 if isinstance(node, str) and node.strip().endswith(":"):
214 raw_tag = clean_node(wxr, None, node).strip(": ¦()")
215 elif isinstance(node, WikiNode) and node.kind == NodeKind.LINK:
216 form = Form(form=clean_node(wxr, None, node))
217 if form.form == "": 217 ↛ 218line 217 didn't jump to line 218 because the condition on line 217 was never true
218 continue
219 if raw_tag != "": 219 ↛ 223line 219 didn't jump to line 223 because the condition on line 219 was always true
220 for r_tag in raw_tag.split():
221 form.raw_tags.append(r_tag)
222 translate_raw_tags(form)
223 word_entry.forms.append(form)
224 elif isinstance(node, WikiNode) and node.kind == NodeKind.ITALIC:
225 r_tag = clean_node(wxr, None, node)
226 if r_tag != "": 226 ↛ 212line 226 didn't jump to line 212 because the condition on line 226 was always true
227 word_entry.raw_tags.append(r_tag)