Coverage for src/wiktextract/extractor/es/example.py: 91%
61 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from wikitextprocessor.parser import (
2 NodeKind,
3 TemplateNode,
4 WikiNode,
5 WikiNodeChildrenList,
6)
8from ...page import clean_node
9from ...wxr_context import WiktextractContext
10from .models import Example, Sense, TemplateData
13def process_ejemplo_template(
14 wxr: WiktextractContext,
15 sense_data: Sense,
16 template_node: TemplateNode,
17):
18 # https://es.wiktionary.org/wiki/Plantilla:ejemplo
19 # https://es.wiktionary.org/wiki/Módulo:ejemplo
20 example_data = Example(text="")
21 expanded_template = wxr.wtp.parse(
22 wxr.wtp.node_to_wikitext(template_node), expand_all=True
23 )
24 for span_tag in expanded_template.find_html_recursively("span"):
25 span_class = span_tag.attrs.get("class")
26 if "cita" == span_class:
27 if (
28 len(span_tag.children) > 1
29 and isinstance(span_tag.children[-1], WikiNode)
30 and span_tag.children[-1].kind == NodeKind.URL
31 ):
32 example_data.text = clean_node(
33 wxr, None, span_tag.children[:-1]
34 )
35 example_data.ref = clean_node(wxr, None, span_tag.children[-1])
36 else:
37 example_data.text = clean_node(wxr, None, span_tag)
38 elif "trad" == span_class:
39 example_data.translation = clean_node(
40 wxr, None, span_tag
41 ).removeprefix("Traducción: ")
42 elif "ref" == span_class:
43 example_data.ref = clean_node(wxr, None, span_tag)
45 if len(example_data.text) == 0:
46 example_data.text = clean_node(
47 wxr, None, template_node.template_parameters.get(1, "")
48 )
50 if len(example_data.text) > 0:
51 template_data = TemplateData(
52 expansion=clean_node(wxr, None, expanded_template)
53 )
54 template_data.name = template_node.template_name
55 for arg, value in template_node.template_parameters.items():
56 template_data.args[str(arg)] = clean_node(wxr, None, value)
57 example_data.example_templates.append(template_data)
58 sense_data.examples.append(example_data)
61def extract_example(
62 wxr: WiktextractContext,
63 sense_data: Sense,
64 nodes: WikiNodeChildrenList,
65):
66 text_nodes: WikiNodeChildrenList = []
67 for node in nodes:
68 if isinstance(node, WikiNode) and node.kind == NodeKind.TEMPLATE:
69 if node.template_name == "ejemplo": 69 ↛ 72line 69 didn't jump to line 72 because the condition on line 69 was always true
70 process_ejemplo_template(wxr, sense_data, node)
71 else:
72 text_nodes.append(node)
73 elif isinstance(node, WikiNode) and node.kind == NodeKind.URL:
74 if len(sense_data.examples) > 0: 74 ↛ 67line 74 didn't jump to line 67 because the condition on line 74 was always true
75 sense_data.examples[-1].ref = clean_node(wxr, None, node)
76 else:
77 text_nodes.append(node)
79 if len(sense_data.examples) == 0 and len(text_nodes) > 0:
80 example = Example(text=clean_node(wxr, None, text_nodes))
81 sense_data.examples.append(example)
82 elif len(text_nodes) > 0: 82 ↛ 83line 82 didn't jump to line 83 because the condition on line 82 was never true
83 wxr.wtp.debug(
84 f"Unprocessed nodes from example group: {text_nodes}",
85 sortid="extractor/es/example/extract_example/87",
86 )
89def process_example_list(
90 wxr: WiktextractContext,
91 sense_data: Sense,
92 list_item: WikiNode,
93):
94 for sub_list_item in list_item.find_child_recursively(NodeKind.LIST_ITEM):
95 example_data = Example(text="")
96 text_nodes: WikiNodeChildrenList = []
97 for child in sub_list_item.children:
98 # "cita *" templates are obsolete
99 if isinstance(
100 child, TemplateNode
101 ) and child.template_name.startswith("cita "):
102 example_data.ref = clean_node(wxr, None, child)
103 elif ( 103 ↛ 108line 103 didn't jump to line 108 because the condition on line 103 was never true
104 isinstance(child, TemplateNode)
105 and child.template_name == "referencia incompleta"
106 ):
107 # ignore empty ref template
108 continue
109 else:
110 text_nodes.append(child)
111 example_data.text = clean_node(wxr, None, text_nodes)
112 if len(example_data.text) > 0: 112 ↛ 94line 112 didn't jump to line 94 because the condition on line 112 was always true
113 sense_data.examples.append(example_data)
115 # If no example was found in sublists,
116 # assume example is in list_item.children directly.
117 if len(sense_data.examples) == 0:
118 text = clean_node(wxr, None, list_item.children).removeprefix(
119 "Ejemplo: "
120 )
121 if len(text) > 0: 121 ↛ exitline 121 didn't return from function 'process_example_list' because the condition on line 121 was always true
122 example_data = Example(text=text)
123 sense_data.examples.append(example_data)