Coverage for src/wiktextract/extractor/es/linkage.py: 88%
79 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
1from wikitextprocessor.parser import (
2 LEVEL_KIND_FLAGS,
3 LevelNode,
4 NodeKind,
5 TemplateNode,
6 WikiNode,
7)
9from ...page import clean_node
10from ...wxr_context import WiktextractContext
11from .models import Form, Linkage, WordEntry
12from .section_titles import LINKAGE_TITLES
15def extract_linkage_section(
16 wxr: WiktextractContext,
17 page_data: list[WordEntry],
18 level_node: LevelNode,
19 linkage_type: str,
20):
21 linkage_list = []
22 for list_item_node in level_node.find_child_recursively(NodeKind.LIST_ITEM):
23 sense_nodes = []
24 after_colon = False
25 words = []
26 for node in list_item_node.children:
27 if after_colon:
28 sense_nodes.append(node)
29 elif isinstance(node, WikiNode) and node.kind == NodeKind.LINK:
30 words.append(clean_node(wxr, None, node))
31 elif isinstance(node, TemplateNode) and node.template_name == "l":
32 words.append(clean_node(wxr, None, node))
33 elif isinstance(node, str) and ":" in node:
34 after_colon = True
35 sense_nodes.append(node[node.index(":") + 1 :])
36 sense = clean_node(wxr, None, sense_nodes)
37 for word in filter(None, words):
38 linkage_list.append(Linkage(word=word, sense=sense))
40 for data in page_data:
41 if ( 41 ↛ 40line 41 didn't jump to line 40 because the condition on line 41 was always true
42 data.lang_code == page_data[-1].lang_code
43 and data.etymology_text == page_data[-1].etymology_text
44 ):
45 getattr(data, linkage_type).extend(linkage_list)
48def process_linkage_template(
49 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode
50):
51 # https://es.wiktionary.org/wiki/Plantilla:sinónimo
52 linkage_type = LINKAGE_TITLES.get(t_node.template_name.removesuffix("s"))
53 for index in range(1, 41): 53 ↛ exitline 53 didn't return from function 'process_linkage_template' because the loop on line 53 didn't complete
54 if index not in t_node.template_parameters:
55 break
56 linkage_data = Linkage(
57 word=clean_node(wxr, None, t_node.template_parameters[index])
58 )
59 if len(word_entry.senses) > 0: 59 ↛ 60line 59 didn't jump to line 60 because the condition on line 59 was never true
60 linkage_data.sense_index = word_entry.senses[-1].sense_index
61 linkage_data.sense = " ".join(word_entry.senses[-1].glosses)
62 getattr(word_entry, linkage_type).append(linkage_data)
63 process_linkage_template_parameter(
64 wxr, linkage_data, t_node, f"nota{index}"
65 )
66 process_linkage_template_parameter(
67 wxr, linkage_data, t_node, f"alt{index}"
68 )
69 if index == 1:
70 process_linkage_template_parameter(
71 wxr, linkage_data, t_node, "nota"
72 )
73 process_linkage_template_parameter(wxr, linkage_data, t_node, "alt")
76def process_linkage_template_parameter(
77 wxr: WiktextractContext,
78 linkage_data: Linkage,
79 template_node: TemplateNode,
80 param: str,
81) -> None:
82 if param in template_node.template_parameters:
83 value = clean_node(wxr, None, template_node.template_parameters[param])
84 if param.startswith("nota"):
85 linkage_data.note = value
86 elif param.startswith("alt"): 86 ↛ exitline 86 didn't return from function 'process_linkage_template_parameter' because the condition on line 86 was always true
87 linkage_data.alternative_spelling = value
90def extract_alt_form_section(
91 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode
92) -> None:
93 has_link = False
94 for link_node in level_node.find_child(NodeKind.LINK):
95 word = clean_node(wxr, None, link_node)
96 has_link = True
97 if word != "": 97 ↛ 94line 97 didn't jump to line 94 because the condition on line 97 was always true
98 word_entry.forms.append(Form(form=word, tags=["alt-of"]))
99 if not has_link:
100 section_text = clean_node(
101 wxr, None, list(level_node.invert_find_child(LEVEL_KIND_FLAGS))
102 ).removesuffix(".")
103 for word in section_text.split(","):
104 word = word.strip()
105 if word != "": 105 ↛ 103line 105 didn't jump to line 103 because the condition on line 105 was always true
106 word_entry.forms.append(Form(form=word, tags=["alt-of"]))
109def extract_additional_information_section(
110 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode
111) -> None:
112 for node in level_node.children:
113 if isinstance(node, TemplateNode) and node.template_name in [
114 "cognados",
115 "derivad",
116 "morfología",
117 ]:
118 extract_cognados_template(wxr, word_entry, node)
121def extract_cognados_template(
122 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode
123) -> None:
124 # https://es.wiktionary.org/wiki/Plantilla:cognados
125 expanded_node = wxr.wtp.parse(
126 wxr.wtp.node_to_wikitext(t_node), expand_all=True
127 )
128 l_list = []
129 for span_tag in expanded_node.find_html_recursively("span"):
130 word = clean_node(wxr, None, span_tag)
131 if word != "": 131 ↛ 129line 131 didn't jump to line 129 because the condition on line 131 was always true
132 l_list.append(Linkage(word=word))
134 if t_node.template_name == "cognados": 134 ↛ 135line 134 didn't jump to line 135 because the condition on line 134 was never true
135 word_entry.cognates.extend(l_list)
136 elif t_node.template_name == "derivad": 136 ↛ 138line 136 didn't jump to line 138 because the condition on line 136 was always true
137 word_entry.derived.extend(l_list)
138 elif t_node.template_name == "morfología":
139 word_entry.morphologies.extend(l_list)