Coverage for src/wiktextract/extractor/it/linkage.py: 74%
59 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
1from wikitextprocessor import LevelNode, NodeKind, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from .models import Form, Linkage, WordEntry
6from .tags import translate_raw_tags
9def extract_linkage_section(
10 wxr: WiktextractContext,
11 page_data: list[WordEntry],
12 level_node: LevelNode,
13 linkage_type: str,
14) -> None:
15 linkages = []
16 for list_node in level_node.find_child(NodeKind.LIST):
17 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
18 linkages.extend(
19 extract_proverb_list_item(wxr, list_item)
20 if linkage_type == "proverbs"
21 else extract_linkage_list_item(wxr, list_item)
22 )
24 for l_data in linkages:
25 translate_raw_tags(l_data)
27 for data in page_data:
28 if data.lang_code == page_data[-1].lang_code: 28 ↛ 27line 28 didn't jump to line 27 because the condition on line 28 was always true
29 getattr(data, linkage_type).extend(linkages)
32def extract_linkage_list_item(
33 wxr: WiktextractContext, list_item: WikiNode
34) -> list[Linkage]:
35 raw_tags = []
36 linkages = []
37 for node in list_item.children:
38 if isinstance(node, WikiNode):
39 match node.kind:
40 case NodeKind.LINK:
41 node_str = clean_node(wxr, None, node)
42 if node_str != "": 42 ↛ 37line 42 didn't jump to line 37 because the condition on line 42 was always true
43 linkages.append(
44 Linkage(word=node_str, raw_tags=raw_tags)
45 )
46 raw_tags.clear()
47 case NodeKind.TEMPLATE | NodeKind.ITALIC: 47 ↛ 37line 47 didn't jump to line 37 because the pattern on line 47 always matched
48 node_str = clean_node(wxr, None, node)
49 if node_str.startswith("(") and node_str.endswith(")"): 49 ↛ 37line 49 didn't jump to line 37 because the condition on line 49 was always true
50 raw_tags.append(node_str.strip("()"))
51 elif isinstance(node, str): 51 ↛ 37line 51 didn't jump to line 37 because the condition on line 51 was always true
52 for word_str in node.split(","):
53 word_str = word_str.strip()
54 if word_str.startswith("(") and word_str.endswith(")"):
55 raw_tags.append(word_str.strip("()"))
56 elif word_str != "":
57 linkages.append(Linkage(word=word_str, raw_tags=raw_tags))
58 raw_tags.clear()
59 return linkages
62def extract_proverb_list_item(
63 wxr: WiktextractContext, list_item: WikiNode
64) -> list[Linkage]:
65 proverb = Linkage(word="")
66 for index, node in enumerate(list_item.children): 66 ↛ 76line 66 didn't jump to line 76 because the loop on line 66 didn't complete
67 if isinstance(node, WikiNode) and node.kind == NodeKind.ITALIC:
68 proverb.word = clean_node(wxr, None, node)
69 elif isinstance(node, str) and ":" in node:
70 proverb.sense = clean_node(
71 wxr,
72 None,
73 [node[node.index(":") + 1 :]] + list_item.children[index + 1 :],
74 )
75 break
76 return [proverb] if proverb.word != "" else []
79def extract_form_section(
80 wxr: WiktextractContext, page_data: list[WordEntry], level_node: LevelNode
81) -> None:
82 forms = []
83 for list_node in level_node.find_child(NodeKind.LIST):
84 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
85 for link_node in list_item.find_child(NodeKind.LINK):
86 word = clean_node(wxr, None, link_node)
87 if word != "":
88 forms.append(Form(form=word))
89 for data in page_data:
90 if data.lang_code == page_data[-1].lang_code:
91 data.forms.extend(forms)