Coverage for src/wiktextract/extractor/it/linkage.py: 92%
45 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from wikitextprocessor import LevelNode, NodeKind, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from .models import Linkage, WordEntry
8def extract_linkage_section(
9 wxr: WiktextractContext,
10 page_data: list[WordEntry],
11 level_node: LevelNode,
12 linkage_type: str,
13) -> None:
14 linkages = []
15 for list_node in level_node.find_child(NodeKind.LIST):
16 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
17 linkages.extend(
18 extract_proverb_list_item(wxr, list_item)
19 if linkage_type == "proverbs"
20 else extract_linkage_list_item(wxr, list_item)
21 )
23 for data in page_data:
24 if data.lang_code == page_data[-1].lang_code: 24 ↛ 23line 24 didn't jump to line 23 because the condition on line 24 was always true
25 getattr(data, linkage_type).extend(linkages)
28def extract_linkage_list_item(
29 wxr: WiktextractContext, list_item: WikiNode
30) -> list[Linkage]:
31 raw_tags = []
32 linkages = []
33 for node in list_item.children:
34 if isinstance(node, WikiNode):
35 match node.kind:
36 case NodeKind.LINK:
37 node_str = clean_node(wxr, None, node)
38 if node_str != "": 38 ↛ 33line 38 didn't jump to line 33 because the condition on line 38 was always true
39 linkages.append(
40 Linkage(word=node_str, raw_tags=raw_tags)
41 )
42 raw_tags.clear()
43 case NodeKind.TEMPLATE | NodeKind.ITALIC: 43 ↛ 33line 43 didn't jump to line 33 because the pattern on line 43 always matched
44 node_str = clean_node(wxr, None, node)
45 if node_str.startswith("(") and node_str.endswith(")"): 45 ↛ 33line 45 didn't jump to line 33 because the condition on line 45 was always true
46 raw_tags.append(node_str.strip("()"))
47 elif isinstance(node, str): 47 ↛ 33line 47 didn't jump to line 33 because the condition on line 47 was always true
48 for word_str in node.split(","):
49 word_str = word_str.strip()
50 if word_str.startswith("(") and word_str.endswith(")"):
51 raw_tags.append(word_str.strip("()"))
52 elif word_str != "":
53 linkages.append(Linkage(word=word_str, raw_tags=raw_tags))
54 raw_tags.clear()
56 return linkages
59def extract_proverb_list_item(
60 wxr: WiktextractContext, list_item: WikiNode
61) -> list[Linkage]:
62 proverb = Linkage(word="")
63 for index, node in enumerate(list_item.children): 63 ↛ 73line 63 didn't jump to line 73 because the loop on line 63 didn't complete
64 if isinstance(node, WikiNode) and node.kind == NodeKind.ITALIC:
65 proverb.word = clean_node(wxr, None, node)
66 elif isinstance(node, str) and ":" in node:
67 proverb.sense = clean_node(
68 wxr,
69 None,
70 [node[node.index(":") + 1 :]] + list_item.children[index + 1 :],
71 )
72 break
73 return [proverb] if proverb.word != "" else []