Coverage for src/wiktextract/extractor/pt/example.py: 90%
62 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-13 10:14 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-13 10:14 +0000
1import re
3from wikitextprocessor import (
4 HTMLNode,
5 NodeKind,
6 TemplateNode,
7 WikiNode,
8)
10from ...page import clean_node
11from ...wxr_context import WiktextractContext
12from ..share import calculate_bold_offsets
13from .models import Example, Sense
16def extract_example_list_item(
17 wxr: WiktextractContext,
18 sense: Sense,
19 list_item: WikiNode,
20) -> None:
21 example = Example()
22 ref_nodes = []
24 for index, node in enumerate(list_item.children):
25 if (
26 isinstance(node, WikiNode)
27 and node.kind == NodeKind.ITALIC
28 and example.text == ""
29 ):
30 example.text = clean_node(wxr, None, node)
31 calculate_bold_offsets(
32 wxr, node, example.text, example, "bold_text_offsets"
33 )
34 elif isinstance(node, HTMLNode) and node.tag == "small":
35 example.translation = clean_node(wxr, None, node)
36 if example.translation.startswith( 36 ↛ 39line 36 didn't jump to line 39 because the condition on line 36 was never true
37 "("
38 ) and example.translation.endswith(")"):
39 example.translation = example.translation.strip("()")
40 elif isinstance(node, TemplateNode):
41 match node.template_name:
42 case "OESP":
43 example.ref = clean_node(wxr, sense, node).strip("()")
44 case "tradex": 44 ↛ 58line 44 didn't jump to line 58 because the pattern on line 44 always matched
45 second_arg = node.template_parameters.get(2, "")
46 example.text = clean_node(wxr, None, second_arg)
47 calculate_bold_offsets(
48 wxr,
49 wxr.wtp.parse(wxr.wtp.node_to_wikitext(second_arg)),
50 example.text,
51 example,
52 "bold_text_offsets",
53 )
54 example.translation = clean_node(
55 wxr, None, node.template_parameters.get(3, "")
56 )
57 clean_node(wxr, sense, node)
58 case "Ex.":
59 example.text = clean_node(
60 wxr, sense, node.template_parameters.get(1, "")
61 )
62 elif isinstance(node, WikiNode) and node.kind == NodeKind.BOLD:
63 bold_str = clean_node(wxr, None, node)
64 if re.fullmatch(r"\d+", bold_str) is not None:
65 list_item_str = clean_node(
66 wxr,
67 None,
68 list(
69 list_item.invert_find_child(
70 NodeKind.LIST, include_empty_str=True
71 )
72 ),
73 )
74 if list_item_str.endswith(":"): 74 ↛ 24line 74 didn't jump to line 24 because the condition on line 74 was always true
75 ref_nodes.clear()
76 example.ref = list_item_str
77 for child_list in list_item.find_child(NodeKind.LIST):
78 for child_list_item in child_list.find_child(
79 NodeKind.LIST_ITEM
80 ):
81 example.text = clean_node(
82 wxr, None, child_list_item.children
83 )
84 calculate_bold_offsets(
85 wxr,
86 child_list_item,
87 example.text,
88 example,
89 "bold_text_offsets",
90 )
91 break
92 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
93 ref_nodes.clear()
94 for child_list_item in node.find_child(NodeKind.LIST_ITEM):
95 ref_nodes.append(child_list_item.children)
96 else:
97 ref_nodes.append(node)
99 if example.text != "":
100 if example.ref == "":
101 example.ref = clean_node(wxr, sense, ref_nodes).strip(":() \n")
102 sense.examples.append(example)
103 else:
104 extract_example_text_list(wxr, sense, list_item)
107def extract_example_text_list(
108 wxr: WiktextractContext,
109 sense: Sense,
110 list_item: WikiNode,
111) -> None:
112 e_nodes = list(
113 list_item.invert_find_child(NodeKind.LIST, include_empty_str=True)
114 )
115 list_item_text = clean_node(wxr, sense, e_nodes)
116 example = Example(text=list_item_text)
117 if "-" in example.text: 117 ↛ 121line 117 didn't jump to line 121 because the condition on line 117 was always true
118 tr_start = example.text.index("-")
119 example.translation = example.text[tr_start + 1 :].strip()
120 example.text = example.text[:tr_start].strip()
121 if len(example.text) > 0: 121 ↛ exitline 121 didn't return from function 'extract_example_text_list' because the condition on line 121 was always true
122 calculate_bold_offsets(
123 wxr,
124 wxr.wtp.parse(wxr.wtp.node_to_wikitext(e_nodes)),
125 example.text,
126 example,
127 "bold_text_offsets",
128 )
129 sense.examples.append(example)