Coverage for src/wiktextract/extractor/it/example.py: 91%
85 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
1from wikitextprocessor import NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from ..ruby import extract_ruby
6from ..share import calculate_bold_offsets
7from .models import Example, Sense
10def extract_example_list_item(
11 wxr: WiktextractContext, sense: Sense, list_item: WikiNode, lang_code: str
12) -> None:
13 examples = []
14 before_italic = True
15 text_nodes = []
16 shared_example = Example()
17 has_zh_tradsem = False
18 for index, node in enumerate(list_item.children):
19 if (
20 isinstance(node, TemplateNode)
21 and node.template_name == "zh-tradsem"
22 ):
23 examples.extend(extract_zh_tradsem(wxr, node))
24 has_zh_tradsem = True
25 elif isinstance(node, WikiNode):
26 match node.kind:
27 case NodeKind.ITALIC:
28 if lang_code in ["zh", "ja"]:
29 if before_italic: 29 ↛ 18line 29 didn't jump to line 18 because the condition on line 29 was always true
30 shared_example.roman = clean_node(wxr, sense, node)
31 calculate_bold_offsets(
32 wxr,
33 node,
34 shared_example.roman,
35 shared_example,
36 "bold_roman_offsets",
37 )
38 before_italic = False
39 else:
40 e_data = Example(text=clean_node(wxr, sense, node))
41 calculate_bold_offsets(
42 wxr, node, e_data.text, e_data, "bold_text_offsets"
43 )
44 examples.append(e_data)
45 case NodeKind.LIST:
46 for tr_list_item in node.find_child(NodeKind.LIST_ITEM):
47 shared_example.translation = clean_node(
48 wxr, sense, tr_list_item.children
49 )
50 calculate_bold_offsets(
51 wxr,
52 tr_list_item,
53 shared_example.translation,
54 shared_example,
55 "bold_translation_offsets",
56 )
57 case _ if lang_code in ["zh", "ja"]: 57 ↛ 18line 57 didn't jump to line 18 because the pattern on line 57 always matched
58 if before_italic: 58 ↛ 18line 58 didn't jump to line 18 because the condition on line 58 was always true
59 text_nodes.append(node)
60 elif isinstance(node, str) and "-" in node:
61 for t_node in list_item.find_child(NodeKind.TEMPLATE):
62 if t_node.template_name == "Term":
63 shared_example.ref = clean_node(wxr, None, t_node).strip(
64 "()"
65 )
66 break
67 tr_nodes = wxr.wtp.parse(
68 wxr.wtp.node_to_wikitext(
69 [node[node.index("-") + 1 :]]
70 + [
71 n
72 for n in list_item.children[index + 1 :]
73 if not (
74 isinstance(n, TemplateNode)
75 and n.template_name == "Term"
76 )
77 ]
78 )
79 )
80 shared_example.translation = clean_node(wxr, sense, tr_nodes)
81 calculate_bold_offsets(
82 wxr,
83 tr_nodes,
84 shared_example.translation,
85 shared_example,
86 "bold_translation_offsets",
87 )
88 if not has_zh_tradsem and len(examples) > 1: 88 ↛ 89line 88 didn't jump to line 89 because the condition on line 88 was never true
89 examples.clear()
90 text_node = wxr.wtp.parse(
91 wxr.wtp.node_to_wikitext(
92 list_item.children[:index] + [node[: node.index("-")]]
93 )
94 )
95 e_data = Example(text=clean_node(wxr, None, text_node))
96 calculate_bold_offsets(
97 wxr, text_node, e_data.text, e_data, "bold_text_offsets"
98 )
99 examples.append(e_data)
100 break
101 elif lang_code in ["zh", "ja"] and len(examples) == 0 and before_italic:
102 text_nodes.append(node)
104 if lang_code in ["zh", "ja"] and len(examples) == 0 and len(text_nodes) > 0:
105 expanded_nodes = wxr.wtp.parse(
106 wxr.wtp.node_to_wikitext(text_nodes), expand_all=True
107 )
108 example = Example()
109 example.ruby, node_without_ruby = extract_ruby(
110 wxr, expanded_nodes.children
111 )
112 example.text = (
113 clean_node(wxr, sense, node_without_ruby)
114 .replace(" ", "")
115 .strip("(")
116 )
117 calculate_bold_offsets(
118 wxr,
119 wxr.wtp.parse(wxr.wtp.node_to_wikitext(node_without_ruby)),
120 example.text,
121 example,
122 "bold_text_offsets",
123 )
124 examples.append(example)
126 if not has_zh_tradsem and len(examples) > 1:
127 examples.clear()
128 text_node = wxr.wtp.parse(
129 wxr.wtp.node_to_wikitext(
130 list(list_item.invert_find_child(NodeKind.LIST))
131 )
132 )
133 e_data = Example(text=clean_node(wxr, None, text_node))
134 calculate_bold_offsets(
135 wxr, text_node, e_data.text, e_data, "bold_text_offsets"
136 )
137 examples.append(e_data)
139 for example in examples:
140 for attr in [
141 "roman",
142 "bold_roman_offsets",
143 "translation",
144 "bold_translation_offsets",
145 "ref",
146 "text",
147 "bold_text_offsets",
148 ]:
149 value = getattr(shared_example, attr)
150 if len(value) > 0:
151 setattr(example, attr, value)
152 if len(example.text) > 0: 152 ↛ 139line 152 didn't jump to line 139 because the condition on line 152 was always true
153 sense.examples.append(example)
156def extract_zh_tradsem(
157 wxr: WiktextractContext, t_node: TemplateNode
158) -> list[Example]:
159 # https://it.wiktionary.org/wiki/Template:zh-tradsem
160 examples = []
161 for arg_index in [1, 2]:
162 arg_value = t_node.template_parameters.get(arg_index, "")
163 arg_value_str = clean_node(wxr, None, arg_value).replace(" ", "")
164 if arg_value_str != "": 164 ↛ 161line 164 didn't jump to line 161 because the condition on line 164 was always true
165 example = Example(text=arg_value_str)
166 calculate_bold_offsets(
167 wxr,
168 wxr.wtp.parse(wxr.wtp.node_to_wikitext(arg_value)),
169 example.text,
170 example,
171 "bold_text_offsets",
172 )
173 if arg_index == 1:
174 example.tags.append("Traditional Chinese")
175 elif arg_index == 2: 175 ↛ 177line 175 didn't jump to line 177 because the condition on line 175 was always true
176 example.tags.append("Simplified Chinese")
177 examples.append(example)
179 return examples