Coverage for src/wiktextract/extractor/it/example.py: 93%
73 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from wikitextprocessor import NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from ..ruby import extract_ruby
6from .models import Example, Sense
9def extract_example_list_item(
10 wxr: WiktextractContext, sense: Sense, list_item: WikiNode, lang_code: str
11) -> None:
12 examples = []
13 before_italic = True
14 text_nodes = []
15 roman = ""
16 translation = ""
17 ref = ""
18 has_zh_tradsem = False
19 for index, node in enumerate(list_item.children):
20 if (
21 isinstance(node, TemplateNode)
22 and node.template_name == "zh-tradsem"
23 ):
24 examples.extend(extract_zh_tradsem(wxr, node))
25 has_zh_tradsem = True
26 elif isinstance(node, WikiNode):
27 match node.kind:
28 case NodeKind.ITALIC:
29 if lang_code in ["zh", "ja"]:
30 if before_italic: 30 ↛ 19line 30 didn't jump to line 19 because the condition on line 30 was always true
31 roman = clean_node(wxr, sense, node)
32 before_italic = False
33 else:
34 examples.append(
35 Example(text=clean_node(wxr, sense, node))
36 )
37 case NodeKind.LIST:
38 for tr_list_item in node.find_child(NodeKind.LIST_ITEM):
39 translation = clean_node(
40 wxr, sense, tr_list_item.children
41 )
42 case _ if lang_code in ["zh", "ja"]: 42 ↛ 19line 42 didn't jump to line 19 because the pattern on line 42 always matched
43 if before_italic: 43 ↛ 19line 43 didn't jump to line 19 because the condition on line 43 was always true
44 text_nodes.append(node)
45 elif isinstance(node, str) and "-" in node:
46 for t_node in list_item.find_child(NodeKind.TEMPLATE):
47 if t_node.template_name == "Term":
48 ref = clean_node(wxr, None, t_node).strip("()")
49 break
50 translation = clean_node(
51 wxr,
52 sense,
53 wxr.wtp.node_to_wikitext(
54 [node[node.index("-") + 1 :]]
55 + [
56 n
57 for n in list_item.children[index + 1 :]
58 if not (
59 isinstance(n, TemplateNode)
60 and n.template_name == "Term"
61 )
62 ]
63 ),
64 )
65 if not has_zh_tradsem and len(examples) > 1: 65 ↛ 66line 65 didn't jump to line 66 because the condition on line 65 was never true
66 examples.clear()
67 examples.append(
68 Example(
69 text=clean_node(
70 wxr,
71 None,
72 list_item.children[:index]
73 + [node[: node.index("-")]],
74 )
75 )
76 )
77 break
78 elif lang_code in ["zh", "ja"] and len(examples) == 0 and before_italic:
79 text_nodes.append(node)
81 if lang_code in ["zh", "ja"] and len(examples) == 0 and len(text_nodes) > 0:
82 expanded_nodes = wxr.wtp.parse(
83 wxr.wtp.node_to_wikitext(text_nodes), expand_all=True
84 )
85 example = Example()
86 example.ruby, node_without_ruby = extract_ruby(
87 wxr, expanded_nodes.children
88 )
89 example.text = (
90 clean_node(wxr, sense, node_without_ruby)
91 .replace(" ", "")
92 .strip("(")
93 )
94 examples.append(example)
96 if not has_zh_tradsem and len(examples) > 1:
97 examples.clear()
98 examples.append(
99 Example(
100 text=clean_node(
101 wxr, None, list(list_item.invert_find_child(NodeKind.LIST))
102 )
103 )
104 )
106 for example in examples:
107 if roman != "":
108 example.roman = roman
109 if translation != "":
110 example.translation = translation
111 if ref != "":
112 example.ref = ref
113 if example.text != "": 113 ↛ 106line 113 didn't jump to line 106 because the condition on line 113 was always true
114 sense.examples.append(example)
117def extract_zh_tradsem(
118 wxr: WiktextractContext, t_node: TemplateNode
119) -> list[Example]:
120 # https://it.wiktionary.org/wiki/Template:zh-tradsem
121 examples = []
122 for arg_index in [1, 2]:
123 arg_value = clean_node(
124 wxr, None, t_node.template_parameters.get(arg_index, "")
125 ).replace(" ", "")
126 if arg_value != "": 126 ↛ 122line 126 didn't jump to line 122 because the condition on line 126 was always true
127 example = Example(text=arg_value)
128 if arg_index == 1:
129 example.tags.append("Traditional Chinese")
130 elif arg_index == 2: 130 ↛ 132line 130 didn't jump to line 132 because the condition on line 130 was always true
131 example.tags.append("Simplified Chinese")
132 examples.append(example)
134 return examples