Coverage for src/wiktextract/extractor/id/example.py: 41%
61 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
1from wikitextprocessor import NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from ..share import calculate_bold_offsets
6from .linkage import extract_syn_template
7from .models import Example, Sense, WordEntry
10def extract_example_list_item(
11 wxr: WiktextractContext,
12 word_entry: WordEntry,
13 sense: Sense,
14 list_item: WikiNode,
15) -> None:
16 italic_node = None
17 for node in list_item.children:
18 if isinstance(node, TemplateNode):
19 if node.template_name in ["ux", "uxi"]: 19 ↛ 20line 19 didn't jump to line 20 because the condition on line 19 was never true
20 extract_ux_template(wxr, sense, node)
21 elif node.template_name in [ 21 ↛ 29line 21 didn't jump to line 29 because the condition on line 21 was always true
22 "sinonim",
23 "syn",
24 "synonyms",
25 "synonym of",
26 "sinonim dari",
27 ]:
28 extract_syn_template(wxr, word_entry, node, "synonyms")
29 elif node.template_name == "antonim":
30 extract_syn_template(wxr, word_entry, node, "antonyms")
31 elif isinstance(node, WikiNode):
32 if node.kind == NodeKind.ITALIC:
33 italic_node = node
34 elif node.kind == NodeKind.LIST and italic_node is not None: 34 ↛ 17line 34 didn't jump to line 17 because the condition on line 34 was always true
35 italic_text = clean_node(wxr, None, italic_node)
36 if italic_text == "": 36 ↛ 37line 36 didn't jump to line 37 because the condition on line 36 was never true
37 continue
38 for child_list_item in node.find_child(NodeKind.LIST_ITEM):
39 e_data = Example(
40 text=italic_text,
41 translation=clean_node(
42 wxr, sense, child_list_item.children
43 ),
44 )
45 calculate_bold_offsets(
46 wxr,
47 italic_node,
48 e_data.text,
49 e_data,
50 "bold_text_offsets",
51 )
52 calculate_bold_offsets(
53 wxr,
54 child_list_item,
55 e_data.translation,
56 e_data,
57 "bold_translation_offsets",
58 )
59 sense.examples.append(e_data)
60 italic_node = None
62 if italic_node is not None: 62 ↛ 63line 62 didn't jump to line 63 because the condition on line 62 was never true
63 e_data = Example(text=clean_node(wxr, None, italic_node))
64 calculate_bold_offsets(
65 wxr, italic_node, e_data.text, e_data, "bold_text_offsets"
66 )
67 sense.examples.append(e_data)
70def extract_ux_template(
71 wxr: WiktextractContext, sense: Sense, t_node: TemplateNode
72) -> None:
73 expanded_node = wxr.wtp.parse(
74 wxr.wtp.node_to_wikitext(t_node), expand_all=True
75 )
76 e_data = Example(text="")
77 for i_tag in expanded_node.find_html_recursively("i"):
78 i_class = i_tag.attrs.get("class", "")
79 if "e-example" in i_class:
80 e_data.text = clean_node(wxr, None, i_tag)
81 calculate_bold_offsets(
82 wxr, i_tag, e_data.text, e_data, "bold_text_offsets"
83 )
84 elif "e-transliteration" in i_class:
85 e_data.roman = clean_node(wxr, None, i_tag)
86 calculate_bold_offsets(
87 wxr, i_tag, e_data.roman, e_data, "bold_roman_offsets"
88 )
89 for span_tag in expanded_node.find_html_recursively("span"):
90 span_class = span_tag.attrs.get("class", "")
91 if "e-translation" in span_class:
92 e_data.translation = clean_node(wxr, None, span_tag)
93 calculate_bold_offsets(
94 wxr,
95 span_tag,
96 e_data.translation,
97 e_data,
98 "bold_translation_offsets",
99 )
100 elif "e-literally" in span_class:
101 e_data.literal_meaning = clean_node(wxr, None, span_tag)
102 calculate_bold_offsets(
103 wxr,
104 span_tag,
105 e_data.literal_meaning,
106 e_data,
107 "bold_literal_offsets",
108 )
109 elif "qualifier-content" in span_class:
110 raw_tag = clean_node(wxr, None, span_tag)
111 if raw_tag != "":
112 e_data.raw_tags.append(raw_tag)
114 e_data.ref = clean_node(
115 wxr, None, t_node.template_parameters.get("ref", "")
116 )
117 if e_data.text != "":
118 sense.examples.append(e_data)
119 for link_node in expanded_node.find_child(NodeKind.LINK):
120 clean_node(wxr, sense, link_node)