Coverage for src/wiktextract/extractor/vi/example.py: 50%
82 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
1from wikitextprocessor import NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from ..ruby import extract_ruby
6from ..share import calculate_bold_offsets
7from .linkage import (
8 GLOSS_LIST_LINKAGE_TEMPLATES,
9 extract_gloss_list_linkage_template,
10)
11from .models import Example, Sense, WordEntry
12from .tags import translate_raw_tags
15def extract_example_list_item(
16 wxr: WiktextractContext,
17 word_entry: WordEntry,
18 sense: Sense,
19 list_item: WikiNode,
20 ref: str = "",
21):
22 for index, node in enumerate(list_item.children):
23 if (
24 isinstance(node, WikiNode)
25 and node.kind == NodeKind.ITALIC
26 and node.contain_node(NodeKind.BOLD)
27 ):
28 e_text = clean_node(wxr, None, node)
29 if e_text != "": 29 ↛ 22line 29 didn't jump to line 22 because the condition on line 29 was always true
30 e_data = Example(text=e_text)
31 calculate_bold_offsets(
32 wxr, node, e_text, e_data, "bold_text_offsets"
33 )
34 e_data.translation = clean_node(
35 wxr, None, list_item.children[index + 1 :]
36 ).strip("—- \n")
37 sense.examples.append(e_data)
38 break
39 elif isinstance(node, TemplateNode):
40 if node.template_name in ["ux", "usex", "ux2", "uxi"]: 40 ↛ 41line 40 didn't jump to line 41 because the condition on line 40 was never true
41 extract_ux_template(wxr, sense, node)
42 elif node.template_name.startswith(("quote-", "RQ:")):
43 ref = extract_quote_template(wxr, sense, node)
44 elif node.template_name in GLOSS_LIST_LINKAGE_TEMPLATES: 44 ↛ 22line 44 didn't jump to line 22 because the condition on line 44 was always true
45 extract_gloss_list_linkage_template(
46 wxr,
47 word_entry,
48 node,
49 GLOSS_LIST_LINKAGE_TEMPLATES[node.template_name],
50 " ".join(word_entry.senses[-1].glosses)
51 if len(word_entry.senses) > 0
52 else "",
53 )
54 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 54 ↛ 55line 54 didn't jump to line 55 because the condition on line 54 was never true
55 for child_list_item in node.find_child(NodeKind.LIST_ITEM):
56 extract_example_list_item(
57 wxr, word_entry, sense, child_list_item, ref
58 )
61def extract_ux_template(
62 wxr: WiktextractContext, sense: Sense, t_node: TemplateNode
63):
64 expanded_node = wxr.wtp.parse(
65 wxr.wtp.node_to_wikitext(t_node), expand_all=True
66 )
67 e_data = Example(text="")
68 for i_tag in expanded_node.find_html_recursively("i"):
69 i_class = i_tag.attrs.get("class", "")
70 if "e-example" in i_class:
71 e_data.text = clean_node(wxr, None, i_tag)
72 calculate_bold_offsets(
73 wxr, i_tag, e_data.text, e_data, "bold_text_offsets"
74 )
75 elif "e-transliteration" in i_class:
76 e_data.roman = clean_node(wxr, None, i_tag)
77 calculate_bold_offsets(
78 wxr, i_tag, e_data.roman, e_data, "bold_roman_offsets"
79 )
80 for span_tag in expanded_node.find_html_recursively("span"):
81 span_class = span_tag.attrs.get("class", "")
82 if "e-translation" in span_class:
83 e_data.translation = clean_node(wxr, None, span_tag)
84 calculate_bold_offsets(
85 wxr,
86 span_tag,
87 e_data.translation,
88 e_data,
89 "bold_translation_offsets",
90 )
91 elif "e-literally" in span_class:
92 e_data.literal_meaning = clean_node(wxr, None, span_tag)
93 calculate_bold_offsets(
94 wxr,
95 span_tag,
96 e_data.literal_meaning,
97 e_data,
98 "bold_literal_offsets",
99 )
100 elif "qualifier-content" in span_class:
101 raw_tag = clean_node(wxr, None, span_tag)
102 if raw_tag != "":
103 e_data.raw_tags.append(raw_tag)
105 e_data.ref = clean_node(
106 wxr, None, t_node.template_parameters.get("ref", "")
107 )
108 if e_data.text != "":
109 translate_raw_tags(e_data)
110 sense.examples.append(e_data)
111 for link_node in expanded_node.find_child(NodeKind.LINK):
112 clean_node(wxr, sense, link_node)
115def extract_quote_template(
116 wxr: WiktextractContext,
117 sense: Sense,
118 t_node: TemplateNode,
119) -> str:
120 ref = ""
121 if all( 121 ↛ 124line 121 didn't jump to line 124 because the condition on line 121 was never true
122 arg not in t_node.template_parameters for arg in ["text", "passage", 7]
123 ):
124 ref = clean_node(wxr, sense, t_node)
125 else:
126 expanded_node = wxr.wtp.parse(
127 wxr.wtp.node_to_wikitext(t_node), expand_all=True
128 )
129 example = Example(text="")
130 for span_tag in expanded_node.find_html_recursively("span"):
131 span_class = span_tag.attrs.get("class", "")
132 if "cited-source" == span_class:
133 example.ref = clean_node(wxr, None, span_tag)
134 elif "e-quotation" in span_class:
135 example.ruby, node_without_ruby = extract_ruby(wxr, span_tag)
136 example.text = clean_node(wxr, None, node_without_ruby)
137 calculate_bold_offsets(
138 wxr, span_tag, example.text, example, "bold_text_offsets"
139 )
140 elif "e-translation" in span_class: 140 ↛ 141line 140 didn't jump to line 141 because the condition on line 140 was never true
141 example.translation = clean_node(wxr, None, span_tag)
142 calculate_bold_offsets(
143 wxr,
144 span_tag,
145 example.translation,
146 example,
147 "bold_translation_text",
148 )
149 for i_tag in expanded_node.find_html_recursively( 149 ↛ 152line 149 didn't jump to line 152 because the loop on line 149 never started
150 "i", attr_name="class", attr_value="e-transliteration"
151 ):
152 example.roman = clean_node(wxr, None, i_tag)
153 calculate_bold_offsets(
154 wxr, i_tag, example.roman, example, "bold_roman_offsets"
155 )
156 break
157 if example.text != "": 157 ↛ 159line 157 didn't jump to line 159 because the condition on line 157 was always true
158 sense.examples.append(example)
159 clean_node(wxr, sense, expanded_node)
161 return ref