Coverage for src/wiktextract/extractor/ku/example.py: 86%
65 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from ..share import calculate_bold_offsets
6from .linkage import LINKAGE_TEMPLATES, extract_nyms_template
7from .models import Example, Sense, WordEntry
10def extract_example_list_item(
11 wxr: WiktextractContext,
12 word_entry: WordEntry,
13 sense: Sense,
14 list_item: WikiNode,
15) -> None:
16 for node in list_item.children:
17 if isinstance(node, TemplateNode):
18 if node.template_name == "jêder" or node.template_name.startswith(
19 "jêder-"
20 ):
21 extract_jêder_template(wxr, sense, node)
22 elif node.template_name in ["mk", "mînak", "ux", "nimûne", "nim"]:
23 extract_nimûne_template(wxr, sense, node)
24 elif (
25 node.template_name in ["deng", "audio"]
26 and len(sense.examples) > 0
27 ):
28 from .sound import extract_deng_template
30 extract_deng_template(wxr, sense.examples[-1], node)
31 sense.categories.extend(sense.examples[-1].categories)
32 elif node.template_name in LINKAGE_TEMPLATES: 32 ↛ 16line 32 didn't jump to line 16 because the condition on line 32 was always true
33 extract_nyms_template(wxr, word_entry, node)
34 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
35 for child_list_item in node.find_child(NodeKind.LIST_ITEM):
36 extract_example_list_item(
37 wxr, word_entry, sense, child_list_item
38 )
39 elif isinstance(node, WikiNode) and node.kind == NodeKind.ITALIC:
40 e_data = Example(text=clean_node(wxr, None, node))
41 calculate_bold_offsets(
42 wxr, node, e_data.text, e_data, "bold_text_offsets"
43 )
44 if e_data.text != "": 44 ↛ 16line 44 didn't jump to line 16 because the condition on line 44 was always true
45 sense.examples.append(e_data)
48def extract_jêder_template(
49 wxr: WiktextractContext, sense: Sense, t_node: TemplateNode
50) -> None:
51 # https://ku.wiktionary.org/wiki/Şablon:jêder
52 expanded_node = wxr.wtp.parse(
53 wxr.wtp.node_to_wikitext(t_node), expand_all=True
54 )
55 text_arg = t_node.template_parameters.get("jêgirtin", "")
56 roman_arg = t_node.template_parameters.get("tr", "")
57 trans_arg = t_node.template_parameters.get("werger", "")
58 e_data = Example(
59 text=clean_node(wxr, None, text_arg),
60 roman=clean_node(wxr, None, roman_arg),
61 translation=clean_node(wxr, None, trans_arg),
62 )
63 calculate_bold_offsets(
64 wxr,
65 wxr.wtp.parse(wxr.wtp.node_to_wikitext(text_arg)),
66 e_data.text,
67 e_data,
68 "bold_text_offsets",
69 )
70 calculate_bold_offsets(
71 wxr,
72 wxr.wtp.parse(wxr.wtp.node_to_wikitext(roman_arg)),
73 e_data.roman,
74 e_data,
75 "bold_roman_offsets",
76 )
77 calculate_bold_offsets(
78 wxr,
79 wxr.wtp.parse(wxr.wtp.node_to_wikitext(trans_arg)),
80 e_data.translation,
81 e_data,
82 "bold_translation_offsets",
83 )
84 for span_tag in expanded_node.find_html(
85 "span", attr_name="class", attr_value="jeder"
86 ):
87 e_data.ref = clean_node(wxr, None, span_tag).strip("— ()")
88 if e_data.text != "": 88 ↛ 90line 88 didn't jump to line 90 because the condition on line 88 was always true
89 sense.examples.append(e_data)
90 clean_node(wxr, sense, expanded_node)
93def extract_nimûne_template(
94 wxr: WiktextractContext, sense: Sense, t_node: TemplateNode
95) -> None:
96 # https://ku.wiktionary.org/wiki/Şablon:nimûne
97 expanded_node = wxr.wtp.parse(
98 wxr.wtp.node_to_wikitext(t_node), expand_all=True
99 )
100 e_data = Example(text="")
101 for i_tag in expanded_node.find_html_recursively("i"):
102 i_class = i_tag.attrs.get("class", "")
103 if "e-example" in i_class:
104 e_data.text = clean_node(wxr, None, i_tag)
105 calculate_bold_offsets(
106 wxr, i_tag, e_data.text, e_data, "bold_text_offsets"
107 )
108 elif "e-transliteration" in i_class:
109 e_data.roman = clean_node(wxr, None, i_tag)
110 calculate_bold_offsets(
111 wxr, i_tag, e_data.roman, e_data, "bold_roman_offsets"
112 )
113 for span_tag in expanded_node.find_html_recursively("span"):
114 span_class = span_tag.attrs.get("class", "")
115 if "e-translation" in span_class: 115 ↛ 113line 115 didn't jump to line 113 because the condition on line 115 was always true
116 e_data.translation = clean_node(wxr, None, span_tag)
117 calculate_bold_offsets(
118 wxr,
119 span_tag,
120 e_data.translation,
121 e_data,
122 "bold_translation_offsets",
123 )
124 if e_data.text != "": 124 ↛ 126line 124 didn't jump to line 126 because the condition on line 124 was always true
125 sense.examples.append(e_data)
126 clean_node(wxr, sense, expanded_node)
129def extract_example_section(
130 wxr: WiktextractContext,
131 word_entry: WordEntry,
132 level_node: LevelNode,
133) -> None:
134 if len(word_entry.senses) > 0:
135 for list_node in level_node.find_child(NodeKind.LIST):
136 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
137 extract_example_list_item(
138 wxr, word_entry, word_entry.senses[0], list_item
139 )