Coverage for src / wiktextract / extractor / ku / sound.py: 89%
66 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-12 08:09 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-12 08:09 +0000
1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from ..share import set_sound_file_url_fields
6from .models import Hyphenation, Sound, WordEntry
7from .tags import translate_raw_tags
10def extract_sound_section(
11 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode
12) -> None:
13 for list_node in level_node.find_child(NodeKind.LIST):
14 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
15 extract_sound_list_item(wxr, word_entry, list_item)
18def extract_sound_list_item(
19 wxr: WiktextractContext, word_entry: WordEntry, list_item: WikiNode
20) -> None:
21 raw_tags = []
22 for node in list_item.children:
23 if isinstance(node, TemplateNode):
24 if node.template_name in ["ku-IPA", "IPA-ku"]:
25 extract_ku_ipa_template(wxr, word_entry, node)
26 elif node.template_name in ["deng", "sound"]:
27 extract_deng_template(wxr, word_entry, node)
28 elif node.template_name == "ku-kîte":
29 extract_ku_kîte(wxr, word_entry, node)
30 elif node.template_name == "kîte":
31 extract_kîte_template(wxr, word_entry, node, raw_tags)
32 elif node.template_name.endswith("."): 32 ↛ 22line 32 didn't jump to line 22 because the condition on line 32 was always true
33 raw_tag = clean_node(wxr, None, node).removesuffix(":")
34 if raw_tag != "": 34 ↛ 22line 34 didn't jump to line 22 because the condition on line 34 was always true
35 raw_tags.append(raw_tag)
36 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 36 ↛ 37line 36 didn't jump to line 37 because the condition on line 36 was never true
37 for child_list_item in node.find_child(NodeKind.LIST_ITEM):
38 extract_sound_list_item(wxr, word_entry, child_list_item)
41def extract_ku_ipa_template(
42 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode
43) -> None:
44 expanded_node = wxr.wtp.parse(
45 wxr.wtp.node_to_wikitext(t_node), expand_all=True
46 )
47 for span_tag in expanded_node.find_html(
48 "span", attr_name="class", attr_value="IPA"
49 ):
50 sound = Sound(ipa=clean_node(wxr, None, span_tag))
51 if sound.ipa != "": 51 ↛ 47line 51 didn't jump to line 47 because the condition on line 51 was always true
52 word_entry.sounds.append(sound)
53 clean_node(wxr, word_entry, expanded_node)
56def extract_deng_template(
57 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode
58) -> None:
59 sound = Sound(
60 ipa=clean_node(wxr, None, t_node.template_parameters.get("ipa", ""))
61 )
62 raw_tag = clean_node(
63 wxr,
64 None,
65 t_node.template_parameters.get(
66 4, t_node.template_parameters.get("dever", "")
67 ),
68 )
69 for r_tag in raw_tag.split(","):
70 r_tag = r_tag.strip()
71 if r_tag != "":
72 sound.raw_tags.append(r_tag)
73 filename = clean_node(wxr, None, t_node.template_parameters.get(2, ""))
74 if filename != "": 74 ↛ 78line 74 didn't jump to line 78 because the condition on line 74 was always true
75 set_sound_file_url_fields(wxr, filename, sound)
76 translate_raw_tags(sound)
77 word_entry.sounds.append(sound)
78 clean_node(wxr, word_entry, t_node)
81def extract_ku_kîte(
82 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode
83) -> None:
84 expanded_node = wxr.wtp.parse(
85 wxr.wtp.node_to_wikitext(t_node), expand_all=True
86 )
87 for index, node in enumerate(expanded_node.children): 87 ↛ exitline 87 didn't return from function 'extract_ku_kîte' because the loop on line 87 didn't complete
88 if isinstance(node, str) and ":" in node:
89 hyphenation = clean_node(
90 wxr,
91 None,
92 [node[node.index(":") + 1 :]]
93 + expanded_node.children[index + 1 :],
94 ).strip()
95 if hyphenation != "": 95 ↛ 99line 95 didn't jump to line 99 because the condition on line 95 was always true
96 word_entry.hyphenations.append(
97 Hyphenation(parts=hyphenation.split("·"))
98 )
99 break
102def extract_kîte_template(
103 wxr: WiktextractContext,
104 word_entry: WordEntry,
105 t_node: TemplateNode,
106 raw_tags: list[str],
107):
108 expanded_node = wxr.wtp.parse(
109 wxr.wtp.node_to_wikitext(t_node), expand_all=True
110 )
111 lang_code = clean_node(wxr, None, t_node.template_parameters.get(1, ""))
112 for span in expanded_node.find_html(
113 "span", attr_name="lang", attr_value=lang_code
114 ):
115 h_str = clean_node(wxr, None, span)
116 if h_str != "": 116 ↛ 112line 116 didn't jump to line 112 because the condition on line 116 was always true
117 h_data = Hyphenation(parts=h_str.split("‧"), raw_tags=raw_tags)
118 translate_raw_tags(h_data)
119 word_entry.hyphenations.append(h_data)