Coverage for src/wiktextract/extractor/id/sound.py: 66%
58 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from ..share import set_sound_file_url_fields
6from .models import Hyphenation, Sound, WordEntry
7from .tags import translate_raw_tags
10def extract_sound_section(
11 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode
12) -> None:
13 for list_node in level_node.find_child(NodeKind.LIST):
14 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
15 extract_sound_list_item(wxr, word_entry, list_item)
18def extract_sound_list_item(
19 wxr: WiktextractContext, word_entry: WordEntry, list_item: WikiNode
20) -> None:
21 raw_tags = []
22 for node in list_item.children:
23 if isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 23 ↛ 24line 23 didn't jump to line 24 because the condition on line 23 was never true
24 for child_list_item in node.find_child(NodeKind.LIST_ITEM):
25 extract_sound_list_item(wxr, word_entry, child_list_item)
26 elif isinstance(node, TemplateNode):
27 if node.template_name == "IPA":
28 extract_ipa_template(wxr, word_entry, node, raw_tags)
29 elif node.template_name == "audio":
30 extract_audio_template(wxr, word_entry, node, raw_tags)
31 elif node.template_name == "ejaan:id": 31 ↛ 32line 31 didn't jump to line 32 because the condition on line 31 was never true
32 extract_ejaan_id_template(wxr, word_entry, node, raw_tags)
33 elif node.template_name == "a": 33 ↛ 37line 33 didn't jump to line 37 because the condition on line 33 was always true
34 raw_tag = clean_node(wxr, None, node).strip("()")
35 if raw_tag != "": 35 ↛ 22line 35 didn't jump to line 22 because the condition on line 35 was always true
36 raw_tags.append(raw_tag)
37 elif node.template_name == "rhymes":
38 extract_rhymes_template(wxr, word_entry, node, raw_tags)
39 elif isinstance(node, str) and node.strip().startswith("Hifenasi:"): 39 ↛ 40line 39 didn't jump to line 40 because the condition on line 39 was never true
40 word_entry.hyphenations.append(
41 Hyphenation(
42 parts=node.strip()
43 .removeprefix("Hifenasi:")
44 .strip()
45 .split("‧")
46 )
47 )
50def extract_ipa_template(
51 wxr: WiktextractContext,
52 word_entry: WordEntry,
53 t_node: TemplateNode,
54 raw_tags: list[str],
55) -> None:
56 sound = Sound(
57 ipa=clean_node(wxr, None, t_node.template_parameters.get(1, "")),
58 raw_tags=raw_tags,
59 )
60 if sound.ipa != "": 60 ↛ exitline 60 didn't return from function 'extract_ipa_template' because the condition on line 60 was always true
61 translate_raw_tags(sound)
62 word_entry.sounds.append(sound)
65def extract_audio_template(
66 wxr: WiktextractContext,
67 word_entry: WordEntry,
68 t_node: TemplateNode,
69 raw_tags: list[str],
70) -> None:
71 filename = clean_node(wxr, None, t_node.template_parameters.get(2, ""))
72 sound = Sound(raw_tags=raw_tags)
73 if filename != "": 73 ↛ 80line 73 didn't jump to line 80 because the condition on line 73 was always true
74 set_sound_file_url_fields(wxr, filename, sound)
75 raw_tag = clean_node(wxr, None, t_node.template_parameters.get(3, ""))
76 if raw_tag != "": 76 ↛ 78line 76 didn't jump to line 78 because the condition on line 76 was always true
77 sound.raw_tags.append(raw_tag)
78 translate_raw_tags(sound)
79 word_entry.sounds.append(sound)
80 clean_node(wxr, word_entry, t_node)
83def extract_ejaan_id_template(
84 wxr: WiktextractContext,
85 word_entry: WordEntry,
86 t_node: TemplateNode,
87 raw_tags: list[str],
88) -> None:
89 sound = Sound(ipa=clean_node(wxr, None, t_node), raw_tags=raw_tags)
90 if sound.ipa != "":
91 translate_raw_tags(sound)
92 word_entry.sounds.append(sound)
95def extract_rhymes_template(
96 wxr: WiktextractContext,
97 word_entry: WordEntry,
98 t_node: TemplateNode,
99 raw_tags: list[str],
100) -> None:
101 expanded_node = wxr.wtp.parse(
102 wxr.wtp.node_to_wikitext(t_node), expand_all=True
103 )
104 for link_node in expanded_node.find_child(NodeKind.LINK):
105 rhyme = clean_node(wxr, None, link_node)
106 if rhyme != "":
107 word_entry.sounds.append(Sound(rhymes=rhyme, raw_tags=raw_tags))