Coverage for src/wiktextract/extractor/id/sound.py: 66%
58 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from ..share import set_sound_file_url_fields
6from .models import Sound, WordEntry
7from .tags import translate_raw_tags
10def extract_sound_section(
11 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode
12) -> None:
13 for list_node in level_node.find_child(NodeKind.LIST):
14 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
15 extract_sound_list_item(wxr, word_entry, list_item)
18def extract_sound_list_item(
19 wxr: WiktextractContext, word_entry: WordEntry, list_item: WikiNode
20) -> None:
21 raw_tags = []
22 for node in list_item.children:
23 if isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 23 ↛ 24line 23 didn't jump to line 24 because the condition on line 23 was never true
24 for child_list_item in node.find_child(NodeKind.LIST_ITEM):
25 extract_sound_list_item(wxr, word_entry, child_list_item)
26 elif isinstance(node, TemplateNode):
27 if node.template_name == "IPA":
28 extract_ipa_template(wxr, word_entry, node, raw_tags)
29 elif node.template_name == "audio":
30 extract_audio_template(wxr, word_entry, node, raw_tags)
31 elif node.template_name == "ejaan:id": 31 ↛ 32line 31 didn't jump to line 32 because the condition on line 31 was never true
32 extract_ejaan_id_template(wxr, word_entry, node, raw_tags)
33 elif node.template_name == "a": 33 ↛ 37line 33 didn't jump to line 37 because the condition on line 33 was always true
34 raw_tag = clean_node(wxr, None, node).strip("()")
35 if raw_tag != "": 35 ↛ 22line 35 didn't jump to line 22 because the condition on line 35 was always true
36 raw_tags.append(raw_tag)
37 elif node.template_name == "rhymes":
38 extract_rhymes_template(wxr, word_entry, node, raw_tags)
39 elif isinstance(node, str) and node.strip().startswith("Hifenasi:"): 39 ↛ 40line 39 didn't jump to line 40 because the condition on line 39 was never true
40 word_entry.hyphenation = (
41 node.strip().removeprefix("Hifenasi:").strip()
42 )
45def extract_ipa_template(
46 wxr: WiktextractContext,
47 word_entry: WordEntry,
48 t_node: TemplateNode,
49 raw_tags: list[str],
50) -> None:
51 sound = Sound(
52 ipa=clean_node(wxr, None, t_node.template_parameters.get(1, "")),
53 raw_tags=raw_tags,
54 )
55 if sound.ipa != "": 55 ↛ exitline 55 didn't return from function 'extract_ipa_template' because the condition on line 55 was always true
56 translate_raw_tags(sound)
57 word_entry.sounds.append(sound)
60def extract_audio_template(
61 wxr: WiktextractContext,
62 word_entry: WordEntry,
63 t_node: TemplateNode,
64 raw_tags: list[str],
65) -> None:
66 filename = clean_node(wxr, None, t_node.template_parameters.get(2, ""))
67 sound = Sound(raw_tags=raw_tags)
68 if filename != "": 68 ↛ 75line 68 didn't jump to line 75 because the condition on line 68 was always true
69 set_sound_file_url_fields(wxr, filename, sound)
70 raw_tag = clean_node(wxr, None, t_node.template_parameters.get(3, ""))
71 if raw_tag != "": 71 ↛ 73line 71 didn't jump to line 73 because the condition on line 71 was always true
72 sound.raw_tags.append(raw_tag)
73 translate_raw_tags(sound)
74 word_entry.sounds.append(sound)
75 clean_node(wxr, word_entry, t_node)
78def extract_ejaan_id_template(
79 wxr: WiktextractContext,
80 word_entry: WordEntry,
81 t_node: TemplateNode,
82 raw_tags: list[str],
83) -> None:
84 sound = Sound(ipa=clean_node(wxr, None, t_node), raw_tags=raw_tags)
85 if sound.ipa != "":
86 translate_raw_tags(sound)
87 word_entry.sounds.append(sound)
90def extract_rhymes_template(
91 wxr: WiktextractContext,
92 word_entry: WordEntry,
93 t_node: TemplateNode,
94 raw_tags: list[str],
95) -> None:
96 expanded_node = wxr.wtp.parse(
97 wxr.wtp.node_to_wikitext(t_node), expand_all=True
98 )
99 for link_node in expanded_node.find_child(NodeKind.LINK):
100 rhyme = clean_node(wxr, None, link_node)
101 if rhyme != "":
102 word_entry.sounds.append(Sound(rhymes=rhyme, raw_tags=raw_tags))