Coverage for src/wiktextract/extractor/cs/sound.py: 80%
66 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from ..share import set_sound_file_url_fields
6from .models import Hyphenation, Sound, WordEntry
7from .tags import translate_raw_tags
10def extract_sound_section(
11 wxr: WiktextractContext, base_data: WordEntry, level_node: LevelNode
12):
13 for list_node in level_node.find_child(NodeKind.LIST):
14 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
15 raw_tags = []
16 for node in list_item.children:
17 if isinstance(node, TemplateNode):
18 if node.template_name == "IPA":
19 extract_ipa_template(wxr, base_data, node, raw_tags)
20 raw_tags.clear()
21 elif node.template_name == "IPA2":
22 extract_ipa2_template(wxr, base_data, node, raw_tags)
23 raw_tags.clear()
24 elif node.template_name == "Audio": 24 ↛ 27line 24 didn't jump to line 27 because the condition on line 24 was always true
25 extract_audio_template(wxr, base_data, node, raw_tags)
26 raw_tags.clear()
27 elif node.template_name == "Příznak2":
28 raw_tags.extend(extract_příznak2_template(wxr, node))
29 elif (
30 isinstance(node, WikiNode) and node.kind == NodeKind.ITALIC
31 ):
32 raw_tag = clean_node(wxr, None, node)
33 if raw_tag != "": 33 ↛ 16line 33 didn't jump to line 16 because the condition on line 33 was always true
34 raw_tags.append(raw_tag)
37def extract_ipa_template(
38 wxr: WiktextractContext,
39 base_data: WordEntry,
40 t_node: TemplateNode,
41 raw_tags: list[str],
42):
43 # https://cs.wiktionary.org/wiki/Šablona:IPA
44 expanded_node = wxr.wtp.parse(
45 wxr.wtp.node_to_wikitext(t_node), expand_all=True
46 )
47 for span_tag in expanded_node.find_html(
48 "span", attr_name="class", attr_value="IPA"
49 ):
50 text = clean_node(wxr, None, span_tag)
51 for ipa in text.split(","):
52 ipa = ipa.strip()
53 if ipa != "": 53 ↛ 51line 53 didn't jump to line 51 because the condition on line 53 was always true
54 sound = Sound(ipa=ipa, raw_tags=raw_tags)
55 translate_raw_tags(sound)
56 base_data.sounds.append(sound)
57 clean_node(wxr, base_data, expanded_node)
60def extract_ipa2_template(
61 wxr: WiktextractContext,
62 base_data: WordEntry,
63 t_node: TemplateNode,
64 raw_tags: list[str],
65):
66 # https://cs.wiktionary.org/wiki/Šablona:IPA2
67 ipa = clean_node(wxr, None, t_node.template_parameters.get(1, ""))
68 if ipa != "": 68 ↛ exitline 68 didn't return from function 'extract_ipa2_template' because the condition on line 68 was always true
69 sound = Sound(ipa=f"[{ipa}]", raw_tags=raw_tags)
70 translate_raw_tags(sound)
71 base_data.sounds.append(sound)
74def extract_audio_template(
75 wxr: WiktextractContext,
76 base_data: WordEntry,
77 t_node: TemplateNode,
78 raw_tags: list[str],
79):
80 # https://cs.wiktionary.org/wiki/Šablona:Audio
81 file = clean_node(wxr, None, t_node.template_parameters.get(1, ""))
82 if file != "": 82 ↛ exitline 82 didn't return from function 'extract_audio_template' because the condition on line 82 was always true
83 sound = Sound(raw_tags=raw_tags)
84 set_sound_file_url_fields(wxr, file, sound)
85 translate_raw_tags(sound)
86 base_data.sounds.append(sound)
89def extract_příznak2_template(
90 wxr: WiktextractContext, t_node: TemplateNode
91) -> list[str]:
92 raw_tags = []
93 text = clean_node(wxr, None, t_node).strip("() ")
94 for raw_tag in text.split(","):
95 raw_tag = raw_tag.strip()
96 if raw_tag != "":
97 raw_tags.append(raw_tag)
98 return raw_tags
101def extract_hyphenation_section(
102 wxr: WiktextractContext, base_data: WordEntry, level_node: LevelNode
103):
104 for list_node in level_node.find_child(NodeKind.LIST):
105 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
106 h_str = clean_node(wxr, None, list_item.children)
107 h_parts = list(filter(None, map(str.strip, h_str.split("-"))))
108 if len(h_parts) > 0: 108 ↛ 105line 108 didn't jump to line 105 because the condition on line 108 was always true
109 base_data.hyphenations.append(Hyphenation(parts=h_parts))