Coverage for src/wiktextract/extractor/pl/sound.py: 73%
48 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from wikitextprocessor.parser import NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from ..share import set_sound_file_url_fields
6from .models import Sound, WordEntry
7from .tags import translate_raw_tags
9SOUND_TAG_TEMPLATES = frozenset(["RP", "amer", "lp", "lm"])
12def extract_sound_section(
13 wxr: WiktextractContext,
14 base_data: WordEntry,
15 level_node: WikiNode,
16) -> None:
17 has_list = False
18 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM):
19 has_list = True
20 raw_tags = []
21 for template_node in list_item.find_child(NodeKind.TEMPLATE):
22 process_sound_template(wxr, base_data, template_node, raw_tags)
23 if not has_list:
24 # could have preformatted node, can't use `find_child()`
25 for template_node in level_node.find_child_recursively(
26 NodeKind.TEMPLATE
27 ):
28 process_sound_template(wxr, base_data, template_node, [])
31def process_sound_template(
32 wxr: WiktextractContext,
33 base_data: WordEntry,
34 template_node: TemplateNode,
35 raw_tags: list[str],
36) -> None:
37 if template_node.template_name.startswith(("IPA", "AS", "SAMPA")):
38 ipa = clean_node(
39 wxr, None, template_node.template_parameters.get(1, "")
40 )
41 if isinstance(ipa, str) and len(ipa) > 0: 41 ↛ exitline 41 didn't return from function 'process_sound_template' because the condition on line 41 was always true
42 sound = Sound(ipa=ipa, raw_tags=raw_tags)
43 if template_node.template_name.startswith("AS"):
44 sound.tags.append("Slavic-alphabet")
45 elif template_node.template_name == "SAMPA":
46 sound.tags.append("SAMPA")
47 translate_raw_tags(sound)
48 base_data.sounds.append(sound)
49 elif template_node.template_name.startswith("audio"):
50 audio_file = template_node.template_parameters.get(1, "")
51 if isinstance(audio_file, str) and len(audio_file) > 0: 51 ↛ exitline 51 didn't return from function 'process_sound_template' because the condition on line 51 was always true
52 sound = Sound(raw_tags=raw_tags)
53 set_sound_file_url_fields(wxr, audio_file, sound)
54 translate_raw_tags(sound)
55 base_data.sounds.append(sound)
56 raw_tags.clear()
57 elif template_node.template_name in SOUND_TAG_TEMPLATES: 57 ↛ 59line 57 didn't jump to line 59 because the condition on line 57 was always true
58 raw_tags.append(clean_node(wxr, None, template_node))
59 elif template_node.template_name in ("pinyin", "zhuyin"):
60 zh_pron = template_node.template_parameters.get(1, "")
61 if isinstance(zh_pron, str) and len(zh_pron) > 0:
62 sound = Sound(zh_pron=zh_pron, raw_tags=raw_tags)
63 if template_node.template_name == "pinyin":
64 sound.tags.append("Pinyin")
65 elif template_node.template_name == "zhuyin":
66 sound.tags.append("Bopomofo")
67 translate_raw_tags(sound)
68 base_data.sounds.append(sound)