Coverage for src/wiktextract/extractor/de/pronunciation.py: 93%
57 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from wikitextprocessor.parser import LevelNode, NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from ..share import set_sound_file_url_fields
6from .models import Sound, WordEntry
7from .tags import translate_raw_tags
10def extract_pronunciation_section(
11 wxr: WiktextractContext,
12 word_entry: WordEntry,
13 level_node: LevelNode,
14) -> None:
15 for list_node in level_node.find_child(NodeKind.LIST):
16 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
17 for sound in extract_pron_list_item(wxr, list_item):
18 word_entry.sounds.append(sound)
19 word_entry.categories.extend(sound.categories)
22def extract_pron_list_item(
23 wxr: WiktextractContext, list_item: WikiNode
24) -> list[Sound]:
25 raw_tags = []
26 sounds = []
27 for node in list_item.find_child(
28 NodeKind.TEMPLATE | NodeKind.ITALIC | NodeKind.LIST
29 ):
30 match node.kind:
31 case NodeKind.ITALIC:
32 node_text = clean_node(wxr, None, node)
33 if node_text.endswith(":"): 33 ↛ 27line 33 didn't jump to line 27 because the condition on line 33 was always true
34 raw_tags.append(node_text.removesuffix(":"))
35 case NodeKind.LIST:
36 for next_list_item in node.find_child(NodeKind.LIST_ITEM):
37 sounds.extend(extract_pron_list_item(wxr, next_list_item))
38 case NodeKind.TEMPLATE: 38 ↛ 27line 38 didn't jump to line 27 because the pattern on line 38 always matched
39 match node.template_name:
40 case "Lautschrift":
41 ipa = clean_node(
42 wxr,
43 None,
44 node.template_parameters.get(1, ""),
45 )
46 if ipa != "": 46 ↛ 27line 46 didn't jump to line 27 because the condition on line 46 was always true
47 sounds.append(Sound(ipa=ipa))
48 clean_node(wxr, sounds[-1], node)
49 case "Audio":
50 new_sound = extract_audio_template(wxr, node)
51 if new_sound is not None: 51 ↛ 27line 51 didn't jump to line 27 because the condition on line 51 was always true
52 sounds.append(new_sound)
53 case "Reim":
54 rhyme = clean_node(
55 wxr,
56 None,
57 node.template_parameters.get(1, ""),
58 )
59 if rhyme != "": 59 ↛ 27line 59 didn't jump to line 27 because the condition on line 59 was always true
60 sounds.append(Sound(rhymes=rhyme))
61 clean_node(wxr, sounds[-1], node)
63 for sound in sounds:
64 sound.raw_tags.extend(raw_tags)
65 translate_raw_tags(sound)
66 return sounds
69def extract_audio_template(
70 wxr: WiktextractContext, t_node: TemplateNode
71) -> Sound | None:
72 # https://de.wiktionary.org/wiki/Vorlage:Audio
73 filename = clean_node(wxr, None, t_node.template_parameters.get(1, ""))
74 if filename.strip() == "": 74 ↛ 75line 74 didn't jump to line 75 because the condition on line 74 was never true
75 return None
76 sound = Sound()
77 set_sound_file_url_fields(wxr, filename, sound)
78 expanded_node = wxr.wtp.parse(
79 wxr.wtp.node_to_wikitext(t_node), expand_all=True
80 )
81 for link_node in expanded_node.find_child(NodeKind.LINK):
82 link_str = clean_node(wxr, None, link_node)
83 if "(" in link_str:
84 sound.raw_tags.append(
85 link_str[link_str.index("(") + 1 :].strip(")")
86 )
87 clean_node(wxr, sound, expanded_node)
88 return sound