Coverage for src/wiktextract/extractor/de/pronunciation.py: 92%
57 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
1from wikitextprocessor.parser import LevelNode, NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from ..share import set_sound_file_url_fields
6from .models import Sound, WordEntry
7from .tags import translate_raw_tags
10def extract_pronunciation_section(
11 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode
12) -> None:
13 for list_node in level_node.find_child(NodeKind.LIST):
14 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
15 for sound in extract_pron_list_item(wxr, list_item):
16 word_entry.sounds.append(sound)
17 word_entry.categories.extend(sound.categories)
20def extract_pron_list_item(
21 wxr: WiktextractContext, list_item: WikiNode
22) -> list[Sound]:
23 raw_tags = []
24 sounds = []
25 for node in list_item.find_child(
26 NodeKind.TEMPLATE | NodeKind.ITALIC | NodeKind.LIST
27 ):
28 match node.kind:
29 case NodeKind.ITALIC:
30 node_text = clean_node(wxr, None, node)
31 if node_text.endswith(":") and node_text != "auth:": 31 ↛ 25line 31 didn't jump to line 25 because the condition on line 31 was always true
32 raw_tags.append(node_text.removesuffix(":"))
33 case NodeKind.LIST:
34 for next_list_item in node.find_child(NodeKind.LIST_ITEM):
35 sounds.extend(extract_pron_list_item(wxr, next_list_item))
36 case NodeKind.TEMPLATE: 36 ↛ 25line 36 didn't jump to line 25 because the pattern on line 36 always matched
37 match node.template_name:
38 case "Lautschrift":
39 sound = Sound(ipa="", raw_tags=raw_tags)
40 sound.ipa = clean_node(wxr, sound, node)
41 if sound.ipa != "": 41 ↛ 25line 41 didn't jump to line 25 because the condition on line 41 was always true
42 translate_raw_tags(sound)
43 sounds.append(sound)
44 raw_tags.clear()
45 case "Audio":
46 new_sound = extract_audio_template(wxr, node)
47 if new_sound is not None: 47 ↛ 25line 47 didn't jump to line 25 because the condition on line 47 was always true
48 sounds.append(new_sound)
49 case "Reim":
50 rhyme = clean_node(
51 wxr,
52 None,
53 node.template_parameters.get(1, ""),
54 )
55 if rhyme != "": 55 ↛ 25line 55 didn't jump to line 25 because the condition on line 55 was always true
56 sounds.append(Sound(rhymes=rhyme))
57 clean_node(wxr, sounds[-1], node)
59 return sounds
62def extract_audio_template(
63 wxr: WiktextractContext, t_node: TemplateNode
64) -> Sound | None:
65 # https://de.wiktionary.org/wiki/Vorlage:Audio
66 filename = clean_node(wxr, None, t_node.template_parameters.get(1, ""))
67 if filename.strip() == "": 67 ↛ 68line 67 didn't jump to line 68 because the condition on line 67 was never true
68 return None
69 sound = Sound()
70 set_sound_file_url_fields(wxr, filename, sound)
71 expanded_node = wxr.wtp.parse(
72 wxr.wtp.node_to_wikitext(t_node), expand_all=True
73 )
74 for link_node in expanded_node.find_child(NodeKind.LINK):
75 link_str = clean_node(wxr, None, link_node)
76 if "(" in link_str:
77 sound.raw_tags.append(
78 link_str[link_str.index("(") + 1 :].strip(")")
79 )
80 clean_node(wxr, sound, expanded_node)
81 translate_raw_tags(sound)
82 return sound