Coverage for src/wiktextract/extractor/pt/pronunciation.py: 94%
40 statements
« prev ^ index » next coverage.py v7.9.0, created at 2025-06-13 07:43 +0000
« prev ^ index » next coverage.py v7.9.0, created at 2025-06-13 07:43 +0000
1from wikitextprocessor.parser import (
2 LEVEL_KIND_FLAGS,
3 LevelNode,
4 NodeKind,
5 WikiNode,
6)
8from ...page import clean_node
9from ...wxr_context import WiktextractContext
10from .models import Sound, WordEntry
11from .tags import translate_raw_tags
14def extract_pronunciation_section(
15 wxr: WiktextractContext,
16 page_data: list[WordEntry],
17 level_node: LevelNode,
18) -> None:
19 raw_tags = []
20 sounds = []
21 title_text = clean_node(wxr, None, level_node.largs)
22 if title_text not in ["", "Pronúncia"]:
23 raw_tags.append(title_text)
25 for list_node in level_node.find_child(NodeKind.LIST):
26 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
27 sounds.extend(
28 extract_pronunciation_list_item(
29 wxr, list_item, page_data[-1].lang_code, raw_tags
30 )
31 )
33 for child_level_node in level_node.find_child(LEVEL_KIND_FLAGS):
34 extract_pronunciation_section(wxr, page_data, child_level_node)
36 for data in page_data:
37 if data.lang_code == page_data[-1].lang_code: 37 ↛ 36line 37 didn't jump to line 36 because the condition on line 37 was always true
38 for sound in sounds:
39 data.sounds.append(sound)
42def extract_pronunciation_list_item(
43 wxr: WiktextractContext,
44 list_item: WikiNode,
45 lang_code: str,
46 parent_raw_tags: list[str],
47) -> list[Sound]:
48 raw_tags = parent_raw_tags[:]
49 sounds = []
50 for index, node in enumerate(list_item.children):
51 if isinstance(node, str) and ":" in node:
52 raw_tag = clean_node(wxr, None, list_item.children[:index])
53 if raw_tag != "": 53 ↛ 55line 53 didn't jump to line 55 because the condition on line 53 was always true
54 raw_tags.append(raw_tag)
55 sound_value = clean_node(
56 wxr,
57 None,
58 [node[node.index(":") + 1 :]]
59 + [
60 n
61 for n in list_item.children[index + 1 :]
62 if not (isinstance(n, WikiNode) and n.kind == NodeKind.LIST)
63 ],
64 )
65 if sound_value != "":
66 sound = Sound(raw_tags=raw_tags)
67 if lang_code == "zh": 67 ↛ 68line 67 didn't jump to line 68 because the condition on line 67 was never true
68 sound.zh_pron = sound_value
69 else:
70 sound.ipa = sound_value
71 translate_raw_tags(sound)
72 sounds.append(sound)
73 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
74 for child_list_item in node.find_child(NodeKind.LIST_ITEM):
75 sounds.extend(
76 extract_pronunciation_list_item(
77 wxr, child_list_item, lang_code, raw_tags
78 )
79 )
81 return sounds