Coverage for src/wiktextract/extractor/pt/pronunciation.py: 97%
38 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from wikitextprocessor.parser import (
2 LEVEL_KIND_FLAGS,
3 LevelNode,
4 NodeKind,
5 WikiNode,
6)
8from ...page import clean_node
9from ...wxr_context import WiktextractContext
10from .models import Sound, WordEntry
11from .tags import translate_raw_tags
14def extract_pronunciation_section(
15 wxr: WiktextractContext,
16 page_data: list[WordEntry],
17 level_node: LevelNode,
18) -> None:
19 raw_tags = []
20 sounds = []
21 title_text = clean_node(wxr, None, level_node.largs)
22 if title_text not in ["", "Pronúncia"]:
23 raw_tags.append(title_text)
25 for list_node in level_node.find_child(NodeKind.LIST):
26 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
27 sounds.extend(
28 extract_pronunciation_list_item(wxr, list_item, raw_tags)
29 )
31 for child_level_node in level_node.find_child(LEVEL_KIND_FLAGS):
32 extract_pronunciation_section(wxr, page_data, child_level_node)
34 for data in page_data:
35 if data.lang_code == page_data[-1].lang_code: 35 ↛ 34line 35 didn't jump to line 34 because the condition on line 35 was always true
36 for sound in sounds:
37 translate_raw_tags(sound)
38 data.sounds.append(sound)
41def extract_pronunciation_list_item(
42 wxr: WiktextractContext, list_item: WikiNode, raw_tags: list[str]
43) -> list[Sound]:
44 sounds = []
45 for index, node in enumerate(list_item.children):
46 if isinstance(node, str) and ":" in node:
47 raw_tag = clean_node(wxr, None, list_item.children[:index])
48 sound_value = clean_node(
49 wxr,
50 None,
51 [node[node.index(":") + 1 :]]
52 + [
53 n
54 for n in list_item.children[index + 1 :]
55 if not (isinstance(n, WikiNode) and n.kind == NodeKind.LIST)
56 ],
57 )
58 if sound_value != "":
59 sound = Sound(ipa=sound_value, raw_tags=raw_tags)
60 if raw_tag == "X-SAMPA":
61 sound.tags.append("X-SAMPA")
62 sounds.append(sound)
63 elif raw_tag != "": 63 ↛ 45line 63 didn't jump to line 45 because the condition on line 63 was always true
64 raw_tags.append(raw_tag)
65 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
66 for child_list_item in node.find_child(NodeKind.LIST_ITEM):
67 sounds.extend(
68 extract_pronunciation_list_item(
69 wxr, child_list_item, raw_tags
70 )
71 )
73 return sounds