Coverage for src/wiktextract/extractor/it/sound.py: 84%
80 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from ..share import set_sound_file_url_fields
6from .models import Hyphenation, Sound, WordEntry
9def extract_hyphenation_section(
10 wxr: WiktextractContext, page_data: list[WordEntry], level_node: LevelNode
11) -> None:
12 # https://it.wiktionary.org/wiki/Aiuto:Sillabazione
13 hyphenations = []
14 for list_node in level_node.find_child(NodeKind.LIST):
15 match list_node.sarg:
16 case ";":
17 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 17 ↛ 14line 17 didn't jump to line 14 because the loop on line 17 didn't complete
18 h_str = clean_node(wxr, None, list_item.children)
19 if h_str != "": 19 ↛ 17line 19 didn't jump to line 17 because the condition on line 19 was always true
20 hyphenations.append(
21 Hyphenation(
22 parts=list(map(str.strip, h_str.split("|")))
23 )
24 )
25 break
26 case "*": 26 ↛ 14line 26 didn't jump to line 14 because the pattern on line 26 always matched
27 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
28 h_data = Hyphenation()
29 for node in list_item.find_child(
30 NodeKind.ITALIC | NodeKind.BOLD
31 ):
32 match node.kind:
33 case NodeKind.ITALIC:
34 h_data.sense = clean_node(
35 wxr, None, node
36 ).strip("()")
37 case NodeKind.BOLD: 37 ↛ 29line 37 didn't jump to line 29 because the pattern on line 37 always matched
38 h_str = clean_node(wxr, None, node)
39 h_data.parts = list(
40 map(str.strip, h_str.split("|"))
41 )
42 if len(h_data.parts) > 0: 42 ↛ 27line 42 didn't jump to line 27 because the condition on line 42 was always true
43 hyphenations.append(h_data)
45 # no list
46 for node in level_node.find_child(NodeKind.BOLD):
47 h_str = clean_node(wxr, None, node)
48 if h_str != "": 48 ↛ 46line 48 didn't jump to line 46 because the condition on line 48 was always true
49 hyphenations.append(
50 Hyphenation(parts=list(map(str.strip, h_str.split("|"))))
51 )
53 for data in page_data:
54 if data.lang_code == page_data[-1].lang_code: 54 ↛ 53line 54 didn't jump to line 53 because the condition on line 54 was always true
55 data.hyphenations.extend(hyphenations)
58def extract_pronunciation_section(
59 wxr: WiktextractContext, page_data: list[WordEntry], level_node: LevelNode
60) -> None:
61 # https://it.wiktionary.org/wiki/Aiuto:Pronuncia
62 sounds = []
63 for list_node in level_node.find_child(NodeKind.LIST):
64 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
65 extract_sound_list_item(wxr, list_item, sounds)
67 # no list
68 for t_node in level_node.find_child(NodeKind.TEMPLATE):
69 extract_sound_template(wxr, t_node, sounds, "", [])
71 for data in page_data:
72 if data.lang_code == page_data[-1].lang_code: 72 ↛ 71line 72 didn't jump to line 71 because the condition on line 72 was always true
73 data.sounds.extend(sounds)
76def extract_sound_list_item(
77 wxr: WiktextractContext, list_item: WikiNode, sounds: list[Sound]
78) -> None:
79 sense = ""
80 raw_tags = []
81 for node in list_item.find_child(NodeKind.ITALIC | NodeKind.TEMPLATE):
82 match node.kind:
83 case NodeKind.ITALIC:
84 sense = clean_node(wxr, None, node).strip("()")
85 case NodeKind.TEMPLATE: 85 ↛ 81line 85 didn't jump to line 81 because the pattern on line 85 always matched
86 if node.template_name.lower() == "glossa":
87 raw_tags.append(clean_node(wxr, None, node).strip("()"))
88 else:
89 extract_sound_template(wxr, node, sounds, sense, raw_tags)
92def extract_sound_template(
93 wxr: WiktextractContext,
94 t_node: TemplateNode,
95 sounds: list[Sound],
96 sense: str,
97 raw_tags: list[str],
98) -> None:
99 match t_node.template_name:
100 case "IPA" | "SAMPA":
101 # https://it.wiktionary.org/wiki/Template:IPA
102 # https://it.wiktionary.org/wiki/Template:SAMPA
103 for arg_name in range(1, 5): 103 ↛ exitline 103 didn't return from function 'extract_sound_template' because the loop on line 103 didn't complete
104 if arg_name not in t_node.template_parameters:
105 break
106 ipa = clean_node(
107 wxr, None, t_node.template_parameters.get(arg_name, "")
108 )
109 if ipa != "": 109 ↛ 103line 109 didn't jump to line 103 because the condition on line 109 was always true
110 sound = Sound(ipa=ipa, sense=sense, raw_tags=raw_tags)
111 if t_node.template_name.lower() == "sampa":
112 sound.tags.append("SAMPA")
113 sounds.append(sound)
114 case "Audio" | "audio": 114 ↛ exitline 114 didn't return from function 'extract_sound_template' because the pattern on line 114 always matched
115 # https://it.wiktionary.org/wiki/Template:Audio
116 sound_file = clean_node(
117 wxr, None, t_node.template_parameters.get(1, "")
118 )
119 raw_tag = clean_node(
120 wxr, None, t_node.template_parameters.get(2, "")
121 )
122 if sound_file != "": 122 ↛ exitline 122 didn't return from function 'extract_sound_template' because the condition on line 122 was always true
123 if len(sounds) > 0: 123 ↛ 128line 123 didn't jump to line 128 because the condition on line 123 was always true
124 set_sound_file_url_fields(wxr, sound_file, sounds[-1])
125 if raw_tag != "": 125 ↛ 126line 125 didn't jump to line 126 because the condition on line 125 was never true
126 sounds[-1].raw_tags.append(raw_tag)
127 else:
128 sound = Sound(sense=sense, raw_tags=raw_tags)
129 set_sound_file_url_fields(wxr, sound_file, sound)
130 if raw_tag != "":
131 sound.raw_tags.append(raw_tag)
132 sounds.append(sound)