Coverage for src/wiktextract/extractor/it/sound.py: 84%
79 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from ..share import set_sound_file_url_fields
6from .models import Hyphenation, Sound, WordEntry
9def extract_hyphenation_section(
10 wxr: WiktextractContext, page_data: list[WordEntry], level_node: LevelNode
11) -> None:
12 # https://it.wiktionary.org/wiki/Aiuto:Sillabazione
13 hyphenations = []
14 for list_node in level_node.find_child(NodeKind.LIST):
15 match list_node.sarg:
16 case ";":
17 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 17 ↛ 14line 17 didn't jump to line 14 because the loop on line 17 didn't complete
18 h_str = clean_node(wxr, None, list_item.children)
19 if h_str != "": 19 ↛ 17line 19 didn't jump to line 17 because the condition on line 19 was always true
20 hyphenations.append(Hyphenation(hyphenation=h_str))
21 break
22 case "*": 22 ↛ 14line 22 didn't jump to line 14 because the pattern on line 22 always matched
23 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
24 h_data = Hyphenation()
25 for node in list_item.find_child(
26 NodeKind.ITALIC | NodeKind.BOLD
27 ):
28 match node.kind:
29 case NodeKind.ITALIC:
30 h_data.sense = clean_node(
31 wxr, None, node
32 ).strip("()")
33 case NodeKind.BOLD: 33 ↛ 25line 33 didn't jump to line 25 because the pattern on line 33 always matched
34 h_data.hyphenation = clean_node(wxr, None, node)
35 if h_data.hyphenation != "": 35 ↛ 23line 35 didn't jump to line 23 because the condition on line 35 was always true
36 hyphenations.append(h_data)
38 # no list
39 for node in level_node.find_child(NodeKind.BOLD):
40 h_str = clean_node(wxr, None, node)
41 if h_str != "": 41 ↛ 39line 41 didn't jump to line 39 because the condition on line 41 was always true
42 hyphenations.append(Hyphenation(hyphenation=h_str))
44 for data in page_data:
45 if data.lang_code == page_data[-1].lang_code: 45 ↛ 44line 45 didn't jump to line 44 because the condition on line 45 was always true
46 data.hyphenations.extend(hyphenations)
49def extract_pronunciation_section(
50 wxr: WiktextractContext, page_data: list[WordEntry], level_node: LevelNode
51) -> None:
52 # https://it.wiktionary.org/wiki/Aiuto:Pronuncia
53 sounds = []
54 for list_node in level_node.find_child(NodeKind.LIST):
55 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
56 extract_sound_list_item(wxr, list_item, sounds)
58 # no list
59 for t_node in level_node.find_child(NodeKind.TEMPLATE):
60 extract_sound_template(wxr, t_node, sounds, "", [])
62 for data in page_data:
63 if data.lang_code == page_data[-1].lang_code: 63 ↛ 62line 63 didn't jump to line 62 because the condition on line 63 was always true
64 data.sounds.extend(sounds)
67def extract_sound_list_item(
68 wxr: WiktextractContext, list_item: WikiNode, sounds: list[Sound]
69) -> None:
70 sense = ""
71 raw_tags = []
72 for node in list_item.find_child(NodeKind.ITALIC | NodeKind.TEMPLATE):
73 match node.kind:
74 case NodeKind.ITALIC:
75 sense = clean_node(wxr, None, node).strip("()")
76 case NodeKind.TEMPLATE: 76 ↛ 72line 76 didn't jump to line 72 because the pattern on line 76 always matched
77 if node.template_name.lower() == "glossa":
78 raw_tags.append(clean_node(wxr, None, node).strip("()"))
79 else:
80 extract_sound_template(wxr, node, sounds, sense, raw_tags)
83def extract_sound_template(
84 wxr: WiktextractContext,
85 t_node: TemplateNode,
86 sounds: list[Sound],
87 sense: str,
88 raw_tags: list[str],
89) -> None:
90 match t_node.template_name:
91 case "IPA" | "SAMPA":
92 # https://it.wiktionary.org/wiki/Template:IPA
93 # https://it.wiktionary.org/wiki/Template:SAMPA
94 for arg_name in range(1, 5): 94 ↛ exitline 94 didn't return from function 'extract_sound_template' because the loop on line 94 didn't complete
95 if arg_name not in t_node.template_parameters:
96 break
97 ipa = clean_node(
98 wxr, None, t_node.template_parameters.get(arg_name, "")
99 )
100 if ipa != "": 100 ↛ 94line 100 didn't jump to line 94 because the condition on line 100 was always true
101 sound = Sound(ipa=ipa, sense=sense, raw_tags=raw_tags)
102 if t_node.template_name.lower() == "sampa":
103 sound.tags.append("SAMPA")
104 sounds.append(sound)
105 case "Audio" | "audio": 105 ↛ exitline 105 didn't return from function 'extract_sound_template' because the pattern on line 105 always matched
106 # https://it.wiktionary.org/wiki/Template:Audio
107 sound_file = clean_node(
108 wxr, None, t_node.template_parameters.get(1, "")
109 )
110 raw_tag = clean_node(
111 wxr, None, t_node.template_parameters.get(2, "")
112 )
113 if sound_file != "": 113 ↛ exitline 113 didn't return from function 'extract_sound_template' because the condition on line 113 was always true
114 if len(sounds) > 0: 114 ↛ 119line 114 didn't jump to line 119 because the condition on line 114 was always true
115 set_sound_file_url_fields(wxr, sound_file, sounds[-1])
116 if raw_tag != "": 116 ↛ 117line 116 didn't jump to line 117 because the condition on line 116 was never true
117 sounds[-1].raw_tags.append(raw_tag)
118 else:
119 sound = Sound(sense=sense, raw_tags=raw_tags)
120 set_sound_file_url_fields(wxr, sound_file, sound)
121 if raw_tag != "":
122 sound.raw_tags.append(raw_tag)
123 sounds.append(sound)