Coverage for src / wiktextract / extractor / es / pronunciation.py: 88%
84 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-12 08:09 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-12 08:09 +0000
1from wikitextprocessor import HTMLNode, NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from ..share import set_sound_file_url_fields
6from .models import Hyphenation, Sound, WordEntry
7from .tags import translate_raw_tags
9# translate table row header to sound model field
10PRON_GRAF_HEADER_MAP = {
11 "rimas": "rhymes",
12 "rima": "rhymes",
13}
16def process_pron_graf_template(
17 wxr: WiktextractContext, word_entry: WordEntry, template_node: TemplateNode
18) -> None:
19 # https://es.wiktionary.org/wiki/Plantilla:pron-graf
20 # this template could create sound data without any parameter
21 # it expands to a two columns table
22 expanded_node = wxr.wtp.parse(
23 wxr.wtp.node_to_wikitext(template_node), expand_all=True
24 )
25 table_nodes = list(expanded_node.find_child(NodeKind.TABLE))
26 if len(table_nodes) == 0: 26 ↛ 27line 26 didn't jump to line 27 because the condition on line 26 was never true
27 return
28 table_node = table_nodes[0]
29 for table_row in table_node.find_child(NodeKind.TABLE_ROW):
30 table_cells = list(table_row.find_child(NodeKind.TABLE_CELL))
31 if len(table_cells) != 2:
32 continue
33 header_node, value_node = table_cells
34 header_text = clean_node(wxr, None, header_node)
35 value_text = clean_node(wxr, None, value_node)
36 if header_text.endswith(" (AFI)"): # IPA
37 process_pron_graf_ipa_cell(wxr, word_entry, value_node, header_text)
38 elif header_text == "silabación" and value_text != "":
39 word_entry.hyphenations.append(
40 Hyphenation(parts=value_text.split("-"))
41 )
42 elif header_text in PRON_GRAF_HEADER_MAP:
43 sound = Sound()
44 setattr(sound, PRON_GRAF_HEADER_MAP[header_text], value_text)
45 word_entry.sounds.append(sound)
46 elif (
47 header_text.endswith(" alternativas") or header_text == "variantes"
48 ):
49 process_pron_graf_link_cell(
50 wxr, word_entry, value_node, header_text, "alternative"
51 )
52 elif header_text == "homófonos":
53 process_pron_graf_link_cell(
54 wxr, word_entry, value_node, header_text, "homophone"
55 )
56 elif header_text == "transliteraciones":
57 process_pron_graf_text_cell(wxr, word_entry, value_node, "roman")
58 elif header_text == "transcripciones silábicas": 58 ↛ 59line 58 didn't jump to line 59 because the condition on line 58 was never true
59 process_pron_graf_text_cell(wxr, word_entry, value_node, "syllabic")
60 elif value_text not in ["", "falta agregar"] and header_text != "": 60 ↛ 61line 60 didn't jump to line 61 because the condition on line 60 was never true
61 word_entry.sounds.append(
62 Sound(other=value_text, raw_tags=[header_text])
63 )
65 clean_node(wxr, word_entry, expanded_node)
68def process_pron_graf_ipa_cell(
69 wxr: WiktextractContext,
70 word_entry: WordEntry,
71 cell_node: WikiNode,
72 header_text: str,
73) -> None:
74 sound = Sound()
75 for node in cell_node.children:
76 if isinstance(node, str) and len(node.strip()) > 0:
77 sound.ipa += node.strip()
78 elif isinstance(node, HTMLNode) and node.tag == "phonos":
79 sound_file = node.attrs.get("file", "")
80 set_sound_file_url_fields(wxr, sound_file, sound)
81 for small_tag in node.find_html("small"):
82 location = clean_node(wxr, None, small_tag)
83 sound.raw_tags.append(location)
84 elif (
85 isinstance(node, HTMLNode) and node.tag == "br" and sound != Sound()
86 ):
87 if not header_text.startswith("pronunciación"): # location
88 sound.raw_tags.append(header_text.removesuffix(" (AFI)"))
89 translate_raw_tags(sound)
90 word_entry.sounds.append(sound.model_copy(deep=True))
91 sound = Sound()
92 if sound != Sound(): 92 ↛ 93line 92 didn't jump to line 93 because the condition on line 92 was never true
93 if not header_text.startswith("pronunciación"):
94 sound.raw_tags.append(header_text.removesuffix(" (AFI)"))
95 translate_raw_tags(sound)
96 word_entry.sounds.append(sound)
99def process_pron_graf_link_cell(
100 wxr: WiktextractContext,
101 word_entry: WordEntry,
102 cell_node: WikiNode,
103 header_text: str,
104 field_name: str,
105) -> None:
106 for link_index, link_node in cell_node.find_child(
107 NodeKind.LINK, with_index=True
108 ):
109 sound = Sound()
110 setattr(sound, field_name, clean_node(wxr, None, link_node))
111 if (
112 link_index + 1 < len(cell_node.children)
113 and isinstance(cell_node.children[link_index + 1], HTMLNode)
114 and cell_node.children[link_index + 1].tag == "ref"
115 ):
116 # nest "ref" tag is note text
117 sound.note = clean_node(
118 wxr, None, cell_node.children[link_index + 1].children
119 )
120 if header_text == "variantes": 120 ↛ 121line 120 didn't jump to line 121 because the condition on line 120 was never true
121 sound.not_same_pronunciation = True
122 word_entry.sounds.append(sound)
125def process_pron_graf_text_cell(
126 wxr: WiktextractContext,
127 word_entry: WordEntry,
128 cell_node: WikiNode,
129 field_name: str,
130) -> None:
131 sound = Sound()
132 for node_index, node in enumerate(cell_node.children):
133 if isinstance(node, str) and len(node.strip()) > 0:
134 node = node.strip()
135 if node.startswith(", "):
136 node = node.removeprefix(", ")
137 word_entry.sounds.append(sound.model_copy(deep=True))
138 sound = Sound()
139 setattr(sound, field_name, node.strip())
140 elif isinstance(node, HTMLNode) and node.tag == "ref":
141 sound.note = clean_node(wxr, None, node.children)
142 if len(getattr(sound, field_name)) > 0: 142 ↛ exitline 142 didn't return from function 'process_pron_graf_text_cell' because the condition on line 142 was always true
143 word_entry.sounds.append(sound)