Coverage for src/wiktextract/extractor/es/pronunciation.py: 90%
86 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
1from wikitextprocessor import HTMLNode, NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from ..share import set_sound_file_url_fields
6from .models import Hyphenation, Sound, WordEntry
7from .tags import translate_raw_tags
9# translate table row header to sound model field
10PRON_GRAF_HEADER_MAP = {
11 "rimas": "rhymes",
12 "rima": "rhymes",
13}
16def process_pron_graf_template(
17 wxr: WiktextractContext, word_entry: WordEntry, template_node: TemplateNode
18) -> None:
19 # https://es.wiktionary.org/wiki/Plantilla:pron-graf
20 # this template could create sound data without any parameter
21 # it expands to a two columns table
22 expanded_node = wxr.wtp.parse(
23 wxr.wtp.node_to_wikitext(template_node), expand_all=True
24 )
25 table_nodes = list(expanded_node.find_child(NodeKind.TABLE))
26 if len(table_nodes) == 0: 26 ↛ 27line 26 didn't jump to line 27 because the condition on line 26 was never true
27 return
28 table_node = table_nodes[0]
29 extra_sounds = {} # not translated
30 for table_row in table_node.find_child(NodeKind.TABLE_ROW):
31 table_cells = list(table_row.find_child(NodeKind.TABLE_CELL))
32 if len(table_cells) != 2:
33 continue
34 header_node, value_node = table_cells
35 header_text = clean_node(wxr, None, header_node)
36 value_text = clean_node(wxr, None, value_node)
37 if header_text.endswith(" (AFI)"): # IPA
38 process_pron_graf_ipa_cell(wxr, word_entry, value_node, header_text)
39 elif header_text == "silabación" and value_text != "":
40 word_entry.hyphenations.append(
41 Hyphenation(parts=value_text.split("-"))
42 )
43 elif header_text in PRON_GRAF_HEADER_MAP:
44 sound = Sound()
45 setattr(sound, PRON_GRAF_HEADER_MAP[header_text], value_text)
46 word_entry.sounds.append(sound)
47 elif (
48 header_text.endswith(" alternativas") or header_text == "variantes"
49 ):
50 process_pron_graf_link_cell(
51 wxr, word_entry, value_node, header_text, "alternative"
52 )
53 elif header_text == "homófonos":
54 process_pron_graf_link_cell(
55 wxr, word_entry, value_node, header_text, "homophone"
56 )
57 elif header_text == "transliteraciones":
58 process_pron_graf_text_cell(wxr, word_entry, value_node, "roman")
59 elif header_text == "transcripciones silábicas": 59 ↛ 60line 59 didn't jump to line 60 because the condition on line 59 was never true
60 process_pron_graf_text_cell(wxr, word_entry, value_node, "syllabic")
61 else:
62 extra_sounds[header_text] = value_text
64 if len(extra_sounds) > 0:
65 word_entry.extra_sounds = extra_sounds
66 clean_node(wxr, word_entry, expanded_node)
69def process_pron_graf_ipa_cell(
70 wxr: WiktextractContext,
71 word_entry: WordEntry,
72 cell_node: WikiNode,
73 header_text: str,
74) -> None:
75 sound = Sound()
76 for node in cell_node.children:
77 if isinstance(node, str) and len(node.strip()) > 0:
78 sound.ipa += node.strip()
79 elif isinstance(node, HTMLNode) and node.tag == "phonos":
80 sound_file = node.attrs.get("file", "")
81 set_sound_file_url_fields(wxr, sound_file, sound)
82 for small_tag in node.find_html("small"):
83 location = clean_node(wxr, None, small_tag)
84 sound.raw_tags.append(location)
85 elif (
86 isinstance(node, HTMLNode) and node.tag == "br" and sound != Sound()
87 ):
88 if not header_text.startswith("pronunciación"): # location
89 sound.raw_tags.append(header_text.removesuffix(" (AFI)"))
90 translate_raw_tags(sound)
91 word_entry.sounds.append(sound.model_copy(deep=True))
92 sound = Sound()
93 if sound != Sound(): 93 ↛ 94line 93 didn't jump to line 94 because the condition on line 93 was never true
94 if not header_text.startswith("pronunciación"):
95 sound.raw_tags.append(header_text.removesuffix(" (AFI)"))
96 translate_raw_tags(sound)
97 word_entry.sounds.append(sound)
100def process_pron_graf_link_cell(
101 wxr: WiktextractContext,
102 word_entry: WordEntry,
103 cell_node: WikiNode,
104 header_text: str,
105 field_name: str,
106) -> None:
107 for link_index, link_node in cell_node.find_child(
108 NodeKind.LINK, with_index=True
109 ):
110 sound = Sound()
111 setattr(sound, field_name, clean_node(wxr, None, link_node))
112 if (
113 link_index + 1 < len(cell_node.children)
114 and isinstance(cell_node.children[link_index + 1], HTMLNode)
115 and cell_node.children[link_index + 1].tag == "ref"
116 ):
117 # nest "ref" tag is note text
118 sound.note = clean_node(
119 wxr, None, cell_node.children[link_index + 1].children
120 )
121 if header_text == "variantes": 121 ↛ 122line 121 didn't jump to line 122 because the condition on line 121 was never true
122 sound.not_same_pronunciation = True
123 word_entry.sounds.append(sound)
126def process_pron_graf_text_cell(
127 wxr: WiktextractContext,
128 word_entry: WordEntry,
129 cell_node: WikiNode,
130 field_name: str,
131) -> None:
132 sound = Sound()
133 for node_index, node in enumerate(cell_node.children):
134 if isinstance(node, str) and len(node.strip()) > 0:
135 node = node.strip()
136 if node.startswith(", "):
137 node = node.removeprefix(", ")
138 word_entry.sounds.append(sound.model_copy(deep=True))
139 sound = Sound()
140 setattr(sound, field_name, node.strip())
141 elif isinstance(node, HTMLNode) and node.tag == "ref":
142 sound.note = clean_node(wxr, None, node.children)
143 if len(getattr(sound, field_name)) > 0: 143 ↛ exitline 143 didn't return from function 'process_pron_graf_text_cell' because the condition on line 143 was always true
144 word_entry.sounds.append(sound)