Coverage for src/wiktextract/extractor/es/pronunciation.py: 90%
86 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from wikitextprocessor import HTMLNode, NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from ..share import create_audio_url_dict
6from .models import Sound, WordEntry
8# translate table row header to sound model field
9PRON_GRAF_HEADER_MAP = {
10 "rimas": "rhymes",
11 "rima": "rhymes",
12}
15def process_pron_graf_template(
16 wxr: WiktextractContext, word_entry: WordEntry, template_node: TemplateNode
17) -> None:
18 # https://es.wiktionary.org/wiki/Plantilla:pron-graf
19 # this template could create sound data without any parameter
20 # it expands to a two columns table
21 expanded_node = wxr.wtp.parse(
22 wxr.wtp.node_to_wikitext(template_node), expand_all=True
23 )
24 table_nodes = list(expanded_node.find_child(NodeKind.TABLE))
25 if len(table_nodes) == 0: 25 ↛ 26line 25 didn't jump to line 26 because the condition on line 25 was never true
26 return
27 table_node = table_nodes[0]
28 extra_sounds = {} # not translated
29 for table_row in table_node.find_child(NodeKind.TABLE_ROW):
30 table_cells = list(table_row.find_child(NodeKind.TABLE_CELL))
31 if len(table_cells) != 2:
32 continue
33 header_node, value_node = table_cells
34 header_text = clean_node(wxr, None, header_node)
35 value_text = clean_node(wxr, None, value_node)
36 if header_text.endswith(" (AFI)"): # IPA
37 process_pron_graf_ipa_cell(wxr, word_entry, value_node, header_text)
38 elif header_text == "silabación":
39 word_entry.hyphenation = value_text
40 elif header_text in PRON_GRAF_HEADER_MAP:
41 sound = Sound()
42 setattr(sound, PRON_GRAF_HEADER_MAP[header_text], value_text)
43 word_entry.sounds.append(sound)
44 elif (
45 header_text.endswith(" alternativas") or header_text == "variantes"
46 ):
47 process_pron_graf_link_cell(
48 wxr, word_entry, value_node, header_text, "alternative"
49 )
50 elif header_text == "homófonos":
51 process_pron_graf_link_cell(
52 wxr, word_entry, value_node, header_text, "homophone"
53 )
54 elif header_text == "transliteraciones":
55 process_pron_graf_text_cell(wxr, word_entry, value_node, "roman")
56 elif header_text == "transcripciones silábicas": 56 ↛ 57line 56 didn't jump to line 57 because the condition on line 56 was never true
57 process_pron_graf_text_cell(wxr, word_entry, value_node, "syllabic")
58 else:
59 extra_sounds[header_text] = value_text
61 if len(extra_sounds) > 0:
62 word_entry.extra_sounds = extra_sounds
63 clean_node(wxr, word_entry, expanded_node)
66def process_pron_graf_ipa_cell(
67 wxr: WiktextractContext,
68 word_entry: WordEntry,
69 cell_node: WikiNode,
70 header_text: str,
71) -> None:
72 sound = Sound()
73 for node in cell_node.children:
74 if isinstance(node, str) and len(node.strip()) > 0:
75 sound.ipa += node.strip()
76 elif isinstance(node, HTMLNode) and node.tag == "phonos":
77 sound_file = node.attrs.get("file", "")
78 sound_urls = create_audio_url_dict(sound_file)
79 for sound_key, sound_value in sound_urls.items():
80 if hasattr(sound, sound_key): 80 ↛ 79line 80 didn't jump to line 79 because the condition on line 80 was always true
81 setattr(sound, sound_key, sound_value)
82 for small_tag in node.find_html("small"):
83 location = clean_node(wxr, None, small_tag)
84 sound.raw_tags.append(location)
85 elif (
86 isinstance(node, HTMLNode) and node.tag == "br" and sound != Sound()
87 ):
88 if not header_text.startswith("pronunciación"): # location
89 sound.raw_tags.append(header_text.removesuffix(" (AFI)"))
90 word_entry.sounds.append(sound.model_copy(deep=True))
91 sound = Sound()
92 if sound != Sound(): 92 ↛ 93line 92 didn't jump to line 93 because the condition on line 92 was never true
93 if not header_text.startswith("pronunciación"):
94 sound.raw_tags.append(header_text.removesuffix(" (AFI)"))
95 word_entry.sounds.append(sound)
98def process_pron_graf_link_cell(
99 wxr: WiktextractContext,
100 word_entry: WordEntry,
101 cell_node: WikiNode,
102 header_text: str,
103 field_name: str,
104) -> None:
105 for link_index, link_node in cell_node.find_child(
106 NodeKind.LINK, with_index=True
107 ):
108 sound = Sound()
109 setattr(sound, field_name, clean_node(wxr, None, link_node))
110 if (
111 link_index + 1 < len(cell_node.children)
112 and isinstance(cell_node.children[link_index + 1], HTMLNode)
113 and cell_node.children[link_index + 1].tag == "ref"
114 ):
115 # nest "ref" tag is note text
116 sound.note = clean_node(
117 wxr, None, cell_node.children[link_index + 1].children
118 )
119 if header_text == "variantes": 119 ↛ 120line 119 didn't jump to line 120 because the condition on line 119 was never true
120 sound.not_same_pronunciation = True
121 word_entry.sounds.append(sound)
124def process_pron_graf_text_cell(
125 wxr: WiktextractContext,
126 word_entry: WordEntry,
127 cell_node: WikiNode,
128 field_name: str,
129) -> None:
130 sound = Sound()
131 for node_index, node in enumerate(cell_node.children):
132 if isinstance(node, str) and len(node.strip()) > 0:
133 node = node.strip()
134 if node.startswith(", "):
135 node = node.removeprefix(", ")
136 word_entry.sounds.append(sound.model_copy(deep=True))
137 sound = Sound()
138 setattr(sound, field_name, node.strip())
139 elif isinstance(node, HTMLNode) and node.tag == "ref":
140 sound.note = clean_node(wxr, None, node.children)
141 if len(getattr(sound, field_name)) > 0: 141 ↛ exitline 141 didn't return from function 'process_pron_graf_text_cell' because the condition on line 141 was always true
142 word_entry.sounds.append(sound)