Coverage for src / wiktextract / extractor / th / sound.py: 57%
397 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-12 08:09 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-12 08:09 +0000
1import re
2from dataclasses import dataclass
4from wikitextprocessor import (
5 HTMLNode,
6 LevelNode,
7 NodeKind,
8 TemplateNode,
9 WikiNode,
10)
12from ...page import clean_node
13from ...wxr_context import WiktextractContext
14from ..share import set_sound_file_url_fields
15from .models import Hyphenation, Sound, WordEntry
16from .tags import translate_raw_tags
19def extract_sound_section(
20 wxr: WiktextractContext, base_data: WordEntry, level_node: LevelNode
21):
22 for t_node in level_node.find_child(NodeKind.TEMPLATE):
23 if t_node.template_name == "zh-forms": 23 ↛ 24line 23 didn't jump to line 24 because the condition on line 23 was never true
24 from .page import extract_zh_forms
26 extract_zh_forms(wxr, base_data, t_node)
27 else:
28 extract_sound_template(wxr, base_data, t_node)
29 for list_node in level_node.find_child(NodeKind.LIST):
30 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
31 for t_node in list_item.find_child(NodeKind.TEMPLATE):
32 extract_sound_template(wxr, base_data, t_node)
35def extract_sound_template(
36 wxr: WiktextractContext, base_data: WordEntry, t_node: TemplateNode
37):
38 if t_node.template_name in ["ja-pron", "ja-IPA"]:
39 extract_ja_pron_template(wxr, base_data, t_node)
40 elif t_node.template_name == "th-pron":
41 extract_th_pron_template(wxr, base_data, t_node)
42 elif t_node.template_name == "lo-pron":
43 extract_lo_pron_template(wxr, base_data, t_node)
44 elif t_node.template_name == "zh-pron": 44 ↛ 45line 44 didn't jump to line 45 because the condition on line 44 was never true
45 extract_zh_pron_template(wxr, base_data, t_node)
46 elif (
47 t_node.template_name.lower() == "ipa"
48 or t_node.template_name.lower().endswith(("-ipa", "-pron"))
49 ):
50 extract_ipa_template(wxr, base_data, t_node)
51 elif t_node.template_name == "X-SAMPA": 51 ↛ 52line 51 didn't jump to line 52 because the condition on line 51 was never true
52 extract_x_sampa_template(wxr, base_data, t_node)
53 elif t_node.template_name == "enPR": 53 ↛ 54line 53 didn't jump to line 54 because the condition on line 53 was never true
54 extract_enpr_template(wxr, base_data, t_node)
55 elif t_node.template_name in ["audio", "Audio", "เสียง"]: 55 ↛ 56line 55 didn't jump to line 56 because the condition on line 55 was never true
56 extract_audio_template(wxr, base_data, t_node)
57 elif t_node.template_name in ["rhymes", "rhyme"]:
58 extract_rhymes_template(wxr, base_data, t_node)
59 elif t_node.template_name in ["homophones", "homophone", "hmp"]: 59 ↛ 61line 59 didn't jump to line 61 because the condition on line 59 was always true
60 extract_homophones_template(wxr, base_data, t_node)
61 elif t_node.template_name in ["hyphenation", "hyph"]:
62 extract_hyphenation_template(wxr, base_data, t_node)
65def extract_ipa_template(
66 wxr: WiktextractContext, base_data: WordEntry, t_node: TemplateNode
67):
68 expanded_node = wxr.wtp.parse(
69 wxr.wtp.node_to_wikitext(t_node), expand_all=True
70 )
71 no_list_nodes = []
72 for node in expanded_node.children:
73 if isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
74 for list_item in node.find_child(NodeKind.LIST_ITEM):
75 extract_ipa_list_item(wxr, base_data, list_item)
76 else:
77 no_list_nodes.append(node)
78 if len(no_list_nodes) > 0: 78 ↛ 82line 78 didn't jump to line 82 because the condition on line 78 was always true
79 tmp_node = WikiNode(NodeKind.ROOT, 0)
80 tmp_node.children = no_list_nodes
81 extract_ipa_list_item(wxr, base_data, tmp_node)
82 clean_node(wxr, base_data, expanded_node)
85def extract_ipa_list_item(
86 wxr: WiktextractContext, base_data: WordEntry, list_item: WikiNode
87):
88 raw_tags = []
89 for italic_node in list_item.find_child(NodeKind.ITALIC):
90 # Template:vi-ipa location data
91 raw_tag = clean_node(wxr, None, italic_node)
92 if raw_tag != "": 92 ↛ 89line 92 didn't jump to line 89 because the condition on line 92 was always true
93 raw_tags.append(raw_tag)
94 for span_tag in list_item.find_html_recursively("span"):
95 span_class = span_tag.attrs.get("class", "").split()
96 if "qualifier-content" in span_class or "ib-content" in span_class: 96 ↛ 97line 96 didn't jump to line 97 because the condition on line 96 was never true
97 for raw_tag in clean_node(wxr, None, span_tag).split(","):
98 raw_tag = raw_tag.strip()
99 if raw_tag != "":
100 raw_tags.append(raw_tag)
101 elif "IPA" in span_class: 101 ↛ 108line 101 didn't jump to line 108 because the condition on line 101 was always true
102 sound = Sound(
103 ipa=clean_node(wxr, None, span_tag), raw_tags=raw_tags
104 )
105 if sound.ipa != "": 105 ↛ 94line 105 didn't jump to line 94 because the condition on line 105 was always true
106 translate_raw_tags(sound)
107 base_data.sounds.append(sound)
108 elif "Latn" in span_class:
109 sound = Sound(
110 roman=clean_node(wxr, None, span_tag), raw_tags=raw_tags
111 )
112 if sound.roman != "":
113 translate_raw_tags(sound)
114 base_data.sounds.append(sound)
117def extract_ja_pron_template(
118 wxr: WiktextractContext, base_data: WordEntry, t_node: TemplateNode
119):
120 JA_PRON_ACCENTS = {
121 "นากาดากะ": "Nakadaka",
122 "เฮบัง": "Heiban",
123 "อาตามาดากะ": "Atamadaka",
124 "โอดากะ": "Odaka",
125 }
126 expanded_node = wxr.wtp.parse(
127 wxr.wtp.node_to_wikitext(t_node), expand_all=True
128 )
129 for li_tag in expanded_node.find_html_recursively("li"):
130 sound = Sound()
131 for span_tag in li_tag.find_html("span"):
132 span_class = span_tag.attrs.get("class", "").split()
133 if "usage-label-accent" in span_class:
134 raw_tag = clean_node(wxr, None, span_tag).strip("() ")
135 if raw_tag != "": 135 ↛ 131line 135 didn't jump to line 131 because the condition on line 135 was always true
136 sound.raw_tags.append(raw_tag)
137 elif "IPA" in span_class:
138 sound.ipa = clean_node(wxr, None, span_tag)
139 elif "Latn" in span_class:
140 sound.roman = clean_node(wxr, None, span_tag)
141 elif span_tag.attrs.get("lang", "") == "ja": 141 ↛ 131line 141 didn't jump to line 131 because the condition on line 141 was always true
142 sound.other = clean_node(wxr, None, span_tag)
143 for link_node in li_tag.find_child(NodeKind.LINK):
144 link_text = clean_node(wxr, None, link_node)
145 if link_text in JA_PRON_ACCENTS:
146 sound.tags.append(JA_PRON_ACCENTS[link_text])
147 if sound.ipa != "" or sound.other != "": 147 ↛ 129line 147 didn't jump to line 129 because the condition on line 147 was always true
148 translate_raw_tags(sound)
149 base_data.sounds.append(sound)
150 audio_file = t_node.template_parameters.get(
151 "a", t_node.template_parameters.get("audio", "")
152 ).strip()
153 if audio_file != "": 153 ↛ 154line 153 didn't jump to line 154 because the condition on line 153 was never true
154 sound = Sound()
155 set_sound_file_url_fields(wxr, audio_file, sound)
156 base_data.sounds.append(sound)
157 clean_node(wxr, base_data, expanded_node)
160def extract_x_sampa_template(
161 wxr: WiktextractContext, base_data: WordEntry, t_node: TemplateNode
162):
163 sound = Sound(
164 ipa=clean_node(wxr, None, t_node.template_parameters.get(1, "")),
165 tags=["X-SAMPA"],
166 )
167 if sound.ipa != "":
168 base_data.sounds.append(sound)
171def extract_enpr_template(
172 wxr: WiktextractContext, base_data: WordEntry, t_node: TemplateNode
173):
174 sound = Sound(
175 enpr=clean_node(wxr, None, t_node.template_parameters.get(1, ""))
176 )
177 if sound.enpr != "":
178 base_data.sounds.append(sound)
181def extract_audio_template(
182 wxr: WiktextractContext, base_data: WordEntry, t_node: TemplateNode
183):
184 sound = Sound()
185 filename = clean_node(wxr, None, t_node.template_parameters.get(2, ""))
186 if filename != "": 186 ↛ exitline 186 didn't return from function 'extract_audio_template' because the condition on line 186 was always true
187 set_sound_file_url_fields(wxr, filename, sound)
188 for raw_tag in clean_node(
189 wxr, None, t_node.template_parameters.get("a", "")
190 ).split(","):
191 raw_tag = raw_tag.strip()
192 if raw_tag != "": 192 ↛ 188line 192 didn't jump to line 188 because the condition on line 192 was always true
193 sound.raw_tags.append(raw_tag)
194 translate_raw_tags(sound)
195 base_data.sounds.append(sound)
196 clean_node(wxr, base_data, t_node)
199def extract_th_pron_template(
200 wxr: WiktextractContext, base_data: WordEntry, t_node: TemplateNode
201):
202 # https://th.wiktionary.org/wiki/แม่แบบ:th-pron
203 @dataclass
204 class TableHeader:
205 raw_tags: list[str]
206 rowspan: int
208 expanded_node = wxr.wtp.parse(
209 wxr.wtp.node_to_wikitext(t_node), expand_all=True
210 )
211 for table_tag in expanded_node.find_html("table"):
212 row_headers = []
213 for tr_tag in table_tag.find_html("tr"):
214 field = "other"
215 new_headers = []
216 for header in row_headers:
217 if header.rowspan > 1:
218 header.rowspan -= 1
219 new_headers.append(header)
220 row_headers = new_headers
221 for th_tag in tr_tag.find_html("th"):
222 header_str = clean_node(wxr, None, th_tag)
223 if header_str.startswith("(มาตรฐาน) สัทอักษรสากล"):
224 field = "ipa"
225 elif header_str.startswith("คำพ้องเสียง"):
226 field = "homophone"
227 elif header_str == "ไฟล์เสียง": 227 ↛ 228line 227 didn't jump to line 228 because the condition on line 227 was never true
228 field = "audio"
229 elif header_str != "": 229 ↛ 221line 229 didn't jump to line 221 because the condition on line 229 was always true
230 rowspan = 1
231 rowspan_str = th_tag.attrs.get("rowspan", "1")
232 if re.fullmatch(r"\d+", rowspan_str): 232 ↛ 234line 232 didn't jump to line 234 because the condition on line 232 was always true
233 rowspan = int(rowspan_str)
234 header = TableHeader([], rowspan)
235 for line in header_str.splitlines():
236 for raw_tag in line.strip("{}\n ").split(";"):
237 raw_tag = raw_tag.strip()
238 if raw_tag != "": 238 ↛ 236line 238 didn't jump to line 236 because the condition on line 238 was always true
239 header.raw_tags.append(raw_tag)
240 row_headers.append(header)
242 for td_tag in tr_tag.find_html("td"):
243 if field == "audio": 243 ↛ 244line 243 didn't jump to line 244 because the condition on line 243 was never true
244 for link_node in td_tag.find_child(NodeKind.LINK):
245 filename = clean_node(wxr, None, link_node.largs[0])
246 if filename != "":
247 sound = Sound()
248 set_sound_file_url_fields(wxr, filename, sound)
249 base_data.sounds.append(sound)
250 elif field == "homophone":
251 for span_tag in td_tag.find_html_recursively(
252 "span", attr_name="lang", attr_value="th"
253 ):
254 word = clean_node(wxr, None, span_tag)
255 if word != "": 255 ↛ 251line 255 didn't jump to line 251 because the condition on line 255 was always true
256 base_data.sounds.append(Sound(homophone=word))
257 else:
258 raw_tags = []
259 for html_node in td_tag.find_child_recursively(
260 NodeKind.HTML
261 ):
262 if html_node.tag == "small":
263 node_str = clean_node(wxr, None, html_node)
264 if node_str.startswith("[") and node_str.endswith( 264 ↛ 259line 264 didn't jump to line 259 because the condition on line 264 was always true
265 "]"
266 ):
267 for raw_tag in node_str.strip("[]").split(","):
268 raw_tag = raw_tag.strip()
269 if raw_tag != "": 269 ↛ 267line 269 didn't jump to line 267 because the condition on line 269 was always true
270 raw_tags.append(raw_tag)
271 elif html_node.tag == "span":
272 node_str = clean_node(wxr, None, html_node)
273 span_lang = html_node.attrs.get("lang", "")
274 span_class = html_node.attrs.get("class", "")
275 if node_str != "" and (
276 span_lang == "th" or span_class in ["IPA", "tr"]
277 ):
278 sound = Sound(raw_tags=raw_tags)
279 for header in row_headers:
280 sound.raw_tags.extend(header.raw_tags)
281 translate_raw_tags(sound)
282 if "romanization" in sound.tags:
283 field = "roman"
284 setattr(sound, field, node_str)
285 base_data.sounds.append(sound)
287 clean_node(wxr, base_data, expanded_node)
290def extract_lo_pron_template(
291 wxr: WiktextractContext, base_data: WordEntry, t_node: TemplateNode
292):
293 # https://th.wiktionary.org/wiki/แม่แบบ:lo-pron
294 expanded_node = wxr.wtp.parse(
295 wxr.wtp.node_to_wikitext(t_node), expand_all=True
296 )
297 for list_node in expanded_node.find_child(NodeKind.LIST):
298 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
299 field = "other"
300 raw_tag = ""
301 for node in list_item.children:
302 if isinstance(node, HTMLNode) and node.tag == "span":
303 span_class = node.attrs.get("class", "")
304 if "qualifier-content" in span_class:
305 raw_tag = clean_node(wxr, None, node)
306 elif span_class == "IPA":
307 ipa = clean_node(wxr, None, node)
308 if ipa != "": 308 ↛ 301line 308 didn't jump to line 301 because the condition on line 308 was always true
309 sound = Sound(ipa=ipa)
310 if raw_tag != "": 310 ↛ 313line 310 didn't jump to line 313 because the condition on line 310 was always true
311 sound.raw_tags.append(raw_tag)
312 translate_raw_tags(sound)
313 base_data.sounds.append(sound)
314 else:
315 span_lang = node.attrs.get("lang", "")
316 if span_lang == "lo" and field == "hyphenation":
317 span_str = clean_node(wxr, None, node)
318 if span_str != "": 318 ↛ 301line 318 didn't jump to line 301 because the condition on line 318 was always true
319 base_data.hyphenations.append(
320 Hyphenation(parts=span_str.split("-"))
321 )
322 elif isinstance(node, WikiNode) and node.kind == NodeKind.LINK:
323 link_str = clean_node(wxr, None, node)
324 if link_str == "สัทอักษรสากล":
325 field = "ipa"
326 elif link_str != "" and field == "rhymes":
327 base_data.sounds.append(Sound(rhymes=link_str))
328 elif isinstance(node, str) and node.strip().endswith(":"):
329 node = node.strip()
330 if node == "การแบ่งพยางค์:":
331 field = "hyphenation"
332 elif node == "สัมผัส:": 332 ↛ 301line 332 didn't jump to line 301 because the condition on line 332 was always true
333 field = "rhymes"
335 clean_node(wxr, base_data, expanded_node)
338def extract_zh_pron_template(
339 wxr: WiktextractContext, base_data: WordEntry, t_node: TemplateNode
340):
341 expanded_node = wxr.wtp.parse(
342 wxr.wtp.node_to_wikitext(t_node), expand_all=True
343 )
344 seen_lists = set()
345 sounds = []
346 for list_node in expanded_node.find_child_recursively(NodeKind.LIST):
347 if list_node not in seen_lists:
348 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
349 sounds.extend(
350 extract_zh_pron_list_item(wxr, list_item, [], seen_lists)
351 )
352 for sound in sounds:
353 translate_raw_tags(sound)
354 base_data.sounds.extend(sounds)
355 clean_node(wxr, base_data, expanded_node)
358def extract_zh_pron_list_item(
359 wxr: WiktextractContext,
360 list_item_node: WikiNode,
361 raw_tags: list[str],
362 seen_lists: set[WikiNode],
363) -> list[Sound]:
364 current_tags = raw_tags[:]
365 sounds = []
366 is_first_small_tag = True
367 for node in list_item_node.children:
368 if isinstance(node, WikiNode):
369 if node.kind == NodeKind.LINK:
370 link_str = clean_node(wxr, None, node.largs)
371 node_str = clean_node(wxr, None, node)
372 if link_str.startswith(("File:", "ไฟล์:")):
373 filename = link_str.removeprefix("File:").removeprefix(
374 "ไฟล์:"
375 )
376 sound_data = Sound(raw_tags=current_tags)
377 set_sound_file_url_fields(wxr, filename, sound_data)
378 sounds.append(sound_data)
379 elif node_str != "":
380 current_tags.append(node_str.strip("()"))
381 elif isinstance(node, HTMLNode):
382 if node.tag == "small":
383 # remove <sup> tag
384 if is_first_small_tag:
385 raw_tag_text = clean_node(
386 wxr,
387 None,
388 [
389 n
390 for n in node.children
391 if not (
392 isinstance(n, HTMLNode) and n.tag == "sup"
393 )
394 ],
395 ).rstrip(":")
396 current_tags.extend(split_zh_pron_raw_tag(raw_tag_text))
397 elif len(sounds) > 0:
398 sounds[-1].raw_tags.extend(
399 split_zh_pron_raw_tag(clean_node(wxr, None, node))
400 )
401 is_first_small_tag = False
402 elif node.tag == "span":
403 sounds.extend(extract_zh_pron_span(wxr, node, current_tags))
404 elif (
405 node.tag == "table"
406 and len(current_tags) > 0
407 and current_tags[-1] == "คำพ้องเสียง"
408 ):
409 sounds.extend(
410 extract_zh_pron_homophones_table(
411 wxr, node, current_tags
412 )
413 )
414 elif node.kind == NodeKind.LIST:
415 seen_lists.add(node)
416 for next_list_item in node.find_child(NodeKind.LIST_ITEM):
417 sounds.extend(
418 extract_zh_pron_list_item(
419 wxr,
420 next_list_item,
421 current_tags,
422 seen_lists,
423 )
424 )
425 return sounds
428def split_zh_pron_raw_tag(raw_tag_text: str) -> list[str]:
429 raw_tags = []
430 if "(" not in raw_tag_text:
431 for raw_tag in re.split(r",|:|;| and ", raw_tag_text):
432 raw_tag = raw_tag.strip().removeprefix("incl. ").strip()
433 if raw_tag != "":
434 raw_tags.append(raw_tag)
435 else:
436 processed_offsets = []
437 for match in re.finditer(r"\([^()]+\)", raw_tag_text):
438 processed_offsets.append((match.start(), match.end()))
439 raw_tags.extend(
440 split_zh_pron_raw_tag(
441 raw_tag_text[match.start() + 1 : match.end() - 1]
442 )
443 )
444 not_processed = ""
445 last_end = 0
446 for start, end in processed_offsets:
447 not_processed += raw_tag_text[last_end:start]
448 last_end = end
449 not_processed += raw_tag_text[last_end:]
450 if not_processed != raw_tag_text:
451 raw_tags = split_zh_pron_raw_tag(not_processed) + raw_tags
452 else:
453 raw_tags.append(not_processed)
454 return raw_tags
457def extract_zh_pron_span(
458 wxr: WiktextractContext, span_tag: HTMLNode, raw_tags: list[str]
459) -> list[Sound]:
460 sounds = []
461 small_tags = []
462 pron_nodes = []
463 roman = ""
464 phonetic_pron = ""
465 for index, node in enumerate(span_tag.children):
466 if isinstance(node, HTMLNode) and node.tag == "small":
467 small_tags = split_zh_pron_raw_tag(clean_node(wxr, None, node))
468 elif (
469 isinstance(node, HTMLNode)
470 and node.tag == "span"
471 and "-Latn" in node.attrs.get("lang", "")
472 ):
473 roman = clean_node(wxr, None, node).strip("() ")
474 elif isinstance(node, str) and node.strip() == "[Phonetic:":
475 phonetic_pron = clean_node(
476 wxr, None, span_tag.children[index + 1 :]
477 ).strip("] ")
478 break
479 else:
480 pron_nodes.append(node)
481 for zh_pron in split_zh_pron(clean_node(wxr, None, pron_nodes)):
482 zh_pron = zh_pron.strip("[]: ")
483 if len(zh_pron) > 0:
484 if "IPA" in span_tag.attrs.get("class", ""):
485 sounds.append(
486 Sound(ipa=zh_pron, roman=roman, raw_tags=raw_tags)
487 )
488 else:
489 sounds.append(
490 Sound(zh_pron=zh_pron, roman=roman, raw_tags=raw_tags)
491 )
492 if len(sounds) > 0:
493 sounds[-1].raw_tags.extend(small_tags)
494 if phonetic_pron != "":
495 sounds.append(
496 Sound(
497 zh_pron=phonetic_pron,
498 roman=roman,
499 raw_tags=raw_tags + ["Phonetic"],
500 )
501 )
502 return sounds
505def split_zh_pron(zh_pron: str) -> list[str]:
506 # split by comma and other symbols that outside parentheses
507 parentheses = 0
508 pron_list = []
509 pron = ""
510 for c in zh_pron:
511 if (
512 (c in [",", ";", "→"] or (c == "/" and not zh_pron.startswith("/")))
513 and parentheses == 0
514 and len(pron.strip()) > 0
515 ):
516 pron_list.append(pron.strip())
517 pron = ""
518 elif c == "(":
519 parentheses += 1
520 pron += c
521 elif c == ")":
522 parentheses -= 1
523 pron += c
524 else:
525 pron += c
527 if pron.strip() != "":
528 pron_list.append(pron)
529 return pron_list
532def extract_zh_pron_homophones_table(
533 wxr: WiktextractContext, table: HTMLNode, raw_tags: list[str]
534) -> list[Sound]:
535 sounds = []
536 for td_tag in table.find_html_recursively("td"):
537 for span_tag in td_tag.find_html("span"):
538 span_class = span_tag.attrs.get("class", "")
539 span_lang = span_tag.attrs.get("lang", "")
540 span_str = clean_node(wxr, None, span_tag)
541 if (
542 span_str not in ["", "/"]
543 and span_lang != ""
544 and span_class in ["Hant", "Hans", "Hani"]
545 ):
546 sound = Sound(homophone=span_str, raw_tags=raw_tags)
547 if span_class == "Hant":
548 sound.tags.append("Traditional-Chinese")
549 elif span_class == "Hans":
550 sound.tags.append("Simplified-Chinese")
551 sounds.append(sound)
552 return sounds
555def extract_rhymes_template(
556 wxr: WiktextractContext, base_data: WordEntry, t_node: TemplateNode
557):
558 expanded_node = wxr.wtp.parse(
559 wxr.wtp.node_to_wikitext(t_node), expand_all=True
560 )
561 for link_node in expanded_node.find_child(NodeKind.LINK):
562 rhyme = clean_node(wxr, base_data, link_node)
563 if rhyme != "":
564 base_data.sounds.append(Sound(rhymes=rhyme))
567def extract_homophones_template(
568 wxr: WiktextractContext, base_data: WordEntry, t_node: TemplateNode
569):
570 expanded_node = wxr.wtp.parse(
571 wxr.wtp.node_to_wikitext(t_node), expand_all=True
572 )
573 homophones = []
574 lang_code = clean_node(wxr, None, t_node.template_parameters.get(1, ""))
575 for top_span in expanded_node.find_html(
576 "span", attr_name="class", attr_value="homophones"
577 ):
578 for span_tag in top_span.find_html("span"):
579 span_lang = span_tag.attrs.get("lang", "")
580 span_class = span_tag.attrs.get("class", "").split()
581 if "tr" in span_class and len(homophones) > 0:
582 homophones[-1].roman = clean_node(wxr, None, span_tag)
583 elif span_lang == lang_code:
584 homophone = clean_node(wxr, None, span_tag)
585 if homophone != "": 585 ↛ 578line 585 didn't jump to line 578 because the condition on line 585 was always true
586 homophones.append(Sound(homophone=homophone))
587 elif "qualifier-content" in span_class and len(homophones) > 0:
588 raw_tag = clean_node(wxr, None, span_tag)
589 if raw_tag != "": 589 ↛ 578line 589 didn't jump to line 578 because the condition on line 589 was always true
590 homophones[-1].raw_tags.append(raw_tag)
591 translate_raw_tags(homophones[-1])
593 base_data.sounds.extend(homophones)
594 for link_node in expanded_node.find_child(NodeKind.LINK):
595 clean_node(wxr, base_data, link_node)
598def extract_hyphenation_template(
599 wxr: WiktextractContext, base_data: WordEntry, t_node: TemplateNode
600):
601 expanded_node = wxr.wtp.parse(
602 wxr.wtp.node_to_wikitext(t_node), expand_all=True
603 )
604 lang_code = clean_node(wxr, None, t_node.template_parameters.get(1, ""))
605 for span_tag in expanded_node.find_html(
606 "span", attr_name="lang", attr_value=lang_code
607 ):
608 h_str = clean_node(wxr, None, span_tag)
609 h_data = Hyphenation(
610 parts=list(filter(None, map(str.strip, h_str.split("‧"))))
611 )
612 if len(h_data.parts) > 0:
613 base_data.hyphenations.append(h_data)