Coverage for src/wiktextract/extractor/ko/sound.py: 91%
91 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-10-25 10:11 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-10-25 10:11 +0000
1from wikitextprocessor import LevelNode, NodeKind, TemplateNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from ..share import set_sound_file_url_fields
6from .models import Sound, WordEntry
8SOUND_TEMPLATES = frozenset(["발음 듣기", "IPA", "ko-IPA", "ja-pron"])
11def extract_sound_section(
12 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode
13) -> None:
14 for t_node in level_node.find_child_recursively(NodeKind.TEMPLATE):
15 extract_sound_template(wxr, word_entry, t_node)
18def extract_sound_template(
19 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode
20) -> None:
21 if node.template_name == "발음 듣기":
22 extract_listen_pronunciation_template(wxr, word_entry, node)
23 elif node.template_name == "IPA":
24 extract_ipa_template(wxr, word_entry, node)
25 elif node.template_name == "ko-IPA":
26 extract_ko_ipa_template(wxr, word_entry, node)
27 elif node.template_name == "ja-pron": 27 ↛ exitline 27 didn't return from function 'extract_sound_template' because the condition on line 27 was always true
28 extract_ja_pron_template(wxr, word_entry, node)
31def extract_listen_pronunciation_template(
32 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode
33) -> None:
34 # https://ko.wiktionary.org/wiki/틀:발음_듣기
35 for key in range(1, 9): 35 ↛ exitline 35 didn't return from function 'extract_listen_pronunciation_template' because the loop on line 35 didn't complete
36 if key not in node.template_parameters:
37 break
38 value = clean_node(wxr, None, node.template_parameters[key])
39 if value == "": 39 ↛ 40line 39 didn't jump to line 40 because the condition on line 39 was never true
40 continue
41 elif key % 2 == 1:
42 sound = Sound()
43 set_sound_file_url_fields(wxr, value, sound)
44 word_entry.sounds.append(sound)
45 elif len(word_entry.sounds) > 0: 45 ↛ 35line 45 didn't jump to line 35 because the condition on line 45 was always true
46 word_entry.sounds[-1].raw_tags.append(value)
49def extract_ipa_template(
50 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode
51) -> None:
52 # https://ko.wiktionary.org/wiki/틀:IPA
53 for key in range(1, 5):
54 if key not in node.template_parameters: 54 ↛ 55line 54 didn't jump to line 55 because the condition on line 54 was never true
55 break
56 value = clean_node(wxr, None, node.template_parameters[key])
57 if value == "": 57 ↛ 58line 57 didn't jump to line 58 because the condition on line 57 was never true
58 continue
59 elif key % 2 == 1:
60 sound = Sound(ipa=value)
61 word_entry.sounds.append(sound)
62 elif len(word_entry.sounds) > 0: 62 ↛ 53line 62 didn't jump to line 53 because the condition on line 62 was always true
63 word_entry.sounds[-1].raw_tags.append(value)
66def extract_ko_ipa_template(
67 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode
68) -> None:
69 # https://ko.wiktionary.org/wiki/틀:ko-IPA
70 expanded_node = wxr.wtp.parse(
71 wxr.wtp.node_to_wikitext(node), expand_all=True
72 )
73 for ul_tag in expanded_node.find_html("ul"):
74 for li_tag in ul_tag.find_html("li"):
75 sound = Sound()
76 for i_tag in li_tag.find_html("i"):
77 sound.raw_tags.append(clean_node(wxr, None, i_tag))
78 break
79 for span_tag in li_tag.find_html("span"):
80 span_class = span_tag.attrs.get("class", "")
81 if span_class == "IPA":
82 sound.ipa = clean_node(wxr, None, span_tag)
83 elif span_class == "Kore": 83 ↛ 79line 83 didn't jump to line 79 because the condition on line 83 was always true
84 sound.hangul = clean_node(wxr, None, span_tag)
85 if sound.hangul != "" or sound.ipa != "": 85 ↛ 74line 85 didn't jump to line 74 because the condition on line 85 was always true
86 word_entry.sounds.append(sound)
88 for table in expanded_node.find_html("table"):
89 for tr_tag in table.find_html("tr"):
90 sound = Sound()
91 for th_tag in tr_tag.find_html("th"): 91 ↛ 95line 91 didn't jump to line 95 because the loop on line 91 didn't complete
92 for span_tag in th_tag.find_html("span"):
93 sound.raw_tags.append(clean_node(wxr, None, span_tag))
94 break
95 for td_tag in tr_tag.find_html(
96 "td", attr_name="class", attr_value="IPA"
97 ):
98 sound.roman = clean_node(wxr, None, td_tag)
99 break
100 if sound.roman != "":
101 word_entry.sounds.append(sound)
103 for link_node in expanded_node.find_child(NodeKind.LINK):
104 clean_node(wxr, word_entry, link_node)
107def extract_ja_pron_template(
108 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode
109) -> None:
110 # https://ko.wiktionary.org/wiki/틀:ja-pron
111 expanded_node = wxr.wtp.parse(
112 wxr.wtp.node_to_wikitext(node), expand_all=True
113 )
114 for ul_tag in expanded_node.find_html("ul"):
115 for li_tag in ul_tag.find_html("li"):
116 sound = Sound()
117 for span_tag in li_tag.find_html("span"):
118 span_class = span_tag.attrs.get("class", "")
119 if span_class == "usage-label-accent":
120 sound.raw_tags.append(
121 clean_node(wxr, None, span_tag).strip("()")
122 )
123 elif span_class == "Jpan":
124 sound.other = clean_node(wxr, None, span_tag)
125 elif span_class == "Latn":
126 sound.roman = clean_node(wxr, None, span_tag)
127 elif span_class == "IPA": 127 ↛ 117line 127 didn't jump to line 117 because the condition on line 127 was always true
128 sound.ipa = clean_node(wxr, None, span_tag)
129 if sound.ipa != "" or sound.roman != "": 129 ↛ 115line 129 didn't jump to line 115 because the condition on line 129 was always true
130 word_entry.sounds.append(sound)
131 clean_node(wxr, word_entry, expanded_node)