Coverage for src/wiktextract/extractor/ko/sound.py: 91%
100 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from wikitextprocessor import LevelNode, NodeKind, TemplateNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from ..share import set_sound_file_url_fields
6from .models import Sound, WordEntry
7from .tags import translate_raw_tags
9SOUND_TEMPLATES = frozenset(["발음 듣기", "IPA", "ko-IPA", "ja-pron"])
12def extract_sound_section(
13 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode
14) -> None:
15 for t_node in level_node.find_child_recursively(NodeKind.TEMPLATE):
16 extract_sound_template(wxr, word_entry, t_node)
19def extract_sound_template(
20 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode
21) -> None:
22 if node.template_name == "발음 듣기":
23 extract_listen_pronunciation_template(wxr, word_entry, node)
24 elif node.template_name == "IPA":
25 extract_ipa_template(wxr, word_entry, node)
26 elif node.template_name == "ko-IPA":
27 extract_ko_ipa_template(wxr, word_entry, node)
28 elif node.template_name == "ja-pron": 28 ↛ exitline 28 didn't return from function 'extract_sound_template' because the condition on line 28 was always true
29 extract_ja_pron_template(wxr, word_entry, node)
32def extract_listen_pronunciation_template(
33 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode
34) -> None:
35 # https://ko.wiktionary.org/wiki/틀:발음_듣기
36 for key in range(1, 9): 36 ↛ exitline 36 didn't return from function 'extract_listen_pronunciation_template' because the loop on line 36 didn't complete
37 if key not in node.template_parameters:
38 break
39 value = clean_node(wxr, None, node.template_parameters[key])
40 if value == "": 40 ↛ 41line 40 didn't jump to line 41 because the condition on line 40 was never true
41 continue
42 elif key % 2 == 1:
43 sound = Sound()
44 set_sound_file_url_fields(wxr, value, sound)
45 word_entry.sounds.append(sound)
46 elif len(word_entry.sounds) > 0: 46 ↛ 36line 46 didn't jump to line 36 because the condition on line 46 was always true
47 word_entry.sounds[-1].raw_tags.append(value)
48 translate_raw_tags(word_entry.sounds[-1])
51def extract_ipa_template(
52 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode
53) -> None:
54 # https://ko.wiktionary.org/wiki/틀:IPA
55 for key in range(1, 5):
56 if key not in node.template_parameters: 56 ↛ 57line 56 didn't jump to line 57 because the condition on line 56 was never true
57 break
58 value = clean_node(wxr, None, node.template_parameters[key])
59 if value == "": 59 ↛ 60line 59 didn't jump to line 60 because the condition on line 59 was never true
60 continue
61 elif key % 2 == 1:
62 sound = Sound(ipa=value)
63 word_entry.sounds.append(sound)
64 elif len(word_entry.sounds) > 0: 64 ↛ 55line 64 didn't jump to line 55 because the condition on line 64 was always true
65 for raw_tag in value.split(","):
66 raw_tag = raw_tag.strip()
67 if raw_tag != "": 67 ↛ 65line 67 didn't jump to line 65 because the condition on line 67 was always true
68 word_entry.sounds[-1].raw_tags.append(raw_tag.strip())
69 translate_raw_tags(word_entry.sounds[-1])
72def extract_ko_ipa_template(
73 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode
74) -> None:
75 # https://ko.wiktionary.org/wiki/틀:ko-IPA
76 expanded_node = wxr.wtp.parse(
77 wxr.wtp.node_to_wikitext(node), expand_all=True
78 )
79 for ul_tag in expanded_node.find_html("ul"):
80 for li_tag in ul_tag.find_html("li"):
81 sound = Sound()
82 for i_tag in li_tag.find_html("i"):
83 sound.raw_tags.append(clean_node(wxr, None, i_tag))
84 break
85 for span_tag in li_tag.find_html("span"):
86 span_class = span_tag.attrs.get("class", "")
87 if span_class == "IPA":
88 sound.ipa = clean_node(wxr, None, span_tag)
89 elif span_class == "Kore": 89 ↛ 85line 89 didn't jump to line 85 because the condition on line 89 was always true
90 sound.hangul = clean_node(wxr, None, span_tag)
91 if sound.hangul != "" or sound.ipa != "": 91 ↛ 80line 91 didn't jump to line 80 because the condition on line 91 was always true
92 translate_raw_tags(sound)
93 word_entry.sounds.append(sound)
95 for table in expanded_node.find_html("table"):
96 for tr_tag in table.find_html("tr"):
97 sound = Sound()
98 for th_tag in tr_tag.find_html("th"): 98 ↛ 102line 98 didn't jump to line 102 because the loop on line 98 didn't complete
99 for span_tag in th_tag.find_html("span"):
100 sound.raw_tags.append(clean_node(wxr, None, span_tag))
101 break
102 for td_tag in tr_tag.find_html(
103 "td", attr_name="class", attr_value="IPA"
104 ):
105 sound.roman = clean_node(wxr, None, td_tag)
106 break
107 if sound.roman != "":
108 translate_raw_tags(sound)
109 word_entry.sounds.append(sound)
111 for link_node in expanded_node.find_child(NodeKind.LINK):
112 clean_node(wxr, word_entry, link_node)
115def extract_ja_pron_template(
116 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode
117) -> None:
118 # https://ko.wiktionary.org/wiki/틀:ja-pron
119 expanded_node = wxr.wtp.parse(
120 wxr.wtp.node_to_wikitext(node), expand_all=True
121 )
122 for ul_tag in expanded_node.find_html("ul"):
123 for li_tag in ul_tag.find_html("li"):
124 sound = Sound()
125 for span_tag in li_tag.find_html("span"):
126 span_class = span_tag.attrs.get("class", "")
127 if span_class == "usage-label-accent":
128 sound.raw_tags.append(
129 clean_node(wxr, None, span_tag).strip("()")
130 )
131 elif span_class == "Jpan":
132 sound.other = clean_node(wxr, None, span_tag)
133 elif span_class == "Latn":
134 sound.roman = clean_node(wxr, None, span_tag)
135 elif span_class == "IPA": 135 ↛ 125line 135 didn't jump to line 125 because the condition on line 135 was always true
136 sound.ipa = clean_node(wxr, None, span_tag)
137 if sound.ipa != "" or sound.roman != "": 137 ↛ 123line 137 didn't jump to line 123 because the condition on line 137 was always true
138 translate_raw_tags(sound)
139 word_entry.sounds.append(sound)
140 clean_node(wxr, word_entry, expanded_node)