Coverage for src/wiktextract/extractor/ko/sound.py: 91%

1from wikitextprocessor import LevelNode, NodeKind, TemplateNode

3from ...page import clean_node

4from ...wxr_context import WiktextractContext

5from ..share import set_sound_file_url_fields

6from .models import Sound, WordEntry

8SOUND_TEMPLATES = frozenset(["발음 듣기", "IPA", "ko-IPA", "ja-pron"])

11def extract_sound_section(

12 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode

13) -> None:

14 for t_node in level_node.find_child_recursively(NodeKind.TEMPLATE):

15 extract_sound_template(wxr, word_entry, t_node)

18def extract_sound_template(

19 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode

20) -> None:

21 if node.template_name == "발음 듣기":

22 extract_listen_pronunciation_template(wxr, word_entry, node)

23 elif node.template_name == "IPA":

24 extract_ipa_template(wxr, word_entry, node)

25 elif node.template_name == "ko-IPA":

26 extract_ko_ipa_template(wxr, word_entry, node)

27 elif node.template_name == "ja-pron": 27 ↛ exitline 27 didn't return from function 'extract_sound_template' because the condition on line 27 was always true

28 extract_ja_pron_template(wxr, word_entry, node)

31def extract_listen_pronunciation_template(

32 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode

33) -> None:

34 # https://ko.wiktionary.org/wiki/틀:발음_듣기

35 for key in range(1, 9): 35 ↛ exitline 35 didn't return from function 'extract_listen_pronunciation_template' because the loop on line 35 didn't complete

36 if key not in node.template_parameters:

37 break

38 value = clean_node(wxr, None, node.template_parameters[key])

39 if value == "": 39 ↛ 40line 39 didn't jump to line 40 because the condition on line 39 was never true

40 continue

41 elif key % 2 == 1:

42 sound = Sound()

43 set_sound_file_url_fields(wxr, value, sound)

44 word_entry.sounds.append(sound)

45 elif len(word_entry.sounds) > 0: 45 ↛ 35line 45 didn't jump to line 35 because the condition on line 45 was always true

46 word_entry.sounds[-1].raw_tags.append(value)

49def extract_ipa_template(

50 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode

51) -> None:

52 # https://ko.wiktionary.org/wiki/틀:IPA

53 for key in range(1, 5):

54 if key not in node.template_parameters: 54 ↛ 55line 54 didn't jump to line 55 because the condition on line 54 was never true

55 break

56 value = clean_node(wxr, None, node.template_parameters[key])

57 if value == "": 57 ↛ 58line 57 didn't jump to line 58 because the condition on line 57 was never true

58 continue

59 elif key % 2 == 1:

60 sound = Sound(ipa=value)

61 word_entry.sounds.append(sound)

62 elif len(word_entry.sounds) > 0: 62 ↛ 53line 62 didn't jump to line 53 because the condition on line 62 was always true

63 word_entry.sounds[-1].raw_tags.append(value)

66def extract_ko_ipa_template(

67 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode

68) -> None:

69 # https://ko.wiktionary.org/wiki/틀:ko-IPA

70 expanded_node = wxr.wtp.parse(

71 wxr.wtp.node_to_wikitext(node), expand_all=True

72 )

73 for ul_tag in expanded_node.find_html("ul"):

74 for li_tag in ul_tag.find_html("li"):

75 sound = Sound()

76 for i_tag in li_tag.find_html("i"):

77 sound.raw_tags.append(clean_node(wxr, None, i_tag))

78 break

79 for span_tag in li_tag.find_html("span"):

80 span_class = span_tag.attrs.get("class", "")

81 if span_class == "IPA":

82 sound.ipa = clean_node(wxr, None, span_tag)

83 elif span_class == "Kore": 83 ↛ 79line 83 didn't jump to line 79 because the condition on line 83 was always true

84 sound.hangul = clean_node(wxr, None, span_tag)

85 if sound.hangul != "" or sound.ipa != "": 85 ↛ 74line 85 didn't jump to line 74 because the condition on line 85 was always true

86 word_entry.sounds.append(sound)

88 for table in expanded_node.find_html("table"):

89 for tr_tag in table.find_html("tr"):

90 sound = Sound()

91 for th_tag in tr_tag.find_html("th"): 91 ↛ 95line 91 didn't jump to line 95 because the loop on line 91 didn't complete

92 for span_tag in th_tag.find_html("span"):

93 sound.raw_tags.append(clean_node(wxr, None, span_tag))

94 break

95 for td_tag in tr_tag.find_html(

96 "td", attr_name="class", attr_value="IPA"

97 ):

98 sound.roman = clean_node(wxr, None, td_tag)

99 break

100 if sound.roman != "":

101 word_entry.sounds.append(sound)

102

103 for link_node in expanded_node.find_child(NodeKind.LINK):

104 clean_node(wxr, word_entry, link_node)

105

106

107def extract_ja_pron_template(

108 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode

109) -> None:

110 # https://ko.wiktionary.org/wiki/틀:ja-pron

111 expanded_node = wxr.wtp.parse(

112 wxr.wtp.node_to_wikitext(node), expand_all=True

113 )

114 for ul_tag in expanded_node.find_html("ul"):

115 for li_tag in ul_tag.find_html("li"):

116 sound = Sound()

117 for span_tag in li_tag.find_html("span"):

118 span_class = span_tag.attrs.get("class", "")

119 if span_class == "usage-label-accent":

120 sound.raw_tags.append(

121 clean_node(wxr, None, span_tag).strip("()")

122 )

123 elif span_class == "Jpan":

124 sound.other = clean_node(wxr, None, span_tag)

125 elif span_class == "Latn":

126 sound.roman = clean_node(wxr, None, span_tag)

127 elif span_class == "IPA": 127 ↛ 117line 127 didn't jump to line 117 because the condition on line 127 was always true

128 sound.ipa = clean_node(wxr, None, span_tag)

129 if sound.ipa != "" or sound.roman != "": 129 ↛ 115line 129 didn't jump to line 115 because the condition on line 129 was always true

130 word_entry.sounds.append(sound)

131 clean_node(wxr, word_entry, expanded_node)