Coverage for src/wiktextract/extractor/ko/sound.py: 91%

91 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-10-25 10:11 +0000

1from wikitextprocessor import LevelNode, NodeKind, TemplateNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from ..share import set_sound_file_url_fields 

6from .models import Sound, WordEntry 

7 

8SOUND_TEMPLATES = frozenset(["발음 듣기", "IPA", "ko-IPA", "ja-pron"]) 

9 

10 

11def extract_sound_section( 

12 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode 

13) -> None: 

14 for t_node in level_node.find_child_recursively(NodeKind.TEMPLATE): 

15 extract_sound_template(wxr, word_entry, t_node) 

16 

17 

18def extract_sound_template( 

19 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode 

20) -> None: 

21 if node.template_name == "발음 듣기": 

22 extract_listen_pronunciation_template(wxr, word_entry, node) 

23 elif node.template_name == "IPA": 

24 extract_ipa_template(wxr, word_entry, node) 

25 elif node.template_name == "ko-IPA": 

26 extract_ko_ipa_template(wxr, word_entry, node) 

27 elif node.template_name == "ja-pron": 27 ↛ exitline 27 didn't return from function 'extract_sound_template' because the condition on line 27 was always true

28 extract_ja_pron_template(wxr, word_entry, node) 

29 

30 

31def extract_listen_pronunciation_template( 

32 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode 

33) -> None: 

34 # https://ko.wiktionary.org/wiki/틀:발음_듣기 

35 for key in range(1, 9): 35 ↛ exitline 35 didn't return from function 'extract_listen_pronunciation_template' because the loop on line 35 didn't complete

36 if key not in node.template_parameters: 

37 break 

38 value = clean_node(wxr, None, node.template_parameters[key]) 

39 if value == "": 39 ↛ 40line 39 didn't jump to line 40 because the condition on line 39 was never true

40 continue 

41 elif key % 2 == 1: 

42 sound = Sound() 

43 set_sound_file_url_fields(wxr, value, sound) 

44 word_entry.sounds.append(sound) 

45 elif len(word_entry.sounds) > 0: 45 ↛ 35line 45 didn't jump to line 35 because the condition on line 45 was always true

46 word_entry.sounds[-1].raw_tags.append(value) 

47 

48 

49def extract_ipa_template( 

50 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode 

51) -> None: 

52 # https://ko.wiktionary.org/wiki/틀:IPA 

53 for key in range(1, 5): 

54 if key not in node.template_parameters: 54 ↛ 55line 54 didn't jump to line 55 because the condition on line 54 was never true

55 break 

56 value = clean_node(wxr, None, node.template_parameters[key]) 

57 if value == "": 57 ↛ 58line 57 didn't jump to line 58 because the condition on line 57 was never true

58 continue 

59 elif key % 2 == 1: 

60 sound = Sound(ipa=value) 

61 word_entry.sounds.append(sound) 

62 elif len(word_entry.sounds) > 0: 62 ↛ 53line 62 didn't jump to line 53 because the condition on line 62 was always true

63 word_entry.sounds[-1].raw_tags.append(value) 

64 

65 

66def extract_ko_ipa_template( 

67 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode 

68) -> None: 

69 # https://ko.wiktionary.org/wiki/틀:ko-IPA 

70 expanded_node = wxr.wtp.parse( 

71 wxr.wtp.node_to_wikitext(node), expand_all=True 

72 ) 

73 for ul_tag in expanded_node.find_html("ul"): 

74 for li_tag in ul_tag.find_html("li"): 

75 sound = Sound() 

76 for i_tag in li_tag.find_html("i"): 

77 sound.raw_tags.append(clean_node(wxr, None, i_tag)) 

78 break 

79 for span_tag in li_tag.find_html("span"): 

80 span_class = span_tag.attrs.get("class", "") 

81 if span_class == "IPA": 

82 sound.ipa = clean_node(wxr, None, span_tag) 

83 elif span_class == "Kore": 83 ↛ 79line 83 didn't jump to line 79 because the condition on line 83 was always true

84 sound.hangul = clean_node(wxr, None, span_tag) 

85 if sound.hangul != "" or sound.ipa != "": 85 ↛ 74line 85 didn't jump to line 74 because the condition on line 85 was always true

86 word_entry.sounds.append(sound) 

87 

88 for table in expanded_node.find_html("table"): 

89 for tr_tag in table.find_html("tr"): 

90 sound = Sound() 

91 for th_tag in tr_tag.find_html("th"): 91 ↛ 95line 91 didn't jump to line 95 because the loop on line 91 didn't complete

92 for span_tag in th_tag.find_html("span"): 

93 sound.raw_tags.append(clean_node(wxr, None, span_tag)) 

94 break 

95 for td_tag in tr_tag.find_html( 

96 "td", attr_name="class", attr_value="IPA" 

97 ): 

98 sound.roman = clean_node(wxr, None, td_tag) 

99 break 

100 if sound.roman != "": 

101 word_entry.sounds.append(sound) 

102 

103 for link_node in expanded_node.find_child(NodeKind.LINK): 

104 clean_node(wxr, word_entry, link_node) 

105 

106 

107def extract_ja_pron_template( 

108 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode 

109) -> None: 

110 # https://ko.wiktionary.org/wiki/틀:ja-pron 

111 expanded_node = wxr.wtp.parse( 

112 wxr.wtp.node_to_wikitext(node), expand_all=True 

113 ) 

114 for ul_tag in expanded_node.find_html("ul"): 

115 for li_tag in ul_tag.find_html("li"): 

116 sound = Sound() 

117 for span_tag in li_tag.find_html("span"): 

118 span_class = span_tag.attrs.get("class", "") 

119 if span_class == "usage-label-accent": 

120 sound.raw_tags.append( 

121 clean_node(wxr, None, span_tag).strip("()") 

122 ) 

123 elif span_class == "Jpan": 

124 sound.other = clean_node(wxr, None, span_tag) 

125 elif span_class == "Latn": 

126 sound.roman = clean_node(wxr, None, span_tag) 

127 elif span_class == "IPA": 127 ↛ 117line 127 didn't jump to line 117 because the condition on line 127 was always true

128 sound.ipa = clean_node(wxr, None, span_tag) 

129 if sound.ipa != "" or sound.roman != "": 129 ↛ 115line 129 didn't jump to line 115 because the condition on line 129 was always true

130 word_entry.sounds.append(sound) 

131 clean_node(wxr, word_entry, expanded_node)