Coverage for src/wiktextract/extractor/ko/sound.py: 91%

100 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2024-12-27 08:07 +0000

1from wikitextprocessor import LevelNode, NodeKind, TemplateNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from ..share import set_sound_file_url_fields 

6from .models import Sound, WordEntry 

7from .tags import translate_raw_tags 

8 

9SOUND_TEMPLATES = frozenset(["발음 듣기", "IPA", "ko-IPA", "ja-pron"]) 

10 

11 

12def extract_sound_section( 

13 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode 

14) -> None: 

15 for t_node in level_node.find_child_recursively(NodeKind.TEMPLATE): 

16 extract_sound_template(wxr, word_entry, t_node) 

17 

18 

19def extract_sound_template( 

20 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode 

21) -> None: 

22 if node.template_name == "발음 듣기": 

23 extract_listen_pronunciation_template(wxr, word_entry, node) 

24 elif node.template_name == "IPA": 

25 extract_ipa_template(wxr, word_entry, node) 

26 elif node.template_name == "ko-IPA": 

27 extract_ko_ipa_template(wxr, word_entry, node) 

28 elif node.template_name == "ja-pron": 28 ↛ exitline 28 didn't return from function 'extract_sound_template' because the condition on line 28 was always true

29 extract_ja_pron_template(wxr, word_entry, node) 

30 

31 

32def extract_listen_pronunciation_template( 

33 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode 

34) -> None: 

35 # https://ko.wiktionary.org/wiki/틀:발음_듣기 

36 for key in range(1, 9): 36 ↛ exitline 36 didn't return from function 'extract_listen_pronunciation_template' because the loop on line 36 didn't complete

37 if key not in node.template_parameters: 

38 break 

39 value = clean_node(wxr, None, node.template_parameters[key]) 

40 if value == "": 40 ↛ 41line 40 didn't jump to line 41 because the condition on line 40 was never true

41 continue 

42 elif key % 2 == 1: 

43 sound = Sound() 

44 set_sound_file_url_fields(wxr, value, sound) 

45 word_entry.sounds.append(sound) 

46 elif len(word_entry.sounds) > 0: 46 ↛ 36line 46 didn't jump to line 36 because the condition on line 46 was always true

47 word_entry.sounds[-1].raw_tags.append(value) 

48 translate_raw_tags(word_entry.sounds[-1]) 

49 

50 

51def extract_ipa_template( 

52 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode 

53) -> None: 

54 # https://ko.wiktionary.org/wiki/틀:IPA 

55 for key in range(1, 5): 

56 if key not in node.template_parameters: 56 ↛ 57line 56 didn't jump to line 57 because the condition on line 56 was never true

57 break 

58 value = clean_node(wxr, None, node.template_parameters[key]) 

59 if value == "": 59 ↛ 60line 59 didn't jump to line 60 because the condition on line 59 was never true

60 continue 

61 elif key % 2 == 1: 

62 sound = Sound(ipa=value) 

63 word_entry.sounds.append(sound) 

64 elif len(word_entry.sounds) > 0: 64 ↛ 55line 64 didn't jump to line 55 because the condition on line 64 was always true

65 for raw_tag in value.split(","): 

66 raw_tag = raw_tag.strip() 

67 if raw_tag != "": 67 ↛ 65line 67 didn't jump to line 65 because the condition on line 67 was always true

68 word_entry.sounds[-1].raw_tags.append(raw_tag.strip()) 

69 translate_raw_tags(word_entry.sounds[-1]) 

70 

71 

72def extract_ko_ipa_template( 

73 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode 

74) -> None: 

75 # https://ko.wiktionary.org/wiki/틀:ko-IPA 

76 expanded_node = wxr.wtp.parse( 

77 wxr.wtp.node_to_wikitext(node), expand_all=True 

78 ) 

79 for ul_tag in expanded_node.find_html("ul"): 

80 for li_tag in ul_tag.find_html("li"): 

81 sound = Sound() 

82 for i_tag in li_tag.find_html("i"): 

83 sound.raw_tags.append(clean_node(wxr, None, i_tag)) 

84 break 

85 for span_tag in li_tag.find_html("span"): 

86 span_class = span_tag.attrs.get("class", "") 

87 if span_class == "IPA": 

88 sound.ipa = clean_node(wxr, None, span_tag) 

89 elif span_class == "Kore": 89 ↛ 85line 89 didn't jump to line 85 because the condition on line 89 was always true

90 sound.hangul = clean_node(wxr, None, span_tag) 

91 if sound.hangul != "" or sound.ipa != "": 91 ↛ 80line 91 didn't jump to line 80 because the condition on line 91 was always true

92 translate_raw_tags(sound) 

93 word_entry.sounds.append(sound) 

94 

95 for table in expanded_node.find_html("table"): 

96 for tr_tag in table.find_html("tr"): 

97 sound = Sound() 

98 for th_tag in tr_tag.find_html("th"): 98 ↛ 102line 98 didn't jump to line 102 because the loop on line 98 didn't complete

99 for span_tag in th_tag.find_html("span"): 

100 sound.raw_tags.append(clean_node(wxr, None, span_tag)) 

101 break 

102 for td_tag in tr_tag.find_html( 

103 "td", attr_name="class", attr_value="IPA" 

104 ): 

105 sound.roman = clean_node(wxr, None, td_tag) 

106 break 

107 if sound.roman != "": 

108 translate_raw_tags(sound) 

109 word_entry.sounds.append(sound) 

110 

111 for link_node in expanded_node.find_child(NodeKind.LINK): 

112 clean_node(wxr, word_entry, link_node) 

113 

114 

115def extract_ja_pron_template( 

116 wxr: WiktextractContext, word_entry: WordEntry, node: TemplateNode 

117) -> None: 

118 # https://ko.wiktionary.org/wiki/틀:ja-pron 

119 expanded_node = wxr.wtp.parse( 

120 wxr.wtp.node_to_wikitext(node), expand_all=True 

121 ) 

122 for ul_tag in expanded_node.find_html("ul"): 

123 for li_tag in ul_tag.find_html("li"): 

124 sound = Sound() 

125 for span_tag in li_tag.find_html("span"): 

126 span_class = span_tag.attrs.get("class", "") 

127 if span_class == "usage-label-accent": 

128 sound.raw_tags.append( 

129 clean_node(wxr, None, span_tag).strip("()") 

130 ) 

131 elif span_class == "Jpan": 

132 sound.other = clean_node(wxr, None, span_tag) 

133 elif span_class == "Latn": 

134 sound.roman = clean_node(wxr, None, span_tag) 

135 elif span_class == "IPA": 135 ↛ 125line 135 didn't jump to line 125 because the condition on line 135 was always true

136 sound.ipa = clean_node(wxr, None, span_tag) 

137 if sound.ipa != "" or sound.roman != "": 137 ↛ 123line 137 didn't jump to line 123 because the condition on line 137 was always true

138 translate_raw_tags(sound) 

139 word_entry.sounds.append(sound) 

140 clean_node(wxr, word_entry, expanded_node)