Coverage for src / wiktextract / extractor / ku / sound.py: 89%

66 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-12 08:09 +0000

1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from ..share import set_sound_file_url_fields 

6from .models import Hyphenation, Sound, WordEntry 

7from .tags import translate_raw_tags 

8 

9 

10def extract_sound_section( 

11 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode 

12) -> None: 

13 for list_node in level_node.find_child(NodeKind.LIST): 

14 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

15 extract_sound_list_item(wxr, word_entry, list_item) 

16 

17 

18def extract_sound_list_item( 

19 wxr: WiktextractContext, word_entry: WordEntry, list_item: WikiNode 

20) -> None: 

21 raw_tags = [] 

22 for node in list_item.children: 

23 if isinstance(node, TemplateNode): 

24 if node.template_name in ["ku-IPA", "IPA-ku"]: 

25 extract_ku_ipa_template(wxr, word_entry, node) 

26 elif node.template_name in ["deng", "sound"]: 

27 extract_deng_template(wxr, word_entry, node) 

28 elif node.template_name == "ku-kîte": 

29 extract_ku_kîte(wxr, word_entry, node) 

30 elif node.template_name == "kîte": 

31 extract_kîte_template(wxr, word_entry, node, raw_tags) 

32 elif node.template_name.endswith("."): 32 ↛ 22line 32 didn't jump to line 22 because the condition on line 32 was always true

33 raw_tag = clean_node(wxr, None, node).removesuffix(":") 

34 if raw_tag != "": 34 ↛ 22line 34 didn't jump to line 22 because the condition on line 34 was always true

35 raw_tags.append(raw_tag) 

36 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 36 ↛ 37line 36 didn't jump to line 37 because the condition on line 36 was never true

37 for child_list_item in node.find_child(NodeKind.LIST_ITEM): 

38 extract_sound_list_item(wxr, word_entry, child_list_item) 

39 

40 

41def extract_ku_ipa_template( 

42 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode 

43) -> None: 

44 expanded_node = wxr.wtp.parse( 

45 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

46 ) 

47 for span_tag in expanded_node.find_html( 

48 "span", attr_name="class", attr_value="IPA" 

49 ): 

50 sound = Sound(ipa=clean_node(wxr, None, span_tag)) 

51 if sound.ipa != "": 51 ↛ 47line 51 didn't jump to line 47 because the condition on line 51 was always true

52 word_entry.sounds.append(sound) 

53 clean_node(wxr, word_entry, expanded_node) 

54 

55 

56def extract_deng_template( 

57 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode 

58) -> None: 

59 sound = Sound( 

60 ipa=clean_node(wxr, None, t_node.template_parameters.get("ipa", "")) 

61 ) 

62 raw_tag = clean_node( 

63 wxr, 

64 None, 

65 t_node.template_parameters.get( 

66 4, t_node.template_parameters.get("dever", "") 

67 ), 

68 ) 

69 for r_tag in raw_tag.split(","): 

70 r_tag = r_tag.strip() 

71 if r_tag != "": 

72 sound.raw_tags.append(r_tag) 

73 filename = clean_node(wxr, None, t_node.template_parameters.get(2, "")) 

74 if filename != "": 74 ↛ 78line 74 didn't jump to line 78 because the condition on line 74 was always true

75 set_sound_file_url_fields(wxr, filename, sound) 

76 translate_raw_tags(sound) 

77 word_entry.sounds.append(sound) 

78 clean_node(wxr, word_entry, t_node) 

79 

80 

81def extract_ku_kîte( 

82 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode 

83) -> None: 

84 expanded_node = wxr.wtp.parse( 

85 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

86 ) 

87 for index, node in enumerate(expanded_node.children): 87 ↛ exitline 87 didn't return from function 'extract_ku_kîte' because the loop on line 87 didn't complete

88 if isinstance(node, str) and ":" in node: 

89 hyphenation = clean_node( 

90 wxr, 

91 None, 

92 [node[node.index(":") + 1 :]] 

93 + expanded_node.children[index + 1 :], 

94 ).strip() 

95 if hyphenation != "": 95 ↛ 99line 95 didn't jump to line 99 because the condition on line 95 was always true

96 word_entry.hyphenations.append( 

97 Hyphenation(parts=hyphenation.split("·")) 

98 ) 

99 break 

100 

101 

102def extract_kîte_template( 

103 wxr: WiktextractContext, 

104 word_entry: WordEntry, 

105 t_node: TemplateNode, 

106 raw_tags: list[str], 

107): 

108 expanded_node = wxr.wtp.parse( 

109 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

110 ) 

111 lang_code = clean_node(wxr, None, t_node.template_parameters.get(1, "")) 

112 for span in expanded_node.find_html( 

113 "span", attr_name="lang", attr_value=lang_code 

114 ): 

115 h_str = clean_node(wxr, None, span) 

116 if h_str != "": 116 ↛ 112line 116 didn't jump to line 112 because the condition on line 116 was always true

117 h_data = Hyphenation(parts=h_str.split("‧"), raw_tags=raw_tags) 

118 translate_raw_tags(h_data) 

119 word_entry.hyphenations.append(h_data)