Coverage for src/wiktextract/extractor/id/sound.py: 66%

58 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-15 05:18 +0000

1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from ..share import set_sound_file_url_fields 

6from .models import Hyphenation, Sound, WordEntry 

7from .tags import translate_raw_tags 

8 

9 

10def extract_sound_section( 

11 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode 

12) -> None: 

13 for list_node in level_node.find_child(NodeKind.LIST): 

14 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

15 extract_sound_list_item(wxr, word_entry, list_item) 

16 

17 

18def extract_sound_list_item( 

19 wxr: WiktextractContext, word_entry: WordEntry, list_item: WikiNode 

20) -> None: 

21 raw_tags = [] 

22 for node in list_item.children: 

23 if isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 23 ↛ 24line 23 didn't jump to line 24 because the condition on line 23 was never true

24 for child_list_item in node.find_child(NodeKind.LIST_ITEM): 

25 extract_sound_list_item(wxr, word_entry, child_list_item) 

26 elif isinstance(node, TemplateNode): 

27 if node.template_name == "IPA": 

28 extract_ipa_template(wxr, word_entry, node, raw_tags) 

29 elif node.template_name == "audio": 

30 extract_audio_template(wxr, word_entry, node, raw_tags) 

31 elif node.template_name == "ejaan:id": 31 ↛ 32line 31 didn't jump to line 32 because the condition on line 31 was never true

32 extract_ejaan_id_template(wxr, word_entry, node, raw_tags) 

33 elif node.template_name == "a": 33 ↛ 37line 33 didn't jump to line 37 because the condition on line 33 was always true

34 raw_tag = clean_node(wxr, None, node).strip("()") 

35 if raw_tag != "": 35 ↛ 22line 35 didn't jump to line 22 because the condition on line 35 was always true

36 raw_tags.append(raw_tag) 

37 elif node.template_name == "rhymes": 

38 extract_rhymes_template(wxr, word_entry, node, raw_tags) 

39 elif isinstance(node, str) and node.strip().startswith("Hifenasi:"): 39 ↛ 40line 39 didn't jump to line 40 because the condition on line 39 was never true

40 word_entry.hyphenations.append( 

41 Hyphenation( 

42 parts=node.strip() 

43 .removeprefix("Hifenasi:") 

44 .strip() 

45 .split("‧") 

46 ) 

47 ) 

48 

49 

50def extract_ipa_template( 

51 wxr: WiktextractContext, 

52 word_entry: WordEntry, 

53 t_node: TemplateNode, 

54 raw_tags: list[str], 

55) -> None: 

56 sound = Sound( 

57 ipa=clean_node(wxr, None, t_node.template_parameters.get(1, "")), 

58 raw_tags=raw_tags, 

59 ) 

60 if sound.ipa != "": 60 ↛ exitline 60 didn't return from function 'extract_ipa_template' because the condition on line 60 was always true

61 translate_raw_tags(sound) 

62 word_entry.sounds.append(sound) 

63 

64 

65def extract_audio_template( 

66 wxr: WiktextractContext, 

67 word_entry: WordEntry, 

68 t_node: TemplateNode, 

69 raw_tags: list[str], 

70) -> None: 

71 filename = clean_node(wxr, None, t_node.template_parameters.get(2, "")) 

72 sound = Sound(raw_tags=raw_tags) 

73 if filename != "": 73 ↛ 80line 73 didn't jump to line 80 because the condition on line 73 was always true

74 set_sound_file_url_fields(wxr, filename, sound) 

75 raw_tag = clean_node(wxr, None, t_node.template_parameters.get(3, "")) 

76 if raw_tag != "": 76 ↛ 78line 76 didn't jump to line 78 because the condition on line 76 was always true

77 sound.raw_tags.append(raw_tag) 

78 translate_raw_tags(sound) 

79 word_entry.sounds.append(sound) 

80 clean_node(wxr, word_entry, t_node) 

81 

82 

83def extract_ejaan_id_template( 

84 wxr: WiktextractContext, 

85 word_entry: WordEntry, 

86 t_node: TemplateNode, 

87 raw_tags: list[str], 

88) -> None: 

89 sound = Sound(ipa=clean_node(wxr, None, t_node), raw_tags=raw_tags) 

90 if sound.ipa != "": 

91 translate_raw_tags(sound) 

92 word_entry.sounds.append(sound) 

93 

94 

95def extract_rhymes_template( 

96 wxr: WiktextractContext, 

97 word_entry: WordEntry, 

98 t_node: TemplateNode, 

99 raw_tags: list[str], 

100) -> None: 

101 expanded_node = wxr.wtp.parse( 

102 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

103 ) 

104 for link_node in expanded_node.find_child(NodeKind.LINK): 

105 rhyme = clean_node(wxr, None, link_node) 

106 if rhyme != "": 

107 word_entry.sounds.append(Sound(rhymes=rhyme, raw_tags=raw_tags))