Coverage for src/wiktextract/extractor/id/sound.py: 66%

58 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-04 10:58 +0000

1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from ..share import set_sound_file_url_fields 

6from .models import Sound, WordEntry 

7from .tags import translate_raw_tags 

8 

9 

10def extract_sound_section( 

11 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode 

12) -> None: 

13 for list_node in level_node.find_child(NodeKind.LIST): 

14 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

15 extract_sound_list_item(wxr, word_entry, list_item) 

16 

17 

18def extract_sound_list_item( 

19 wxr: WiktextractContext, word_entry: WordEntry, list_item: WikiNode 

20) -> None: 

21 raw_tags = [] 

22 for node in list_item.children: 

23 if isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 23 ↛ 24line 23 didn't jump to line 24 because the condition on line 23 was never true

24 for child_list_item in node.find_child(NodeKind.LIST_ITEM): 

25 extract_sound_list_item(wxr, word_entry, child_list_item) 

26 elif isinstance(node, TemplateNode): 

27 if node.template_name == "IPA": 

28 extract_ipa_template(wxr, word_entry, node, raw_tags) 

29 elif node.template_name == "audio": 

30 extract_audio_template(wxr, word_entry, node, raw_tags) 

31 elif node.template_name == "ejaan:id": 31 ↛ 32line 31 didn't jump to line 32 because the condition on line 31 was never true

32 extract_ejaan_id_template(wxr, word_entry, node, raw_tags) 

33 elif node.template_name == "a": 33 ↛ 37line 33 didn't jump to line 37 because the condition on line 33 was always true

34 raw_tag = clean_node(wxr, None, node).strip("()") 

35 if raw_tag != "": 35 ↛ 22line 35 didn't jump to line 22 because the condition on line 35 was always true

36 raw_tags.append(raw_tag) 

37 elif node.template_name == "rhymes": 

38 extract_rhymes_template(wxr, word_entry, node, raw_tags) 

39 elif isinstance(node, str) and node.strip().startswith("Hifenasi:"): 39 ↛ 40line 39 didn't jump to line 40 because the condition on line 39 was never true

40 word_entry.hyphenation = ( 

41 node.strip().removeprefix("Hifenasi:").strip() 

42 ) 

43 

44 

45def extract_ipa_template( 

46 wxr: WiktextractContext, 

47 word_entry: WordEntry, 

48 t_node: TemplateNode, 

49 raw_tags: list[str], 

50) -> None: 

51 sound = Sound( 

52 ipa=clean_node(wxr, None, t_node.template_parameters.get(1, "")), 

53 raw_tags=raw_tags, 

54 ) 

55 if sound.ipa != "": 55 ↛ exitline 55 didn't return from function 'extract_ipa_template' because the condition on line 55 was always true

56 translate_raw_tags(sound) 

57 word_entry.sounds.append(sound) 

58 

59 

60def extract_audio_template( 

61 wxr: WiktextractContext, 

62 word_entry: WordEntry, 

63 t_node: TemplateNode, 

64 raw_tags: list[str], 

65) -> None: 

66 filename = clean_node(wxr, None, t_node.template_parameters.get(2, "")) 

67 sound = Sound(raw_tags=raw_tags) 

68 if filename != "": 68 ↛ 75line 68 didn't jump to line 75 because the condition on line 68 was always true

69 set_sound_file_url_fields(wxr, filename, sound) 

70 raw_tag = clean_node(wxr, None, t_node.template_parameters.get(3, "")) 

71 if raw_tag != "": 71 ↛ 73line 71 didn't jump to line 73 because the condition on line 71 was always true

72 sound.raw_tags.append(raw_tag) 

73 translate_raw_tags(sound) 

74 word_entry.sounds.append(sound) 

75 clean_node(wxr, word_entry, t_node) 

76 

77 

78def extract_ejaan_id_template( 

79 wxr: WiktextractContext, 

80 word_entry: WordEntry, 

81 t_node: TemplateNode, 

82 raw_tags: list[str], 

83) -> None: 

84 sound = Sound(ipa=clean_node(wxr, None, t_node), raw_tags=raw_tags) 

85 if sound.ipa != "": 

86 translate_raw_tags(sound) 

87 word_entry.sounds.append(sound) 

88 

89 

90def extract_rhymes_template( 

91 wxr: WiktextractContext, 

92 word_entry: WordEntry, 

93 t_node: TemplateNode, 

94 raw_tags: list[str], 

95) -> None: 

96 expanded_node = wxr.wtp.parse( 

97 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

98 ) 

99 for link_node in expanded_node.find_child(NodeKind.LINK): 

100 rhyme = clean_node(wxr, None, link_node) 

101 if rhyme != "": 

102 word_entry.sounds.append(Sound(rhymes=rhyme, raw_tags=raw_tags))