Coverage for src/wiktextract/extractor/id/sound.py: 66%

1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode

3from ...page import clean_node

4from ...wxr_context import WiktextractContext

5from ..share import set_sound_file_url_fields

6from .models import Sound, WordEntry

7from .tags import translate_raw_tags

10def extract_sound_section(

11 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode

12) -> None:

13 for list_node in level_node.find_child(NodeKind.LIST):

14 for list_item in list_node.find_child(NodeKind.LIST_ITEM):

15 extract_sound_list_item(wxr, word_entry, list_item)

18def extract_sound_list_item(

19 wxr: WiktextractContext, word_entry: WordEntry, list_item: WikiNode

20) -> None:

21 raw_tags = []

22 for node in list_item.children:

23 if isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 23 ↛ 24line 23 didn't jump to line 24 because the condition on line 23 was never true

24 for child_list_item in node.find_child(NodeKind.LIST_ITEM):

25 extract_sound_list_item(wxr, word_entry, child_list_item)

26 elif isinstance(node, TemplateNode):

27 if node.template_name == "IPA":

28 extract_ipa_template(wxr, word_entry, node, raw_tags)

29 elif node.template_name == "audio":

30 extract_audio_template(wxr, word_entry, node, raw_tags)

31 elif node.template_name == "ejaan:id": 31 ↛ 32line 31 didn't jump to line 32 because the condition on line 31 was never true

32 extract_ejaan_id_template(wxr, word_entry, node, raw_tags)

33 elif node.template_name == "a": 33 ↛ 37line 33 didn't jump to line 37 because the condition on line 33 was always true

34 raw_tag = clean_node(wxr, None, node).strip("()")

35 if raw_tag != "": 35 ↛ 22line 35 didn't jump to line 22 because the condition on line 35 was always true

36 raw_tags.append(raw_tag)

37 elif node.template_name == "rhymes":

38 extract_rhymes_template(wxr, word_entry, node, raw_tags)

39 elif isinstance(node, str) and node.strip().startswith("Hifenasi:"): 39 ↛ 40line 39 didn't jump to line 40 because the condition on line 39 was never true

40 word_entry.hyphenation = (

41 node.strip().removeprefix("Hifenasi:").strip()

42 )

45def extract_ipa_template(

46 wxr: WiktextractContext,

47 word_entry: WordEntry,

48 t_node: TemplateNode,

49 raw_tags: list[str],

50) -> None:

51 sound = Sound(

52 ipa=clean_node(wxr, None, t_node.template_parameters.get(1, "")),

53 raw_tags=raw_tags,

54 )

55 if sound.ipa != "": 55 ↛ exitline 55 didn't return from function 'extract_ipa_template' because the condition on line 55 was always true

56 translate_raw_tags(sound)

57 word_entry.sounds.append(sound)

60def extract_audio_template(

61 wxr: WiktextractContext,

62 word_entry: WordEntry,

63 t_node: TemplateNode,

64 raw_tags: list[str],

65) -> None:

66 filename = clean_node(wxr, None, t_node.template_parameters.get(2, ""))

67 sound = Sound(raw_tags=raw_tags)

68 if filename != "": 68 ↛ 75line 68 didn't jump to line 75 because the condition on line 68 was always true

69 set_sound_file_url_fields(wxr, filename, sound)

70 raw_tag = clean_node(wxr, None, t_node.template_parameters.get(3, ""))

71 if raw_tag != "": 71 ↛ 73line 71 didn't jump to line 73 because the condition on line 71 was always true

72 sound.raw_tags.append(raw_tag)

73 translate_raw_tags(sound)

74 word_entry.sounds.append(sound)

75 clean_node(wxr, word_entry, t_node)

78def extract_ejaan_id_template(

79 wxr: WiktextractContext,

80 word_entry: WordEntry,

81 t_node: TemplateNode,

82 raw_tags: list[str],

83) -> None:

84 sound = Sound(ipa=clean_node(wxr, None, t_node), raw_tags=raw_tags)

85 if sound.ipa != "":

86 translate_raw_tags(sound)

87 word_entry.sounds.append(sound)

90def extract_rhymes_template(

91 wxr: WiktextractContext,

92 word_entry: WordEntry,

93 t_node: TemplateNode,

94 raw_tags: list[str],

95) -> None:

96 expanded_node = wxr.wtp.parse(

97 wxr.wtp.node_to_wikitext(t_node), expand_all=True

98 )

99 for link_node in expanded_node.find_child(NodeKind.LINK):

100 rhyme = clean_node(wxr, None, link_node)

101 if rhyme != "":

102 word_entry.sounds.append(Sound(rhymes=rhyme, raw_tags=raw_tags))