Coverage for src/wiktextract/extractor/it/sound.py: 84%

1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode

3from ...page import clean_node

4from ...wxr_context import WiktextractContext

5from ..share import set_sound_file_url_fields

6from .models import Hyphenation, Sound, WordEntry

9def extract_hyphenation_section(

10 wxr: WiktextractContext, page_data: list[WordEntry], level_node: LevelNode

11) -> None:

12 # https://it.wiktionary.org/wiki/Aiuto:Sillabazione

13 hyphenations = []

14 for list_node in level_node.find_child(NodeKind.LIST):

15 match list_node.sarg:

16 case ";":

17 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 17 ↛ 14line 17 didn't jump to line 14 because the loop on line 17 didn't complete

18 h_str = clean_node(wxr, None, list_item.children)

19 if h_str != "": 19 ↛ 17line 19 didn't jump to line 17 because the condition on line 19 was always true

20 hyphenations.append(Hyphenation(hyphenation=h_str))

21 break

22 case "*": 22 ↛ 14line 22 didn't jump to line 14 because the pattern on line 22 always matched

23 for list_item in list_node.find_child(NodeKind.LIST_ITEM):

24 h_data = Hyphenation()

25 for node in list_item.find_child(

26 NodeKind.ITALIC | NodeKind.BOLD

27 ):

28 match node.kind:

29 case NodeKind.ITALIC:

30 h_data.sense = clean_node(

31 wxr, None, node

32 ).strip("()")

33 case NodeKind.BOLD: 33 ↛ 25line 33 didn't jump to line 25 because the pattern on line 33 always matched

34 h_data.hyphenation = clean_node(wxr, None, node)

35 if h_data.hyphenation != "": 35 ↛ 23line 35 didn't jump to line 23 because the condition on line 35 was always true

36 hyphenations.append(h_data)

38 # no list

39 for node in level_node.find_child(NodeKind.BOLD):

40 h_str = clean_node(wxr, None, node)

41 if h_str != "": 41 ↛ 39line 41 didn't jump to line 39 because the condition on line 41 was always true

42 hyphenations.append(Hyphenation(hyphenation=h_str))

44 for data in page_data:

45 if data.lang_code == page_data[-1].lang_code: 45 ↛ 44line 45 didn't jump to line 44 because the condition on line 45 was always true

46 data.hyphenations.extend(hyphenations)

49def extract_pronunciation_section(

50 wxr: WiktextractContext, page_data: list[WordEntry], level_node: LevelNode

51) -> None:

52 # https://it.wiktionary.org/wiki/Aiuto:Pronuncia

53 sounds = []

54 for list_node in level_node.find_child(NodeKind.LIST):

55 for list_item in list_node.find_child(NodeKind.LIST_ITEM):

56 extract_sound_list_item(wxr, list_item, sounds)

58 # no list

59 for t_node in level_node.find_child(NodeKind.TEMPLATE):

60 extract_sound_template(wxr, t_node, sounds, "", [])

62 for data in page_data:

63 if data.lang_code == page_data[-1].lang_code: 63 ↛ 62line 63 didn't jump to line 62 because the condition on line 63 was always true

64 data.sounds.extend(sounds)

67def extract_sound_list_item(

68 wxr: WiktextractContext, list_item: WikiNode, sounds: list[Sound]

69) -> None:

70 sense = ""

71 raw_tags = []

72 for node in list_item.find_child(NodeKind.ITALIC | NodeKind.TEMPLATE):

73 match node.kind:

74 case NodeKind.ITALIC:

75 sense = clean_node(wxr, None, node).strip("()")

76 case NodeKind.TEMPLATE: 76 ↛ 72line 76 didn't jump to line 72 because the pattern on line 76 always matched

77 if node.template_name.lower() == "glossa":

78 raw_tags.append(clean_node(wxr, None, node).strip("()"))

79 else:

80 extract_sound_template(wxr, node, sounds, sense, raw_tags)

83def extract_sound_template(

84 wxr: WiktextractContext,

85 t_node: TemplateNode,

86 sounds: list[Sound],

87 sense: str,

88 raw_tags: list[str],

89) -> None:

90 match t_node.template_name:

91 case "IPA" | "SAMPA":

92 # https://it.wiktionary.org/wiki/Template:IPA

93 # https://it.wiktionary.org/wiki/Template:SAMPA

94 for arg_name in range(1, 5): 94 ↛ exitline 94 didn't return from function 'extract_sound_template' because the loop on line 94 didn't complete

95 if arg_name not in t_node.template_parameters:

96 break

97 ipa = clean_node(

98 wxr, None, t_node.template_parameters.get(arg_name, "")

99 )

100 if ipa != "": 100 ↛ 94line 100 didn't jump to line 94 because the condition on line 100 was always true

101 sound = Sound(ipa=ipa, sense=sense, raw_tags=raw_tags)

102 if t_node.template_name.lower() == "sampa":

103 sound.tags.append("SAMPA")

104 sounds.append(sound)

105 case "Audio" | "audio": 105 ↛ exitline 105 didn't return from function 'extract_sound_template' because the pattern on line 105 always matched

106 # https://it.wiktionary.org/wiki/Template:Audio

107 sound_file = clean_node(

108 wxr, None, t_node.template_parameters.get(1, "")

109 )

110 raw_tag = clean_node(

111 wxr, None, t_node.template_parameters.get(2, "")

112 )

113 if sound_file != "": 113 ↛ exitline 113 didn't return from function 'extract_sound_template' because the condition on line 113 was always true

114 if len(sounds) > 0: 114 ↛ 119line 114 didn't jump to line 119 because the condition on line 114 was always true

115 set_sound_file_url_fields(wxr, sound_file, sounds[-1])

116 if raw_tag != "": 116 ↛ 117line 116 didn't jump to line 117 because the condition on line 116 was never true

117 sounds[-1].raw_tags.append(raw_tag)

118 else:

119 sound = Sound(sense=sense, raw_tags=raw_tags)

120 set_sound_file_url_fields(wxr, sound_file, sound)

121 if raw_tag != "":

122 sound.raw_tags.append(raw_tag)

123 sounds.append(sound)