Coverage for src/wiktextract/extractor/it/sound.py: 84%

80 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-15 05:18 +0000

1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from ..share import set_sound_file_url_fields 

6from .models import Hyphenation, Sound, WordEntry 

7 

8 

9def extract_hyphenation_section( 

10 wxr: WiktextractContext, page_data: list[WordEntry], level_node: LevelNode 

11) -> None: 

12 # https://it.wiktionary.org/wiki/Aiuto:Sillabazione 

13 hyphenations = [] 

14 for list_node in level_node.find_child(NodeKind.LIST): 

15 match list_node.sarg: 

16 case ";": 

17 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 17 ↛ 14line 17 didn't jump to line 14 because the loop on line 17 didn't complete

18 h_str = clean_node(wxr, None, list_item.children) 

19 if h_str != "": 19 ↛ 17line 19 didn't jump to line 17 because the condition on line 19 was always true

20 hyphenations.append( 

21 Hyphenation( 

22 parts=list(map(str.strip, h_str.split("|"))) 

23 ) 

24 ) 

25 break 

26 case "*": 26 ↛ 14line 26 didn't jump to line 14 because the pattern on line 26 always matched

27 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

28 h_data = Hyphenation() 

29 for node in list_item.find_child( 

30 NodeKind.ITALIC | NodeKind.BOLD 

31 ): 

32 match node.kind: 

33 case NodeKind.ITALIC: 

34 h_data.sense = clean_node( 

35 wxr, None, node 

36 ).strip("()") 

37 case NodeKind.BOLD: 37 ↛ 29line 37 didn't jump to line 29 because the pattern on line 37 always matched

38 h_str = clean_node(wxr, None, node) 

39 h_data.parts = list( 

40 map(str.strip, h_str.split("|")) 

41 ) 

42 if len(h_data.parts) > 0: 42 ↛ 27line 42 didn't jump to line 27 because the condition on line 42 was always true

43 hyphenations.append(h_data) 

44 

45 # no list 

46 for node in level_node.find_child(NodeKind.BOLD): 

47 h_str = clean_node(wxr, None, node) 

48 if h_str != "": 48 ↛ 46line 48 didn't jump to line 46 because the condition on line 48 was always true

49 hyphenations.append( 

50 Hyphenation(parts=list(map(str.strip, h_str.split("|")))) 

51 ) 

52 

53 for data in page_data: 

54 if data.lang_code == page_data[-1].lang_code: 54 ↛ 53line 54 didn't jump to line 53 because the condition on line 54 was always true

55 data.hyphenations.extend(hyphenations) 

56 

57 

58def extract_pronunciation_section( 

59 wxr: WiktextractContext, page_data: list[WordEntry], level_node: LevelNode 

60) -> None: 

61 # https://it.wiktionary.org/wiki/Aiuto:Pronuncia 

62 sounds = [] 

63 for list_node in level_node.find_child(NodeKind.LIST): 

64 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

65 extract_sound_list_item(wxr, list_item, sounds) 

66 

67 # no list 

68 for t_node in level_node.find_child(NodeKind.TEMPLATE): 

69 extract_sound_template(wxr, t_node, sounds, "", []) 

70 

71 for data in page_data: 

72 if data.lang_code == page_data[-1].lang_code: 72 ↛ 71line 72 didn't jump to line 71 because the condition on line 72 was always true

73 data.sounds.extend(sounds) 

74 

75 

76def extract_sound_list_item( 

77 wxr: WiktextractContext, list_item: WikiNode, sounds: list[Sound] 

78) -> None: 

79 sense = "" 

80 raw_tags = [] 

81 for node in list_item.find_child(NodeKind.ITALIC | NodeKind.TEMPLATE): 

82 match node.kind: 

83 case NodeKind.ITALIC: 

84 sense = clean_node(wxr, None, node).strip("()") 

85 case NodeKind.TEMPLATE: 85 ↛ 81line 85 didn't jump to line 81 because the pattern on line 85 always matched

86 if node.template_name.lower() == "glossa": 

87 raw_tags.append(clean_node(wxr, None, node).strip("()")) 

88 else: 

89 extract_sound_template(wxr, node, sounds, sense, raw_tags) 

90 

91 

92def extract_sound_template( 

93 wxr: WiktextractContext, 

94 t_node: TemplateNode, 

95 sounds: list[Sound], 

96 sense: str, 

97 raw_tags: list[str], 

98) -> None: 

99 match t_node.template_name: 

100 case "IPA" | "SAMPA": 

101 # https://it.wiktionary.org/wiki/Template:IPA 

102 # https://it.wiktionary.org/wiki/Template:SAMPA 

103 for arg_name in range(1, 5): 103 ↛ exitline 103 didn't return from function 'extract_sound_template' because the loop on line 103 didn't complete

104 if arg_name not in t_node.template_parameters: 

105 break 

106 ipa = clean_node( 

107 wxr, None, t_node.template_parameters.get(arg_name, "") 

108 ) 

109 if ipa != "": 109 ↛ 103line 109 didn't jump to line 103 because the condition on line 109 was always true

110 sound = Sound(ipa=ipa, sense=sense, raw_tags=raw_tags) 

111 if t_node.template_name.lower() == "sampa": 

112 sound.tags.append("SAMPA") 

113 sounds.append(sound) 

114 case "Audio" | "audio": 114 ↛ exitline 114 didn't return from function 'extract_sound_template' because the pattern on line 114 always matched

115 # https://it.wiktionary.org/wiki/Template:Audio 

116 sound_file = clean_node( 

117 wxr, None, t_node.template_parameters.get(1, "") 

118 ) 

119 raw_tag = clean_node( 

120 wxr, None, t_node.template_parameters.get(2, "") 

121 ) 

122 if sound_file != "": 122 ↛ exitline 122 didn't return from function 'extract_sound_template' because the condition on line 122 was always true

123 if len(sounds) > 0: 123 ↛ 128line 123 didn't jump to line 128 because the condition on line 123 was always true

124 set_sound_file_url_fields(wxr, sound_file, sounds[-1]) 

125 if raw_tag != "": 125 ↛ 126line 125 didn't jump to line 126 because the condition on line 125 was never true

126 sounds[-1].raw_tags.append(raw_tag) 

127 else: 

128 sound = Sound(sense=sense, raw_tags=raw_tags) 

129 set_sound_file_url_fields(wxr, sound_file, sound) 

130 if raw_tag != "": 

131 sound.raw_tags.append(raw_tag) 

132 sounds.append(sound)