Coverage for src/wiktextract/extractor/it/sound.py: 84%

79 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2024-12-27 08:07 +0000

1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from ..share import set_sound_file_url_fields 

6from .models import Hyphenation, Sound, WordEntry 

7 

8 

9def extract_hyphenation_section( 

10 wxr: WiktextractContext, page_data: list[WordEntry], level_node: LevelNode 

11) -> None: 

12 # https://it.wiktionary.org/wiki/Aiuto:Sillabazione 

13 hyphenations = [] 

14 for list_node in level_node.find_child(NodeKind.LIST): 

15 match list_node.sarg: 

16 case ";": 

17 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 17 ↛ 14line 17 didn't jump to line 14 because the loop on line 17 didn't complete

18 h_str = clean_node(wxr, None, list_item.children) 

19 if h_str != "": 19 ↛ 17line 19 didn't jump to line 17 because the condition on line 19 was always true

20 hyphenations.append(Hyphenation(hyphenation=h_str)) 

21 break 

22 case "*": 22 ↛ 14line 22 didn't jump to line 14 because the pattern on line 22 always matched

23 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

24 h_data = Hyphenation() 

25 for node in list_item.find_child( 

26 NodeKind.ITALIC | NodeKind.BOLD 

27 ): 

28 match node.kind: 

29 case NodeKind.ITALIC: 

30 h_data.sense = clean_node( 

31 wxr, None, node 

32 ).strip("()") 

33 case NodeKind.BOLD: 33 ↛ 25line 33 didn't jump to line 25 because the pattern on line 33 always matched

34 h_data.hyphenation = clean_node(wxr, None, node) 

35 if h_data.hyphenation != "": 35 ↛ 23line 35 didn't jump to line 23 because the condition on line 35 was always true

36 hyphenations.append(h_data) 

37 

38 # no list 

39 for node in level_node.find_child(NodeKind.BOLD): 

40 h_str = clean_node(wxr, None, node) 

41 if h_str != "": 41 ↛ 39line 41 didn't jump to line 39 because the condition on line 41 was always true

42 hyphenations.append(Hyphenation(hyphenation=h_str)) 

43 

44 for data in page_data: 

45 if data.lang_code == page_data[-1].lang_code: 45 ↛ 44line 45 didn't jump to line 44 because the condition on line 45 was always true

46 data.hyphenations.extend(hyphenations) 

47 

48 

49def extract_pronunciation_section( 

50 wxr: WiktextractContext, page_data: list[WordEntry], level_node: LevelNode 

51) -> None: 

52 # https://it.wiktionary.org/wiki/Aiuto:Pronuncia 

53 sounds = [] 

54 for list_node in level_node.find_child(NodeKind.LIST): 

55 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

56 extract_sound_list_item(wxr, list_item, sounds) 

57 

58 # no list 

59 for t_node in level_node.find_child(NodeKind.TEMPLATE): 

60 extract_sound_template(wxr, t_node, sounds, "", []) 

61 

62 for data in page_data: 

63 if data.lang_code == page_data[-1].lang_code: 63 ↛ 62line 63 didn't jump to line 62 because the condition on line 63 was always true

64 data.sounds.extend(sounds) 

65 

66 

67def extract_sound_list_item( 

68 wxr: WiktextractContext, list_item: WikiNode, sounds: list[Sound] 

69) -> None: 

70 sense = "" 

71 raw_tags = [] 

72 for node in list_item.find_child(NodeKind.ITALIC | NodeKind.TEMPLATE): 

73 match node.kind: 

74 case NodeKind.ITALIC: 

75 sense = clean_node(wxr, None, node).strip("()") 

76 case NodeKind.TEMPLATE: 76 ↛ 72line 76 didn't jump to line 72 because the pattern on line 76 always matched

77 if node.template_name.lower() == "glossa": 

78 raw_tags.append(clean_node(wxr, None, node).strip("()")) 

79 else: 

80 extract_sound_template(wxr, node, sounds, sense, raw_tags) 

81 

82 

83def extract_sound_template( 

84 wxr: WiktextractContext, 

85 t_node: TemplateNode, 

86 sounds: list[Sound], 

87 sense: str, 

88 raw_tags: list[str], 

89) -> None: 

90 match t_node.template_name: 

91 case "IPA" | "SAMPA": 

92 # https://it.wiktionary.org/wiki/Template:IPA 

93 # https://it.wiktionary.org/wiki/Template:SAMPA 

94 for arg_name in range(1, 5): 94 ↛ exitline 94 didn't return from function 'extract_sound_template' because the loop on line 94 didn't complete

95 if arg_name not in t_node.template_parameters: 

96 break 

97 ipa = clean_node( 

98 wxr, None, t_node.template_parameters.get(arg_name, "") 

99 ) 

100 if ipa != "": 100 ↛ 94line 100 didn't jump to line 94 because the condition on line 100 was always true

101 sound = Sound(ipa=ipa, sense=sense, raw_tags=raw_tags) 

102 if t_node.template_name.lower() == "sampa": 

103 sound.tags.append("SAMPA") 

104 sounds.append(sound) 

105 case "Audio" | "audio": 105 ↛ exitline 105 didn't return from function 'extract_sound_template' because the pattern on line 105 always matched

106 # https://it.wiktionary.org/wiki/Template:Audio 

107 sound_file = clean_node( 

108 wxr, None, t_node.template_parameters.get(1, "") 

109 ) 

110 raw_tag = clean_node( 

111 wxr, None, t_node.template_parameters.get(2, "") 

112 ) 

113 if sound_file != "": 113 ↛ exitline 113 didn't return from function 'extract_sound_template' because the condition on line 113 was always true

114 if len(sounds) > 0: 114 ↛ 119line 114 didn't jump to line 119 because the condition on line 114 was always true

115 set_sound_file_url_fields(wxr, sound_file, sounds[-1]) 

116 if raw_tag != "": 116 ↛ 117line 116 didn't jump to line 117 because the condition on line 116 was never true

117 sounds[-1].raw_tags.append(raw_tag) 

118 else: 

119 sound = Sound(sense=sense, raw_tags=raw_tags) 

120 set_sound_file_url_fields(wxr, sound_file, sound) 

121 if raw_tag != "": 

122 sound.raw_tags.append(raw_tag) 

123 sounds.append(sound)