Coverage for src/wiktextract/extractor/cs/sound.py: 80%

66 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-12 08:27 +0000

1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from ..share import set_sound_file_url_fields 

6from .models import Hyphenation, Sound, WordEntry 

7from .tags import translate_raw_tags 

8 

9 

10def extract_sound_section( 

11 wxr: WiktextractContext, base_data: WordEntry, level_node: LevelNode 

12): 

13 for list_node in level_node.find_child(NodeKind.LIST): 

14 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

15 raw_tags = [] 

16 for node in list_item.children: 

17 if isinstance(node, TemplateNode): 

18 if node.template_name == "IPA": 

19 extract_ipa_template(wxr, base_data, node, raw_tags) 

20 raw_tags.clear() 

21 elif node.template_name == "IPA2": 

22 extract_ipa2_template(wxr, base_data, node, raw_tags) 

23 raw_tags.clear() 

24 elif node.template_name == "Audio": 24 ↛ 27line 24 didn't jump to line 27 because the condition on line 24 was always true

25 extract_audio_template(wxr, base_data, node, raw_tags) 

26 raw_tags.clear() 

27 elif node.template_name == "Příznak2": 

28 raw_tags.extend(extract_příznak2_template(wxr, node)) 

29 elif ( 

30 isinstance(node, WikiNode) and node.kind == NodeKind.ITALIC 

31 ): 

32 raw_tag = clean_node(wxr, None, node) 

33 if raw_tag != "": 33 ↛ 16line 33 didn't jump to line 16 because the condition on line 33 was always true

34 raw_tags.append(raw_tag) 

35 

36 

37def extract_ipa_template( 

38 wxr: WiktextractContext, 

39 base_data: WordEntry, 

40 t_node: TemplateNode, 

41 raw_tags: list[str], 

42): 

43 # https://cs.wiktionary.org/wiki/Šablona:IPA 

44 expanded_node = wxr.wtp.parse( 

45 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

46 ) 

47 for span_tag in expanded_node.find_html( 

48 "span", attr_name="class", attr_value="IPA" 

49 ): 

50 text = clean_node(wxr, None, span_tag) 

51 for ipa in text.split(","): 

52 ipa = ipa.strip() 

53 if ipa != "": 53 ↛ 51line 53 didn't jump to line 51 because the condition on line 53 was always true

54 sound = Sound(ipa=ipa, raw_tags=raw_tags) 

55 translate_raw_tags(sound) 

56 base_data.sounds.append(sound) 

57 clean_node(wxr, base_data, expanded_node) 

58 

59 

60def extract_ipa2_template( 

61 wxr: WiktextractContext, 

62 base_data: WordEntry, 

63 t_node: TemplateNode, 

64 raw_tags: list[str], 

65): 

66 # https://cs.wiktionary.org/wiki/Šablona:IPA2 

67 ipa = clean_node(wxr, None, t_node.template_parameters.get(1, "")) 

68 if ipa != "": 68 ↛ exitline 68 didn't return from function 'extract_ipa2_template' because the condition on line 68 was always true

69 sound = Sound(ipa=f"[{ipa}]", raw_tags=raw_tags) 

70 translate_raw_tags(sound) 

71 base_data.sounds.append(sound) 

72 

73 

74def extract_audio_template( 

75 wxr: WiktextractContext, 

76 base_data: WordEntry, 

77 t_node: TemplateNode, 

78 raw_tags: list[str], 

79): 

80 # https://cs.wiktionary.org/wiki/Šablona:Audio 

81 file = clean_node(wxr, None, t_node.template_parameters.get(1, "")) 

82 if file != "": 82 ↛ exitline 82 didn't return from function 'extract_audio_template' because the condition on line 82 was always true

83 sound = Sound(raw_tags=raw_tags) 

84 set_sound_file_url_fields(wxr, file, sound) 

85 translate_raw_tags(sound) 

86 base_data.sounds.append(sound) 

87 

88 

89def extract_příznak2_template( 

90 wxr: WiktextractContext, t_node: TemplateNode 

91) -> list[str]: 

92 raw_tags = [] 

93 text = clean_node(wxr, None, t_node).strip("() ") 

94 for raw_tag in text.split(","): 

95 raw_tag = raw_tag.strip() 

96 if raw_tag != "": 

97 raw_tags.append(raw_tag) 

98 return raw_tags 

99 

100 

101def extract_hyphenation_section( 

102 wxr: WiktextractContext, base_data: WordEntry, level_node: LevelNode 

103): 

104 for list_node in level_node.find_child(NodeKind.LIST): 

105 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

106 h_str = clean_node(wxr, None, list_item.children) 

107 h_parts = list(filter(None, map(str.strip, h_str.split("-")))) 

108 if len(h_parts) > 0: 108 ↛ 105line 108 didn't jump to line 105 because the condition on line 108 was always true

109 base_data.hyphenations.append(Hyphenation(parts=h_parts))