Coverage for src/wiktextract/extractor/es/pronunciation.py: 90%

86 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-15 05:18 +0000

1from wikitextprocessor import HTMLNode, NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from ..share import set_sound_file_url_fields 

6from .models import Hyphenation, Sound, WordEntry 

7from .tags import translate_raw_tags 

8 

9# translate table row header to sound model field 

10PRON_GRAF_HEADER_MAP = { 

11 "rimas": "rhymes", 

12 "rima": "rhymes", 

13} 

14 

15 

16def process_pron_graf_template( 

17 wxr: WiktextractContext, word_entry: WordEntry, template_node: TemplateNode 

18) -> None: 

19 # https://es.wiktionary.org/wiki/Plantilla:pron-graf 

20 # this template could create sound data without any parameter 

21 # it expands to a two columns table 

22 expanded_node = wxr.wtp.parse( 

23 wxr.wtp.node_to_wikitext(template_node), expand_all=True 

24 ) 

25 table_nodes = list(expanded_node.find_child(NodeKind.TABLE)) 

26 if len(table_nodes) == 0: 26 ↛ 27line 26 didn't jump to line 27 because the condition on line 26 was never true

27 return 

28 table_node = table_nodes[0] 

29 extra_sounds = {} # not translated 

30 for table_row in table_node.find_child(NodeKind.TABLE_ROW): 

31 table_cells = list(table_row.find_child(NodeKind.TABLE_CELL)) 

32 if len(table_cells) != 2: 

33 continue 

34 header_node, value_node = table_cells 

35 header_text = clean_node(wxr, None, header_node) 

36 value_text = clean_node(wxr, None, value_node) 

37 if header_text.endswith(" (AFI)"): # IPA 

38 process_pron_graf_ipa_cell(wxr, word_entry, value_node, header_text) 

39 elif header_text == "silabación" and value_text != "": 

40 word_entry.hyphenations.append( 

41 Hyphenation(parts=value_text.split("-")) 

42 ) 

43 elif header_text in PRON_GRAF_HEADER_MAP: 

44 sound = Sound() 

45 setattr(sound, PRON_GRAF_HEADER_MAP[header_text], value_text) 

46 word_entry.sounds.append(sound) 

47 elif ( 

48 header_text.endswith(" alternativas") or header_text == "variantes" 

49 ): 

50 process_pron_graf_link_cell( 

51 wxr, word_entry, value_node, header_text, "alternative" 

52 ) 

53 elif header_text == "homófonos": 

54 process_pron_graf_link_cell( 

55 wxr, word_entry, value_node, header_text, "homophone" 

56 ) 

57 elif header_text == "transliteraciones": 

58 process_pron_graf_text_cell(wxr, word_entry, value_node, "roman") 

59 elif header_text == "transcripciones silábicas": 59 ↛ 60line 59 didn't jump to line 60 because the condition on line 59 was never true

60 process_pron_graf_text_cell(wxr, word_entry, value_node, "syllabic") 

61 else: 

62 extra_sounds[header_text] = value_text 

63 

64 if len(extra_sounds) > 0: 

65 word_entry.extra_sounds = extra_sounds 

66 clean_node(wxr, word_entry, expanded_node) 

67 

68 

69def process_pron_graf_ipa_cell( 

70 wxr: WiktextractContext, 

71 word_entry: WordEntry, 

72 cell_node: WikiNode, 

73 header_text: str, 

74) -> None: 

75 sound = Sound() 

76 for node in cell_node.children: 

77 if isinstance(node, str) and len(node.strip()) > 0: 

78 sound.ipa += node.strip() 

79 elif isinstance(node, HTMLNode) and node.tag == "phonos": 

80 sound_file = node.attrs.get("file", "") 

81 set_sound_file_url_fields(wxr, sound_file, sound) 

82 for small_tag in node.find_html("small"): 

83 location = clean_node(wxr, None, small_tag) 

84 sound.raw_tags.append(location) 

85 elif ( 

86 isinstance(node, HTMLNode) and node.tag == "br" and sound != Sound() 

87 ): 

88 if not header_text.startswith("pronunciación"): # location 

89 sound.raw_tags.append(header_text.removesuffix(" (AFI)")) 

90 translate_raw_tags(sound) 

91 word_entry.sounds.append(sound.model_copy(deep=True)) 

92 sound = Sound() 

93 if sound != Sound(): 93 ↛ 94line 93 didn't jump to line 94 because the condition on line 93 was never true

94 if not header_text.startswith("pronunciación"): 

95 sound.raw_tags.append(header_text.removesuffix(" (AFI)")) 

96 translate_raw_tags(sound) 

97 word_entry.sounds.append(sound) 

98 

99 

100def process_pron_graf_link_cell( 

101 wxr: WiktextractContext, 

102 word_entry: WordEntry, 

103 cell_node: WikiNode, 

104 header_text: str, 

105 field_name: str, 

106) -> None: 

107 for link_index, link_node in cell_node.find_child( 

108 NodeKind.LINK, with_index=True 

109 ): 

110 sound = Sound() 

111 setattr(sound, field_name, clean_node(wxr, None, link_node)) 

112 if ( 

113 link_index + 1 < len(cell_node.children) 

114 and isinstance(cell_node.children[link_index + 1], HTMLNode) 

115 and cell_node.children[link_index + 1].tag == "ref" 

116 ): 

117 # nest "ref" tag is note text 

118 sound.note = clean_node( 

119 wxr, None, cell_node.children[link_index + 1].children 

120 ) 

121 if header_text == "variantes": 121 ↛ 122line 121 didn't jump to line 122 because the condition on line 121 was never true

122 sound.not_same_pronunciation = True 

123 word_entry.sounds.append(sound) 

124 

125 

126def process_pron_graf_text_cell( 

127 wxr: WiktextractContext, 

128 word_entry: WordEntry, 

129 cell_node: WikiNode, 

130 field_name: str, 

131) -> None: 

132 sound = Sound() 

133 for node_index, node in enumerate(cell_node.children): 

134 if isinstance(node, str) and len(node.strip()) > 0: 

135 node = node.strip() 

136 if node.startswith(",&nbsp;"): 

137 node = node.removeprefix(",&nbsp;") 

138 word_entry.sounds.append(sound.model_copy(deep=True)) 

139 sound = Sound() 

140 setattr(sound, field_name, node.strip()) 

141 elif isinstance(node, HTMLNode) and node.tag == "ref": 

142 sound.note = clean_node(wxr, None, node.children) 

143 if len(getattr(sound, field_name)) > 0: 143 ↛ exitline 143 didn't return from function 'process_pron_graf_text_cell' because the condition on line 143 was always true

144 word_entry.sounds.append(sound)