Coverage for src/wiktextract/extractor/es/linkage.py: 88%

79 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-18 10:14 +0000

1from wikitextprocessor.parser import ( 

2 LEVEL_KIND_FLAGS, 

3 LevelNode, 

4 NodeKind, 

5 TemplateNode, 

6 WikiNode, 

7) 

8 

9from ...page import clean_node 

10from ...wxr_context import WiktextractContext 

11from .models import Form, Linkage, WordEntry 

12from .section_titles import LINKAGE_TITLES 

13 

14 

15def extract_linkage_section( 

16 wxr: WiktextractContext, 

17 page_data: list[WordEntry], 

18 level_node: LevelNode, 

19 linkage_type: str, 

20): 

21 linkage_list = [] 

22 for list_item_node in level_node.find_child_recursively(NodeKind.LIST_ITEM): 

23 sense_nodes = [] 

24 after_colon = False 

25 words = [] 

26 for node in list_item_node.children: 

27 if after_colon: 

28 sense_nodes.append(node) 

29 elif isinstance(node, WikiNode) and node.kind == NodeKind.LINK: 

30 words.append(clean_node(wxr, None, node)) 

31 elif isinstance(node, TemplateNode) and node.template_name == "l": 

32 words.append(clean_node(wxr, None, node)) 

33 elif isinstance(node, str) and ":" in node: 

34 after_colon = True 

35 sense_nodes.append(node[node.index(":") + 1 :]) 

36 sense = clean_node(wxr, None, sense_nodes) 

37 for word in filter(None, words): 

38 linkage_list.append(Linkage(word=word, sense=sense)) 

39 

40 for data in page_data: 

41 if ( 41 ↛ 40line 41 didn't jump to line 40 because the condition on line 41 was always true

42 data.lang_code == page_data[-1].lang_code 

43 and data.etymology_text == page_data[-1].etymology_text 

44 ): 

45 getattr(data, linkage_type).extend(linkage_list) 

46 

47 

48def process_linkage_template( 

49 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode 

50): 

51 # https://es.wiktionary.org/wiki/Plantilla:sinónimo 

52 linkage_type = LINKAGE_TITLES.get(t_node.template_name.removesuffix("s")) 

53 for index in range(1, 41): 53 ↛ exitline 53 didn't return from function 'process_linkage_template' because the loop on line 53 didn't complete

54 if index not in t_node.template_parameters: 

55 break 

56 linkage_data = Linkage( 

57 word=clean_node(wxr, None, t_node.template_parameters[index]) 

58 ) 

59 if len(word_entry.senses) > 0: 59 ↛ 60line 59 didn't jump to line 60 because the condition on line 59 was never true

60 linkage_data.sense_index = word_entry.senses[-1].sense_index 

61 linkage_data.sense = " ".join(word_entry.senses[-1].glosses) 

62 getattr(word_entry, linkage_type).append(linkage_data) 

63 process_linkage_template_parameter( 

64 wxr, linkage_data, t_node, f"nota{index}" 

65 ) 

66 process_linkage_template_parameter( 

67 wxr, linkage_data, t_node, f"alt{index}" 

68 ) 

69 if index == 1: 

70 process_linkage_template_parameter( 

71 wxr, linkage_data, t_node, "nota" 

72 ) 

73 process_linkage_template_parameter(wxr, linkage_data, t_node, "alt") 

74 

75 

76def process_linkage_template_parameter( 

77 wxr: WiktextractContext, 

78 linkage_data: Linkage, 

79 template_node: TemplateNode, 

80 param: str, 

81) -> None: 

82 if param in template_node.template_parameters: 

83 value = clean_node(wxr, None, template_node.template_parameters[param]) 

84 if param.startswith("nota"): 

85 linkage_data.note = value 

86 elif param.startswith("alt"): 86 ↛ exitline 86 didn't return from function 'process_linkage_template_parameter' because the condition on line 86 was always true

87 linkage_data.alternative_spelling = value 

88 

89 

90def extract_alt_form_section( 

91 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode 

92) -> None: 

93 has_link = False 

94 for link_node in level_node.find_child(NodeKind.LINK): 

95 word = clean_node(wxr, None, link_node) 

96 has_link = True 

97 if word != "": 97 ↛ 94line 97 didn't jump to line 94 because the condition on line 97 was always true

98 word_entry.forms.append(Form(form=word, tags=["alt-of"])) 

99 if not has_link: 

100 section_text = clean_node( 

101 wxr, 

102 None, 

103 list( 

104 level_node.invert_find_child( 

105 LEVEL_KIND_FLAGS, include_empty_str=True 

106 ) 

107 ), 

108 ).removesuffix(".") 

109 for word in section_text.split(","): 

110 word = word.strip() 

111 if word != "": 111 ↛ 109line 111 didn't jump to line 109 because the condition on line 111 was always true

112 word_entry.forms.append(Form(form=word, tags=["alt-of"])) 

113 

114 

115def extract_additional_information_section( 

116 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode 

117) -> None: 

118 for node in level_node.children: 

119 if isinstance(node, TemplateNode) and node.template_name in [ 

120 "cognados", 

121 "derivad", 

122 "morfología", 

123 ]: 

124 extract_cognados_template(wxr, word_entry, node) 

125 

126 

127def extract_cognados_template( 

128 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode 

129) -> None: 

130 # https://es.wiktionary.org/wiki/Plantilla:cognados 

131 expanded_node = wxr.wtp.parse( 

132 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

133 ) 

134 l_list = [] 

135 for span_tag in expanded_node.find_html_recursively("span"): 

136 word = clean_node(wxr, None, span_tag) 

137 if word != "": 137 ↛ 135line 137 didn't jump to line 135 because the condition on line 137 was always true

138 l_list.append(Linkage(word=word)) 

139 

140 if t_node.template_name == "cognados": 140 ↛ 141line 140 didn't jump to line 141 because the condition on line 140 was never true

141 word_entry.cognates.extend(l_list) 

142 elif t_node.template_name == "derivad": 142 ↛ 144line 142 didn't jump to line 144 because the condition on line 142 was always true

143 word_entry.derived.extend(l_list) 

144 elif t_node.template_name == "morfología": 

145 word_entry.morphologies.extend(l_list)