Coverage for src/wiktextract/extractor/es/linkage.py: 88%

79 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-04 10:58 +0000

1from wikitextprocessor.parser import ( 

2 LEVEL_KIND_FLAGS, 

3 LevelNode, 

4 NodeKind, 

5 TemplateNode, 

6 WikiNode, 

7) 

8 

9from ...page import clean_node 

10from ...wxr_context import WiktextractContext 

11from .models import Form, Linkage, WordEntry 

12from .section_titles import LINKAGE_TITLES 

13 

14 

15def extract_linkage_section( 

16 wxr: WiktextractContext, 

17 page_data: list[WordEntry], 

18 level_node: LevelNode, 

19 linkage_type: str, 

20): 

21 linkage_list = [] 

22 for list_item_node in level_node.find_child_recursively(NodeKind.LIST_ITEM): 

23 sense_nodes = [] 

24 after_colon = False 

25 words = [] 

26 for node in list_item_node.children: 

27 if after_colon: 

28 sense_nodes.append(node) 

29 elif isinstance(node, WikiNode) and node.kind == NodeKind.LINK: 

30 words.append(clean_node(wxr, None, node)) 

31 elif isinstance(node, TemplateNode) and node.template_name == "l": 

32 words.append(clean_node(wxr, None, node)) 

33 elif isinstance(node, str) and ":" in node: 

34 after_colon = True 

35 sense_nodes.append(node[node.index(":") + 1 :]) 

36 sense = clean_node(wxr, None, sense_nodes) 

37 for word in filter(None, words): 

38 linkage_list.append(Linkage(word=word, sense=sense)) 

39 

40 for data in page_data: 

41 if ( 41 ↛ 40line 41 didn't jump to line 40 because the condition on line 41 was always true

42 data.lang_code == page_data[-1].lang_code 

43 and data.etymology_text == page_data[-1].etymology_text 

44 ): 

45 getattr(data, linkage_type).extend(linkage_list) 

46 

47 

48def process_linkage_template( 

49 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode 

50): 

51 # https://es.wiktionary.org/wiki/Plantilla:sinónimo 

52 linkage_type = LINKAGE_TITLES.get(t_node.template_name.removesuffix("s")) 

53 for index in range(1, 41): 53 ↛ exitline 53 didn't return from function 'process_linkage_template' because the loop on line 53 didn't complete

54 if index not in t_node.template_parameters: 

55 break 

56 linkage_data = Linkage( 

57 word=clean_node(wxr, None, t_node.template_parameters[index]) 

58 ) 

59 if len(word_entry.senses) > 0: 59 ↛ 60line 59 didn't jump to line 60 because the condition on line 59 was never true

60 linkage_data.sense_index = word_entry.senses[-1].sense_index 

61 linkage_data.sense = " ".join(word_entry.senses[-1].glosses) 

62 getattr(word_entry, linkage_type).append(linkage_data) 

63 process_linkage_template_parameter( 

64 wxr, linkage_data, t_node, f"nota{index}" 

65 ) 

66 process_linkage_template_parameter( 

67 wxr, linkage_data, t_node, f"alt{index}" 

68 ) 

69 if index == 1: 

70 process_linkage_template_parameter( 

71 wxr, linkage_data, t_node, "nota" 

72 ) 

73 process_linkage_template_parameter(wxr, linkage_data, t_node, "alt") 

74 

75 

76def process_linkage_template_parameter( 

77 wxr: WiktextractContext, 

78 linkage_data: Linkage, 

79 template_node: TemplateNode, 

80 param: str, 

81) -> None: 

82 if param in template_node.template_parameters: 

83 value = clean_node(wxr, None, template_node.template_parameters[param]) 

84 if param.startswith("nota"): 

85 linkage_data.note = value 

86 elif param.startswith("alt"): 86 ↛ exitline 86 didn't return from function 'process_linkage_template_parameter' because the condition on line 86 was always true

87 linkage_data.alternative_spelling = value 

88 

89 

90def extract_alt_form_section( 

91 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode 

92) -> None: 

93 has_link = False 

94 for link_node in level_node.find_child(NodeKind.LINK): 

95 word = clean_node(wxr, None, link_node) 

96 has_link = True 

97 if word != "": 97 ↛ 94line 97 didn't jump to line 94 because the condition on line 97 was always true

98 word_entry.forms.append(Form(form=word, tags=["alt-of"])) 

99 if not has_link: 

100 section_text = clean_node( 

101 wxr, None, list(level_node.invert_find_child(LEVEL_KIND_FLAGS)) 

102 ).removesuffix(".") 

103 for word in section_text.split(","): 

104 word = word.strip() 

105 if word != "": 105 ↛ 103line 105 didn't jump to line 103 because the condition on line 105 was always true

106 word_entry.forms.append(Form(form=word, tags=["alt-of"])) 

107 

108 

109def extract_additional_information_section( 

110 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode 

111) -> None: 

112 for node in level_node.children: 

113 if isinstance(node, TemplateNode) and node.template_name in [ 

114 "cognados", 

115 "derivad", 

116 "morfología", 

117 ]: 

118 extract_cognados_template(wxr, word_entry, node) 

119 

120 

121def extract_cognados_template( 

122 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode 

123) -> None: 

124 # https://es.wiktionary.org/wiki/Plantilla:cognados 

125 expanded_node = wxr.wtp.parse( 

126 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

127 ) 

128 l_list = [] 

129 for span_tag in expanded_node.find_html_recursively("span"): 

130 word = clean_node(wxr, None, span_tag) 

131 if word != "": 131 ↛ 129line 131 didn't jump to line 129 because the condition on line 131 was always true

132 l_list.append(Linkage(word=word)) 

133 

134 if t_node.template_name == "cognados": 134 ↛ 135line 134 didn't jump to line 135 because the condition on line 134 was never true

135 word_entry.cognates.extend(l_list) 

136 elif t_node.template_name == "derivad": 136 ↛ 138line 136 didn't jump to line 138 because the condition on line 136 was always true

137 word_entry.derived.extend(l_list) 

138 elif t_node.template_name == "morfología": 

139 word_entry.morphologies.extend(l_list)