Coverage for src/wiktextract/extractor/es/pos.py: 86%

145 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-04 10:58 +0000

1import re 

2 

3from wikitextprocessor.parser import ( 

4 LEVEL_KIND_FLAGS, 

5 LevelNode, 

6 NodeKind, 

7 TemplateNode, 

8 WikiNode, 

9) 

10 

11from ...page import clean_node 

12from ...wxr_context import WiktextractContext 

13from .example import process_ejemplo_template 

14from .inflection import process_inflect_template 

15from .linkage import process_linkage_template 

16from .models import AltForm, Form, Sense, WordEntry 

17from .section_titles import LINKAGE_TITLES 

18from .tags import ALL_TAGS, translate_raw_tags 

19 

20 

21def extract_pos_section( 

22 wxr: WiktextractContext, 

23 word_entry: WordEntry, 

24 level_node: LevelNode, 

25 section_title: str, 

26) -> None: 

27 for raw_tag in section_title.split(): 

28 if raw_tag in ALL_TAGS: 

29 tr_tag = ALL_TAGS[raw_tag] 

30 if isinstance(tr_tag, str) and tr_tag not in word_entry.tags: 

31 word_entry.tags.append(tr_tag) 

32 elif isinstance(tr_tag, list): 32 ↛ 33line 32 didn't jump to line 33 because the condition on line 32 was never true

33 for tag in tr_tag: 

34 if tag not in word_entry.tags: 

35 word_entry.tags.append(tag) 

36 

37 has_list = False 

38 for node in level_node.children: 

39 if isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 

40 has_list = True 

41 if node.sarg == ";": 

42 for list_item in node.find_child(NodeKind.LIST_ITEM): 

43 extract_gloss_list_item(wxr, word_entry, list_item, Sense()) 

44 elif re.fullmatch(r":+;", node.sarg) is not None: # nested gloss 

45 parent_sense = Sense() 

46 parent_gloss_num = len(node.sarg) - 1 

47 for sense in word_entry.senses[::-1]: 47 ↛ 51line 47 didn't jump to line 51 because the loop on line 47 didn't complete

48 if len(sense.glosses) == parent_gloss_num: 

49 parent_sense = sense 

50 break 

51 for list_item in node.find_child(NodeKind.LIST_ITEM): 

52 sense = parent_sense.model_copy(deep=True) 

53 sense.sense_index = "" 

54 extract_gloss_list_item(wxr, word_entry, list_item, sense) 

55 elif node.sarg == ":" and len(word_entry.senses) > 0: 55 ↛ 38line 55 didn't jump to line 38 because the condition on line 55 was always true

56 for list_item in node.find_child(NodeKind.LIST_ITEM): 

57 extract_gloss_list_item( 

58 wxr, word_entry, list_item, word_entry.senses[-1] 

59 ) 

60 elif isinstance(node, TemplateNode): 

61 if node.template_name.startswith("inflect."): 

62 process_inflect_template(wxr, word_entry, node) 

63 elif node.template_name in ["es.sust", "es.adj", "es.v"]: 

64 extract_pos_header_template(wxr, word_entry, node) 

65 elif node.template_name.removesuffix("s") in LINKAGE_TITLES: 65 ↛ 66line 65 didn't jump to line 66 because the condition on line 65 was never true

66 process_linkage_template(wxr, word_entry, node) 

67 elif node.template_name == "ejemplo" and len(word_entry.senses) > 0: 

68 process_ejemplo_template(wxr, word_entry.senses[-1], node) 

69 elif node.template_name == "uso" and len(word_entry.senses) > 0: 

70 process_uso_template(wxr, word_entry.senses[-1], node) 

71 elif node.template_name == "ámbito" and len(word_entry.senses) > 0: 71 ↛ 38line 71 didn't jump to line 38 because the condition on line 71 was always true

72 process_ambito_template(wxr, word_entry.senses[-1], node) 

73 

74 if not has_list: 

75 sense = Sense() 

76 gloss = clean_node( 

77 wxr, sense, list(level_node.invert_find_child(LEVEL_KIND_FLAGS)) 

78 ) 

79 if gloss != "": 

80 sense.glosses.append(gloss) 

81 word_entry.senses.append(sense) 

82 

83 

84def extract_gloss_list_item( 

85 wxr: WiktextractContext, 

86 word_entry: WordEntry, 

87 list_item: WikiNode, 

88 sense: Sense, 

89) -> None: 

90 if list_item.sarg.endswith(";"): 

91 raw_tag_text = clean_node(wxr, sense, list_item.children) 

92 for index, node in enumerate(list_item.children): 92 ↛ 101line 92 didn't jump to line 101 because the loop on line 92 didn't complete

93 if isinstance(node, str) and sense.sense_index == "": 93 ↛ 92line 93 didn't jump to line 92 because the condition on line 93 was always true

94 m = re.search(r"[\d.a-z]+", node) 

95 if m is not None: 95 ↛ 92line 95 didn't jump to line 92 because the condition on line 95 was always true

96 sense.sense_index = m.group(0) 

97 raw_tag_text = clean_node( 

98 wxr, sense, list_item.children[index + 1 :] 

99 ) 

100 break 

101 for raw_tag in raw_tag_text.split(","): 

102 raw_tag = raw_tag.strip() 

103 if raw_tag != "": 

104 sense.raw_tags.append(raw_tag) 

105 

106 gloss_nodes = [] 

107 for node in ( 

108 list_item.definition 

109 if list_item.definition is not None 

110 else list_item.children 

111 ): 

112 if isinstance(node, TemplateNode) and node.template_name.startswith( 

113 ("f.", "forma ", "plural") 

114 ): 

115 process_forma_template(wxr, sense, node) 

116 gloss_nodes.append(node) 

117 elif not (isinstance(node, WikiNode) and node.kind == NodeKind.LIST): 

118 gloss_nodes.append(node) 

119 

120 gloss_text = clean_node(wxr, sense, gloss_nodes) 

121 if gloss_text != "": 121 ↛ 127line 121 didn't jump to line 127 because the condition on line 121 was always true

122 sense.glosses.append(gloss_text) 

123 translate_raw_tags(sense) 

124 if list_item.sarg.endswith(";"): 

125 word_entry.senses.append(sense) 

126 

127 for node in ( 

128 list_item.definition 

129 if list_item.definition is not None 

130 else list_item.children 

131 ): 

132 if isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 

133 for child_list_item in node.find_child(NodeKind.LIST_ITEM): 

134 child_sense = sense.model_copy(deep=True) 

135 child_sense.sense_index = "" 

136 extract_gloss_list_item( 

137 wxr, word_entry, child_list_item, child_sense 

138 ) 

139 

140 

141def process_forma_template( 

142 wxr: WiktextractContext, sense: Sense, template: TemplateNode 

143) -> None: 

144 # https://es.wiktionary.org/wiki/Plantilla:forma_verbo 

145 form_of = clean_node(wxr, None, template.template_parameters.get(1, "")) 

146 if form_of != "": 146 ↛ exitline 146 didn't return from function 'process_forma_template' because the condition on line 146 was always true

147 sense.form_of.append(AltForm(word=form_of)) 

148 if ( 

149 "pronominal" in template.template_parameters 

150 or "pronom" in template.template_parameters 

151 ): 

152 sense.form_of.append(AltForm(word=form_of + "se")) 

153 if "form-of" not in sense.tags: 153 ↛ exitline 153 didn't return from function 'process_forma_template' because the condition on line 153 was always true

154 sense.tags.append("form-of") 

155 

156 

157def process_uso_template( 

158 wxr: WiktextractContext, sense: Sense, t_node: TemplateNode 

159) -> None: 

160 # https://es.wiktionary.org/wiki/Plantilla:uso 

161 from .tags import USO_TAGS 

162 

163 for arg_name, arg_value in t_node.template_parameters.items(): 

164 if isinstance(arg_name, int): 

165 arg_value = clean_node(wxr, None, arg_value) 

166 if arg_value in USO_TAGS: 166 ↛ 173line 166 didn't jump to line 173 because the condition on line 166 was always true

167 tr_tags = USO_TAGS[arg_value] 

168 if isinstance(tr_tags, str): 168 ↛ 170line 168 didn't jump to line 170 because the condition on line 168 was always true

169 sense.tags.append(USO_TAGS[arg_value]) 

170 elif isinstance(tr_tags, list): 

171 sense.tags.extend(USO_TAGS[arg_value]) 

172 else: 

173 sense.raw_tags.append(arg_value) 

174 

175 clean_node(wxr, sense, t_node) # save category links 

176 

177 

178def process_ambito_template( 

179 wxr: WiktextractContext, sense: Sense, t_node: TemplateNode 

180) -> None: 

181 # https://es.wiktionary.org/wiki/Plantilla:ámbito 

182 # location data 

183 from .tags import AMBITO_TAGS 

184 

185 for arg_name, arg_value in t_node.template_parameters.items(): 

186 if isinstance(arg_name, int): 186 ↛ 185line 186 didn't jump to line 185 because the condition on line 186 was always true

187 arg_value = clean_node(wxr, None, arg_value) 

188 if arg_value in AMBITO_TAGS: 188 ↛ 185line 188 didn't jump to line 185 because the condition on line 188 was always true

189 tr_tags = AMBITO_TAGS[arg_value] 

190 if isinstance(tr_tags, str): 190 ↛ 192line 190 didn't jump to line 192 because the condition on line 190 was always true

191 sense.tags.append(AMBITO_TAGS[arg_value]) 

192 elif isinstance(tr_tags, list): 

193 sense.tags.extend(tr_tags) 

194 

195 clean_node(wxr, sense, t_node) # save category links 

196 

197 

198def extract_pos_header_template( 

199 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode 

200) -> None: 

201 # https://es.wiktionary.org/wiki/Plantilla:es.sust 

202 expanded_node = wxr.wtp.parse( 

203 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

204 ) 

205 raw_tag = "" 

206 for node in expanded_node.children: 

207 if isinstance(node, str) and node.strip().endswith(":"): 

208 raw_tag = clean_node(wxr, None, node).strip(": ¦()") 

209 elif isinstance(node, WikiNode) and node.kind == NodeKind.LINK: 

210 form = Form(form=clean_node(wxr, None, node)) 

211 if form.form == "": 211 ↛ 212line 211 didn't jump to line 212 because the condition on line 211 was never true

212 continue 

213 if raw_tag != "": 213 ↛ 217line 213 didn't jump to line 217 because the condition on line 213 was always true

214 for r_tag in raw_tag.split(): 

215 form.raw_tags.append(r_tag) 

216 translate_raw_tags(form) 

217 word_entry.forms.append(form) 

218 elif isinstance(node, WikiNode) and node.kind == NodeKind.ITALIC: 

219 r_tag = clean_node(wxr, None, node) 

220 if r_tag != "": 220 ↛ 206line 220 didn't jump to line 206 because the condition on line 220 was always true

221 word_entry.raw_tags.append(r_tag)