Coverage for src/wiktextract/extractor/es/pos.py: 86%

145 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-13 10:14 +0000

1import re 

2 

3from wikitextprocessor.parser import ( 

4 LEVEL_KIND_FLAGS, 

5 LevelNode, 

6 NodeKind, 

7 TemplateNode, 

8 WikiNode, 

9) 

10 

11from ...page import clean_node 

12from ...wxr_context import WiktextractContext 

13from .example import process_ejemplo_template 

14from .inflection import process_inflect_template 

15from .linkage import process_linkage_template 

16from .models import AltForm, Form, Sense, WordEntry 

17from .section_titles import LINKAGE_TITLES 

18from .tags import ALL_TAGS, translate_raw_tags 

19 

20 

21def extract_pos_section( 

22 wxr: WiktextractContext, 

23 word_entry: WordEntry, 

24 level_node: LevelNode, 

25 section_title: str, 

26) -> None: 

27 for raw_tag in section_title.split(): 

28 if raw_tag in ALL_TAGS: 

29 tr_tag = ALL_TAGS[raw_tag] 

30 if isinstance(tr_tag, str) and tr_tag not in word_entry.tags: 

31 word_entry.tags.append(tr_tag) 

32 elif isinstance(tr_tag, list): 32 ↛ 33line 32 didn't jump to line 33 because the condition on line 32 was never true

33 for tag in tr_tag: 

34 if tag not in word_entry.tags: 

35 word_entry.tags.append(tag) 

36 

37 has_list = False 

38 for node in level_node.children: 

39 if isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 

40 has_list = True 

41 if node.sarg == ";": 

42 for list_item in node.find_child(NodeKind.LIST_ITEM): 

43 extract_gloss_list_item(wxr, word_entry, list_item, Sense()) 

44 elif re.fullmatch(r":+;", node.sarg) is not None: # nested gloss 

45 parent_sense = Sense() 

46 parent_gloss_num = len(node.sarg) - 1 

47 for sense in word_entry.senses[::-1]: 47 ↛ 51line 47 didn't jump to line 51 because the loop on line 47 didn't complete

48 if len(sense.glosses) == parent_gloss_num: 

49 parent_sense = sense 

50 break 

51 for list_item in node.find_child(NodeKind.LIST_ITEM): 

52 sense = parent_sense.model_copy(deep=True) 

53 sense.sense_index = "" 

54 extract_gloss_list_item(wxr, word_entry, list_item, sense) 

55 elif node.sarg == ":" and len(word_entry.senses) > 0: 55 ↛ 38line 55 didn't jump to line 38 because the condition on line 55 was always true

56 for list_item in node.find_child(NodeKind.LIST_ITEM): 

57 extract_gloss_list_item( 

58 wxr, word_entry, list_item, word_entry.senses[-1] 

59 ) 

60 elif isinstance(node, TemplateNode): 

61 if node.template_name.startswith("inflect."): 

62 process_inflect_template(wxr, word_entry, node) 

63 elif node.template_name in ["es.sust", "es.adj", "es.v"]: 

64 extract_pos_header_template(wxr, word_entry, node) 

65 elif node.template_name.removesuffix("s") in LINKAGE_TITLES: 65 ↛ 66line 65 didn't jump to line 66 because the condition on line 65 was never true

66 process_linkage_template(wxr, word_entry, node) 

67 elif node.template_name == "ejemplo" and len(word_entry.senses) > 0: 

68 process_ejemplo_template(wxr, word_entry.senses[-1], node) 

69 elif node.template_name == "uso" and len(word_entry.senses) > 0: 

70 process_uso_template(wxr, word_entry.senses[-1], node) 

71 elif node.template_name == "ámbito" and len(word_entry.senses) > 0: 71 ↛ 38line 71 didn't jump to line 38 because the condition on line 71 was always true

72 process_ambito_template(wxr, word_entry.senses[-1], node) 

73 

74 if not has_list: 

75 sense = Sense() 

76 gloss = clean_node( 

77 wxr, 

78 sense, 

79 list( 

80 level_node.invert_find_child( 

81 LEVEL_KIND_FLAGS, include_empty_str=True 

82 ) 

83 ), 

84 ) 

85 if gloss != "": 

86 sense.glosses.append(gloss) 

87 word_entry.senses.append(sense) 

88 

89 

90def extract_gloss_list_item( 

91 wxr: WiktextractContext, 

92 word_entry: WordEntry, 

93 list_item: WikiNode, 

94 sense: Sense, 

95) -> None: 

96 if list_item.sarg.endswith(";"): 

97 raw_tag_text = clean_node(wxr, sense, list_item.children) 

98 for index, node in enumerate(list_item.children): 98 ↛ 107line 98 didn't jump to line 107 because the loop on line 98 didn't complete

99 if isinstance(node, str) and sense.sense_index == "": 99 ↛ 98line 99 didn't jump to line 98 because the condition on line 99 was always true

100 m = re.search(r"[\d.a-z]+", node) 

101 if m is not None: 101 ↛ 98line 101 didn't jump to line 98 because the condition on line 101 was always true

102 sense.sense_index = m.group(0) 

103 raw_tag_text = clean_node( 

104 wxr, sense, list_item.children[index + 1 :] 

105 ) 

106 break 

107 for raw_tag in raw_tag_text.split(","): 

108 raw_tag = raw_tag.strip() 

109 if raw_tag != "": 

110 sense.raw_tags.append(raw_tag) 

111 

112 gloss_nodes = [] 

113 for node in ( 

114 list_item.definition 

115 if list_item.definition is not None 

116 else list_item.children 

117 ): 

118 if isinstance(node, TemplateNode) and node.template_name.startswith( 

119 ("f.", "forma ", "plural") 

120 ): 

121 process_forma_template(wxr, sense, node) 

122 gloss_nodes.append(node) 

123 elif not (isinstance(node, WikiNode) and node.kind == NodeKind.LIST): 

124 gloss_nodes.append(node) 

125 

126 gloss_text = clean_node(wxr, sense, gloss_nodes) 

127 if gloss_text != "": 127 ↛ 133line 127 didn't jump to line 133 because the condition on line 127 was always true

128 sense.glosses.append(gloss_text) 

129 translate_raw_tags(sense) 

130 if list_item.sarg.endswith(";"): 

131 word_entry.senses.append(sense) 

132 

133 for node in ( 

134 list_item.definition 

135 if list_item.definition is not None 

136 else list_item.children 

137 ): 

138 if isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 

139 for child_list_item in node.find_child(NodeKind.LIST_ITEM): 

140 child_sense = sense.model_copy(deep=True) 

141 child_sense.sense_index = "" 

142 extract_gloss_list_item( 

143 wxr, word_entry, child_list_item, child_sense 

144 ) 

145 

146 

147def process_forma_template( 

148 wxr: WiktextractContext, sense: Sense, template: TemplateNode 

149) -> None: 

150 # https://es.wiktionary.org/wiki/Plantilla:forma_verbo 

151 form_of = clean_node(wxr, None, template.template_parameters.get(1, "")) 

152 if form_of != "": 152 ↛ exitline 152 didn't return from function 'process_forma_template' because the condition on line 152 was always true

153 sense.form_of.append(AltForm(word=form_of)) 

154 if ( 

155 "pronominal" in template.template_parameters 

156 or "pronom" in template.template_parameters 

157 ): 

158 sense.form_of.append(AltForm(word=form_of + "se")) 

159 if "form-of" not in sense.tags: 159 ↛ exitline 159 didn't return from function 'process_forma_template' because the condition on line 159 was always true

160 sense.tags.append("form-of") 

161 

162 

163def process_uso_template( 

164 wxr: WiktextractContext, sense: Sense, t_node: TemplateNode 

165) -> None: 

166 # https://es.wiktionary.org/wiki/Plantilla:uso 

167 from .tags import USO_TAGS 

168 

169 for arg_name, arg_value in t_node.template_parameters.items(): 

170 if isinstance(arg_name, int): 

171 arg_value = clean_node(wxr, None, arg_value) 

172 if arg_value in USO_TAGS: 172 ↛ 179line 172 didn't jump to line 179 because the condition on line 172 was always true

173 tr_tags = USO_TAGS[arg_value] 

174 if isinstance(tr_tags, str): 174 ↛ 176line 174 didn't jump to line 176 because the condition on line 174 was always true

175 sense.tags.append(USO_TAGS[arg_value]) 

176 elif isinstance(tr_tags, list): 

177 sense.tags.extend(USO_TAGS[arg_value]) 

178 else: 

179 sense.raw_tags.append(arg_value) 

180 

181 clean_node(wxr, sense, t_node) # save category links 

182 

183 

184def process_ambito_template( 

185 wxr: WiktextractContext, sense: Sense, t_node: TemplateNode 

186) -> None: 

187 # https://es.wiktionary.org/wiki/Plantilla:ámbito 

188 # location data 

189 from .tags import AMBITO_TAGS 

190 

191 for arg_name, arg_value in t_node.template_parameters.items(): 

192 if isinstance(arg_name, int): 192 ↛ 191line 192 didn't jump to line 191 because the condition on line 192 was always true

193 arg_value = clean_node(wxr, None, arg_value) 

194 if arg_value in AMBITO_TAGS: 194 ↛ 191line 194 didn't jump to line 191 because the condition on line 194 was always true

195 tr_tags = AMBITO_TAGS[arg_value] 

196 if isinstance(tr_tags, str): 196 ↛ 198line 196 didn't jump to line 198 because the condition on line 196 was always true

197 sense.tags.append(AMBITO_TAGS[arg_value]) 

198 elif isinstance(tr_tags, list): 

199 sense.tags.extend(tr_tags) 

200 

201 clean_node(wxr, sense, t_node) # save category links 

202 

203 

204def extract_pos_header_template( 

205 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode 

206) -> None: 

207 # https://es.wiktionary.org/wiki/Plantilla:es.sust 

208 expanded_node = wxr.wtp.parse( 

209 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

210 ) 

211 raw_tag = "" 

212 for node in expanded_node.children: 

213 if isinstance(node, str) and node.strip().endswith(":"): 

214 raw_tag = clean_node(wxr, None, node).strip(": ¦()") 

215 elif isinstance(node, WikiNode) and node.kind == NodeKind.LINK: 

216 form = Form(form=clean_node(wxr, None, node)) 

217 if form.form == "": 217 ↛ 218line 217 didn't jump to line 218 because the condition on line 217 was never true

218 continue 

219 if raw_tag != "": 219 ↛ 223line 219 didn't jump to line 223 because the condition on line 219 was always true

220 for r_tag in raw_tag.split(): 

221 form.raw_tags.append(r_tag) 

222 translate_raw_tags(form) 

223 word_entry.forms.append(form) 

224 elif isinstance(node, WikiNode) and node.kind == NodeKind.ITALIC: 

225 r_tag = clean_node(wxr, None, node) 

226 if r_tag != "": 226 ↛ 212line 226 didn't jump to line 212 because the condition on line 226 was always true

227 word_entry.raw_tags.append(r_tag)