Coverage for src/wiktextract/extractor/es/gloss.py: 73%
72 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1import re
3from wikitextprocessor.parser import (
4 NodeKind,
5 TemplateNode,
6 WikiNode,
7 WikiNodeChildrenList,
8)
10from ...page import clean_node
11from ...wxr_context import WiktextractContext
12from .models import AltForm, Sense, WordEntry
13from .sense_data import process_sense_data_list
14from .tags import translate_raw_tags
17def extract_gloss(
18 wxr: WiktextractContext,
19 page_data: list[WordEntry],
20 list_node: WikiNode,
21) -> None:
22 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
23 gloss_data = Sense()
25 definition: WikiNodeChildrenList = []
26 other: WikiNodeChildrenList = []
28 if not list_item.definition: 28 ↛ 29line 28 didn't jump to line 29 because the condition on line 28 was never true
29 continue
31 for node in list_item.definition:
32 if isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 32 ↛ 33line 32 didn't jump to line 33 because the condition on line 32 was never true
33 other.append(node)
34 else:
35 definition.append(node)
36 if isinstance(node, TemplateNode) and node.template_name.startswith(
37 ("f.", "forma ")
38 ):
39 process_forma_template(wxr, gloss_data, node)
41 gloss = clean_node(wxr, gloss_data, definition)
42 if len(gloss) > 0: 42 ↛ 45line 42 didn't jump to line 45 because the condition on line 42 was always true
43 gloss_data.glosses.append(gloss)
45 gloss_note = clean_node(wxr, gloss_data, list_item.children)
46 match = re.match(r"\d+", gloss_note)
47 if match is not None: 47 ↛ 51line 47 didn't jump to line 51 because the condition on line 47 was always true
48 gloss_data.sense_index = match.group(0)
49 tag_string = gloss_note[match.end() :].strip()
50 else:
51 tag_string = gloss_note.strip()
53 # split tags by comma or "y"
54 tags = re.split(r",|y", tag_string)
55 for tag in tags:
56 tag = (
57 tag.strip()
58 .removesuffix(".")
59 .removesuffix("Main")
60 .removeprefix("Main")
61 )
62 if tag:
63 gloss_data.raw_tags.append(tag)
65 translate_raw_tags(gloss_data)
66 page_data[-1].senses.append(gloss_data)
67 if len(other) > 0: 67 ↛ 68line 67 didn't jump to line 68 because the condition on line 67 was never true
68 for node in other:
69 if isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
70 process_sense_data_list(wxr, page_data[-1], node)
71 else:
72 wxr.wtp.debug(
73 f"Found nodes that are not part of definition: {node}",
74 sortid="extractor/es/gloss/extract_gloss/46",
75 )
78def process_uso_template(
79 wxr: WiktextractContext, sense: Sense, template: TemplateNode
80) -> None:
81 # https://es.wiktionary.org/wiki/Plantilla:uso
82 from .tags import USO_TAGS
84 for arg_name, arg_value in template.template_parameters.items():
85 if isinstance(arg_name, int): 85 ↛ 84line 85 didn't jump to line 84 because the condition on line 85 was always true
86 arg_value = clean_node(wxr, None, arg_value)
87 if arg_value in USO_TAGS: 87 ↛ 94line 87 didn't jump to line 94 because the condition on line 87 was always true
88 tr_tags = USO_TAGS[arg_value]
89 if isinstance(tr_tags, str): 89 ↛ 91line 89 didn't jump to line 91 because the condition on line 89 was always true
90 sense.tags.append(USO_TAGS[arg_value])
91 elif isinstance(tr_tags, list):
92 sense.tags.extend(USO_TAGS[arg_value])
93 else:
94 sense.raw_tags.append(arg_value)
96 clean_node(wxr, sense, template) # save category links
99def process_ambito_template(
100 wxr: WiktextractContext, sense: Sense, template: TemplateNode
101) -> None:
102 # https://es.wiktionary.org/wiki/Plantilla:ámbito
103 # location data
104 from .tags import AMBITO_TAGS
106 for arg_name, arg_value in template.template_parameters.items():
107 if isinstance(arg_name, int): 107 ↛ 106line 107 didn't jump to line 106 because the condition on line 107 was always true
108 arg_value = clean_node(wxr, None, arg_value)
109 if arg_value in AMBITO_TAGS: 109 ↛ 106line 109 didn't jump to line 106 because the condition on line 109 was always true
110 tr_tags = AMBITO_TAGS[arg_value]
111 if isinstance(tr_tags, str): 111 ↛ 113line 111 didn't jump to line 113 because the condition on line 111 was always true
112 sense.tags.append(AMBITO_TAGS[arg_value])
113 elif isinstance(tr_tags, list):
114 sense.tags.extend(tr_tags)
116 clean_node(wxr, sense, template) # save category links
119def process_forma_template(
120 wxr: WiktextractContext, sense: Sense, template: TemplateNode
121) -> None:
122 # https://es.wiktionary.org/wiki/Plantilla:forma_verbo
123 form_of = clean_node(wxr, None, template.template_parameters.get(1, ""))
124 if form_of != "": 124 ↛ exitline 124 didn't return from function 'process_forma_template' because the condition on line 124 was always true
125 sense.form_of.append(AltForm(word=form_of))
126 if (
127 "pronominal" in template.template_parameters
128 or "pronom" in template.template_parameters
129 ):
130 sense.form_of.append(AltForm(word=form_of + "se"))