Coverage for src/wiktextract/extractor/es/etymology.py: 97%
24 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from typing import cast
3from wikitextprocessor.parser import (
4 LEVEL_KIND_FLAGS,
5 NodeKind,
6 TemplateNode,
7 WikiNode,
8)
10from ...page import clean_node
11from ...wxr_context import WiktextractContext
12from .models import TemplateData, WordEntry
15def process_etymology_block(
16 wxr: WiktextractContext,
17 entry: WordEntry,
18 level_node: WikiNode,
19) -> None:
20 """
21 https://es.wiktionary.org/wiki/Plantilla:etimología
22 https://es.wiktionary.org/wiki/Plantilla:etimología2
24 When the etymology templates have no arguments that means this word has
25 no etymology info yet.
27 If they only have the "leng" (language) param, that means there's no info
28 and this word is from a language other than Spanish.
30 When there's no info, "etymology_text" should be missing.
31 """
33 has_etymology_info = False
35 for template_node in level_node.find_child_recursively(NodeKind.TEMPLATE):
36 # no-op type-annotation cast; we softly assert template_node is a
37 # TemplateNode, which has .template_name, to quiet the type-checker.
38 template_node = cast(TemplateNode, template_node)
39 if "etim" not in template_node.template_name:
40 # We don't want to keep any other template data other than
41 # the main etymology templates (and maybe Plantilla:etim)
42 continue
44 entry.etymology_templates = entry.etymology_templates or []
46 etymology_template = TemplateData(
47 name=template_node.template_name,
48 expansion=clean_node(wxr, None, template_node),
49 )
51 if etymology_template.expansion in (
52 # "Please fill in this etymology, thank you..."
53 "Si puedes, incorpórala: ver cómo.",
54 "Préstamo no adaptado.",
55 "Este lema en este idioma es ampliable. "
56 "Retira este aviso si la mayor parte de las acepciones ya están incluidas.", # noqa:E501
57 ):
58 continue
60 args = {}
61 for index, param in template_node.template_parameters.items():
62 args[str(index)] = (
63 param
64 if isinstance(param, str)
65 else clean_node(wxr, None, param)
66 )
67 # if any other index other than "leng" is encountered,
68 # has_etymology => True
69 has_etymology_info = has_etymology_info or index != "leng"
70 if args and not (len(args) == 1 and "leng" in args): 70 ↛ 73line 70 didn't jump to line 73 because the condition on line 70 was always true
71 etymology_template.args = args
73 entry.etymology_templates.append(etymology_template)
75 if has_etymology_info:
76 entry.etymology_text = clean_node(
77 wxr, None, list(level_node.invert_find_child(LEVEL_KIND_FLAGS))
78 )