Coverage for src/wiktextract/extractor/en/info_templates.py: 84%
64 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1"""Handle specific templates, starting with `Template:+obj`.
2Each template should have a properly annotated function
3associated with its name in info_templates, and it should
4handle the given node based on where it is called from
5(`location`), like head or sense.
6"""
8import re
9from typing import Callable, Optional, Union
11from wikitextprocessor import TemplateNode, WikiNode
12from wikitextprocessor.core import TemplateArgs
14from ...clean import clean_template_args, clean_value
15from ...wxr_context import WiktextractContext
16from .form_descriptions import decode_tags
17from .type_utils import PlusObjTemplateData, TemplateData
19InfoNode = Union[str, WikiNode]
21InfoReturnTuple = tuple[
22 Optional[TemplateData], # template data field contents or None
23 Optional[Union[str, WikiNode]], # template output or None if it should
24 # not be expressed (like `+obj` in heads). Return the original
25 # WikiNode if nothing special needs to happen.
26]
29InfoTemplateFunc = Callable[
30 [
31 WiktextractContext,
32 InfoNode, # the node being checked
33 str, # location from where this is called
34 ],
35 InfoReturnTuple,
36]
38PLUSOBJ_RE = re.compile(r"\[with ([^][=]+)( = ([^][]+))?\]")
41def plusobj_func(
42 wxr: WiktextractContext, node: InfoNode, loc: str
43) -> InfoReturnTuple:
44 """Parse the output of Template:+obj,
45 `[with infinitive or ergative = meaning]`"""
47 if not isinstance(node, TemplateNode): 47 ↛ 48line 47 didn't jump to line 48 because the condition on line 47 was never true
48 wxr.wtp.error(
49 "INFO-TEMPLATES: plusobj_func: node is not a TemplateNode",
50 sortid="info_templates/45",
51 )
52 return None, None
54 text = clean_value(wxr, wxr.wtp.expand(wxr.wtp.node_to_wikitext(node)))
55 # print(f"cleaned: {text=}")
56 m = PLUSOBJ_RE.search(text)
57 if not m: 57 ↛ 58line 57 didn't jump to line 58 because the condition on line 57 was never true
58 wxr.wtp.error(
59 f"INFO-TEMPLATES: `Template:+obj` expansion does not "
60 f"match regex: {text}",
61 sortid="info_templates/78",
62 )
63 return None, None
64 taggers = m.group(1)
65 meaning = m.group(3)
67 extra_data: PlusObjTemplateData = {"words": [], "tags": []}
68 if meaning: 68 ↛ 71line 68 didn't jump to line 71 because the condition on line 68 was always true
69 extra_data["meaning"] = meaning
71 for ortags in re.split(r",| or ", taggers):
72 tagsets, _ = decode_tags(ortags)
73 for tagset in tagsets:
74 if "error-unknown-tag" in tagset:
75 extra_data["words"].extend(ortags.split())
76 else:
77 extra_data["tags"].extend(tagset)
78 if not extra_data["words"]: 78 ↛ 79line 78 didn't jump to line 79 because the condition on line 78 was never true
79 del extra_data["words"]
80 if not extra_data["tags"]: 80 ↛ 81line 80 didn't jump to line 81 because the condition on line 80 was never true
81 del extra_data["tags"]
82 else:
83 extra_data["tags"] = sorted(set(extra_data["tags"]))
85 ret_template_data: TemplateData = {
86 "args": clean_template_args(wxr, node.template_parameters),
87 "name": "+obj",
88 "extra_data": extra_data,
89 "expansion": text,
90 }
92 if loc == "head":
93 return ret_template_data, None
95 # if "sense", keep text in sense
96 return ret_template_data, text
99INFO_TEMPLATE_FUNCS: dict[str, InfoTemplateFunc] = {
100 "+obj": plusobj_func,
101}
104def parse_info_template_node(
105 wxr: WiktextractContext, node: Union[str, WikiNode], loc: str
106) -> InfoReturnTuple:
107 if not isinstance(node, WikiNode):
108 return None, None
109 if (
110 not isinstance(node, TemplateNode)
111 or node.template_name not in INFO_TEMPLATE_FUNCS
112 ):
113 return None, None
115 return INFO_TEMPLATE_FUNCS[node.template_name](wxr, node, loc)
118def parse_info_template_arguments(
119 wxr: WiktextractContext, name: str, args: TemplateArgs, loc: str
120) -> InfoReturnTuple:
121 templ_s = "{{" + name
122 zipped = [(str(k), v) for k, v in args.items()]
123 for k, v in sorted(zipped):
124 if k.isnumeric():
125 templ_s += f"|{v}"
126 else:
127 templ_s += f"|{k}={v}"
128 templ_s += "}}"
129 templ_node = wxr.wtp.parse(templ_s)
130 if len(templ_node.children) > 0: 130 ↛ 136line 130 didn't jump to line 136 because the condition on line 130 was always true
131 tnode = wxr.wtp.parse(templ_s).children[0]
132 if not isinstance(tnode, WikiNode): 132 ↛ 133line 132 didn't jump to line 133 because the condition on line 132 was never true
133 return None, None
134 templ_node = tnode
135 else:
136 return None, None
138 return parse_info_template_node(
139 wxr,
140 templ_node,
141 "sense",
142 )