Coverage for src/wiktextract/extractor/nl/linkage.py: 83%
91 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-10-25 10:11 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-10-25 10:11 +0000
1import re
3from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
5from ...page import clean_node
6from ...wxr_context import WiktextractContext
7from .models import Linkage, WordEntry
10def extract_linkage_section(
11 wxr: WiktextractContext,
12 word_entry: WordEntry,
13 level_node: LevelNode,
14 linkage_type: str,
15) -> None:
16 sense_index = 0
17 sense = ""
18 raw_tags = []
19 for node in level_node.children:
20 if isinstance(node, TemplateNode):
21 if node.template_name == "intens":
22 # https://nl.wiktionary.org/wiki/Sjabloon:intens
23 raw_tags = ["intensivering"]
24 s_index_str = node.template_parameters.get(2, "").strip()
25 if re.fullmatch(r"\d+", s_index_str): 25 ↛ 19line 25 didn't jump to line 19 because the condition on line 25 was always true
26 sense_index = int(s_index_str)
27 elif node.template_name == "L-top":
28 second_arg = clean_node(
29 wxr, None, node.template_parameters.get(2, "")
30 )
31 m = re.search(r"\[(\d+)\]", second_arg)
32 if m is not None: 32 ↛ 36line 32 didn't jump to line 36 because the condition on line 32 was always true
33 sense_index = int(m.group(1))
34 sense = second_arg[m.end() :].strip()
35 else:
36 sense = second_arg
37 elif node.template_name == "L-bottom":
38 sense = ""
39 sense_index = 0
40 elif node.template_name.startswith("nld-"):
41 extract_nld_template(wxr, word_entry, node, linkage_type)
42 elif node.template_name in ["expr", "fras"]: 42 ↛ 19line 42 didn't jump to line 19 because the condition on line 42 was always true
43 extract_expr_template(wxr, word_entry, node, linkage_type)
44 elif isinstance(node, WikiNode):
45 if node.kind == NodeKind.LINK:
46 word = clean_node(wxr, None, node)
47 if word != "": 47 ↛ 19line 47 didn't jump to line 19 because the condition on line 47 was always true
48 getattr(word_entry, linkage_type).append(
49 Linkage(
50 word=word,
51 sense=sense,
52 sense_index=sense_index,
53 raw_tags=raw_tags,
54 )
55 )
56 elif node.kind == NodeKind.LIST: 56 ↛ 19line 56 didn't jump to line 19 because the condition on line 56 was always true
57 for list_item in node.find_child(NodeKind.LIST_ITEM):
58 extract_linkage_list_item(
59 wxr,
60 word_entry,
61 list_item,
62 linkage_type,
63 sense,
64 sense_index,
65 )
68def extract_linkage_list_item(
69 wxr: WiktextractContext,
70 word_entry: WordEntry,
71 list_item: WordEntry,
72 linkage_type: str,
73 sense: str,
74 sense_index: str,
75) -> None:
76 for node in list_item.children:
77 if isinstance(node, str):
78 m = re.search(r"\[(\d+)\]", node)
79 if m is not None:
80 sense_index = int(m.group(1))
81 elif node.strip().startswith("="):
82 sense = node.strip().removeprefix("=").strip()
83 linkage_list = getattr(word_entry, linkage_type)
84 if len(linkage_list) > 0: 84 ↛ 76line 84 didn't jump to line 76 because the condition on line 84 was always true
85 linkage_list[-1].sense = sense
86 elif isinstance(node, WikiNode) and node.kind == NodeKind.LINK: 86 ↛ 92line 86 didn't jump to line 92 because the condition on line 86 was always true
87 word = clean_node(wxr, None, node)
88 if word != "": 88 ↛ 76line 88 didn't jump to line 76 because the condition on line 88 was always true
89 getattr(word_entry, linkage_type).append(
90 Linkage(word=word, sense=sense, sense_index=sense_index)
91 )
92 elif isinstance(node, TemplateNode) and node.template_name == "expr":
93 extract_expr_template(wxr, word_entry, node, linkage_type)
96def extract_nld_template(
97 wxr: WiktextractContext,
98 word_entry: WordEntry,
99 t_node: TemplateNode,
100 linkage_type: str,
101) -> None:
102 # https://nl.wiktionary.org/wiki/Sjabloon:nld-rashonden
103 expanded_node = wxr.wtp.parse(
104 wxr.wtp.node_to_wikitext(t_node), expand_all=True
105 )
106 sense_index_str = clean_node(
107 wxr, None, t_node.template_parameters.get(1, "")
108 )
109 sense_index = 0
110 if re.fullmatch(r"\d+", sense_index_str): 110 ↛ 112line 110 didn't jump to line 112 because the condition on line 110 was always true
111 sense_index = int(sense_index_str)
112 sense = ""
113 for italic_node in expanded_node.find_child_recursively(NodeKind.ITALIC): 113 ↛ 118line 113 didn't jump to line 118 because the loop on line 113 didn't complete
114 for link_node in italic_node.find_child(NodeKind.LINK):
115 sense = clean_node(wxr, None, link_node)
116 break
118 for list_item in expanded_node.find_child_recursively(NodeKind.LIST_ITEM):
119 for link_node in list_item.find_child(NodeKind.LINK):
120 word = clean_node(wxr, None, link_node)
121 if word != "": 121 ↛ 119line 121 didn't jump to line 119 because the condition on line 121 was always true
122 getattr(word_entry, linkage_type).append(
123 Linkage(word=word, sense_index=sense_index, sense=sense)
124 )
127def extract_expr_template(
128 wxr: WiktextractContext,
129 word_entry: WordEntry,
130 t_node: TemplateNode,
131 linkage_type: str,
132) -> None:
133 # https://nl.wiktionary.org/wiki/Sjabloon:expr
134 # https://nl.wiktionary.org/wiki/Sjabloon:fras
135 sense_index_str = clean_node(
136 wxr, None, t_node.template_parameters.get("n", "")
137 )
138 sense_index = 0
139 if re.fullmatch(r"\d+", sense_index_str) is not None: 139 ↛ 140line 139 didn't jump to line 140 because the condition on line 139 was never true
140 sense_index = int(sense_index_str)
141 sense_arg = 2 if t_node.template_name == "expr" else 3
142 word_arg = 1 if t_node.template_name == "expr" else 2
143 sense = clean_node(wxr, None, t_node.template_parameters.get(sense_arg, ""))
144 word = clean_node(wxr, None, t_node.template_parameters.get(word_arg, ""))
145 m = re.match(r"\[?(\d+)\]?", word)
146 if m is not None: # should use "n" arg
147 sense_index = int(m.group(1))
148 word = word[m.end() :].strip()
149 if word != "": 149 ↛ exitline 149 didn't return from function 'extract_expr_template' because the condition on line 149 was always true
150 getattr(word_entry, linkage_type).append(
151 Linkage(word=word, sense=sense, sense_index=sense_index)
152 )
155def extract_fixed_preposition_section(
156 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode
157) -> None:
158 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM):
159 word = clean_node(wxr, None, list_item.children)
160 if len(word) > 0:
161 word_entry.derived.append(
162 Linkage(word=word, tags=["prepositional"])
163 )