Coverage for src/wiktextract/extractor/nl/etymology.py: 97%
24 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1import re
3from wikitextprocessor import LevelNode, NodeKind
5from ...page import clean_node
6from ...wxr_context import WiktextractContext
7from .models import Etymology
10def extract_etymology_section(
11 wxr: WiktextractContext, level_node: LevelNode
12) -> list[Etymology]:
13 etymology_list = []
14 for list_node in level_node.find_child(NodeKind.LIST):
15 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
16 for t_node in list_item.find_child(NodeKind.TEMPLATE):
17 if t_node.template_name == "((":
18 return etymology_list
19 e_data = Etymology()
20 cats = {}
21 e_text = clean_node(wxr, cats, list_item.children)
22 m = re.match(r"\[([A-Z])\]", e_text)
23 if m is not None:
24 e_data.index = m.group(1)
25 e_text = e_text[m.end() :].strip()
26 e_data.text = e_text
27 e_data.categories = cats.get("categories", [])
28 if len(e_data.text) > 0: 28 ↛ 15line 28 didn't jump to line 15 because the condition on line 28 was always true
29 etymology_list.append(e_data)
30 return etymology_list