Coverage for src/wiktextract/extractor/nl/etymology.py: 97%

1import re

3from wikitextprocessor import LevelNode, NodeKind

5from ...page import clean_node

6from ...wxr_context import WiktextractContext

7from .models import Etymology

10def extract_etymology_section(

11 wxr: WiktextractContext, level_node: LevelNode

12) -> list[Etymology]:

13 etymology_list = []

14 for list_node in level_node.find_child(NodeKind.LIST):

15 for list_item in list_node.find_child(NodeKind.LIST_ITEM):

16 for t_node in list_item.find_child(NodeKind.TEMPLATE):

17 if t_node.template_name == "((":

18 return etymology_list

19 e_data = Etymology()

20 cats = {}

21 e_text = clean_node(wxr, cats, list_item.children)

22 m = re.match(r"\[([A-Z])\]", e_text)

23 if m is not None:

24 e_data.index = m.group(1)

25 e_text = e_text[m.end() :].strip()

26 e_data.text = e_text

27 e_data.categories = cats.get("categories", [])

28 if len(e_data.text) > 0: 28 ↛ 15line 28 didn't jump to line 15 because the condition on line 28 was always true

29 etymology_list.append(e_data)

30 return etymology_list