Coverage for src/wiktextract/extractor/nl/etymology.py: 97%

24 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2024-12-27 08:07 +0000

1import re 

2 

3from wikitextprocessor import LevelNode, NodeKind 

4 

5from ...page import clean_node 

6from ...wxr_context import WiktextractContext 

7from .models import Etymology 

8 

9 

10def extract_etymology_section( 

11 wxr: WiktextractContext, level_node: LevelNode 

12) -> list[Etymology]: 

13 etymology_list = [] 

14 for list_node in level_node.find_child(NodeKind.LIST): 

15 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

16 for t_node in list_item.find_child(NodeKind.TEMPLATE): 

17 if t_node.template_name == "((": 

18 return etymology_list 

19 e_data = Etymology() 

20 cats = {} 

21 e_text = clean_node(wxr, cats, list_item.children) 

22 m = re.match(r"\[([A-Z])\]", e_text) 

23 if m is not None: 

24 e_data.index = m.group(1) 

25 e_text = e_text[m.end() :].strip() 

26 e_data.text = e_text 

27 e_data.categories = cats.get("categories", []) 

28 if len(e_data.text) > 0: 28 ↛ 15line 28 didn't jump to line 15 because the condition on line 28 was always true

29 etymology_list.append(e_data) 

30 return etymology_list