Coverage for src/wiktextract/extractor/nl/descendant.py: 98%

31 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2024-12-27 08:07 +0000

1from mediawiki_langcodes import name_to_code 

2from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

3 

4from ...page import clean_node 

5from ...wxr_context import WiktextractContext 

6from .models import Descendant, WordEntry 

7 

8 

9def extract_descendant_section( 

10 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode 

11) -> None: 

12 # https://nl.wiktionary.org/wiki/WikiWoordenboek:Overerving_en_ontlening 

13 desc_list = [] 

14 for list_node in level_node.find_child(NodeKind.LIST): 

15 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

16 desc_list.extend(extract_descendant_list_item(wxr, list_item)) 

17 word_entry.descendants.extend(desc_list) 

18 

19 

20def extract_descendant_list_item( 

21 wxr: WiktextractContext, list_item: WikiNode 

22) -> list[Descendant]: 

23 desc_list = [] 

24 before_colon = True 

25 lang_code = "unknown" 

26 lang_name = "unknown" 

27 for index, node in enumerate(list_item.children): 

28 if before_colon and isinstance(node, str) and ":" in node: 

29 before_colon = False 

30 lang_name = clean_node(wxr, None, list_item.children[:index]).strip( 

31 "→ " 

32 ) 

33 new_lang_code = name_to_code(lang_name, "nl") 

34 if new_lang_code != "": 34 ↛ 27line 34 didn't jump to line 27 because the condition on line 34 was always true

35 lang_code = new_lang_code 

36 elif not before_colon and ( 

37 (isinstance(node, TemplateNode) and node.template_name == "Q") 

38 or (isinstance(node, WikiNode) and node.kind == NodeKind.LINK) 

39 ): 

40 desc_list.append( 

41 Descendant( 

42 lang=lang_name, 

43 lang_code=lang_code, 

44 word=clean_node(wxr, None, node), 

45 ) 

46 ) 

47 

48 for nested_list in list_item.find_child(NodeKind.LIST): 

49 for nested_list_item in nested_list.find_child(NodeKind.LIST_ITEM): 

50 child_data = extract_descendant_list_item(wxr, nested_list_item) 

51 for parent_data in desc_list: 

52 parent_data.descendants.extend(child_data) 

53 

54 return desc_list