Coverage for src/wiktextract/extractor/nl/descendant.py: 98%
31 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from mediawiki_langcodes import name_to_code
2from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
4from ...page import clean_node
5from ...wxr_context import WiktextractContext
6from .models import Descendant, WordEntry
9def extract_descendant_section(
10 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode
11) -> None:
12 # https://nl.wiktionary.org/wiki/WikiWoordenboek:Overerving_en_ontlening
13 desc_list = []
14 for list_node in level_node.find_child(NodeKind.LIST):
15 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
16 desc_list.extend(extract_descendant_list_item(wxr, list_item))
17 word_entry.descendants.extend(desc_list)
20def extract_descendant_list_item(
21 wxr: WiktextractContext, list_item: WikiNode
22) -> list[Descendant]:
23 desc_list = []
24 before_colon = True
25 lang_code = "unknown"
26 lang_name = "unknown"
27 for index, node in enumerate(list_item.children):
28 if before_colon and isinstance(node, str) and ":" in node:
29 before_colon = False
30 lang_name = clean_node(wxr, None, list_item.children[:index]).strip(
31 "→ "
32 )
33 new_lang_code = name_to_code(lang_name, "nl")
34 if new_lang_code != "": 34 ↛ 27line 34 didn't jump to line 27 because the condition on line 34 was always true
35 lang_code = new_lang_code
36 elif not before_colon and (
37 (isinstance(node, TemplateNode) and node.template_name == "Q")
38 or (isinstance(node, WikiNode) and node.kind == NodeKind.LINK)
39 ):
40 desc_list.append(
41 Descendant(
42 lang=lang_name,
43 lang_code=lang_code,
44 word=clean_node(wxr, None, node),
45 )
46 )
48 for nested_list in list_item.find_child(NodeKind.LIST):
49 for nested_list_item in nested_list.find_child(NodeKind.LIST_ITEM):
50 child_data = extract_descendant_list_item(wxr, nested_list_item)
51 for parent_data in desc_list:
52 parent_data.descendants.extend(child_data)
54 return desc_list