Coverage for src / wiktextract / extractor / pl / etymology.py: 82%
37 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-12 08:09 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-12 08:09 +0000
1import re
2from collections import defaultdict
4from wikitextprocessor.parser import NodeKind, WikiNode
6from ...page import clean_node
7from ...wxr_context import WiktextractContext
8from .models import WordEntry
11def extract_etymology_section(
12 wxr: WiktextractContext,
13 page_data: list[WordEntry],
14 base_data: WordEntry,
15 level_node: WikiNode,
16):
17 from .page import match_sense_index
19 etymology_texts = defaultdict(list)
20 has_list = False
21 sense_index = ""
22 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM):
23 e_nodes = []
24 for node in list_item.children:
25 if isinstance(node, str): 25 ↛ 35line 25 didn't jump to line 35 because the condition on line 25 was always true
26 m = re.search(r"\(([\d\s,-.]+)\)", node)
27 if m is not None:
28 sense_index = m.group(1)
29 remain_str = node[m.end() :]
30 if remain_str != "": 30 ↛ 24line 30 didn't jump to line 24 because the condition on line 30 was always true
31 e_nodes.append(remain_str)
32 else:
33 e_nodes.append(node)
34 else:
35 e_nodes.append(node)
36 text = clean_node(wxr, None, e_nodes)
37 if len(text) > 0: 37 ↛ 22line 37 didn't jump to line 22 because the condition on line 37 was always true
38 etymology_texts[sense_index].append(text)
39 has_list = True
40 if not has_list: 40 ↛ 41line 40 didn't jump to line 41 because the condition on line 40 was never true
41 text = clean_node(wxr, None, level_node.children)
42 if len(text) > 0:
43 etymology_texts[sense_index].append(text)
45 for data in page_data:
46 if data.lang_code == base_data.lang_code: 46 ↛ 45line 46 didn't jump to line 45 because the condition on line 46 was always true
47 for sense_index, texts in etymology_texts.items():
48 if sense_index == "" or match_sense_index(sense_index, data):
49 data.etymology_texts = texts
51 base_data.etymology_texts = etymology_texts.get("", [])