Coverage for src/wiktextract/extractor/pl/etymology.py: 82%

1import re

2from collections import defaultdict

4from wikitextprocessor.parser import NodeKind, WikiNode

6from ...page import clean_node

7from ...wxr_context import WiktextractContext

8from .models import WordEntry

11def extract_etymology_section(

12 wxr: WiktextractContext,

13 page_data: list[WordEntry],

14 base_data: WordEntry,

15 level_node: WikiNode,

17 from .page import match_sense_index

19 etymology_texts = defaultdict(list)

20 has_list = False

21 sense_index = ""

22 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM):

23 e_nodes = []

24 for node in list_item.children:

25 if isinstance(node, str): 25 ↛ 35line 25 didn't jump to line 35 because the condition on line 25 was always true

26 m = re.search(r"\(([\d\s,-.]+)\)", node)

27 if m is not None:

28 sense_index = m.group(1)

29 remain_str = node[m.end() :]

30 if remain_str != "": 30 ↛ 24line 30 didn't jump to line 24 because the condition on line 30 was always true

31 e_nodes.append(remain_str)

32 else:

33 e_nodes.append(node)

34 else:

35 e_nodes.append(node)

36 text = clean_node(wxr, None, e_nodes)

37 if len(text) > 0: 37 ↛ 22line 37 didn't jump to line 22 because the condition on line 37 was always true

38 etymology_texts[sense_index].append(text)

39 has_list = True

40 if not has_list: 40 ↛ 41line 40 didn't jump to line 41 because the condition on line 40 was never true

41 text = clean_node(wxr, None, level_node.children)

42 if len(text) > 0:

43 etymology_texts[sense_index].append(text)

45 for data in page_data:

46 if data.lang_code == base_data.lang_code: 46 ↛ 45line 46 didn't jump to line 45 because the condition on line 46 was always true

47 for sense_index, texts in etymology_texts.items():

48 if sense_index == "" or match_sense_index(sense_index, data):

49 data.etymology_texts = texts

51 base_data.etymology_texts = etymology_texts.get("", [])