Coverage for src/wiktextract/extractor/pl/example.py: 88%

59 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-10-25 10:11 +0000

1import re 

2from collections import defaultdict 

3 

4from wikitextprocessor import HTMLNode, NodeKind, TemplateNode, WikiNode 

5 

6from ...page import clean_node 

7from ...wxr_context import WiktextractContext 

8from .models import Example, Sense, WordEntry 

9 

10 

11def extract_example_section( 

12 wxr: WiktextractContext, 

13 page_data: list[WordEntry], 

14 base_data: WordEntry, 

15 level_node: WikiNode, 

16) -> None: 

17 examples = defaultdict(list) 

18 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM): 

19 process_example_list_item(wxr, list_item, examples) 

20 

21 for data in page_data: 

22 if data.lang_code != base_data.lang_code: 22 ↛ 23line 22 didn't jump to line 23 because the condition on line 22 was never true

23 continue 

24 for sense in data.senses: 

25 if sense.sense_index in examples: 25 ↛ 28line 25 didn't jump to line 28 because the condition on line 25 was always true

26 sense.examples.extend(examples[sense.sense_index]) 

27 del examples[sense.sense_index] 

28 sense.examples.extend(examples[""]) 

29 

30 if "" in examples: 30 ↛ 32line 30 didn't jump to line 32 because the condition on line 30 was always true

31 del examples[""] 

32 if len(page_data) == 0 or page_data[-1].lang_code != base_data.lang_code: 32 ↛ 33line 32 didn't jump to line 33 because the condition on line 32 was never true

33 page_data.append(base_data.model_copy(deep=True)) 

34 for sense_index, example_list in examples.items(): 34 ↛ 35line 34 didn't jump to line 35 because the loop on line 34 never started

35 sense_data = Sense( 

36 tags=["no-gloss"], 

37 examples=example_list, 

38 sense_index=sense_index, 

39 ) 

40 page_data[-1].senses.append(sense_data) 

41 

42 

43def process_example_list_item( 

44 wxr: WiktextractContext, 

45 list_item: WikiNode, 

46 examples: dict[str, list[Example]], 

47) -> None: 

48 sense_index = "" 

49 example_data = Example() 

50 translation_start = 0 

51 example_start = 0 

52 for index, node in enumerate(list_item.children): 52 ↛ 65line 52 didn't jump to line 65 because the loop on line 52 didn't complete

53 if isinstance(node, str): 

54 m = re.search(r"\(\d+\.\d+\)", node) 

55 if m is not None: 

56 sense_index = m.group(0).strip("()") 

57 example_start = index + 1 

58 elif "→" in node: 

59 translation_start = index + 1 

60 break 

61 elif isinstance(node, WikiNode) and node.kind == NodeKind.ITALIC: 

62 example_data.text = clean_node(wxr, None, node) 

63 elif isinstance(node, HTMLNode) and node.tag == "ref": 

64 example_data.ref = clean_node(wxr, None, node.children) 

65 if translation_start != 0: 65 ↛ 83line 65 didn't jump to line 83 because the condition on line 65 was always true

66 lit_start = len(list_item.children) 

67 for t_index, node in enumerate( 

68 list_item.children[translation_start:], translation_start 

69 ): 

70 if isinstance(node, TemplateNode) and node.template_name == "dosł": 

71 example_data.literal_meaning = clean_node( 

72 wxr, None, list_item.children[t_index + 1 :] 

73 ).strip("() ") 

74 lit_start = t_index 

75 break 

76 example_data.translation = clean_node( 

77 wxr, None, list_item.children[translation_start:lit_start] 

78 ).strip("() ") 

79 if len(example_data.text) == 0: 

80 example_data.text = clean_node( 

81 wxr, None, list_item.children[example_start:translation_start] 

82 ).strip("→ ") 

83 if "(" in example_data.text and example_data.text.endswith(")"): 

84 roman_start = example_data.text.rindex("(") 

85 example_data.roman = example_data.text[roman_start:].strip("() ") 

86 example_data.text = example_data.text[:roman_start].strip() 

87 if len(example_data.text) > 0: 87 ↛ exitline 87 didn't return from function 'process_example_list_item' because the condition on line 87 was always true

88 examples[sense_index].append(example_data)