Coverage for src/wiktextract/extractor/pl/example.py: 88%

63 statements  

« prev     ^ index     » next       coverage.py v7.9.0, created at 2025-06-13 07:43 +0000

1import re 

2from collections import defaultdict 

3 

4from wikitextprocessor import HTMLNode, NodeKind, TemplateNode, WikiNode 

5 

6from ...page import clean_node 

7from ...wxr_context import WiktextractContext 

8from ..share import calculate_bold_offsets 

9from .models import Example, Sense, WordEntry 

10 

11 

12def extract_example_section( 

13 wxr: WiktextractContext, 

14 page_data: list[WordEntry], 

15 base_data: WordEntry, 

16 level_node: WikiNode, 

17) -> None: 

18 examples = defaultdict(list) 

19 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM): 

20 process_example_list_item(wxr, list_item, examples) 

21 

22 for data in page_data: 

23 if data.lang_code != base_data.lang_code: 23 ↛ 24line 23 didn't jump to line 24 because the condition on line 23 was never true

24 continue 

25 for sense in data.senses: 

26 if sense.sense_index in examples: 26 ↛ 29line 26 didn't jump to line 29 because the condition on line 26 was always true

27 sense.examples.extend(examples[sense.sense_index]) 

28 del examples[sense.sense_index] 

29 sense.examples.extend(examples[""]) 

30 

31 if "" in examples: 31 ↛ 33line 31 didn't jump to line 33 because the condition on line 31 was always true

32 del examples[""] 

33 if len(page_data) == 0 or page_data[-1].lang_code != base_data.lang_code: 33 ↛ 34line 33 didn't jump to line 34 because the condition on line 33 was never true

34 page_data.append(base_data.model_copy(deep=True)) 

35 for sense_index, example_list in examples.items(): 35 ↛ 36line 35 didn't jump to line 36 because the loop on line 35 never started

36 sense_data = Sense( 

37 tags=["no-gloss"], 

38 examples=example_list, 

39 sense_index=sense_index, 

40 ) 

41 page_data[-1].senses.append(sense_data) 

42 

43 

44def process_example_list_item( 

45 wxr: WiktextractContext, 

46 list_item: WikiNode, 

47 examples: dict[str, list[Example]], 

48) -> None: 

49 sense_index = "" 

50 example_data = Example() 

51 translation_start = 0 

52 example_start = 0 

53 for index, node in enumerate(list_item.children): 53 ↛ 69line 53 didn't jump to line 69 because the loop on line 53 didn't complete

54 if isinstance(node, str): 

55 m = re.search(r"\(\d+\.\d+\)", node) 

56 if m is not None: 

57 sense_index = m.group(0).strip("()") 

58 example_start = index + 1 

59 elif "→" in node: 

60 translation_start = index + 1 

61 break 

62 elif isinstance(node, WikiNode) and node.kind == NodeKind.ITALIC: 

63 example_data.text = clean_node(wxr, None, node) 

64 calculate_bold_offsets( 

65 wxr, node, example_data.text, example_data, "bold_text_offsets" 

66 ) 

67 elif isinstance(node, HTMLNode) and node.tag == "ref": 

68 example_data.ref = clean_node(wxr, None, node.children) 

69 if translation_start != 0: 69 ↛ 109line 69 didn't jump to line 109 because the condition on line 69 was always true

70 lit_start = len(list_item.children) 

71 for t_index, node in enumerate( 

72 list_item.children[translation_start:], translation_start 

73 ): 

74 if isinstance(node, TemplateNode) and node.template_name == "dosł": 

75 example_data.literal_meaning = clean_node( 

76 wxr, None, list_item.children[t_index + 1 :] 

77 ).strip("() ") 

78 lit_start = t_index 

79 break 

80 example_data.translation = clean_node( 

81 wxr, None, list_item.children[translation_start:lit_start] 

82 ).strip("() ") 

83 calculate_bold_offsets( 

84 wxr, 

85 wxr.wtp.parse( 

86 wxr.wtp.node_to_wikitext( 

87 list_item.children[translation_start:lit_start] 

88 ) 

89 ), 

90 example_data.translation, 

91 example_data, 

92 "bold_translation_offsets", 

93 ) 

94 if len(example_data.text) == 0: 

95 example_data.text = clean_node( 

96 wxr, None, list_item.children[example_start:translation_start] 

97 ).strip("→ ") 

98 calculate_bold_offsets( 

99 wxr, 

100 wxr.wtp.parse( 

101 wxr.wtp.node_to_wikitext( 

102 list_item.children[example_start:translation_start] 

103 ) 

104 ), 

105 example_data.text, 

106 example_data, 

107 "bold_text_offsets", 

108 ) 

109 if "(" in example_data.text and example_data.text.endswith(")"): 

110 roman_start = example_data.text.rindex("(") 

111 example_data.roman = example_data.text[roman_start:].strip("() ") 

112 example_data.text = example_data.text[:roman_start].strip() 

113 if len(example_data.text) > 0: 113 ↛ exitline 113 didn't return from function 'process_example_list_item' because the condition on line 113 was always true

114 examples[sense_index].append(example_data)