Coverage for src/wiktextract/extractor/pt/example.py: 90%

62 statements  

« prev     ^ index     » next       coverage.py v7.9.0, created at 2025-06-13 07:43 +0000

1import re 

2 

3from wikitextprocessor import ( 

4 HTMLNode, 

5 NodeKind, 

6 TemplateNode, 

7 WikiNode, 

8) 

9 

10from ...page import clean_node 

11from ...wxr_context import WiktextractContext 

12from ..share import calculate_bold_offsets 

13from .models import Example, Sense 

14 

15 

16def extract_example_list_item( 

17 wxr: WiktextractContext, 

18 sense: Sense, 

19 list_item: WikiNode, 

20) -> None: 

21 example = Example() 

22 ref_nodes = [] 

23 

24 for index, node in enumerate(list_item.children): 

25 if ( 

26 isinstance(node, WikiNode) 

27 and node.kind == NodeKind.ITALIC 

28 and example.text == "" 

29 ): 

30 example.text = clean_node(wxr, None, node) 

31 calculate_bold_offsets( 

32 wxr, node, example.text, example, "bold_text_offsets" 

33 ) 

34 elif isinstance(node, HTMLNode) and node.tag == "small": 

35 example.translation = clean_node(wxr, None, node) 

36 if example.translation.startswith( 36 ↛ 39line 36 didn't jump to line 39 because the condition on line 36 was never true

37 "(" 

38 ) and example.translation.endswith(")"): 

39 example.translation = example.translation.strip("()") 

40 elif isinstance(node, TemplateNode): 

41 match node.template_name: 

42 case "OESP": 

43 example.ref = clean_node(wxr, sense, node).strip("()") 

44 case "tradex": 44 ↛ 58line 44 didn't jump to line 58 because the pattern on line 44 always matched

45 second_arg = node.template_parameters.get(2, "") 

46 example.text = clean_node(wxr, None, second_arg) 

47 calculate_bold_offsets( 

48 wxr, 

49 wxr.wtp.parse(wxr.wtp.node_to_wikitext(second_arg)), 

50 example.text, 

51 example, 

52 "bold_text_offsets", 

53 ) 

54 example.translation = clean_node( 

55 wxr, None, node.template_parameters.get(3, "") 

56 ) 

57 clean_node(wxr, sense, node) 

58 case "Ex.": 

59 example.text = clean_node( 

60 wxr, sense, node.template_parameters.get(1, "") 

61 ) 

62 elif isinstance(node, WikiNode) and node.kind == NodeKind.BOLD: 

63 bold_str = clean_node(wxr, None, node) 

64 if re.fullmatch(r"\d+", bold_str) is not None: 

65 list_item_str = clean_node( 

66 wxr, None, list(list_item.invert_find_child(NodeKind.LIST)) 

67 ) 

68 if list_item_str.endswith(":"): 68 ↛ 24line 68 didn't jump to line 24 because the condition on line 68 was always true

69 ref_nodes.clear() 

70 example.ref = list_item_str 

71 for child_list in list_item.find_child(NodeKind.LIST): 

72 for child_list_item in child_list.find_child( 

73 NodeKind.LIST_ITEM 

74 ): 

75 example.text = clean_node( 

76 wxr, None, child_list_item.children 

77 ) 

78 calculate_bold_offsets( 

79 wxr, 

80 child_list_item, 

81 example.text, 

82 example, 

83 "bold_text_offsets", 

84 ) 

85 break 

86 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 

87 ref_nodes.clear() 

88 for child_list_item in node.find_child(NodeKind.LIST_ITEM): 

89 ref_nodes.append(child_list_item.children) 

90 else: 

91 ref_nodes.append(node) 

92 

93 if example.text != "": 

94 if example.ref == "": 

95 example.ref = clean_node(wxr, sense, ref_nodes).strip(":() \n") 

96 sense.examples.append(example) 

97 else: 

98 extract_example_text_list(wxr, sense, list_item) 

99 

100 

101def extract_example_text_list( 

102 wxr: WiktextractContext, 

103 sense: Sense, 

104 list_item: WikiNode, 

105) -> None: 

106 e_nodes = list(list_item.invert_find_child(NodeKind.LIST)) 

107 list_item_text = clean_node(wxr, sense, e_nodes) 

108 example = Example(text=list_item_text) 

109 if "-" in example.text: 109 ↛ 113line 109 didn't jump to line 113 because the condition on line 109 was always true

110 tr_start = example.text.index("-") 

111 example.translation = example.text[tr_start + 1 :].strip() 

112 example.text = example.text[:tr_start].strip() 

113 if len(example.text) > 0: 113 ↛ exitline 113 didn't return from function 'extract_example_text_list' because the condition on line 113 was always true

114 calculate_bold_offsets( 

115 wxr, 

116 wxr.wtp.parse(wxr.wtp.node_to_wikitext(e_nodes)), 

117 example.text, 

118 example, 

119 "bold_text_offsets", 

120 ) 

121 sense.examples.append(example)