Coverage for src/wiktextract/extractor/pt/example.py: 90%

62 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-13 10:14 +0000

1import re 

2 

3from wikitextprocessor import ( 

4 HTMLNode, 

5 NodeKind, 

6 TemplateNode, 

7 WikiNode, 

8) 

9 

10from ...page import clean_node 

11from ...wxr_context import WiktextractContext 

12from ..share import calculate_bold_offsets 

13from .models import Example, Sense 

14 

15 

16def extract_example_list_item( 

17 wxr: WiktextractContext, 

18 sense: Sense, 

19 list_item: WikiNode, 

20) -> None: 

21 example = Example() 

22 ref_nodes = [] 

23 

24 for index, node in enumerate(list_item.children): 

25 if ( 

26 isinstance(node, WikiNode) 

27 and node.kind == NodeKind.ITALIC 

28 and example.text == "" 

29 ): 

30 example.text = clean_node(wxr, None, node) 

31 calculate_bold_offsets( 

32 wxr, node, example.text, example, "bold_text_offsets" 

33 ) 

34 elif isinstance(node, HTMLNode) and node.tag == "small": 

35 example.translation = clean_node(wxr, None, node) 

36 if example.translation.startswith( 36 ↛ 39line 36 didn't jump to line 39 because the condition on line 36 was never true

37 "(" 

38 ) and example.translation.endswith(")"): 

39 example.translation = example.translation.strip("()") 

40 elif isinstance(node, TemplateNode): 

41 match node.template_name: 

42 case "OESP": 

43 example.ref = clean_node(wxr, sense, node).strip("()") 

44 case "tradex": 44 ↛ 58line 44 didn't jump to line 58 because the pattern on line 44 always matched

45 second_arg = node.template_parameters.get(2, "") 

46 example.text = clean_node(wxr, None, second_arg) 

47 calculate_bold_offsets( 

48 wxr, 

49 wxr.wtp.parse(wxr.wtp.node_to_wikitext(second_arg)), 

50 example.text, 

51 example, 

52 "bold_text_offsets", 

53 ) 

54 example.translation = clean_node( 

55 wxr, None, node.template_parameters.get(3, "") 

56 ) 

57 clean_node(wxr, sense, node) 

58 case "Ex.": 

59 example.text = clean_node( 

60 wxr, sense, node.template_parameters.get(1, "") 

61 ) 

62 elif isinstance(node, WikiNode) and node.kind == NodeKind.BOLD: 

63 bold_str = clean_node(wxr, None, node) 

64 if re.fullmatch(r"\d+", bold_str) is not None: 

65 list_item_str = clean_node( 

66 wxr, 

67 None, 

68 list( 

69 list_item.invert_find_child( 

70 NodeKind.LIST, include_empty_str=True 

71 ) 

72 ), 

73 ) 

74 if list_item_str.endswith(":"): 74 ↛ 24line 74 didn't jump to line 24 because the condition on line 74 was always true

75 ref_nodes.clear() 

76 example.ref = list_item_str 

77 for child_list in list_item.find_child(NodeKind.LIST): 

78 for child_list_item in child_list.find_child( 

79 NodeKind.LIST_ITEM 

80 ): 

81 example.text = clean_node( 

82 wxr, None, child_list_item.children 

83 ) 

84 calculate_bold_offsets( 

85 wxr, 

86 child_list_item, 

87 example.text, 

88 example, 

89 "bold_text_offsets", 

90 ) 

91 break 

92 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 

93 ref_nodes.clear() 

94 for child_list_item in node.find_child(NodeKind.LIST_ITEM): 

95 ref_nodes.append(child_list_item.children) 

96 else: 

97 ref_nodes.append(node) 

98 

99 if example.text != "": 

100 if example.ref == "": 

101 example.ref = clean_node(wxr, sense, ref_nodes).strip(":() \n") 

102 sense.examples.append(example) 

103 else: 

104 extract_example_text_list(wxr, sense, list_item) 

105 

106 

107def extract_example_text_list( 

108 wxr: WiktextractContext, 

109 sense: Sense, 

110 list_item: WikiNode, 

111) -> None: 

112 e_nodes = list( 

113 list_item.invert_find_child(NodeKind.LIST, include_empty_str=True) 

114 ) 

115 list_item_text = clean_node(wxr, sense, e_nodes) 

116 example = Example(text=list_item_text) 

117 if "-" in example.text: 117 ↛ 121line 117 didn't jump to line 121 because the condition on line 117 was always true

118 tr_start = example.text.index("-") 

119 example.translation = example.text[tr_start + 1 :].strip() 

120 example.text = example.text[:tr_start].strip() 

121 if len(example.text) > 0: 121 ↛ exitline 121 didn't return from function 'extract_example_text_list' because the condition on line 121 was always true

122 calculate_bold_offsets( 

123 wxr, 

124 wxr.wtp.parse(wxr.wtp.node_to_wikitext(e_nodes)), 

125 example.text, 

126 example, 

127 "bold_text_offsets", 

128 ) 

129 sense.examples.append(example)