Coverage for src/wiktextract/extractor/pt/example.py: 90%

1import re

3from wikitextprocessor import (

4 HTMLNode,

5 NodeKind,

6 TemplateNode,

7 WikiNode,

10from ...page import clean_node

11from ...wxr_context import WiktextractContext

12from ..share import calculate_bold_offsets

13from .models import Example, Sense

16def extract_example_list_item(

17 wxr: WiktextractContext,

18 sense: Sense,

19 list_item: WikiNode,

20) -> None:

21 example = Example()

22 ref_nodes = []

24 for index, node in enumerate(list_item.children):

25 if (

26 isinstance(node, WikiNode)

27 and node.kind == NodeKind.ITALIC

28 and example.text == ""

29 ):

30 example.text = clean_node(wxr, None, node)

31 calculate_bold_offsets(

32 wxr, node, example.text, example, "bold_text_offsets"

33 )

34 elif isinstance(node, HTMLNode) and node.tag == "small":

35 example.translation = clean_node(wxr, None, node)

36 if example.translation.startswith( 36 ↛ 39line 36 didn't jump to line 39 because the condition on line 36 was never true

37 "("

38 ) and example.translation.endswith(")"):

39 example.translation = example.translation.strip("()")

40 elif isinstance(node, TemplateNode):

41 match node.template_name:

42 case "OESP":

43 example.ref = clean_node(wxr, sense, node).strip("()")

44 case "tradex": 44 ↛ 58line 44 didn't jump to line 58 because the pattern on line 44 always matched

45 second_arg = node.template_parameters.get(2, "")

46 example.text = clean_node(wxr, None, second_arg)

47 calculate_bold_offsets(

48 wxr,

49 wxr.wtp.parse(wxr.wtp.node_to_wikitext(second_arg)),

50 example.text,

51 example,

52 "bold_text_offsets",

53 )

54 example.translation = clean_node(

55 wxr, None, node.template_parameters.get(3, "")

56 )

57 clean_node(wxr, sense, node)

58 case "Ex.":

59 example.text = clean_node(

60 wxr, sense, node.template_parameters.get(1, "")

61 )

62 elif isinstance(node, WikiNode) and node.kind == NodeKind.BOLD:

63 bold_str = clean_node(wxr, None, node)

64 if re.fullmatch(r"\d+", bold_str) is not None:

65 list_item_str = clean_node(

66 wxr, None, list(list_item.invert_find_child(NodeKind.LIST))

67 )

68 if list_item_str.endswith(":"): 68 ↛ 24line 68 didn't jump to line 24 because the condition on line 68 was always true

69 ref_nodes.clear()

70 example.ref = list_item_str

71 for child_list in list_item.find_child(NodeKind.LIST):

72 for child_list_item in child_list.find_child(

73 NodeKind.LIST_ITEM

74 ):

75 example.text = clean_node(

76 wxr, None, child_list_item.children

77 )

78 calculate_bold_offsets(

79 wxr,

80 child_list_item,

81 example.text,

82 example,

83 "bold_text_offsets",

84 )

85 break

86 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:

87 ref_nodes.clear()

88 for child_list_item in node.find_child(NodeKind.LIST_ITEM):

89 ref_nodes.append(child_list_item.children)

90 else:

91 ref_nodes.append(node)

93 if example.text != "":

94 if example.ref == "":

95 example.ref = clean_node(wxr, sense, ref_nodes).strip(":() \n")

96 sense.examples.append(example)

97 else:

98 extract_example_text_list(wxr, sense, list_item)

100

101def extract_example_text_list(

102 wxr: WiktextractContext,

103 sense: Sense,

104 list_item: WikiNode,

105) -> None:

106 e_nodes = list(list_item.invert_find_child(NodeKind.LIST))

107 list_item_text = clean_node(wxr, sense, e_nodes)

108 example = Example(text=list_item_text)

109 if "-" in example.text: 109 ↛ 113line 109 didn't jump to line 113 because the condition on line 109 was always true

110 tr_start = example.text.index("-")

111 example.translation = example.text[tr_start + 1 :].strip()

112 example.text = example.text[:tr_start].strip()

113 if len(example.text) > 0: 113 ↛ exitline 113 didn't return from function 'extract_example_text_list' because the condition on line 113 was always true

114 calculate_bold_offsets(

115 wxr,

116 wxr.wtp.parse(wxr.wtp.node_to_wikitext(e_nodes)),

117 example.text,

118 example,

119 "bold_text_offsets",

120 )

121 sense.examples.append(example)