Coverage for src/wiktextract/extractor/it/example.py: 91%

1from wikitextprocessor import NodeKind, TemplateNode, WikiNode

3from ...page import clean_node

4from ...wxr_context import WiktextractContext

5from ..ruby import extract_ruby

6from ..share import calculate_bold_offsets

7from .models import Example, Sense

10def extract_example_list_item(

11 wxr: WiktextractContext, sense: Sense, list_item: WikiNode, lang_code: str

12) -> None:

13 examples = []

14 before_italic = True

15 text_nodes = []

16 shared_example = Example()

17 has_zh_tradsem = False

18 for index, node in enumerate(list_item.children):

19 if (

20 isinstance(node, TemplateNode)

21 and node.template_name == "zh-tradsem"

22 ):

23 examples.extend(extract_zh_tradsem(wxr, node))

24 has_zh_tradsem = True

25 elif isinstance(node, WikiNode):

26 match node.kind:

27 case NodeKind.ITALIC:

28 if lang_code in ["zh", "ja"]:

29 if before_italic: 29 ↛ 18line 29 didn't jump to line 18 because the condition on line 29 was always true

30 shared_example.roman = clean_node(wxr, sense, node)

31 calculate_bold_offsets(

32 wxr,

33 node,

34 shared_example.roman,

35 shared_example,

36 "bold_roman_offsets",

37 )

38 before_italic = False

39 else:

40 e_data = Example(text=clean_node(wxr, sense, node))

41 calculate_bold_offsets(

42 wxr, node, e_data.text, e_data, "bold_text_offsets"

43 )

44 examples.append(e_data)

45 case NodeKind.LIST:

46 for tr_list_item in node.find_child(NodeKind.LIST_ITEM):

47 shared_example.translation = clean_node(

48 wxr, sense, tr_list_item.children

49 )

50 calculate_bold_offsets(

51 wxr,

52 tr_list_item,

53 shared_example.translation,

54 shared_example,

55 "bold_translation_offsets",

56 )

57 case _ if lang_code in ["zh", "ja"]: 57 ↛ 18line 57 didn't jump to line 18 because the pattern on line 57 always matched

58 if before_italic: 58 ↛ 18line 58 didn't jump to line 18 because the condition on line 58 was always true

59 text_nodes.append(node)

60 elif isinstance(node, str) and "-" in node:

61 for t_node in list_item.find_child(NodeKind.TEMPLATE):

62 if t_node.template_name == "Term":

63 shared_example.ref = clean_node(wxr, None, t_node).strip(

64 "()"

65 )

66 break

67 tr_nodes = wxr.wtp.parse(

68 wxr.wtp.node_to_wikitext(

69 [node[node.index("-") + 1 :]]

70 + [

71 n

72 for n in list_item.children[index + 1 :]

73 if not (

74 isinstance(n, TemplateNode)

75 and n.template_name == "Term"

76 )

77 ]

78 )

79 )

80 shared_example.translation = clean_node(wxr, sense, tr_nodes)

81 calculate_bold_offsets(

82 wxr,

83 tr_nodes,

84 shared_example.translation,

85 shared_example,

86 "bold_translation_offsets",

87 )

88 if not has_zh_tradsem and len(examples) > 1: 88 ↛ 89line 88 didn't jump to line 89 because the condition on line 88 was never true

89 examples.clear()

90 text_node = wxr.wtp.parse(

91 wxr.wtp.node_to_wikitext(

92 list_item.children[:index] + [node[: node.index("-")]]

93 )

94 )

95 e_data = Example(text=clean_node(wxr, None, text_node))

96 calculate_bold_offsets(

97 wxr, text_node, e_data.text, e_data, "bold_text_offsets"

98 )

99 examples.append(e_data)

100 break

101 elif lang_code in ["zh", "ja"] and len(examples) == 0 and before_italic:

102 text_nodes.append(node)

103

104 if lang_code in ["zh", "ja"] and len(examples) == 0 and len(text_nodes) > 0:

105 expanded_nodes = wxr.wtp.parse(

106 wxr.wtp.node_to_wikitext(text_nodes), expand_all=True

107 )

108 example = Example()

109 example.ruby, node_without_ruby = extract_ruby(

110 wxr, expanded_nodes.children

111 )

112 example.text = (

113 clean_node(wxr, sense, node_without_ruby)

114 .replace(" ", "")

115 .strip("(")

116 )

117 calculate_bold_offsets(

118 wxr,

119 wxr.wtp.parse(wxr.wtp.node_to_wikitext(node_without_ruby)),

120 example.text,

121 example,

122 "bold_text_offsets",

123 )

124 examples.append(example)

125

126 if not has_zh_tradsem and len(examples) > 1:

127 examples.clear()

128 text_node = wxr.wtp.parse(

129 wxr.wtp.node_to_wikitext(

130 list(list_item.invert_find_child(NodeKind.LIST))

131 )

132 )

133 e_data = Example(text=clean_node(wxr, None, text_node))

134 calculate_bold_offsets(

135 wxr, text_node, e_data.text, e_data, "bold_text_offsets"

136 )

137 examples.append(e_data)

138

139 for example in examples:

140 for attr in [

141 "roman",

142 "bold_roman_offsets",

143 "translation",

144 "bold_translation_offsets",

145 "ref",

146 "text",

147 "bold_text_offsets",

148 ]:

149 value = getattr(shared_example, attr)

150 if len(value) > 0:

151 setattr(example, attr, value)

152 if len(example.text) > 0: 152 ↛ 139line 152 didn't jump to line 139 because the condition on line 152 was always true

153 sense.examples.append(example)

154

155

156def extract_zh_tradsem(

157 wxr: WiktextractContext, t_node: TemplateNode

158) -> list[Example]:

159 # https://it.wiktionary.org/wiki/Template:zh-tradsem

160 examples = []

161 for arg_index in [1, 2]:

162 arg_value = t_node.template_parameters.get(arg_index, "")

163 arg_value_str = clean_node(wxr, None, arg_value).replace(" ", "")

164 if arg_value_str != "": 164 ↛ 161line 164 didn't jump to line 161 because the condition on line 164 was always true

165 example = Example(text=arg_value_str)

166 calculate_bold_offsets(

167 wxr,

168 wxr.wtp.parse(wxr.wtp.node_to_wikitext(arg_value)),

169 example.text,

170 example,

171 "bold_text_offsets",

172 )

173 if arg_index == 1:

174 example.tags.append("Traditional Chinese")

175 elif arg_index == 2: 175 ↛ 177line 175 didn't jump to line 177 because the condition on line 175 was always true

176 example.tags.append("Simplified Chinese")

177 examples.append(example)

178

179 return examples