Coverage for src/wiktextract/extractor/it/example.py: 91%

85 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-04 10:58 +0000

1from wikitextprocessor import NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from ..ruby import extract_ruby 

6from ..share import calculate_bold_offsets 

7from .models import Example, Sense 

8 

9 

10def extract_example_list_item( 

11 wxr: WiktextractContext, sense: Sense, list_item: WikiNode, lang_code: str 

12) -> None: 

13 examples = [] 

14 before_italic = True 

15 text_nodes = [] 

16 shared_example = Example() 

17 has_zh_tradsem = False 

18 for index, node in enumerate(list_item.children): 

19 if ( 

20 isinstance(node, TemplateNode) 

21 and node.template_name == "zh-tradsem" 

22 ): 

23 examples.extend(extract_zh_tradsem(wxr, node)) 

24 has_zh_tradsem = True 

25 elif isinstance(node, WikiNode): 

26 match node.kind: 

27 case NodeKind.ITALIC: 

28 if lang_code in ["zh", "ja"]: 

29 if before_italic: 29 ↛ 18line 29 didn't jump to line 18 because the condition on line 29 was always true

30 shared_example.roman = clean_node(wxr, sense, node) 

31 calculate_bold_offsets( 

32 wxr, 

33 node, 

34 shared_example.roman, 

35 shared_example, 

36 "bold_roman_offsets", 

37 ) 

38 before_italic = False 

39 else: 

40 e_data = Example(text=clean_node(wxr, sense, node)) 

41 calculate_bold_offsets( 

42 wxr, node, e_data.text, e_data, "bold_text_offsets" 

43 ) 

44 examples.append(e_data) 

45 case NodeKind.LIST: 

46 for tr_list_item in node.find_child(NodeKind.LIST_ITEM): 

47 shared_example.translation = clean_node( 

48 wxr, sense, tr_list_item.children 

49 ) 

50 calculate_bold_offsets( 

51 wxr, 

52 tr_list_item, 

53 shared_example.translation, 

54 shared_example, 

55 "bold_translation_offsets", 

56 ) 

57 case _ if lang_code in ["zh", "ja"]: 57 ↛ 18line 57 didn't jump to line 18 because the pattern on line 57 always matched

58 if before_italic: 58 ↛ 18line 58 didn't jump to line 18 because the condition on line 58 was always true

59 text_nodes.append(node) 

60 elif isinstance(node, str) and "-" in node: 

61 for t_node in list_item.find_child(NodeKind.TEMPLATE): 

62 if t_node.template_name == "Term": 

63 shared_example.ref = clean_node(wxr, None, t_node).strip( 

64 "()" 

65 ) 

66 break 

67 tr_nodes = wxr.wtp.parse( 

68 wxr.wtp.node_to_wikitext( 

69 [node[node.index("-") + 1 :]] 

70 + [ 

71 n 

72 for n in list_item.children[index + 1 :] 

73 if not ( 

74 isinstance(n, TemplateNode) 

75 and n.template_name == "Term" 

76 ) 

77 ] 

78 ) 

79 ) 

80 shared_example.translation = clean_node(wxr, sense, tr_nodes) 

81 calculate_bold_offsets( 

82 wxr, 

83 tr_nodes, 

84 shared_example.translation, 

85 shared_example, 

86 "bold_translation_offsets", 

87 ) 

88 if not has_zh_tradsem and len(examples) > 1: 88 ↛ 89line 88 didn't jump to line 89 because the condition on line 88 was never true

89 examples.clear() 

90 text_node = wxr.wtp.parse( 

91 wxr.wtp.node_to_wikitext( 

92 list_item.children[:index] + [node[: node.index("-")]] 

93 ) 

94 ) 

95 e_data = Example(text=clean_node(wxr, None, text_node)) 

96 calculate_bold_offsets( 

97 wxr, text_node, e_data.text, e_data, "bold_text_offsets" 

98 ) 

99 examples.append(e_data) 

100 break 

101 elif lang_code in ["zh", "ja"] and len(examples) == 0 and before_italic: 

102 text_nodes.append(node) 

103 

104 if lang_code in ["zh", "ja"] and len(examples) == 0 and len(text_nodes) > 0: 

105 expanded_nodes = wxr.wtp.parse( 

106 wxr.wtp.node_to_wikitext(text_nodes), expand_all=True 

107 ) 

108 example = Example() 

109 example.ruby, node_without_ruby = extract_ruby( 

110 wxr, expanded_nodes.children 

111 ) 

112 example.text = ( 

113 clean_node(wxr, sense, node_without_ruby) 

114 .replace(" ", "") 

115 .strip("(") 

116 ) 

117 calculate_bold_offsets( 

118 wxr, 

119 wxr.wtp.parse(wxr.wtp.node_to_wikitext(node_without_ruby)), 

120 example.text, 

121 example, 

122 "bold_text_offsets", 

123 ) 

124 examples.append(example) 

125 

126 if not has_zh_tradsem and len(examples) > 1: 

127 examples.clear() 

128 text_node = wxr.wtp.parse( 

129 wxr.wtp.node_to_wikitext( 

130 list(list_item.invert_find_child(NodeKind.LIST)) 

131 ) 

132 ) 

133 e_data = Example(text=clean_node(wxr, None, text_node)) 

134 calculate_bold_offsets( 

135 wxr, text_node, e_data.text, e_data, "bold_text_offsets" 

136 ) 

137 examples.append(e_data) 

138 

139 for example in examples: 

140 for attr in [ 

141 "roman", 

142 "bold_roman_offsets", 

143 "translation", 

144 "bold_translation_offsets", 

145 "ref", 

146 "text", 

147 "bold_text_offsets", 

148 ]: 

149 value = getattr(shared_example, attr) 

150 if len(value) > 0: 

151 setattr(example, attr, value) 

152 if len(example.text) > 0: 152 ↛ 139line 152 didn't jump to line 139 because the condition on line 152 was always true

153 sense.examples.append(example) 

154 

155 

156def extract_zh_tradsem( 

157 wxr: WiktextractContext, t_node: TemplateNode 

158) -> list[Example]: 

159 # https://it.wiktionary.org/wiki/Template:zh-tradsem 

160 examples = [] 

161 for arg_index in [1, 2]: 

162 arg_value = t_node.template_parameters.get(arg_index, "") 

163 arg_value_str = clean_node(wxr, None, arg_value).replace(" ", "") 

164 if arg_value_str != "": 164 ↛ 161line 164 didn't jump to line 161 because the condition on line 164 was always true

165 example = Example(text=arg_value_str) 

166 calculate_bold_offsets( 

167 wxr, 

168 wxr.wtp.parse(wxr.wtp.node_to_wikitext(arg_value)), 

169 example.text, 

170 example, 

171 "bold_text_offsets", 

172 ) 

173 if arg_index == 1: 

174 example.tags.append("Traditional Chinese") 

175 elif arg_index == 2: 175 ↛ 177line 175 didn't jump to line 177 because the condition on line 175 was always true

176 example.tags.append("Simplified Chinese") 

177 examples.append(example) 

178 

179 return examples