Coverage for src/wiktextract/extractor/it/example.py: 91%

85 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-13 10:14 +0000

1from wikitextprocessor import NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from ..ruby import extract_ruby 

6from ..share import calculate_bold_offsets 

7from .models import Example, Sense 

8 

9 

10def extract_example_list_item( 

11 wxr: WiktextractContext, sense: Sense, list_item: WikiNode, lang_code: str 

12) -> None: 

13 examples = [] 

14 before_italic = True 

15 text_nodes = [] 

16 shared_example = Example() 

17 has_zh_tradsem = False 

18 for index, node in enumerate(list_item.children): 

19 if ( 

20 isinstance(node, TemplateNode) 

21 and node.template_name == "zh-tradsem" 

22 ): 

23 examples.extend(extract_zh_tradsem(wxr, node)) 

24 has_zh_tradsem = True 

25 elif isinstance(node, WikiNode): 

26 match node.kind: 

27 case NodeKind.ITALIC: 

28 if lang_code in ["zh", "ja"]: 

29 if before_italic: 29 ↛ 18line 29 didn't jump to line 18 because the condition on line 29 was always true

30 shared_example.roman = clean_node(wxr, sense, node) 

31 calculate_bold_offsets( 

32 wxr, 

33 node, 

34 shared_example.roman, 

35 shared_example, 

36 "bold_roman_offsets", 

37 ) 

38 before_italic = False 

39 else: 

40 e_data = Example(text=clean_node(wxr, sense, node)) 

41 calculate_bold_offsets( 

42 wxr, node, e_data.text, e_data, "bold_text_offsets" 

43 ) 

44 examples.append(e_data) 

45 case NodeKind.LIST: 

46 for tr_list_item in node.find_child(NodeKind.LIST_ITEM): 

47 shared_example.translation = clean_node( 

48 wxr, sense, tr_list_item.children 

49 ) 

50 calculate_bold_offsets( 

51 wxr, 

52 tr_list_item, 

53 shared_example.translation, 

54 shared_example, 

55 "bold_translation_offsets", 

56 ) 

57 case _ if lang_code in ["zh", "ja"]: 57 ↛ 18line 57 didn't jump to line 18 because the pattern on line 57 always matched

58 if before_italic: 58 ↛ 18line 58 didn't jump to line 18 because the condition on line 58 was always true

59 text_nodes.append(node) 

60 elif isinstance(node, str) and "-" in node: 

61 for t_node in list_item.find_child(NodeKind.TEMPLATE): 

62 if t_node.template_name == "Term": 

63 shared_example.ref = clean_node(wxr, None, t_node).strip( 

64 "()" 

65 ) 

66 break 

67 tr_nodes = wxr.wtp.parse( 

68 wxr.wtp.node_to_wikitext( 

69 [node[node.index("-") + 1 :]] 

70 + [ 

71 n 

72 for n in list_item.children[index + 1 :] 

73 if not ( 

74 isinstance(n, TemplateNode) 

75 and n.template_name == "Term" 

76 ) 

77 ] 

78 ) 

79 ) 

80 shared_example.translation = clean_node(wxr, sense, tr_nodes) 

81 calculate_bold_offsets( 

82 wxr, 

83 tr_nodes, 

84 shared_example.translation, 

85 shared_example, 

86 "bold_translation_offsets", 

87 ) 

88 if not has_zh_tradsem and len(examples) > 1: 88 ↛ 89line 88 didn't jump to line 89 because the condition on line 88 was never true

89 examples.clear() 

90 text_node = wxr.wtp.parse( 

91 wxr.wtp.node_to_wikitext( 

92 list_item.children[:index] + [node[: node.index("-")]] 

93 ) 

94 ) 

95 e_data = Example(text=clean_node(wxr, None, text_node)) 

96 calculate_bold_offsets( 

97 wxr, text_node, e_data.text, e_data, "bold_text_offsets" 

98 ) 

99 examples.append(e_data) 

100 break 

101 elif lang_code in ["zh", "ja"] and len(examples) == 0 and before_italic: 

102 text_nodes.append(node) 

103 

104 if lang_code in ["zh", "ja"] and len(examples) == 0 and len(text_nodes) > 0: 

105 expanded_nodes = wxr.wtp.parse( 

106 wxr.wtp.node_to_wikitext(text_nodes), expand_all=True 

107 ) 

108 example = Example() 

109 example.ruby, node_without_ruby = extract_ruby( 

110 wxr, expanded_nodes.children 

111 ) 

112 example.text = ( 

113 clean_node(wxr, sense, node_without_ruby) 

114 .replace(" ", "") 

115 .strip("(") 

116 ) 

117 calculate_bold_offsets( 

118 wxr, 

119 wxr.wtp.parse(wxr.wtp.node_to_wikitext(node_without_ruby)), 

120 example.text, 

121 example, 

122 "bold_text_offsets", 

123 ) 

124 examples.append(example) 

125 

126 if not has_zh_tradsem and len(examples) > 1: 

127 examples.clear() 

128 text_node = wxr.wtp.parse( 

129 wxr.wtp.node_to_wikitext( 

130 list( 

131 list_item.invert_find_child( 

132 NodeKind.LIST, include_empty_str=True 

133 ) 

134 ) 

135 ) 

136 ) 

137 e_data = Example(text=clean_node(wxr, None, text_node)) 

138 calculate_bold_offsets( 

139 wxr, text_node, e_data.text, e_data, "bold_text_offsets" 

140 ) 

141 examples.append(e_data) 

142 

143 for example in examples: 

144 for attr in [ 

145 "roman", 

146 "bold_roman_offsets", 

147 "translation", 

148 "bold_translation_offsets", 

149 "ref", 

150 "text", 

151 "bold_text_offsets", 

152 ]: 

153 value = getattr(shared_example, attr) 

154 if len(value) > 0: 

155 setattr(example, attr, value) 

156 if len(example.text) > 0: 156 ↛ 143line 156 didn't jump to line 143 because the condition on line 156 was always true

157 sense.examples.append(example) 

158 

159 

160def extract_zh_tradsem( 

161 wxr: WiktextractContext, t_node: TemplateNode 

162) -> list[Example]: 

163 # https://it.wiktionary.org/wiki/Template:zh-tradsem 

164 examples = [] 

165 for arg_index in [1, 2]: 

166 arg_value = t_node.template_parameters.get(arg_index, "") 

167 arg_value_str = clean_node(wxr, None, arg_value).replace(" ", "") 

168 if arg_value_str != "": 168 ↛ 165line 168 didn't jump to line 165 because the condition on line 168 was always true

169 example = Example(text=arg_value_str) 

170 calculate_bold_offsets( 

171 wxr, 

172 wxr.wtp.parse(wxr.wtp.node_to_wikitext(arg_value)), 

173 example.text, 

174 example, 

175 "bold_text_offsets", 

176 ) 

177 if arg_index == 1: 

178 example.tags.append("Traditional-Chinese") 

179 elif arg_index == 2: 179 ↛ 181line 179 didn't jump to line 181 because the condition on line 179 was always true

180 example.tags.append("Simplified-Chinese") 

181 examples.append(example) 

182 

183 return examples