Coverage for src / wiktextract / extractor / tr / example.py: 89%

143 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-11 04:48 +0000

1import re 

2 

3from wikitextprocessor import NodeKind, TemplateNode, WikiNode 

4 

5from ...page import clean_node 

6from ...wxr_context import WiktextractContext 

7from ..share import calculate_bold_offsets 

8from .linkage import ( 

9 GLOSS_LIST_LINKAGE_TEMPLATES, 

10 extract_gloss_list_linkage_template, 

11) 

12from .models import Example, WordEntry 

13 

14 

15def extract_example_list_item( 

16 wxr: WiktextractContext, 

17 word_entry: WordEntry, 

18 list_item: WikiNode, 

19 example: Example, 

20) -> None: 

21 seen_text = False 

22 trailing_text_parts: list[str] = [] 

23 plain_string_parts: list[str] = [] 

24 has_text_node = False 

25 for node in list_item.children: 

26 if isinstance(node, TemplateNode): 

27 if node.template_name in ["ux", "uxi"]: 

28 extract_ux_template(wxr, word_entry.lang_code, node, example) 

29 has_text_node = True 

30 elif node.template_name == "örnek": 

31 extract_örnek_template(wxr, word_entry.lang_code, node, example) 

32 has_text_node = True 

33 elif node.template_name in ("kt", "mt"): 

34 extract_kt_template(wxr, node, example) 

35 has_text_node = True 

36 elif node.template_name in GLOSS_LIST_LINKAGE_TEMPLATES: 

37 extract_gloss_list_linkage_template(wxr, word_entry, node) 

38 elif node.template_name.startswith("AT:"): 38 ↛ 25line 38 didn't jump to line 25 because the condition on line 38 was always true

39 extract_at_template(wxr, example, node) 

40 has_text_node = True 

41 elif isinstance(node, WikiNode): 

42 match node.kind: 

43 case NodeKind.LIST: 

44 for child_list_item in node.find_child(NodeKind.LIST_ITEM): 

45 extract_example_list_item( 

46 wxr, word_entry, child_list_item, example 

47 ) 

48 case NodeKind.ITALIC: 

49 italic_str = clean_node(wxr, None, node) 

50 if italic_str != "": 50 ↛ 25line 50 didn't jump to line 25 because the condition on line 50 was always true

51 if example.text == "": 

52 example.text = italic_str 

53 calculate_bold_offsets( 

54 wxr, 

55 node, 

56 italic_str, 

57 example, 

58 "bold_text_offsets", 

59 ) 

60 else: 

61 example.translation = italic_str 

62 calculate_bold_offsets( 

63 wxr, 

64 node, 

65 italic_str, 

66 example, 

67 "bold_translation_offsets", 

68 ) 

69 seen_text = True 

70 has_text_node = True 

71 elif isinstance(node, str): 71 ↛ 25line 71 didn't jump to line 25 because the condition on line 71 was always true

72 if seen_text: 

73 trailing_text_parts.append(node) 

74 else: 

75 plain_string_parts.append(node) 

76 

77 if not has_text_node and plain_string_parts: 

78 extract_quoted_plain_example( 

79 "".join(plain_string_parts), example 

80 ) 

81 elif example.ref == "" and trailing_text_parts: 

82 trailing = "".join(trailing_text_parts).strip() 

83 trailing = trailing.lstrip("-–—").strip("()").strip() 

84 if trailing != "": 

85 example.ref = trailing 

86 

87def extract_ux_template( 

88 wxr: WiktextractContext, 

89 lang_code: str, 

90 t_node: TemplateNode, 

91 example: Example, 

92) -> None: 

93 # https://tr.wiktionary.org/wiki/Şablon:ux 

94 e_lang_code = clean_node(wxr, None, t_node.template_parameters.get(1, "")) 

95 second_arg = t_node.template_parameters.get(2, "") 

96 second_arg_text = clean_node(wxr, None, second_arg) 

97 if e_lang_code == lang_code: 97 ↛ 106line 97 didn't jump to line 106 because the condition on line 97 was always true

98 example.text = second_arg_text 

99 calculate_bold_offsets( 

100 wxr, 

101 wxr.wtp.parse(wxr.wtp.node_to_wikitext(second_arg)), 

102 second_arg_text, 

103 example, 

104 "bold_text_offsets", 

105 ) 

106 elif e_lang_code == "tr": 

107 example.translation = second_arg_text 

108 calculate_bold_offsets( 

109 wxr, 

110 wxr.wtp.parse(wxr.wtp.node_to_wikitext(second_arg)), 

111 second_arg_text, 

112 example, 

113 "bold_translation_offsets", 

114 ) 

115 for index in [4, 5]: 

116 ref = clean_node(wxr, None, t_node.template_parameters.get(index, "")) 

117 if ref != "": 117 ↛ 118line 117 didn't jump to line 118 because the condition on line 117 was never true

118 example.ref = ref 

119 third_arg = t_node.template_parameters.get(3, "") 

120 tr_value = clean_node(wxr, None, third_arg) 

121 if tr_value != "": 121 ↛ exitline 121 didn't return from function 'extract_ux_template' because the condition on line 121 was always true

122 example.translation = tr_value 

123 calculate_bold_offsets( 

124 wxr, 

125 wxr.wtp.parse(wxr.wtp.node_to_wikitext(third_arg)), 

126 tr_value, 

127 example, 

128 "bold_translation_offsets", 

129 ) 

130 

131 

132def extract_örnek_template( 

133 wxr: WiktextractContext, 

134 lang_code: str, 

135 t_node: TemplateNode, 

136 example: Example, 

137) -> None: 

138 # https://tr.wiktionary.org/wiki/Şablon:örnek 

139 e_lang_code = clean_node( 

140 wxr, None, t_node.template_parameters.get("dil", "") 

141 ) 

142 first_arg = t_node.template_parameters.get(1, "") 

143 first_arg_text = clean_node(wxr, None, first_arg) 

144 if e_lang_code == lang_code: 

145 example.text = first_arg_text 

146 calculate_bold_offsets( 

147 wxr, 

148 wxr.wtp.parse(wxr.wtp.node_to_wikitext(first_arg)), 

149 first_arg_text, 

150 example, 

151 "bold_text_offsets", 

152 ) 

153 elif e_lang_code == "tr": 153 ↛ 162line 153 didn't jump to line 162 because the condition on line 153 was always true

154 example.translation = first_arg_text 

155 calculate_bold_offsets( 

156 wxr, 

157 wxr.wtp.parse(wxr.wtp.node_to_wikitext(first_arg)), 

158 first_arg_text, 

159 example, 

160 "bold_translation_offsets", 

161 ) 

162 for index in [2, 3]: 

163 ref = clean_node(wxr, None, t_node.template_parameters.get(index, "")) 

164 if ref != "": 164 ↛ 165line 164 didn't jump to line 165 because the condition on line 164 was never true

165 example.ref = ref 

166 t_arg = t_node.template_parameters.get("t", "") 

167 t_value = clean_node(wxr, None, t_arg) 

168 if t_value != "": 

169 example.translation = t_value 

170 calculate_bold_offsets( 

171 wxr, 

172 wxr.wtp.parse(wxr.wtp.node_to_wikitext(t_arg)), 

173 t_value, 

174 example, 

175 "bold_translation_offsets", 

176 ) 

177 

178 

179def extract_at_template( 

180 wxr: WiktextractContext, example: Example, t_node: TemplateNode 

181) -> None: 

182 # Şablon:AT:Kur'an 

183 if any( 

184 arg in t_node.template_parameters for arg in ["pasaj", "text", "metin"] 

185 ): 

186 for arg in ["pasaj", "text", "metin"]: 186 ↛ 198line 186 didn't jump to line 198 because the loop on line 186 didn't complete

187 if arg in t_node.template_parameters: 187 ↛ 186line 187 didn't jump to line 186 because the condition on line 187 was always true

188 arg_value = t_node.template_parameters[arg] 

189 example.text = clean_node(wxr, None, arg_value) 

190 calculate_bold_offsets( 

191 wxr, 

192 wxr.wtp.parse(wxr.wtp.node_to_wikitext(arg_value)), 

193 example.text, 

194 example, 

195 "bold_text_offsets", 

196 ) 

197 break 

198 for arg in ["anlam", "mana", "mânâ", "t", "tercüme"]: 198 ↛ 224line 198 didn't jump to line 224 because the loop on line 198 didn't complete

199 if arg in t_node.template_parameters: 

200 arg_value = t_node.template_parameters[arg] 

201 example.translation = clean_node(wxr, None, arg_value) 

202 calculate_bold_offsets( 

203 wxr, 

204 wxr.wtp.parse(wxr.wtp.node_to_wikitext(arg_value)), 

205 example.translation, 

206 example, 

207 "bold_translation_offsets", 

208 ) 

209 break 

210 else: 

211 for arg in ["anlam", "mana", "mânâ", "t", "tercüme"]: 

212 if arg in t_node.template_parameters: 

213 arg_value = t_node.template_parameters[arg] 

214 example.text = clean_node(wxr, None, arg_value) 

215 calculate_bold_offsets( 

216 wxr, 

217 wxr.wtp.parse(wxr.wtp.node_to_wikitext(arg_value)), 

218 example.text, 

219 example, 

220 "bold_text_offsets", 

221 ) 

222 break 

223 

224 example.ref = clean_node(wxr, None, t_node).splitlines()[0] 

225 

226 

227KT_REF_FIELDS = ( 

228 "yazar", 

229 "başlık", 

230 "dergi", 

231 "sayı", 

232 "yıl", 

233 "tarih", 

234 "sayfa", 

235 "yayıncı", 

236) 

237 

238 

239def extract_kt_template( 

240 wxr: WiktextractContext, 

241 t_node: TemplateNode, 

242 example: Example, 

243) -> None: 

244 # https://tr.wiktionary.org/wiki/Şablon:kt (book) and Şablon:mt (magazine). 

245 # tanıklık = example sentence; remaining fields = ref. 

246 text_arg = t_node.template_parameters.get("tanıklık", "") 

247 text = clean_node(wxr, None, text_arg) 

248 if text != "": 248 ↛ 257line 248 didn't jump to line 257 because the condition on line 248 was always true

249 example.text = text 

250 calculate_bold_offsets( 

251 wxr, 

252 wxr.wtp.parse(wxr.wtp.node_to_wikitext(text_arg)), 

253 text, 

254 example, 

255 "bold_text_offsets", 

256 ) 

257 ref_parts: list[str] = [] 

258 for field in KT_REF_FIELDS: 

259 value = clean_node(wxr, None, t_node.template_parameters.get(field, "")) 

260 if value != "": 

261 ref_parts.append(value) 

262 if ref_parts and example.ref == "": 262 ↛ exitline 262 didn't return from function 'extract_kt_template' because the condition on line 262 was always true

263 example.ref = ", ".join(ref_parts) 

264 

265 

266QUOTED_EXAMPLE_RE = re.compile( 

267 r'^\s*[\"“"]\s*(?P<text>.+?)\s*[\"”"]\s*' 

268 r'(?:[-–—]\s*)?(?P<ref>.+?)?\s*$', 

269 re.DOTALL, 

270) 

271 

272 

273def extract_quoted_plain_example(raw: str, example: Example) -> None: 

274 # Plain `"..." - Author` lines that use no italic/template markup. 

275 raw = raw.strip() 

276 if raw == "": 

277 return 

278 m = QUOTED_EXAMPLE_RE.match(raw) 

279 if m is None: 279 ↛ 280line 279 didn't jump to line 280 because the condition on line 279 was never true

280 return 

281 text = (m.group("text") or "").strip() 

282 if text == "": 282 ↛ 283line 282 didn't jump to line 283 because the condition on line 282 was never true

283 return 

284 example.text = text 

285 if example.ref == "": 285 ↛ exitline 285 didn't return from function 'extract_quoted_plain_example' because the condition on line 285 was always true

286 ref = (m.group("ref") or "").strip().lstrip("-–—").strip("()").strip() 

287 if ref != "": 287 ↛ exitline 287 didn't return from function 'extract_quoted_plain_example' because the condition on line 287 was always true

288 example.ref = ref