Coverage for src / wiktextract / extractor / tr / example.py: 89%
143 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-11 04:48 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-11 04:48 +0000
1import re
3from wikitextprocessor import NodeKind, TemplateNode, WikiNode
5from ...page import clean_node
6from ...wxr_context import WiktextractContext
7from ..share import calculate_bold_offsets
8from .linkage import (
9 GLOSS_LIST_LINKAGE_TEMPLATES,
10 extract_gloss_list_linkage_template,
11)
12from .models import Example, WordEntry
15def extract_example_list_item(
16 wxr: WiktextractContext,
17 word_entry: WordEntry,
18 list_item: WikiNode,
19 example: Example,
20) -> None:
21 seen_text = False
22 trailing_text_parts: list[str] = []
23 plain_string_parts: list[str] = []
24 has_text_node = False
25 for node in list_item.children:
26 if isinstance(node, TemplateNode):
27 if node.template_name in ["ux", "uxi"]:
28 extract_ux_template(wxr, word_entry.lang_code, node, example)
29 has_text_node = True
30 elif node.template_name == "örnek":
31 extract_örnek_template(wxr, word_entry.lang_code, node, example)
32 has_text_node = True
33 elif node.template_name in ("kt", "mt"):
34 extract_kt_template(wxr, node, example)
35 has_text_node = True
36 elif node.template_name in GLOSS_LIST_LINKAGE_TEMPLATES:
37 extract_gloss_list_linkage_template(wxr, word_entry, node)
38 elif node.template_name.startswith("AT:"): 38 ↛ 25line 38 didn't jump to line 25 because the condition on line 38 was always true
39 extract_at_template(wxr, example, node)
40 has_text_node = True
41 elif isinstance(node, WikiNode):
42 match node.kind:
43 case NodeKind.LIST:
44 for child_list_item in node.find_child(NodeKind.LIST_ITEM):
45 extract_example_list_item(
46 wxr, word_entry, child_list_item, example
47 )
48 case NodeKind.ITALIC:
49 italic_str = clean_node(wxr, None, node)
50 if italic_str != "": 50 ↛ 25line 50 didn't jump to line 25 because the condition on line 50 was always true
51 if example.text == "":
52 example.text = italic_str
53 calculate_bold_offsets(
54 wxr,
55 node,
56 italic_str,
57 example,
58 "bold_text_offsets",
59 )
60 else:
61 example.translation = italic_str
62 calculate_bold_offsets(
63 wxr,
64 node,
65 italic_str,
66 example,
67 "bold_translation_offsets",
68 )
69 seen_text = True
70 has_text_node = True
71 elif isinstance(node, str): 71 ↛ 25line 71 didn't jump to line 25 because the condition on line 71 was always true
72 if seen_text:
73 trailing_text_parts.append(node)
74 else:
75 plain_string_parts.append(node)
77 if not has_text_node and plain_string_parts:
78 extract_quoted_plain_example(
79 "".join(plain_string_parts), example
80 )
81 elif example.ref == "" and trailing_text_parts:
82 trailing = "".join(trailing_text_parts).strip()
83 trailing = trailing.lstrip("-–—").strip("()").strip()
84 if trailing != "":
85 example.ref = trailing
87def extract_ux_template(
88 wxr: WiktextractContext,
89 lang_code: str,
90 t_node: TemplateNode,
91 example: Example,
92) -> None:
93 # https://tr.wiktionary.org/wiki/Şablon:ux
94 e_lang_code = clean_node(wxr, None, t_node.template_parameters.get(1, ""))
95 second_arg = t_node.template_parameters.get(2, "")
96 second_arg_text = clean_node(wxr, None, second_arg)
97 if e_lang_code == lang_code: 97 ↛ 106line 97 didn't jump to line 106 because the condition on line 97 was always true
98 example.text = second_arg_text
99 calculate_bold_offsets(
100 wxr,
101 wxr.wtp.parse(wxr.wtp.node_to_wikitext(second_arg)),
102 second_arg_text,
103 example,
104 "bold_text_offsets",
105 )
106 elif e_lang_code == "tr":
107 example.translation = second_arg_text
108 calculate_bold_offsets(
109 wxr,
110 wxr.wtp.parse(wxr.wtp.node_to_wikitext(second_arg)),
111 second_arg_text,
112 example,
113 "bold_translation_offsets",
114 )
115 for index in [4, 5]:
116 ref = clean_node(wxr, None, t_node.template_parameters.get(index, ""))
117 if ref != "": 117 ↛ 118line 117 didn't jump to line 118 because the condition on line 117 was never true
118 example.ref = ref
119 third_arg = t_node.template_parameters.get(3, "")
120 tr_value = clean_node(wxr, None, third_arg)
121 if tr_value != "": 121 ↛ exitline 121 didn't return from function 'extract_ux_template' because the condition on line 121 was always true
122 example.translation = tr_value
123 calculate_bold_offsets(
124 wxr,
125 wxr.wtp.parse(wxr.wtp.node_to_wikitext(third_arg)),
126 tr_value,
127 example,
128 "bold_translation_offsets",
129 )
132def extract_örnek_template(
133 wxr: WiktextractContext,
134 lang_code: str,
135 t_node: TemplateNode,
136 example: Example,
137) -> None:
138 # https://tr.wiktionary.org/wiki/Şablon:örnek
139 e_lang_code = clean_node(
140 wxr, None, t_node.template_parameters.get("dil", "")
141 )
142 first_arg = t_node.template_parameters.get(1, "")
143 first_arg_text = clean_node(wxr, None, first_arg)
144 if e_lang_code == lang_code:
145 example.text = first_arg_text
146 calculate_bold_offsets(
147 wxr,
148 wxr.wtp.parse(wxr.wtp.node_to_wikitext(first_arg)),
149 first_arg_text,
150 example,
151 "bold_text_offsets",
152 )
153 elif e_lang_code == "tr": 153 ↛ 162line 153 didn't jump to line 162 because the condition on line 153 was always true
154 example.translation = first_arg_text
155 calculate_bold_offsets(
156 wxr,
157 wxr.wtp.parse(wxr.wtp.node_to_wikitext(first_arg)),
158 first_arg_text,
159 example,
160 "bold_translation_offsets",
161 )
162 for index in [2, 3]:
163 ref = clean_node(wxr, None, t_node.template_parameters.get(index, ""))
164 if ref != "": 164 ↛ 165line 164 didn't jump to line 165 because the condition on line 164 was never true
165 example.ref = ref
166 t_arg = t_node.template_parameters.get("t", "")
167 t_value = clean_node(wxr, None, t_arg)
168 if t_value != "":
169 example.translation = t_value
170 calculate_bold_offsets(
171 wxr,
172 wxr.wtp.parse(wxr.wtp.node_to_wikitext(t_arg)),
173 t_value,
174 example,
175 "bold_translation_offsets",
176 )
179def extract_at_template(
180 wxr: WiktextractContext, example: Example, t_node: TemplateNode
181) -> None:
182 # Şablon:AT:Kur'an
183 if any(
184 arg in t_node.template_parameters for arg in ["pasaj", "text", "metin"]
185 ):
186 for arg in ["pasaj", "text", "metin"]: 186 ↛ 198line 186 didn't jump to line 198 because the loop on line 186 didn't complete
187 if arg in t_node.template_parameters: 187 ↛ 186line 187 didn't jump to line 186 because the condition on line 187 was always true
188 arg_value = t_node.template_parameters[arg]
189 example.text = clean_node(wxr, None, arg_value)
190 calculate_bold_offsets(
191 wxr,
192 wxr.wtp.parse(wxr.wtp.node_to_wikitext(arg_value)),
193 example.text,
194 example,
195 "bold_text_offsets",
196 )
197 break
198 for arg in ["anlam", "mana", "mânâ", "t", "tercüme"]: 198 ↛ 224line 198 didn't jump to line 224 because the loop on line 198 didn't complete
199 if arg in t_node.template_parameters:
200 arg_value = t_node.template_parameters[arg]
201 example.translation = clean_node(wxr, None, arg_value)
202 calculate_bold_offsets(
203 wxr,
204 wxr.wtp.parse(wxr.wtp.node_to_wikitext(arg_value)),
205 example.translation,
206 example,
207 "bold_translation_offsets",
208 )
209 break
210 else:
211 for arg in ["anlam", "mana", "mânâ", "t", "tercüme"]:
212 if arg in t_node.template_parameters:
213 arg_value = t_node.template_parameters[arg]
214 example.text = clean_node(wxr, None, arg_value)
215 calculate_bold_offsets(
216 wxr,
217 wxr.wtp.parse(wxr.wtp.node_to_wikitext(arg_value)),
218 example.text,
219 example,
220 "bold_text_offsets",
221 )
222 break
224 example.ref = clean_node(wxr, None, t_node).splitlines()[0]
227KT_REF_FIELDS = (
228 "yazar",
229 "başlık",
230 "dergi",
231 "sayı",
232 "yıl",
233 "tarih",
234 "sayfa",
235 "yayıncı",
236)
239def extract_kt_template(
240 wxr: WiktextractContext,
241 t_node: TemplateNode,
242 example: Example,
243) -> None:
244 # https://tr.wiktionary.org/wiki/Şablon:kt (book) and Şablon:mt (magazine).
245 # tanıklık = example sentence; remaining fields = ref.
246 text_arg = t_node.template_parameters.get("tanıklık", "")
247 text = clean_node(wxr, None, text_arg)
248 if text != "": 248 ↛ 257line 248 didn't jump to line 257 because the condition on line 248 was always true
249 example.text = text
250 calculate_bold_offsets(
251 wxr,
252 wxr.wtp.parse(wxr.wtp.node_to_wikitext(text_arg)),
253 text,
254 example,
255 "bold_text_offsets",
256 )
257 ref_parts: list[str] = []
258 for field in KT_REF_FIELDS:
259 value = clean_node(wxr, None, t_node.template_parameters.get(field, ""))
260 if value != "":
261 ref_parts.append(value)
262 if ref_parts and example.ref == "": 262 ↛ exitline 262 didn't return from function 'extract_kt_template' because the condition on line 262 was always true
263 example.ref = ", ".join(ref_parts)
266QUOTED_EXAMPLE_RE = re.compile(
267 r'^\s*[\"“"]\s*(?P<text>.+?)\s*[\"”"]\s*'
268 r'(?:[-–—]\s*)?(?P<ref>.+?)?\s*$',
269 re.DOTALL,
270)
273def extract_quoted_plain_example(raw: str, example: Example) -> None:
274 # Plain `"..." - Author` lines that use no italic/template markup.
275 raw = raw.strip()
276 if raw == "":
277 return
278 m = QUOTED_EXAMPLE_RE.match(raw)
279 if m is None: 279 ↛ 280line 279 didn't jump to line 280 because the condition on line 279 was never true
280 return
281 text = (m.group("text") or "").strip()
282 if text == "": 282 ↛ 283line 282 didn't jump to line 283 because the condition on line 282 was never true
283 return
284 example.text = text
285 if example.ref == "": 285 ↛ exitline 285 didn't return from function 'extract_quoted_plain_example' because the condition on line 285 was always true
286 ref = (m.group("ref") or "").strip().lstrip("-–—").strip("()").strip()
287 if ref != "": 287 ↛ exitline 287 didn't return from function 'extract_quoted_plain_example' because the condition on line 287 was always true
288 example.ref = ref