Coverage for src/wiktextract/extractor/en/translations.py: 88%
236 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
1# Code related to parsing translations
2#
3# Copyright (c) 2019-2022 Tatu Ylonen. See file LICENSE and https://ylonen.org
5import copy
6import re
7from typing import Optional
9from mediawiki_langcodes import code_to_name, name_to_code
10from wikitextprocessor import MAGIC_FIRST, MAGIC_LAST
12from ...datautils import data_append, data_extend, split_at_comma_semi
13from ...wxr_context import WiktextractContext
14from .form_descriptions import (
15 classify_desc,
16 decode_tags,
17 nested_translations_re,
18 parse_translation_desc,
19 tr_note_re,
20)
21from .type_utils import TranslationData, WordData
23# Maps language names in translations to actual language names.
24# E.g., "Apache" is not a language name, but "Apachean" is.
25tr_langname_map = {
26 "Apache": "Apachean",
27 "Lutshootseed": "Lushootseed",
28 "Old Assamese": "Early Assamese",
29}
31# These names will be interpreted as script names or dialect names
32# when used as a second-level name in translations. Some script names
33# are also valid language names, but it looks likes the ones that are
34# also script names aren't used on the second level as language names.
35# These will not be interpreted as a separate language, but will instead
36# be included under the parent language with the script/dialect as a tag
37# (with spaces replaced by hyphens).
38script_and_dialect_names = set(
39 [
40 # Scripts
41 "ALUPEC",
42 "Adlam",
43 "Arabic", # Script for Kashmiri
44 "Bengali",
45 "Burmese",
46 "Carakan",
47 "CJKV Characters",
48 "Cyrillic",
49 "Devanagari",
50 "Glagolitic",
51 "Gurmukhi",
52 "Hebrew", # For Aramaic
53 "Jawi",
54 "Khmer",
55 "Latin",
56 "Mongolian",
57 "Roman",
58 "Shahmukhi",
59 "Sinhalese",
60 "Syriac", # For Aramaic
61 "Classical Syriac", # For Aramaic
62 "Taraškievica",
63 "Thai",
64 "Uyghurjin",
65 # Chinese dialects/languages
66 "Cantonese", # Variant of Chinese
67 "Dungan", # Chinese
68 "Gan", # Chinese
69 "Hakka", # Chinese
70 "Hokkien", # Chinese
71 "Jin", # Chinese
72 "Mandarin", # Chinese
73 "Min Bei", # Chinese
74 "Min Dong", # Chinese
75 "Min Nan", # Chinsese
76 "Wu", # Chinsese
77 "Xiang", # Chinese
78 "Jianghuai Mandarin", # Chinese
79 "Jilu Mandarin", # Chinese
80 "Jin Mandarin", # Chinese
81 "Northern Mandarin", # Chinese
82 "Southwestern Mandarin", # Chinese
83 "Taiwanese Mandarin", # Chinese
84 "Coastal Min", # Chinese
85 "Inland Min", # Chinese
86 "Leizhou Min", # Chinese
87 "Min", # Chinese
88 "Puxian Min", # Chinese
89 "Shanghainese Wu", # Chinese
90 "Wenzhou Wu", # Chinese
91 "Wenzhou", # Chinese
92 "Hsinchu Hokkien", # Chinese
93 "Jinjiang Hokkien", # Chinese
94 "Kaohsiung Hokkien", # Chinsese
95 "Pinghua", # Chinese
96 "Eastern Punjabi",
97 "Western Punjabi",
98 # Various countries/regions
99 "Alsace",
100 "Bavaria",
101 "Belgium",
102 "Canada",
103 "Central",
104 "Cologne",
105 "Fogo",
106 "Föhr",
107 "Föhr-Amrum",
108 "Hallig",
109 "Helgoland",
110 "Heligoland",
111 "Santiago",
112 "Sylt",
113 "Mooring",
114 "Föhr-Amrum",
115 "Vancouver Island",
116 "Wiedingharde",
117 "Anpezan", # Variant of Ladin
118 "Badiot", # Ladin
119 "Fascian", # Ladin
120 "Fodom", # Ladin
121 "Gherdëina", # Ladin
122 "Anbarani", # Variant of Talysh
123 "Asalemi", # Variant of Talysh
124 "Alemannic German", # Variant of German
125 "Rhine Franconian", # Variant of German
126 "German Low German", # Variant of Low German
127 "Campidanese", # Variant of Sardinian
128 "Logudorese", # Variant of Sardinian
129 "Digor", # Variant of Ossetian
130 "Iron", # Variant of Ossetian
131 "Northern Puebla", # Variant of Nahuatl
132 "Mecayapan", # Variant of Nathuatl
133 "Egyptian Arabic", # Variant of Arabic
134 "Gulf Arabic", # Variant of Arabic
135 "Hijazi Arabic", # Variant of Arabic
136 "Moroccan Arabic", # Variant of Arabic
137 "North Levantine Arabic", # Variant of Arabic
138 "South Levantine Arabic", # Variant of Arabic
139 "Alviri", # Variant of Alviri-Vidari
140 "Vidari", # Variant of Alviri-Vidari
141 "Tashelhit", # Variant of Berber
142 "Bokmål", # Variant of Norwegian
143 "Nynorsk", # Variant of Norwegian
144 "Mycenaean", # Variant of Greek
145 # Language varieties
146 "Ancient",
147 "Classical",
148 "Draweno-Polabian",
149 "Literary",
150 "Lower",
151 "Manitoba Saulteux",
152 "Modern",
153 "Modern Polabian",
154 "Modified traditional",
155 "Northern",
156 "Northern and Southern",
157 "Old Polabian",
158 "Simplified",
159 "Southern",
160 "Traditional",
161 "Western",
162 "1708",
163 "1918",
164 ]
165)
167# These names should be interpreted as tags (as listed in the value
168# space-separated) in second-level translations.
169tr_second_tagmap = {
170 "Föhr-Amrum, Bökingharde": "Föhr-Amrum Bökingharde",
171 "Halligen, Goesharde, Karrhard": "Halligen Goesharde Karrhard",
172 "Föhr-Amrum and Sylt dialect": "Föhr-Amrum Sylt",
173 "Hallig and Mooring": "Hallig Mooring",
174 "Föhr-Amrum & Mooring": "Föhr-Amrum Mooring",
175}
177# Ignore translations that start with one of these
178tr_ignore_prefixes = [
179 "+",
180 "Different structure used",
181 "Literally",
182 "No equivalent",
183 "Not used",
184 "Please add this translation if you can",
185 "See: ",
186 "Use ",
187 "[Book Pahlavi needed]",
188 "[book pahlavi needed]",
189 "[script needed]",
190 "different structure used",
191 "e.g.",
192 "lit.",
193 "literally",
194 "no equivalent",
195 "normally ",
196 "not used",
197 "noun compound ",
198 "please add this translation if you can",
199 "prefix ",
200 "see: ",
201 "suffix ",
202 "use ",
203 "usually ",
204]
206# Ignore translations that contain one of these anywhere (case-sensitive).
207# Or actually, put such translations in the "note" field rather than in "word".
208tr_ignore_contains = [
209 "usually expressed with ",
210 " can be used ",
211 " construction used",
212 " used with ",
213 " + ",
214 "genitive case",
215 "dative case",
216 "nominative case",
217 "accusative case",
218 "absolute state",
219 "infinitive of ",
220 "participle of ",
221 "for this sense",
222 "depending on the circumstances",
223 "expressed with ",
224 " expression ",
225 " means ",
226 " is used",
227 " — ", # Used to give example sentences
228 " translation",
229 "not attested",
230 "grammatical structure",
231 "construction is used",
232 "tense used",
233 " lit.",
234 " literally",
235 "dative",
236 "accusative",
237 "genitive",
238 "essive",
239 "partitive",
240 "translative",
241 "elative",
242 "inessive",
243 "illative",
244 "adessive",
245 "ablative",
246 "allative",
247 "abessive",
248 "comitative",
249 "instructive",
250 "particle",
251 "predicative",
252 "attributive",
253 "preposition",
254 "postposition",
255 "prepositional",
256 "postpositional",
257 "prefix",
258 "suffix",
259 "translated",
260]
262# Ignore translations that match one of these regular expressions
263tr_ignore_regexps = [
264 r"^\[[\d,]+\]$",
265 r"\?\?$",
266 r"^\s*$",
267]
269# If a translation matches this regexp (with re.search), we print a debug
270# message
271tr_suspicious_re = re.compile(
272 r" [mf][12345]$|"
273 + r" [mfnc]$|"
274 + r" (pf|impf|vir|nvir|anml|anim|inan|sg|pl)$|"
275 + "|".join(
276 re.escape(x)
277 for x in [
278 "; ",
279 "* ",
280 ": ",
281 "[",
282 "]",
283 "{",
284 "}",
285 "/",
286 "^",
287 "literally",
288 "lit.",
289 # XXX check occurrences of ⫽, seems to be used as verb-object
290 # separator but shouldn't really be part of the canonical form.
291 # See e.g. 打工/Chinese
292 "⫽",
293 "also expressed with",
294 "e.g.",
295 "cf.",
296 "used ",
297 "script needed",
298 "please add this translation",
299 "usage ",
300 ]
301 )
302)
304# Regular expression to be searched from translation (with re.search) to check
305# if it should be ignored.
306tr_ignore_re = re.compile(
307 "^("
308 + "|".join(re.escape(x) for x in tr_ignore_prefixes)
309 + ")|"
310 + "|".join(re.escape(x) for x in tr_ignore_contains)
311 + "|"
312 + "|".join(tr_ignore_regexps)
313) # These are not to be escaped
315# These English texts get converted to tags in translations
316english_to_tags = {
317 "I have": "first-person singular",
318 "you have": "second-person singular",
319 "she has": "third-person singular feminine",
320 "he has": "third-person singular masculine",
321}
324def parse_translation_item_text(
325 wxr: WiktextractContext,
326 word: str,
327 data: WordData,
328 item: str,
329 sense: Optional[str],
330 lang: Optional[str],
331 langcode: Optional[str],
332 translations_from_template: list[str],
333 is_reconstruction: bool,
334) -> Optional[str]:
335 assert isinstance(wxr, WiktextractContext)
336 assert isinstance(word, str)
337 assert isinstance(data, dict)
338 assert isinstance(item, str)
339 assert sense is None or isinstance(sense, str)
340 assert lang is None or isinstance(lang, str) # Parent item language
341 assert langcode is None or isinstance(langcode, str) # Template langcode
342 assert isinstance(translations_from_template, list)
343 for x in translations_from_template:
344 assert isinstance(x, str)
345 assert is_reconstruction in (True, False)
347 # print("parse_translation_item_text: {!r} lang={}"
348 # " langcode={}".format(item, lang, langcode))
350 if not item: 350 ↛ 351line 350 didn't jump to line 351 because the condition on line 350 was never true
351 return None
353 # Find and remove nested translations from the item
354 nested = list(m.group(1) for m in re.finditer(nested_translations_re, item))
355 if nested:
356 item = re.sub(nested_translations_re, "", item)
358 if re.search(r"\(\d+\)|\[\d+\]", item) and "numeral:" not in item:
359 wxr.wtp.debug(
360 "possible sense number in translation item: {}".format(item),
361 sortid="translations/324",
362 )
364 # Translation items should start with a language name (except
365 # some nested translation items don't and rely on the language
366 # name from the higher level, and some append a language variant
367 # name to a broader language name)
368 extra_langcodes = set()
369 if lang and name_to_code(lang, "en") != "":
370 lang_code = name_to_code(lang, "en")
371 extra_langcodes.add(lang_code)
372 # Canonicalize language name (we could have gotten it via
373 # alias or other_names)
374 lang = code_to_name(lang_code, "en")
375 m = re.match(r"\*?\s*([-' \w][-'&, \w()]*)[::]\s*", item)
376 tags = []
377 if m:
378 sublang = m.group(1).strip()
379 language_name_variations: list[str] = list()
380 if lang and sublang:
381 lang_sublang = lang + " " + sublang
382 sublang_lang = sublang + " " + lang
383 language_name_variations.extend(
384 (
385 lang_sublang,
386 sublang_lang,
387 lang_sublang.replace(" ", "-"),
388 sublang_lang.replace(" ", "-"),
389 )
390 )
391 if " " in sublang:
392 language_name_variations.append(sublang.replace(" ", "-"))
393 if "-" in sublang:
394 language_name_variations.append(sublang.replace("-", " "))
396 if lang is None:
397 if sublang == "Note": 397 ↛ 398line 397 didn't jump to line 398 because the condition on line 397 was never true
398 return None
399 lang = sublang
400 elif lang_sublang and any(
401 name_to_code(captured_lang := lang_comb, "en") != ""
402 # Python 3.8: catch the value of lang_comb with :=
403 for lang_comb in language_name_variations
404 ):
405 lang = captured_lang
406 elif sublang in script_and_dialect_names:
407 # If the second-level name is a script name, add it as
408 # tag and keep the top-level language.
409 # This helps with languages that script names
410 # on the same level; those scripts may also be valid
411 # language names. See leaf/English/Translations/Pali.
412 tags.append(sublang.replace(" ", "-"))
413 elif sublang in tr_second_tagmap:
414 # Certain second-level names are interpreted as tags
415 # (mapped to tags). Note that these may still have
416 # separate language codes, so additional langcode
417 # removal tricks may need to be played below.
418 tags.extend(tr_second_tagmap[sublang].split())
419 elif name_to_code(sublang, "en") != "":
420 lang = sublang
421 elif sublang[0].isupper() and classify_desc(sublang) == "tags":
422 # Interpret it as a tag
423 tags.append(sublang)
424 else:
425 # We don't recognize this prefix
426 wxr.wtp.error(
427 "unrecognized prefix (language name?) in "
428 "translation item: {}".format(item),
429 sortid="translations/369",
430 )
431 return None
432 # Strip the language name/tag from the item
433 item = item[m.end() :]
434 elif lang is None:
435 # No mathing language prefix. Try if it is missing colon.
436 parts = item.split()
437 if len(parts) > 1 and name_to_code(parts[0], "en") != "": 437 ↛ 438line 437 didn't jump to line 438 because the condition on line 437 was never true
438 lang = parts[0]
439 item = " ".join(parts[1:])
440 else:
441 if "__IGNORE__" not in item:
442 wxr.wtp.error(
443 "no language name in translation item: {}".format(item),
444 sortid="translations/382",
445 )
446 return None
448 # Map non-standard language names (e.g., "Apache" -> "Apachean")
449 lang = tr_langname_map.get(lang, lang)
451 # If we didn't get language code from the template, look it up
452 # based on language name
453 if langcode is None and name_to_code(lang, "en") != "":
454 langcode = name_to_code(lang, "en")
456 # Remove (<langcode>) parts from the item. They seem to be
457 # generated by {{t+|...}}.
458 if langcode:
459 extra_langcodes.add(langcode)
460 if "-" in langcode:
461 extra_langcodes.add(langcode.split("-")[0])
462 if langcode in (
463 "zh",
464 "yue",
465 "cdo",
466 "cmn",
467 "dng",
468 "hak",
469 "mnp",
470 "nan",
471 "wuu",
472 "zh-min-nan",
473 ):
474 extra_langcodes.update(
475 [
476 "zh",
477 "yue",
478 "cdo",
479 "cmn",
480 "dng",
481 "hak",
482 "mnp",
483 "nan",
484 "wuu",
485 "zh-min-nan",
486 ]
487 )
488 elif langcode in ("nn", "nb", "no"):
489 extra_langcodes.update(["no", "nn", "nb"])
490 for x in extra_langcodes:
491 item = re.sub(r"\s*\^?\({}\)".format(re.escape(x)), "", item)
493 # Map translations obtained from templates into magic characters
494 # before splitting the translations list. This way, if a comma
495 # (or semicolon etc) was used inside the template, it won't get
496 # split. We restore the magic characters into the original
497 # translations after splitting. This kludge improves robustness
498 # of collection translations for phrases whose translations
499 # may contain commas.
500 translations_from_template = list(
501 sorted(translations_from_template, key=lambda x: len(x), reverse=True)
502 )
503 tr_mappings = {}
504 for i, trt in enumerate(translations_from_template):
505 if not trt: 505 ↛ 506line 505 didn't jump to line 506 because the condition on line 505 was never true
506 continue
507 ch = chr(MAGIC_FIRST + i)
508 rex = re.escape(trt)
509 if trt[0].isalnum():
510 rex = r"\b" + rex
511 if trt[-1].isalnum():
512 rex = rex + r"\b"
513 item = re.sub(rex, ch, item)
514 tr_mappings[ch] = trt
516 # There may be multiple translations, separated by comma
517 nested.append(item)
518 for item in nested:
519 tagsets: list[tuple[str, ...]] = []
520 # This never does anything; it's never updated, so it's always empty
521 # topics: list[str] = []
523 for part in split_at_comma_semi(
524 item, extra=[" / ", " / ", "/", r"\| furthermore: "]
525 ):
526 # Substitute the magic characters back to original
527 # translations (this is part of dealing with
528 # phrasal translations containing commas).
529 part = re.sub(
530 r"[{:c}-{:c}]".format(MAGIC_FIRST, MAGIC_LAST),
531 lambda m: tr_mappings.get(m.group(0), m.group(0)),
532 part,
533 )
535 if part.endswith(":"): # E.g. "salt of the earth"/Korean 535 ↛ 536line 535 didn't jump to line 536 because the condition on line 535 was never true
536 part = part[:-1].strip()
537 if not part: 537 ↛ 538line 537 didn't jump to line 538 because the condition on line 537 was never true
538 continue
540 # Strip language links
541 tr: TranslationData = {"lang": lang}
542 if langcode:
543 tr["code"] = langcode # DEPRECATED in favor of "lang_code"
544 tr["lang_code"] = langcode
545 if tags:
546 tr["tags"] = list(tags)
547 for ttup in tagsets: 547 ↛ 548line 547 didn't jump to line 548 because the loop on line 547 never started
548 tr["tags"].extend(ttup)
549 # topics is never populated, so it's always empty
550 # if topics:
551 # tr["topics"] = list(topics)
552 if sense:
553 if sense.startswith( 553 ↛ 559line 553 didn't jump to line 559 because the condition on line 553 was never true
554 (
555 "Translations to be checked",
556 ":The translations below need to be checked",
557 )
558 ):
559 continue # Skip such translations
560 else:
561 tr["sense"] = sense
563 # Check if this part starts with (tags)
564 m = re.match(r"\(([^)]+)\) ", part)
565 if m:
566 par = m.group(1)
567 rest = part[m.end() :]
568 cls = classify_desc(par, no_unknown_starts=True)
569 if cls == "tags":
570 tagsets2, topics2 = decode_tags(par)
571 for ttup in tagsets2:
572 data_extend(tr, "tags", ttup)
573 data_extend(tr, "topics", topics2)
574 part = rest
576 # Check if this part ends with (tags). Note that
577 # note-re will mess things up if we rely on this being
578 # checked later.
579 m = re.search(r" +\(([^)]+)\)$", part)
580 if m:
581 par = m.group(1)
582 rest = part[: m.start()]
583 cls = classify_desc(par, no_unknown_starts=True)
584 if cls == "tags":
585 tagsets2, topics2 = decode_tags(par)
586 for ttup in tagsets2:
587 data_extend(tr, "tags", ttup)
588 data_extend(tr, "topics", topics2)
589 part = rest
591 # Check if this part starts with "<tags/english>: <rest>"
592 m = re.match(r"([-\w() ]+): ", part)
593 if m:
594 par = m.group(1).strip()
595 rest = part[m.end() :]
596 if par in ("", "see"): 596 ↛ 597line 596 didn't jump to line 597 because the condition on line 596 was never true
597 part = "rest"
598 else:
599 cls = classify_desc(par)
600 # print("par={!r} cls={!r}".format(par, cls))
601 if cls == "tags":
602 tagsets2, topics2 = decode_tags(par)
603 for ttup in tagsets2:
604 data_extend(tr, "tags", ttup)
605 data_extend(tr, "topics", topics2)
606 part = rest
607 elif cls == "english":
608 if re.search(tr_note_re, par):
609 if "note" in tr: 609 ↛ 610line 609 didn't jump to line 610 because the condition on line 609 was never true
610 tr["note"] += "; " + par
611 else:
612 tr["note"] = par
613 else:
614 if "translation" in tr and "english" in tr: 614 ↛ 616line 614 didn't jump to line 616 because the condition on line 614 was never true
615 # DEPRECATED for "translation"
616 tr["english"] += "; " + par
617 tr["translation"] += "; " + par
618 else:
619 # DEPRECATED for "translation"
620 tr["english"] = par
621 tr["translation"] = par
622 part = rest
624 # Skip translations that our template_fn says to ignore
625 # and those that contain Lua execution errors.
626 if "__IGNORE__" in part:
627 continue # Contains something we want to ignore
628 if part.startswith("Lua execution error"): 628 ↛ 629line 628 didn't jump to line 629 because the condition on line 628 was never true
629 continue
631 # Handle certain suffixes in translations that
632 # we might put in "note" but that we can actually
633 # parse into tags.
634 for suffix, t in (
635 (" with dative", "with-dative"),
636 (" with genitive", "with-genitive"),
637 (" with accusative", "with-accusative"),
638 (" in subjunctive", "with-subjunctive"),
639 (" and conditional mood", "with-conditional"),
640 (" - I have - you have", "first-person second-person singular"),
641 (" - I have", "first-person singular"),
642 (" - you have", "second-person singular"),
643 ):
644 if part.endswith(suffix): 644 ↛ 645line 644 didn't jump to line 645 because the condition on line 644 was never true
645 part = part[: -len(suffix)]
646 data_append(tr, "tags", t)
647 break
649 # Handle certain prefixes in translations
650 for prefix, t in (("subjunctive of ", "with-subjunctive"),):
651 if part.startswith(prefix): 651 ↛ 652line 651 didn't jump to line 652 because the condition on line 651 was never true
652 part = part[len(prefix) :]
653 data_append(tr, "tags", t)
654 break
656 # Skip certain one-character translations entirely
657 # (these could result from templates being ignored)
658 if part in ",;.": 658 ↛ 659line 658 didn't jump to line 659 because the condition on line 658 was never true
659 continue
661 if "english" in tr and tr["english"] in english_to_tags: 661 ↛ 662line 661 didn't jump to line 662 because the condition on line 661 was never true
662 data_extend(tr, "tags", english_to_tags[tr["english"]].split())
663 del tr["english"] # DEPRECATED for "translation"
664 if "translation" in tr:
665 del tr["translation"]
667 # Certain values indicate it is not actually a translation.
668 # See definition of tr_ignore_re to adjust.
669 m = re.search(tr_ignore_re, part)
670 w: Optional[str] = None
672 if m and (
673 m.start() != 0 or m.end() != len(part) or len(part.split()) > 1
674 ):
675 # This translation will be skipped because it
676 # seems to be some kind of explanatory text.
677 # However, let's put it in the "note" field
678 # instead, unless it is one of the listed fully
679 # ignored ones.
680 if part in ("please add this translation if you can",):
681 continue
682 # Save in note field
683 tr["note"] = part
684 else:
685 # Interpret it as an actual translation
686 parse_translation_desc(wxr, lang, part, tr)
687 w = tr.get("word")
688 if not w:
689 continue # Not set or empty
690 if w.startswith(("*", ":")):
691 w = w[1:].strip()
692 if w in ("[Term?]", ":", "/", "?"): 692 ↛ 693line 692 didn't jump to line 693 because the condition on line 692 was never true
693 continue # These are not valid linkage targets
694 if len(w) > 3 * len(word) + 20:
695 # Accept translation if word looks like acronym:
696 # 'ISBN', 'I.S.B.N'.isupper() return True, and
697 # false positives are unlikely.
698 if not word.isupper():
699 # Likely descriptive text or example because
700 # it is much too long.
701 wxr.wtp.debug(
702 "Translation too long compared to word, so"
703 " it is skipped",
704 sortid="translations/609-20230504",
705 )
706 del tr["word"]
707 tr["note"] = w
709 # Sanity check: try to detect certain suspicious
710 # patterns in translations
711 if "word" in tr:
712 m = re.search(tr_suspicious_re, tr["word"])
713 if m and lang not in (
714 "Bats", # ^ in tree/English/Tr/Bats
715 ):
716 wxr.wtp.debug(
717 "suspicious translation with {!r}: {}".format(
718 m.group(0), tr
719 ),
720 sortid="translations/611",
721 )
723 if "tags" in tr:
724 tr["tags"] = list(sorted(set(tr["tags"])))
726 # If we have only notes, add as-is
727 if "word" not in tr:
728 data_append(data, "translations", tr)
729 continue
731 # Split if it contains no spaces
732 if w: 732 ↛ 523line 732 didn't jump to line 523 because the condition on line 732 was always true
733 alts = [w]
734 if " " not in w:
735 # If no spaces, split by separator
736 alts = re.split(r"/|/", w)
737 # Note: there could be remaining slashes, but they are
738 # sometimes used in ways we cannot resolve programmatically.
739 # Create translations for each alternative.
740 for alt in alts:
741 alt = alt.strip()
742 tr1 = copy.deepcopy(tr)
743 if alt.startswith("*") or alt.startswith(":"):
744 alt = alt[1:].strip()
745 if not alt: 745 ↛ 746line 745 didn't jump to line 746 because the condition on line 745 was never true
746 continue
747 tr1["word"] = alt
748 data_append(data, "translations", tr1)
750 # Return the language name, in case we have subitems
751 return lang