Coverage for src/wiktextract/extractor/en/translations.py: 89%

1# Code related to parsing translations

5import copy

6import re

7from typing import Optional

9from mediawiki_langcodes import code_to_name, name_to_code

10from wikitextprocessor import MAGIC_FIRST, MAGIC_LAST

12from ...datautils import data_append, data_extend, split_at_comma_semi

13from ...wxr_context import WiktextractContext

14from .form_descriptions import (

15 classify_desc,

16 decode_tags,

17 nested_translations_re,

18 parse_translation_desc,

19 tr_note_re,

20)

21from .type_utils import TranslationData, WordData

23# Maps language names in translations to actual language names.

24# E.g., "Apache" is not a language name, but "Apachean" is.

25tr_langname_map = {

26 "Apache": "Apachean",

27 "Lutshootseed": "Lushootseed",

28 "Old Assamese": "Early Assamese",

29}

31# These names will be interpreted as script names or dialect names

32# when used as a second-level name in translations. Some script names

33# are also valid language names, but it looks likes the ones that are

34# also script names aren't used on the second level as language names.

35# These will not be interpreted as a separate language, but will instead

36# be included under the parent language with the script/dialect as a tag

37# (with spaces replaced by hyphens).

38script_and_dialect_names = set(

39 [

40 # Scripts

41 "ALUPEC",

42 "Adlam",

43 "Arabic", # Script for Kashmiri

44 "Bengali",

45 "Burmese",

46 "Carakan",

47 "CJKV Characters",

48 "Cyrillic",

49 "Devanagari",

50 "Glagolitic",

51 "Gurmukhi",

52 "Hebrew", # For Aramaic

53 "Jawi",

54 "Khmer",

55 "Latin",

56 "Mongolian",

57 "Roman",

58 "Shahmukhi",

59 "Sinhalese",

60 "Syriac", # For Aramaic

61 "Classical Syriac", # For Aramaic

62 "Taraškievica",

63 "Thai",

64 "Uyghurjin",

65 # Chinese dialects/languages

66 "Cantonese", # Variant of Chinese

67 "Dungan", # Chinese

68 "Gan", # Chinese

69 "Hakka", # Chinese

70 "Hokkien", # Chinese

71 "Jin", # Chinese

72 "Mandarin", # Chinese

73 "Min Bei", # Chinese

74 "Min Dong", # Chinese

75 "Min Nan", # Chinsese

76 "Wu", # Chinsese

77 "Xiang", # Chinese

78 "Jianghuai Mandarin", # Chinese

79 "Jilu Mandarin", # Chinese

80 "Jin Mandarin", # Chinese

81 "Northern Mandarin", # Chinese

82 "Southwestern Mandarin", # Chinese

83 "Taiwanese Mandarin", # Chinese

84 "Coastal Min", # Chinese

85 "Inland Min", # Chinese

86 "Leizhou Min", # Chinese

87 "Min", # Chinese

88 "Puxian Min", # Chinese

89 "Shanghainese Wu", # Chinese

90 "Wenzhou Wu", # Chinese

91 "Wenzhou", # Chinese

92 "Hsinchu Hokkien", # Chinese

93 "Jinjiang Hokkien", # Chinese

94 "Kaohsiung Hokkien", # Chinsese

95 "Pinghua", # Chinese

96 "Eastern Punjabi",

97 "Western Punjabi",

98 # Various countries/regions

99 "Alsace",

100 "Bavaria",

101 "Belgium",

102 "Canada",

103 "Central",

104 "Cologne",

105 "Fogo",

106 "Föhr",

107 "Föhr-Amrum",

108 "Hallig",

109 "Helgoland",

110 "Heligoland",

111 "Santiago",

112 "Sylt",

113 "Mooring",

114 "Föhr-Amrum",

115 "Vancouver Island",

116 "Wiedingharde",

117 "Anpezan", # Variant of Ladin

118 "Badiot", # Ladin

119 "Fascian", # Ladin

120 "Fodom", # Ladin

121 "Gherdëina", # Ladin

122 "Anbarani", # Variant of Talysh

123 "Asalemi", # Variant of Talysh

124 "Alemannic German", # Variant of German

125 "Rhine Franconian", # Variant of German

126 "German Low German", # Variant of Low German

127 "Campidanese", # Variant of Sardinian

128 "Logudorese", # Variant of Sardinian

129 "Digor", # Variant of Ossetian

130 "Iron", # Variant of Ossetian

131 "Northern Puebla", # Variant of Nahuatl

132 "Mecayapan", # Variant of Nathuatl

133 "Egyptian Arabic", # Variant of Arabic

134 "Gulf Arabic", # Variant of Arabic

135 "Hijazi Arabic", # Variant of Arabic

136 "Moroccan Arabic", # Variant of Arabic

137 "North Levantine Arabic", # Variant of Arabic

138 "South Levantine Arabic", # Variant of Arabic

139 "Alviri", # Variant of Alviri-Vidari

140 "Vidari", # Variant of Alviri-Vidari

141 "Tashelhit", # Variant of Berber

142 "Bokmål", # Variant of Norwegian

143 "Nynorsk", # Variant of Norwegian

144 "Mycenaean", # Variant of Greek

145 # Language varieties

146 "Ancient",

147 "Classical",

148 "Draweno-Polabian",

149 "Literary",

150 "Lower",

151 "Manitoba Saulteux",

152 "Modern",

153 "Modern Polabian",

154 "Modified traditional",

155 "Northern",

156 "Northern and Southern",

157 "Old Polabian",

158 "Simplified",

159 "Southern",

160 "Traditional",

161 "Western",

162 "1708",

163 "1918",

164 ]

165)

166

167# These names should be interpreted as tags (as listed in the value

168# space-separated) in second-level translations.

169tr_second_tagmap = {

170 "Föhr-Amrum, Bökingharde": "Föhr-Amrum Bökingharde",

171 "Halligen, Goesharde, Karrhard": "Halligen Goesharde Karrhard",

172 "Föhr-Amrum and Sylt dialect": "Föhr-Amrum Sylt",

173 "Hallig and Mooring": "Hallig Mooring",

174 "Föhr-Amrum & Mooring": "Föhr-Amrum Mooring",

175}

176

177# Ignore translations that start with one of these

178tr_ignore_prefixes = [

179 "+",

180 "Different structure used",

181 "Literally",

182 "No equivalent",

183 "Not used",

184 "Please add this translation if you can",

185 "See: ",

186 "Use ",

187 "[Book Pahlavi needed]",

188 "[book pahlavi needed]",

189 "[script needed]",

190 "different structure used",

191 "e.g.",

192 "lit.",

193 "literally",

194 "no equivalent",

195 "normally ",

196 "not used",

197 "noun compound ",

198 "please add this translation if you can",

199 "prefix ",

200 "see: ",

201 "suffix ",

202 "use ",

203 "usually ",

204]

205

206# Ignore translations that contain one of these anywhere (case-sensitive).

207# Or actually, put such translations in the "note" field rather than in "word".

208tr_ignore_contains = [

209 "usually expressed with ",

210 " can be used ",

211 " construction used",

212 " used with ",

213 " + ",

214 "genitive case",

215 "dative case",

216 "nominative case",

217 "accusative case",

218 "absolute state",

219 "infinitive of ",

220 "participle of ",

221 "for this sense",

222 "depending on the circumstances",

223 "expressed with ",

224 " expression ",

225 " means ",

226 " is used",

227 " — ", # Used to give example sentences

228 " translation",

229 "not attested",

230 "grammatical structure",

231 "construction is used",

232 "tense used",

233 " lit.",

234 " literally",

235 "dative",

236 "accusative",

237 "genitive",

238 "essive",

239 "partitive",

240 "translative",

241 "elative",

242 "inessive",

243 "illative",

244 "adessive",

245 "ablative",

246 "allative",

247 "abessive",

248 "comitative",

249 "instructive",

250 "particle",

251 "predicative",

252 "attributive",

253 "preposition",

254 "postposition",

255 "prepositional",

256 "postpositional",

257 "prefix",

258 "suffix",

259 "translated",

260]

261

262# Ignore translations that match one of these regular expressions

263tr_ignore_regexps = [

264 r"^\[[\d,]+\]$",

265 r"\?\?$",

266 r"^\s*$",

267]

268

269# If a translation matches this regexp (with re.search), we print a debug

270# message

271tr_suspicious_re = re.compile(

272 r" [mf][12345]$|"

273 + r" [mfnc]$|"

274 + r" (pf|impf|vir|nvir|anml|anim|inan|sg|pl)$|"

275 + "|".join(

276 re.escape(x)

277 for x in [

278 "; ",

279 "* ",

280 ": ",

281 "[",

282 "]",

283 "{",

284 "}",

285 "／",

286 "^",

287 "literally",

288 "lit.",

289 # XXX check occurrences of ⫽, seems to be used as verb-object

290 # separator but shouldn't really be part of the canonical form.

291 # See e.g. 打工/Chinese

292 "⫽",

293 "also expressed with",

294 "e.g.",

295 "cf.",

296 "used ",

297 "script needed",

298 "please add this translation",

299 "usage ",

300 ]

301 )

302)

303

304# Regular expression to be searched from translation (with re.search) to check

305# if it should be ignored.

306tr_ignore_re = re.compile(

307 "^("

308 + "|".join(re.escape(x) for x in tr_ignore_prefixes)

309 + ")|"

310 + "|".join(re.escape(x) for x in tr_ignore_contains)

311 + "|"

312 + "|".join(tr_ignore_regexps)

313) # These are not to be escaped

314

315# These English texts get converted to tags in translations

316english_to_tags = {

317 "I have": "first-person singular",

318 "you have": "second-person singular",

319 "she has": "third-person singular feminine",

320 "he has": "third-person singular masculine",

321}

322

323

324def parse_translation_item_text(

325 wxr: WiktextractContext,

326 word: str,

327 data: WordData,

328 item: str,

329 sense: Optional[str],

330 lang: Optional[str],

331 langcode: Optional[str],

332 translations_from_template: list[str],

333 is_reconstruction: bool,

334) -> Optional[str]:

335 assert isinstance(wxr, WiktextractContext)

336 assert isinstance(word, str)

337 assert isinstance(data, dict)

338 assert isinstance(item, str)

339 assert sense is None or isinstance(sense, str)

340 assert lang is None or isinstance(lang, str) # Parent item language

341 assert langcode is None or isinstance(langcode, str) # Template langcode

342 assert isinstance(translations_from_template, list)

343 for x in translations_from_template:

344 assert isinstance(x, str)

345 assert is_reconstruction in (True, False)

346

347 # print("parse_translation_item_text: {!r} lang={}"

348 # " langcode={}".format(item, lang, langcode))

349

350 if not item: 350 ↛ 351line 350 didn't jump to line 351 because the condition on line 350 was never true

351 return None

352

353 # Find and remove nested translations from the item

354 nested = list(m.group(1) for m in re.finditer(nested_translations_re, item))

355 if nested:

356 item = re.sub(nested_translations_re, "", item)

357

358 if re.search(r"$\d+$|\[\d+\]", item) and "numeral:" not in item:

359 wxr.wtp.debug(

360 "possible sense number in translation item: {}".format(item),

361 sortid="translations/324",

362 )

363

364 # Translation items should start with a language name (except

365 # some nested translation items don't and rely on the language

366 # name from the higher level, and some append a language variant

367 # name to a broader language name)

368 extra_langcodes = set()

369 if lang and name_to_code(lang, "en") != "":

370 lang_code = name_to_code(lang, "en")

371 extra_langcodes.add(lang_code)

372 # Canonicalize language name (we could have gotten it via

373 # alias or other_names)

374 lang = code_to_name(lang_code, "en")

375 m = re.match(r"\*?\s*([-' \w][-'&, \w()]*)[:：]\s*", item)

376 tags = []

377 if m:

378 sublang = m.group(1).strip()

379 language_name_variations: list[str] = list()

380 if lang and sublang:

381 lang_sublang = lang + " " + sublang

382 sublang_lang = sublang + " " + lang

383 language_name_variations.extend(

384 (

385 lang_sublang,

386 sublang_lang,

387 lang_sublang.replace(" ", "-"),

388 sublang_lang.replace(" ", "-"),

389 )

390 )

391 if " " in sublang:

392 language_name_variations.append(sublang.replace(" ", "-"))

393 if "-" in sublang:

394 language_name_variations.append(sublang.replace("-", " "))

395

396 if lang is None:

397 if sublang == "Note": 397 ↛ 398line 397 didn't jump to line 398 because the condition on line 397 was never true

398 return None

399 lang = sublang

400 elif lang_sublang and any(

401 name_to_code(captured_lang := lang_comb, "en") != ""

402 # Python 3.8: catch the value of lang_comb with :=

403 for lang_comb in language_name_variations

404 ):

405 lang = captured_lang

406 elif sublang in script_and_dialect_names:

407 # If the second-level name is a script name, add it as

408 # tag and keep the top-level language.

409 # This helps with languages that script names

410 # on the same level; those scripts may also be valid

411 # language names. See leaf/English/Translations/Pali.

412 tags.append(sublang.replace(" ", "-"))

413 elif sublang in tr_second_tagmap:

414 # Certain second-level names are interpreted as tags

415 # (mapped to tags). Note that these may still have

416 # separate language codes, so additional langcode

417 # removal tricks may need to be played below.

418 tags.extend(tr_second_tagmap[sublang].split())

419 elif name_to_code(sublang, "en") != "":

420 lang = sublang

421 elif sublang[0].isupper() and classify_desc(sublang) == "tags":

422 # Interpret it as a tag

423 tags.append(sublang)

424 else:

425 # We don't recognize this prefix

426 wxr.wtp.error(

427 "unrecognized prefix (language name?) in "

428 "translation item: {}".format(item),

429 sortid="translations/369",

430 )

431 return None

432 # Strip the language name/tag from the item

433 item = item[m.end() :]

434 elif lang is None:

435 # No mathing language prefix. Try if it is missing colon.

436 parts = item.split()

437 if len(parts) > 1 and name_to_code(parts[0], "en") != "": 437 ↛ 438line 437 didn't jump to line 438 because the condition on line 437 was never true

438 lang = parts[0]

439 item = " ".join(parts[1:])

440 else:

441 if "__IGNORE__" not in item:

442 wxr.wtp.error(

443 "no language name in translation item: {}".format(item),

444 sortid="translations/382",

445 )

446 return None

447

448 # Map non-standard language names (e.g., "Apache" -> "Apachean")

449 lang = tr_langname_map.get(lang, lang)

450

451 # If we didn't get language code from the template, look it up

452 # based on language name

453 if langcode is None and name_to_code(lang, "en") != "":

454 langcode = name_to_code(lang, "en")

455

456 # Remove (<langcode>) parts from the item. They seem to be

457 # generated by {{t+|...}}.

458 if langcode:

459 extra_langcodes.add(langcode)

460 if "-" in langcode:

461 extra_langcodes.add(langcode.split("-")[0])

462 if langcode in (

463 "zh",

464 "yue",

465 "cdo",

466 "cmn",

467 "dng",

468 "hak",

469 "mnp",

470 "nan",

471 "wuu",

472 "zh-min-nan",

473 ):

474 extra_langcodes.update(

475 [

476 "zh",

477 "yue",

478 "cdo",

479 "cmn",

480 "dng",

481 "hak",

482 "mnp",

483 "nan",

484 "wuu",

485 "zh-min-nan",

486 ]

487 )

488 elif langcode in ("nn", "nb", "no"):

489 extra_langcodes.update(["no", "nn", "nb"])

490 for x in extra_langcodes:

491 item = re.sub(r"\s*\^?${}$".format(re.escape(x)), "", item)

492

493 # Map translations obtained from templates into magic characters

494 # before splitting the translations list. This way, if a comma

495 # (or semicolon etc) was used inside the template, it won't get

496 # split. We restore the magic characters into the original

497 # translations after splitting. This kludge improves robustness

498 # of collection translations for phrases whose translations

499 # may contain commas.

500 translations_from_template = list(

501 sorted(translations_from_template, key=lambda x: len(x), reverse=True)

502 )

503 tr_mappings = {}

504 for i, trt in enumerate(translations_from_template):

505 if not trt: 505 ↛ 506line 505 didn't jump to line 506 because the condition on line 505 was never true

506 continue

507 ch = chr(MAGIC_FIRST + i)

508 rex = re.escape(trt)

509 if trt[0].isalnum():

510 rex = r"\b" + rex

511 if trt[-1].isalnum():

512 rex = rex + r"\b"

513 item = re.sub(rex, ch, item)

514 tr_mappings[ch] = trt

515

516 # There may be multiple translations, separated by comma

517 nested.append(item)

518 for item in nested:

519 tagsets: list[tuple[str, ...]] = []

520 # This never does anything; it's never updated, so it's always empty

521 # topics: list[str] = []

522

523 for part in split_at_comma_semi(

524 item, extra=[" / ", " ／ ", "／", r"\| furthermore: "]

525 ):

526 # Substitute the magic characters back to original

527 # translations (this is part of dealing with

528 # phrasal translations containing commas).

529 part = re.sub(

530 r"[{:c}-{:c}]".format(MAGIC_FIRST, MAGIC_LAST),

531 lambda m: tr_mappings.get(m.group(0), m.group(0)),

532 part,

533 )

534

535 if part.endswith(":"): # E.g. "salt of the earth"/Korean 535 ↛ 536line 535 didn't jump to line 536 because the condition on line 535 was never true

536 part = part[:-1].strip()

537 if not part: 537 ↛ 538line 537 didn't jump to line 538 because the condition on line 537 was never true

538 continue

539

540 # Strip language links

541 tr: TranslationData = {"lang": lang}

542 if langcode:

543 tr["code"] = langcode

544 if tags:

545 tr["tags"] = list(tags)

546 for ttup in tagsets: 546 ↛ 547line 546 didn't jump to line 547 because the loop on line 546 never started

547 tr["tags"].extend(ttup)

548 # topics is never populated, so it's always empty

549 # if topics:

550 # tr["topics"] = list(topics)

551 if sense:

552 if sense.startswith( 552 ↛ 558line 552 didn't jump to line 558 because the condition on line 552 was never true

553 (

554 "Translations to be checked",

555 ":The translations below need to be checked",

556 )

557 ):

558 continue # Skip such translations

559 else:

560 tr["sense"] = sense

561

562 # Check if this part starts with (tags)

563 m = re.match(r"$([^)]+)$ ", part)

564 if m:

565 par = m.group(1)

566 rest = part[m.end() :]

567 cls = classify_desc(par, no_unknown_starts=True)

568 if cls == "tags":

569 tagsets2, topics2 = decode_tags(par)

570 for ttup in tagsets2:

571 data_extend(tr, "tags", ttup)

572 data_extend(tr, "topics", topics2)

573 part = rest

574

575 # Check if this part ends with (tags). Note that

576 # note-re will mess things up if we rely on this being

577 # checked later.

578 m = re.search(r" +$([^)]+)$$", part)

579 if m:

580 par = m.group(1)

581 rest = part[: m.start()]

582 cls = classify_desc(par, no_unknown_starts=True)

583 if cls == "tags":

584 tagsets2, topics2 = decode_tags(par)

585 for ttup in tagsets2:

586 data_extend(tr, "tags", ttup)

587 data_extend(tr, "topics", topics2)

588 part = rest

589

590 # Check if this part starts with "<tags/english>: <rest>"

591 m = re.match(r"([-\w() ]+): ", part)

592 if m:

593 par = m.group(1).strip()

594 rest = part[m.end() :]

595 if par in ("", "see"): 595 ↛ 596line 595 didn't jump to line 596 because the condition on line 595 was never true

596 part = "rest"

597 else:

598 cls = classify_desc(par)

599 # print("par={!r} cls={!r}".format(par, cls))

600 if cls == "tags":

601 tagsets2, topics2 = decode_tags(par)

602 for ttup in tagsets2:

603 data_extend(tr, "tags", ttup)

604 data_extend(tr, "topics", topics2)

605 part = rest

606 elif cls == "english":

607 if re.search(tr_note_re, par):

608 if "note" in tr: 608 ↛ 609line 608 didn't jump to line 609 because the condition on line 608 was never true

609 tr["note"] += "; " + par

610 else:

611 tr["note"] = par

612 else:

613 if "english" in tr: 613 ↛ 614line 613 didn't jump to line 614 because the condition on line 613 was never true

614 tr["english"] += "; " + par

615 else:

616 tr["english"] = par

617 part = rest

618

619 # Skip translations that our template_fn says to ignore

620 # and those that contain Lua execution errors.

621 if "__IGNORE__" in part:

622 continue # Contains something we want to ignore

623 if part.startswith("Lua execution error"): 623 ↛ 624line 623 didn't jump to line 624 because the condition on line 623 was never true

624 continue

625

626 # Handle certain suffixes in translations that

627 # we might put in "note" but that we can actually

628 # parse into tags.

629 for suffix, t in (

630 (" with dative", "with-dative"),

631 (" with genitive", "with-genitive"),

632 (" with accusative", "with-accusative"),

633 (" in subjunctive", "with-subjunctive"),

634 (" and conditional mood", "with-conditional"),

635 (" - I have - you have", "first-person second-person singular"),

636 (" - I have", "first-person singular"),

637 (" - you have", "second-person singular"),

638 ):

639 if part.endswith(suffix): 639 ↛ 640line 639 didn't jump to line 640 because the condition on line 639 was never true

640 part = part[: -len(suffix)]

641 data_append(tr, "tags", t)

642 break

643

644 # Handle certain prefixes in translations

645 for prefix, t in (("subjunctive of ", "with-subjunctive"),):

646 if part.startswith(prefix): 646 ↛ 647line 646 didn't jump to line 647 because the condition on line 646 was never true

647 part = part[len(prefix) :]

648 data_append(tr, "tags", t)

649 break

650

651 # Skip certain one-character translations entirely

652 # (these could result from templates being ignored)

653 if part in ",;.": 653 ↛ 654line 653 didn't jump to line 654 because the condition on line 653 was never true

654 continue

655

656 if "english" in tr and tr["english"] in english_to_tags: 656 ↛ 657line 656 didn't jump to line 657 because the condition on line 656 was never true

657 data_extend(tr, "tags", english_to_tags[tr["english"]].split())

658 del tr["english"]

659

660 # Certain values indicate it is not actually a translation.

661 # See definition of tr_ignore_re to adjust.

662 m = re.search(tr_ignore_re, part)

663 w: Optional[str] = None

664

665 if m and (

666 m.start() != 0 or m.end() != len(part) or len(part.split()) > 1

667 ):

668 # This translation will be skipped because it

669 # seems to be some kind of explanatory text.

670 # However, let's put it in the "note" field

671 # instead, unless it is one of the listed fully

672 # ignored ones.

673 if part in ("please add this translation if you can",):

674 continue

675 # Save in note field

676 tr["note"] = part

677 else:

678 # Interpret it as an actual translation

679 parse_translation_desc(wxr, lang, part, tr)

680 w = tr.get("word")

681 if not w:

682 continue # Not set or empty

683 if w.startswith(("*", ":")):

684 w = w[1:].strip()

685 if w in ("[Term?]", ":", "/", "?"): 685 ↛ 686line 685 didn't jump to line 686 because the condition on line 685 was never true

686 continue # These are not valid linkage targets

687 if len(w) > 3 * len(word) + 20:

688 # Accept translation if word looks like acronym:

689 # 'ISBN', 'I.S.B.N'.isupper() return True, and

690 # false positives are unlikely.

691 if not word.isupper():

692 # Likely descriptive text or example because

693 # it is much too long.

694 wxr.wtp.debug(

695 "Translation too long compared to word, so"

696 " it is skipped",

697 sortid="translations/609-20230504",

698 )

699 del tr["word"]

700 tr["note"] = w

701

702 # Sanity check: try to detect certain suspicious

703 # patterns in translations

704 if "word" in tr:

705 m = re.search(tr_suspicious_re, tr["word"])

706 if m and lang not in (

707 "Bats", # ^ in tree/English/Tr/Bats

708 ):

709 wxr.wtp.debug(

710 "suspicious translation with {!r}: {}".format(

711 m.group(0), tr

712 ),

713 sortid="translations/611",

714 )

715

716 if "tags" in tr:

717 tr["tags"] = list(sorted(set(tr["tags"])))

718

719 # If we have only notes, add as-is

720 if "word" not in tr:

721 data_append(data, "translations", tr)

722 continue

723

724 # Split if it contains no spaces

725 if w: 725 ↛ 523line 725 didn't jump to line 523 because the condition on line 725 was always true

726 alts = [w]

727 if " " not in w:

728 # If no spaces, split by separator

729 alts = re.split(r"/|／", w)

730 # Note: there could be remaining slashes, but they are

731 # sometimes used in ways we cannot resolve programmatically.

732 # Create translations for each alternative.

733 for alt in alts:

734 alt = alt.strip()

735 tr1 = copy.deepcopy(tr)

736 if alt.startswith("*") or alt.startswith(":"):

737 alt = alt[1:].strip()

738 if not alt: 738 ↛ 739line 738 didn't jump to line 739 because the condition on line 738 was never true

739 continue

740 tr1["word"] = alt

741 data_append(data, "translations", tr1)

742

743 # Return the language name, in case we have subitems

744 return lang