Coverage for src/wiktextract/extractor/el/translations.py: 24%
29 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-11 10:26 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-11 10:26 +0000
1from typing import TypeAlias
3from mediawiki_langcodes import code_to_name
4from wikitextprocessor import (
5 WikiNode,
6)
7from wikitextprocessor.core import TemplateArgs
9from wiktextract import WiktextractContext
10from wiktextract.page import clean_node
12from .models import Translation, WordEntry
14# Greek Wiktionary translation sections seem to be a hidden div, inside
15# which is a table with one row and one cell with a list of translations.
16# I don't know why there's a table, the div is what creates the columns.
17# Each entry seems to generally use a {{τ|lang_code|translation|...}} template
18# with perfect information.
20LangCode: TypeAlias = str
23def process_translations(
24 wxr: WiktextractContext, data: WordEntry, translation_node: WikiNode
25) -> None:
26 """Takes a translation section node and extract template data."""
28 current_sense: str = ""
29 translations: list[Translation] = []
31 def translation_template_fn(name: str, ht: TemplateArgs) -> str | None:
32 nonlocal current_sense
33 nonlocal translations
35 if name == "μτφ-αρχή":
36 current_sense = clean_node(wxr, None, ht.get(1, ""))
37 # print(f"{current_sense=}")
38 if name in ("τ", "t"):
39 lang_code = ht.get(1, "")
40 lang_name = code_to_name(lang_code)
41 if not lang_code:
42 wxr.wtp.warning(
43 f"Language-code '{lang_code}' in "
44 "translation does not parse.",
45 sortid="translations/57",
46 )
47 lang_name = "LANG_NAME_ERROR"
48 text = ht.get(2, "")
49 if not text:
50 wxr.wtp.warning(
51 f"Translation template has no translation," f"{ht=}",
52 sortid="translations/64",
53 )
54 return None
55 latin_translitteration = ht.get("tr", "")
57 translations.append(
58 Translation(
59 sense=current_sense,
60 lang_code=lang_code,
61 lang=lang_name,
62 word=text,
63 roman=latin_translitteration
64 )
65 )
67 # print(f"{name=} -> {ht=}")
68 return None
70 # _ for discarding the return value; we're only using node_to_html
71 # with the template_fn to capture template data.
72 _ = wxr.wtp.node_to_html(
73 translation_node,
74 template_fn=translation_template_fn,
75 )
76 data.translations = translations
77 # print(f"{translations=}")