Coverage for src/wiktextract/extractor/el/translations.py: 24%

1from typing import TypeAlias

3from mediawiki_langcodes import code_to_name

4from wikitextprocessor import (

5 WikiNode,

7from wikitextprocessor.core import TemplateArgs

9from wiktextract import WiktextractContext

10from wiktextract.page import clean_node

12from .models import Translation, WordEntry

14# Greek Wiktionary translation sections seem to be a hidden div, inside

15# which is a table with one row and one cell with a list of translations.

16# I don't know why there's a table, the div is what creates the columns.

17# Each entry seems to generally use a {{τ|lang_code|translation|...}} template

18# with perfect information.

20LangCode: TypeAlias = str

23def process_translations(

24 wxr: WiktextractContext, data: WordEntry, translation_node: WikiNode

25) -> None:

26 """Takes a translation section node and extract template data."""

28 current_sense: str = ""

29 translations: list[Translation] = []

31 def translation_template_fn(name: str, ht: TemplateArgs) -> str | None:

32 nonlocal current_sense

33 nonlocal translations

35 if name == "μτφ-αρχή":

36 current_sense = clean_node(wxr, None, ht.get(1, ""))

37 # print(f"{current_sense=}")

38 if name in ("τ", "t"):

39 lang_code = ht.get(1, "")

40 lang_name = code_to_name(lang_code)

41 if not lang_code:

42 wxr.wtp.warning(

43 f"Language-code '{lang_code}' in "

44 "translation does not parse.",

45 sortid="translations/57",

46 )

47 lang_name = "LANG_NAME_ERROR"

48 text = ht.get(2, "")

49 if not text:

50 wxr.wtp.warning(

51 f"Translation template has no translation," f"{ht=}",

52 sortid="translations/64",

53 )

54 return None

55 latin_translitteration = ht.get("tr", "")

57 translations.append(

58 Translation(

59 sense=current_sense,

60 lang_code=lang_code,

61 lang=lang_name,

62 word=text,

63 roman=latin_translitteration

64 )

65 )

67 # print(f"{name=} -> {ht=}")

68 return None

70 # _ for discarding the return value; we're only using node_to_html

71 # with the template_fn to capture template data.

72 _ = wxr.wtp.node_to_html(

73 translation_node,

74 template_fn=translation_template_fn,

75 )

76 data.translations = translations

77 # print(f"{translations=}")