Coverage for src/wiktextract/extractor/el/translations.py: 24%

29 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-11 10:26 +0000

1from typing import TypeAlias 

2 

3from mediawiki_langcodes import code_to_name 

4from wikitextprocessor import ( 

5 WikiNode, 

6) 

7from wikitextprocessor.core import TemplateArgs 

8 

9from wiktextract import WiktextractContext 

10from wiktextract.page import clean_node 

11 

12from .models import Translation, WordEntry 

13 

14# Greek Wiktionary translation sections seem to be a hidden div, inside 

15# which is a table with one row and one cell with a list of translations. 

16# I don't know why there's a table, the div is what creates the columns. 

17# Each entry seems to generally use a {{τ|lang_code|translation|...}} template 

18# with perfect information. 

19 

20LangCode: TypeAlias = str 

21 

22 

23def process_translations( 

24 wxr: WiktextractContext, data: WordEntry, translation_node: WikiNode 

25) -> None: 

26 """Takes a translation section node and extract template data.""" 

27 

28 current_sense: str = "" 

29 translations: list[Translation] = [] 

30 

31 def translation_template_fn(name: str, ht: TemplateArgs) -> str | None: 

32 nonlocal current_sense 

33 nonlocal translations 

34 

35 if name == "μτφ-αρχή": 

36 current_sense = clean_node(wxr, None, ht.get(1, "")) 

37 # print(f"{current_sense=}") 

38 if name in ("τ", "t"): 

39 lang_code = ht.get(1, "") 

40 lang_name = code_to_name(lang_code) 

41 if not lang_code: 

42 wxr.wtp.warning( 

43 f"Language-code '{lang_code}' in " 

44 "translation does not parse.", 

45 sortid="translations/57", 

46 ) 

47 lang_name = "LANG_NAME_ERROR" 

48 text = ht.get(2, "") 

49 if not text: 

50 wxr.wtp.warning( 

51 f"Translation template has no translation," f"{ht=}", 

52 sortid="translations/64", 

53 ) 

54 return None 

55 latin_translitteration = ht.get("tr", "") 

56 

57 translations.append( 

58 Translation( 

59 sense=current_sense, 

60 lang_code=lang_code, 

61 lang=lang_name, 

62 word=text, 

63 roman=latin_translitteration 

64 ) 

65 ) 

66 

67 # print(f"{name=} -> {ht=}") 

68 return None 

69 

70 # _ for discarding the return value; we're only using node_to_html 

71 # with the template_fn to capture template data. 

72 _ = wxr.wtp.node_to_html( 

73 translation_node, 

74 template_fn=translation_template_fn, 

75 ) 

76 data.translations = translations 

77 # print(f"{translations=}")