Coverage for src/wiktextract/extractor/es/translation.py: 84%

69 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-04 10:58 +0000

1import itertools 

2import re 

3 

4from mediawiki_langcodes import code_to_name 

5from wikitextprocessor.parser import LevelNode, NodeKind, TemplateNode 

6 

7from ...page import clean_node 

8from ...wxr_context import WiktextractContext 

9from .models import Translation, WordEntry 

10from .tags import translate_raw_tags 

11 

12 

13def extract_translation_section( 

14 wxr: WiktextractContext, 

15 page_data: list[WordEntry], 

16 level_node: LevelNode, 

17 is_translation: bool = True, 

18) -> None: 

19 tr_data = [] 

20 cats = [] 

21 sense = "" 

22 sense_index = "" 

23 for t_node in level_node.find_child(NodeKind.TEMPLATE): 

24 if t_node.template_name in ["t", "d"]: 

25 new_tr_list, new_cats = process_t_template( 

26 wxr, t_node, sense, sense_index 

27 ) 

28 tr_data.extend(new_tr_list) 

29 cats.extend(new_cats) 

30 elif t_node.template_name == "trad-arriba" and is_translation: 30 ↛ 23line 30 didn't jump to line 23 because the condition on line 30 was always true

31 sense = clean_node(wxr, None, t_node.template_parameters.get(1, "")) 

32 m = re.match(r"\[([\d.a-z]+)\]", sense) 

33 if m is not None: 33 ↛ 23line 33 didn't jump to line 23 because the condition on line 33 was always true

34 sense_index = m.group(1) 

35 sense = sense[m.end() :].strip() 

36 

37 for data in page_data: 

38 if ( 38 ↛ 37line 38 didn't jump to line 37 because the condition on line 38 was always true

39 data.lang_code == page_data[-1].lang_code 

40 and data.etymology_text == page_data[-1].etymology_text 

41 ): 

42 if is_translation: 42 ↛ 45line 42 didn't jump to line 45 because the condition on line 42 was always true

43 data.translations.extend(tr_data) 

44 else: 

45 data.descendants.extend(tr_data) 

46 data.categories.extend(cats) 

47 

48 

49# https://es.wiktionary.org/wiki/Módulo:t 

50T_GENDERS = { 

51 "m": "masculine", 

52 "f": "feminine", 

53 "mf": ["masculine", "feminine"], 

54 "n": "neuter", 

55 "c": "common", 

56} 

57T_NUMBERS = { 

58 "s": "singular", 

59 "sg": "singular", 

60 "p": "plural", 

61 "pl": "plural", 

62 "d": "dual", 

63 "du": "dual", 

64} 

65 

66 

67def process_t_template( 

68 wxr: WiktextractContext, 

69 template_node: TemplateNode, 

70 sense: str, 

71 sense_index: str, 

72) -> tuple[list[Translation], list[str]]: 

73 # https://es.wiktionary.org/wiki/Plantilla:t 

74 tr_list = [] 

75 cats = {} 

76 lang_code = template_node.template_parameters.get(1, "") or "unknown" 

77 template_text = clean_node(wxr, cats, template_node) 

78 lang_name = template_text[: template_text.find(":")].strip("* ") 

79 if lang_name == "": # in case Lua error 79 ↛ 80line 79 didn't jump to line 80 because the condition on line 79 was never true

80 lang_name = code_to_name(lang_code, "es") or "unknown" 

81 

82 for tr_index in itertools.count(1): 82 ↛ 135line 82 didn't jump to line 135 because the loop on line 82 didn't complete

83 if ( 

84 "t" + str(tr_index) not in template_node.template_parameters 

85 and "d" + str(tr_index) not in template_node.template_parameters 

86 ): 

87 break 

88 tr_data = Translation( 

89 lang_code=lang_code, 

90 lang=lang_name, 

91 word="", 

92 sense=sense, 

93 sense_index=sense_index, 

94 ) 

95 for param_prefix, field in ( 

96 ("t", "word"), 

97 ("d", "word"), 

98 ("a", "sense_index"), 

99 ("tl", "roman"), 

100 ("nota", "raw_tags"), 

101 ("g", "tags"), 

102 ("n", "tags"), 

103 ): 

104 param = param_prefix + str(tr_index) 

105 if param not in template_node.template_parameters: 

106 continue 

107 value = clean_node( 

108 wxr, None, template_node.template_parameters[param] 

109 ) 

110 if param_prefix == "g": 

111 value = T_GENDERS.get(value) 

112 elif param_prefix == "n": 112 ↛ 113line 112 didn't jump to line 113 because the condition on line 112 was never true

113 value = T_NUMBERS.get(value) 

114 elif param_prefix == "a" and value != "": 

115 sense_index = value 

116 if value is None or value == "": 116 ↛ 117line 116 didn't jump to line 117 because the condition on line 116 was never true

117 continue 

118 

119 pre_value = getattr(tr_data, field) 

120 if isinstance(pre_value, list): 

121 if isinstance(value, list): 121 ↛ 122line 121 didn't jump to line 122 because the condition on line 121 was never true

122 pre_value.extend(value) 

123 else: 

124 pre_value.append(value) 

125 else: 

126 setattr(tr_data, field, value) 

127 

128 if tr_data.sense_index == "" and sense_index != "": 128 ↛ 130line 128 didn't jump to line 130 because the condition on line 128 was never true

129 # usually only first word has index param 

130 tr_data.sense_index = sense_index 

131 

132 if len(tr_data.word) > 0: 132 ↛ 82line 132 didn't jump to line 82 because the condition on line 132 was always true

133 translate_raw_tags(tr_data) 

134 tr_list.append(tr_data) 

135 return tr_list, cats.get("categories", [])