Coverage for src/wiktextract/extractor/de/translation.py: 86%

93 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-13 10:14 +0000

1import re 

2 

3from mediawiki_langcodes import code_to_name, name_to_code 

4from wikitextprocessor import NodeKind, WikiNode 

5from wikitextprocessor.parser import TemplateNode 

6 

7from ...page import clean_node 

8from ...wxr_context import WiktextractContext 

9from .models import Translation, WordEntry 

10from .tags import translate_raw_tags 

11 

12 

13def extract_translation( 

14 wxr: WiktextractContext, word_entry: WordEntry, level_node: WikiNode 

15) -> None: 

16 for template_node in level_node.find_child(NodeKind.TEMPLATE): 

17 if template_node.template_name == "Ü-Tabelle": 17 ↛ 16line 17 didn't jump to line 16 because the condition on line 17 was always true

18 process_u_tabelle_template(wxr, word_entry, template_node) 

19 

20 

21def process_u_tabelle_template( 

22 wxr: WiktextractContext, word_entry: WordEntry, template_node: TemplateNode 

23) -> None: 

24 # https://de.wiktionary.org/wiki/Vorlage:Ü-Tabelle 

25 sense_idx = clean_node( 

26 wxr, None, template_node.template_parameters.get(1, "") 

27 ) 

28 sense = clean_node( 

29 wxr, None, template_node.template_parameters.get("G", "") 

30 ) 

31 for list_arg_name in ["Ü-Liste", "Dialekttabelle"]: 

32 list_arg_value = template_node.template_parameters.get(list_arg_name) 

33 if list_arg_value is None: 

34 continue 

35 tr_list = wxr.wtp.parse(wxr.wtp.node_to_wikitext(list_arg_value)) 

36 for list_item in tr_list.find_child_recursively(NodeKind.LIST_ITEM): 

37 process_u_tabelle_list_item( 

38 wxr, word_entry, list_item, sense, sense_idx 

39 ) 

40 

41 

42def process_u_tabelle_list_item( 

43 wxr: WiktextractContext, 

44 word_entry: WordEntry, 

45 list_item_node: WikiNode, 

46 sense: str, 

47 sense_idx: str, 

48) -> None: 

49 before_colon = True 

50 tr_data = Translation(sense=sense, sense_index=sense_idx) 

51 for node in list_item_node.children: 

52 if isinstance(node, str): 

53 node = node.strip() 

54 if len(node) == 0: 

55 continue 

56 elif ":" in node: 

57 lang_str = node[: node.index(":")].strip() 

58 if len(lang_str) > 0 and len(tr_data.lang) == 0: 

59 tr_data.lang = lang_str 

60 if len(tr_data.lang_code) == 0: 60 ↛ 62line 60 didn't jump to line 62 because the condition on line 60 was always true

61 tr_data.lang_code = name_to_code(lang_str, "de") 

62 before_colon = False 

63 elif node in [",", ";"] and len(tr_data.word) > 0: 63 ↛ 66line 63 didn't jump to line 66 because the condition on line 63 was always true

64 tr_data = append_tr_data(word_entry, tr_data) 

65 

66 if before_colon and len(tr_data.lang) == 0: 

67 tr_data.lang = clean_node(wxr, None, node) 

68 if isinstance(node, TemplateNode): 

69 tr_data.lang_code = node.template_name.lower() 

70 else: 

71 tr_data.lang_code = name_to_code(tr_data.lang_code, "de") 

72 elif isinstance(node, TemplateNode): 

73 if node.template_name.startswith("Ü"): 

74 if len(tr_data.word) > 0: 74 ↛ 75line 74 didn't jump to line 75 because the condition on line 74 was never true

75 tr_data = append_tr_data(word_entry, tr_data) 

76 process_u_template(wxr, tr_data, node) 

77 else: 

78 raw_tag = clean_node(wxr, None, node).strip(": \n") 

79 if raw_tag != "": 79 ↛ 51line 79 didn't jump to line 51 because the condition on line 79 was always true

80 tr_data.raw_tags.append(raw_tag) 

81 elif isinstance(node, WikiNode) and node.kind == NodeKind.ITALIC: 

82 raw_tag_str = clean_node(wxr, None, node).removesuffix(":") 

83 for raw_tag in filter(None, raw_tag_str.split(", ")): 

84 tr_data.raw_tags.append(raw_tag) 

85 

86 if len(tr_data.word) > 0: 

87 append_tr_data(word_entry, tr_data) 

88 

89 

90def append_tr_data(word_entry: WordEntry, tr_data: Translation) -> Translation: 

91 translate_raw_tags(tr_data) 

92 word_entry.translations.append(tr_data.model_copy(deep=True)) 

93 return Translation( 

94 sense=tr_data.sense, 

95 sense_index=tr_data.sense_index, 

96 lang=tr_data.lang, 

97 lang_code=tr_data.lang_code, 

98 ) 

99 

100 

101def process_u_template( 

102 wxr: WiktextractContext, tr_data: Translation, u_template: TemplateNode 

103) -> None: 

104 # https://de.wiktionary.org/wiki/Vorlage:Ü 

105 # also "Ü?", "Üt", "Üt?", "Üxx4", "Üxx4?" 

106 if len(tr_data.lang_code) == 0: 

107 tr_data.lang_code = clean_node( 

108 wxr, None, u_template.template_parameters.get(1, "") 

109 ) 

110 if len(tr_data.lang) == 0: 110 ↛ 111line 110 didn't jump to line 111 because the condition on line 110 was never true

111 tr_data.lang = code_to_name(tr_data, "de") 

112 

113 tr_data.word = clean_node( 

114 wxr, None, u_template.template_parameters.get(2, "") 

115 ) 

116 template_name = u_template.template_name 

117 tr_data.uncertain = template_name.endswith("?") 

118 template_name = template_name.removesuffix("?") 

119 display_arg = -1 

120 if template_name == "Ü": 

121 display_arg = 3 

122 elif template_name == "Üt": 122 ↛ 140line 122 didn't jump to line 140 because the condition on line 122 was always true

123 display_arg = 4 

124 if 3 in u_template.template_parameters: 

125 arg_value = clean_node( 

126 wxr, None, u_template.template_parameters.get(3, "") 

127 ) 

128 if tr_data.lang_code in ["ja", "ko"] and "," in arg_value: 

129 tr_data.other, tr_data.roman = tuple( 

130 map(str.strip, arg_value.split(",", maxsplit=1)) 

131 ) 

132 else: 

133 tr_data.roman = arg_value 

134 else: 

135 # this template could create roman without the third arg 

136 expanded_text = clean_node(wxr, None, u_template) 

137 m = re.search(r"\(([^)]+?)\^\☆\)", expanded_text) 

138 if m is not None: 138 ↛ 148line 138 didn't jump to line 148 because the condition on line 138 was always true

139 tr_data.roman = m.group(1) 

140 elif template_name == "Üxx4": 

141 display_arg = "v" 

142 if 3 in u_template.template_parameters: 

143 display_arg = 3 

144 tr_data.roman = clean_node( 

145 wxr, None, u_template.template_parameters.get("d", "") 

146 ) 

147 

148 tr_word = clean_node( 

149 wxr, None, u_template.template_parameters.get(display_arg, "") 

150 ) 

151 if len(tr_word) > 0: 151 ↛ 152line 151 didn't jump to line 152 because the condition on line 151 was never true

152 tr_data.word = tr_word