Coverage for src/wiktextract/extractor/de/translation.py: 86%

1import re

3from mediawiki_langcodes import code_to_name, name_to_code

4from wikitextprocessor import NodeKind, WikiNode

5from wikitextprocessor.parser import TemplateNode

7from ...page import clean_node

8from ...wxr_context import WiktextractContext

9from .models import Translation, WordEntry

10from .tags import translate_raw_tags

13def extract_translation(

14 wxr: WiktextractContext, word_entry: WordEntry, level_node: WikiNode

15) -> None:

16 for template_node in level_node.find_child(NodeKind.TEMPLATE):

17 if template_node.template_name == "Ü-Tabelle": 17 ↛ 16line 17 didn't jump to line 16 because the condition on line 17 was always true

18 process_u_tabelle_template(wxr, word_entry, template_node)

21def process_u_tabelle_template(

22 wxr: WiktextractContext, word_entry: WordEntry, template_node: TemplateNode

23) -> None:

24 # https://de.wiktionary.org/wiki/Vorlage:Ü-Tabelle

25 sense_idx = clean_node(

26 wxr, None, template_node.template_parameters.get(1, "")

27 )

28 sense = clean_node(

29 wxr, None, template_node.template_parameters.get("G", "")

30 )

31 for list_arg_name in ["Ü-Liste", "Dialekttabelle"]:

32 list_arg_value = template_node.template_parameters.get(list_arg_name)

33 if list_arg_value is None:

34 continue

35 tr_list = wxr.wtp.parse(wxr.wtp.node_to_wikitext(list_arg_value))

36 for list_item in tr_list.find_child_recursively(NodeKind.LIST_ITEM):

37 process_u_tabelle_list_item(

38 wxr, word_entry, list_item, sense, sense_idx

39 )

42def process_u_tabelle_list_item(

43 wxr: WiktextractContext,

44 word_entry: WordEntry,

45 list_item_node: WikiNode,

46 sense: str,

47 sense_idx: str,

48) -> None:

49 before_colon = True

50 tr_data = Translation(sense=sense, sense_index=sense_idx)

51 for node in list_item_node.children:

52 if isinstance(node, str):

53 node = node.strip()

54 if len(node) == 0:

55 continue

56 elif ":" in node:

57 lang_str = node[: node.index(":")].strip()

58 if len(lang_str) > 0 and len(tr_data.lang) == 0:

59 tr_data.lang = lang_str

60 if len(tr_data.lang_code) == 0: 60 ↛ 62line 60 didn't jump to line 62 because the condition on line 60 was always true

61 tr_data.lang_code = name_to_code(lang_str, "de")

62 before_colon = False

63 elif node == "," and len(tr_data.word) > 0: 63 ↛ 66line 63 didn't jump to line 66 because the condition on line 63 was always true

64 tr_data = append_tr_data(word_entry, tr_data)

66 if before_colon and len(tr_data.lang) == 0:

67 tr_data.lang = clean_node(wxr, None, node)

68 if isinstance(node, TemplateNode):

69 tr_data.lang_code = node.template_name.lower()

70 else:

71 tr_data.lang_code = name_to_code(tr_data.lang_code, "de")

72 elif isinstance(node, TemplateNode):

73 if node.template_name.startswith("Ü"):

74 if len(tr_data.word) > 0: 74 ↛ 75line 74 didn't jump to line 75 because the condition on line 74 was never true

75 tr_data = append_tr_data(word_entry, tr_data)

76 process_u_template(wxr, tr_data, node)

77 else:

78 tr_data.raw_tags.append(clean_node(wxr, None, node))

79 elif isinstance(node, WikiNode) and node.kind == NodeKind.ITALIC:

80 raw_tag_str = clean_node(wxr, None, node).removesuffix(":")

81 for raw_tag in filter(None, raw_tag_str.split(", ")):

82 tr_data.raw_tags.append(raw_tag)

84 if len(tr_data.word) > 0:

85 append_tr_data(word_entry, tr_data)

88def append_tr_data(word_entry: WordEntry, tr_data: Translation) -> Translation:

89 translate_raw_tags(tr_data)

90 word_entry.translations.append(tr_data.model_copy(deep=True))

91 return Translation(

92 sense=tr_data.sense,

93 sense_index=tr_data.sense_index,

94 lang=tr_data.lang,

95 lang_code=tr_data.lang_code,

96 )

99def process_u_template(

100 wxr: WiktextractContext, tr_data: Translation, u_template: TemplateNode

101) -> None:

102 # https://de.wiktionary.org/wiki/Vorlage:Ü

103 # also "Ü?", "Üt", "Üt?", "Üxx4", "Üxx4?"

104 if len(tr_data.lang_code) == 0:

105 tr_data.lang_code = clean_node(

106 wxr, None, u_template.template_parameters.get(1, "")

107 )

108 if len(tr_data.lang) == 0: 108 ↛ 109line 108 didn't jump to line 109 because the condition on line 108 was never true

109 tr_data.lang = code_to_name(tr_data, "de")

110

111 tr_data.word = clean_node(

112 wxr, None, u_template.template_parameters.get(2, "")

113 )

114 template_name = u_template.template_name

115 tr_data.uncertain = template_name.endswith("?")

116 template_name = template_name.removesuffix("?")

117 display_arg = -1

118 if template_name == "Ü":

119 display_arg = 3

120 elif template_name == "Üt": 120 ↛ 132line 120 didn't jump to line 132 because the condition on line 120 was always true

121 display_arg = 4

122 if 3 in u_template.template_parameters:

123 tr_data.roman = clean_node(

124 wxr, None, u_template.template_parameters.get(3, "")

125 )

126 else:

127 # this template could create roman without the third arg

128 expanded_text = clean_node(wxr, None, u_template)

129 m = re.search(r"\(([^)]+?)\^\☆\)", expanded_text)

130 if m is not None: 130 ↛ 140line 130 didn't jump to line 140 because the condition on line 130 was always true

131 tr_data.roman = m.group(1)

132 elif template_name == "Üxx4":

133 display_arg = "v"

134 if 3 in u_template.template_parameters:

135 display_arg = 3

136 tr_data.roman = clean_node(

137 wxr, None, u_template.template_parameters.get("d", "")

138 )

139

140 tr_word = clean_node(

141 wxr, None, u_template.template_parameters.get(display_arg, "")

142 )

143 if len(tr_word) > 0: 143 ↛ 144line 143 didn't jump to line 144 because the condition on line 143 was never true

144 tr_data.word = tr_word