Coverage for src/wiktextract/extractor/zh/translation.py: 89%

1from typing import Optional, Union

3from mediawiki_langcodes import code_to_name, name_to_code

4from wikitextprocessor.parser import (

5 LEVEL_KIND_FLAGS,

6 NodeKind,

7 TemplateNode,

8 WikiNode,

11from ...page import clean_node

12from ...wxr_context import WiktextractContext

13from .models import Translation, WordEntry

14from .section_titles import TRANSLATIONS_TITLES

15from .tags import TEMPLATE_TAG_ARGS, translate_raw_tags

18def extract_translation(

19 wxr: WiktextractContext,

20 page_data: list[WordEntry],

21 level_node: WikiNode,

22 sense: str = "",

23 is_subpage: bool = False,

24) -> None:

25 for child in level_node.find_child(NodeKind.TEMPLATE | NodeKind.LIST):

26 if isinstance(child, TemplateNode):

27 template_name = child.template_name.lower()

28 if (

29 template_name in {"trans-top", "翻譯-頂", "trans-top-also"}

30 and 1 in child.template_parameters

31 ):

32 sense = clean_node(wxr, None, child.template_parameters.get(1))

33 elif (

34 template_name in {"see translation subpage", "trans-see"}

35 and not is_subpage

36 ):

37 translation_subpage(wxr, page_data, child)

38 elif template_name == "multitrans":

39 wikitext = "".join(

40 wxr.wtp.node_to_wikitext(c)

41 for c in child.template_parameters.get("data", [])

42 )

43 multitrans = wxr.wtp.parse(wikitext)

44 extract_translation(wxr, page_data, multitrans, sense)

45 else:

46 for list_item in child.find_child_recursively(NodeKind.LIST_ITEM):

47 process_translation_list_item(

48 wxr,

49 page_data,

50 list_item,

51 sense,

52 )

55def process_translation_list_item(

56 wxr: WiktextractContext,

57 page_data: list[WordEntry],

58 list_item: WikiNode,

59 sense: str,

60) -> None:

61 tr_data = Translation(word="", sense=sense)

63 for child_index, child in enumerate(list_item.filter_empty_str_child()):

64 if child_index == 0:

65 lang_text = ""

66 if isinstance(child, str):

67 if "：" in child:

68 lang_text = child[: child.index("：")]

69 elif ":" in child: 69 ↛ 73line 69 didn't jump to line 73 because the condition on line 69 was always true

70 lang_text = child[: child.index(":")]

71 else:

72 lang_text = clean_node(wxr, None, child)

73 if len(lang_text) > 0: 73 ↛ 63line 73 didn't jump to line 63 because the condition on line 73 was always true

74 tr_data.lang = lang_text.strip()

75 tr_data.lang_code = name_to_code(tr_data.lang, "zh")

76 elif isinstance(child, TemplateNode):

77 template_name = child.template_name.lower()

78 if template_name in {

79 "t",

80 "t+",

81 "tt",

82 "tt+",

83 "t-check",

84 "t+check",

85 "l",

86 }:

87 if len(tr_data.word) > 0:

88 page_data[-1].translations.append(

89 tr_data.model_copy(deep=True)

90 )

91 tr_data = Translation(

92 word="",

93 lang=tr_data.lang,

94 lang_code=tr_data.lang_code,

95 sense=sense,

96 )

97 if tr_data.lang_code == "":

98 tr_data.lang_code = child.template_parameters.get(1, "")

99 if tr_data.lang == "": 99 ↛ 100line 99 didn't jump to line 100 because the condition on line 99 was never true

100 tr_data.lang = code_to_name(tr_data.lang_code, "zh")

101 tr_data.word = clean_node(

102 wxr, None, child.template_parameters.get(2, "")

103 )

104 tr_data.roman = clean_node(

105 wxr, None, child.template_parameters.get("tr", "")

106 )

107 tr_data.alt = clean_node(

108 wxr, None, child.template_parameters.get("alt", "")

109 )

110 tr_data.lit = clean_node(

111 wxr, None, child.template_parameters.get("lit", "")

112 )

113 for arg_key, arg_value in child.template_parameters.items():

114 if (

115 isinstance(arg_key, int) and arg_key >= 3

116 ) or arg_key == "g": # template "l" uses the "g" arg

117 for tag_arg in arg_value.split("-"):

118 if tag_arg in TEMPLATE_TAG_ARGS: 118 ↛ 117line 118 didn't jump to line 117 because the condition on line 118 was always true

119 tr_data.tags.append(TEMPLATE_TAG_ARGS[tag_arg])

120

121 elif template_name == "t-needed":

122 # ignore empty translation

123 continue

124 elif template_name in ("qualifier", "q"):

125 raw_tag = clean_node(wxr, None, child)

126 tr_data.raw_tags.append(raw_tag.strip("()"))

127 else:

128 # zh qualifier templates that use template "注释"

129 # https://zh.wiktionary.org/wiki/Template:注释

130 raw_tag = clean_node(wxr, None, child)

131 if raw_tag.startswith("〈") and raw_tag.endswith("〉"): 131 ↛ 63line 131 didn't jump to line 63 because the condition on line 131 was always true

132 tr_data.raw_tags.append(raw_tag.strip("〈〉"))

133 elif isinstance(child, WikiNode) and child.kind == NodeKind.LINK:

134 if len(tr_data.word) > 0:

135 page_data[-1].translations.append(tr_data.model_copy(deep=True))

136 tr_data = Translation(

137 word="",

138 lang=tr_data.lang,

139 lang_code=tr_data.lang_code,

140 sense=sense,

141 )

142 tr_data.word = clean_node(wxr, None, child)

143

144 if len(tr_data.word) > 0:

145 translate_raw_tags(tr_data)

146 page_data[-1].translations.append(tr_data.model_copy(deep=True))

147

148

149def translation_subpage(

150 wxr: WiktextractContext,

151 page_data: list[WordEntry],

152 template_node: TemplateNode,

153) -> None:

154 # https://zh.wiktionary.org/wiki/Template:翻譯-見

155 # https://zh.wiktionary.org/wiki/Template:See_translation_subpage

156

157 page_title = wxr.wtp.title

158 target_section = None

159 if template_node.template_name == "see translation subpage": 159 ↛ 160line 159 didn't jump to line 160 because the condition on line 159 was never true

160 target_section = template_node.template_parameters.get(1)

161 page_title = template_node.template_parameters.get(2, wxr.wtp.title)

162

163 translation_subpage_title = page_title

164 if page_title == wxr.wtp.title: 164 ↛ 165line 164 didn't jump to line 165 because the condition on line 164 was never true

165 translation_subpage_title = f"{page_title}/翻譯"

166 subpage = wxr.wtp.get_page(translation_subpage_title)

167 if subpage is None: 167 ↛ 168line 167 didn't jump to line 168 because the condition on line 167 was never true

168 return

169

170 root = wxr.wtp.parse(subpage.body, pre_expand=True)

171 target_section_node = (

172 root

173 if target_section is None

174 else find_subpage_section(wxr, root, target_section)

175 )

176 translation_node = find_subpage_section(wxr, target_section_node)

177 if translation_node is not None: 177 ↛ exitline 177 didn't return from function 'translation_subpage' because the condition on line 177 was always true

178 extract_translation(wxr, page_data, translation_node, is_subpage=True)

179

180

181def find_subpage_section(

182 wxr: WiktextractContext,

183 node: Union[WikiNode, str],

184 target_section: Union[str, None] = None,

185) -> Optional[WikiNode]:

186 if not isinstance(node, WikiNode): 186 ↛ 187line 186 didn't jump to line 187 because the condition on line 186 was never true

187 return None

188 for level_node in node.find_child_recursively(LEVEL_KIND_FLAGS): 188 ↛ 194line 188 didn't jump to line 194 because the loop on line 188 didn't complete

189 section_title = clean_node(wxr, None, level_node.largs)

190 if isinstance(target_section, str) and section_title == target_section: 190 ↛ 191line 190 didn't jump to line 191 because the condition on line 190 was never true

191 return level_node

192 if section_title in TRANSLATIONS_TITLES:

193 return level_node

194 return None