Coverage for src/wiktextract/extractor/ja/translation.py: 92%

94 statements  

« prev     ^ index     » next       coverage.py v7.9.0, created at 2025-06-13 07:43 +0000

1from typing import Optional 

2 

3from mediawiki_langcodes import name_to_code 

4from wikitextprocessor.parser import LevelNode, NodeKind, TemplateNode, WikiNode 

5 

6from ...page import clean_node 

7from ...wxr_context import WiktextractContext 

8from .models import Translation, WordEntry 

9from .tags import translate_raw_tags 

10 

11 

12def extract_translation_section( 

13 wxr: WiktextractContext, 

14 word_entry: WordEntry, 

15 level_node: LevelNode, 

16) -> None: 

17 sense_text = "" 

18 for node in level_node.find_child(NodeKind.TEMPLATE | NodeKind.LIST): 

19 if isinstance(node, TemplateNode) and node.template_name == "trans-top": 

20 sense_text = clean_node( 

21 wxr, None, node.template_parameters.get(1, "") 

22 ) 

23 elif node.kind == NodeKind.LIST: 23 ↛ 18line 23 didn't jump to line 18 because the condition on line 23 was always true

24 for list_item in node.find_child(NodeKind.LIST_ITEM): 

25 process_translation_list_item( 

26 wxr, word_entry, list_item, sense_text, "", "" 

27 ) 

28 

29 

30def process_translation_list_item( 

31 wxr: WiktextractContext, 

32 word_entry: WordEntry, 

33 list_item: WikiNode, 

34 sense_text: str, 

35 lang_name: str, 

36 lang_code: str, 

37) -> None: 

38 after_collon = False 

39 last_tr: Optional[Translation] = None 

40 for node_index, node in enumerate(list_item.children): 

41 if isinstance(node, str) and ":" in node and not after_collon: 

42 after_collon = True 

43 lang_nodes = list_item.children[:node_index] 

44 lang_nodes.append(node[: node.index(":")]) 

45 new_lang_name = clean_node(wxr, None, lang_nodes) 

46 new_lang_code = name_to_code(new_lang_name, "ja") 

47 if new_lang_code != "" or lang_name == "": 

48 lang_code = new_lang_code 

49 lang_name = new_lang_name 

50 elif isinstance(node, TemplateNode): 

51 if not after_collon: 

52 lang_name = clean_node(wxr, None, node) 

53 if node.template_name == "T": 

54 lang_code = node.template_parameters.get(1, "") 

55 else: 

56 lang_code = node.template_name 

57 elif node.template_name.lower() in ["t+", "t", "t-", "l", "lang"]: 

58 for tr_data in process_t_template( 

59 wxr, word_entry, node, sense_text, lang_name, lang_code 

60 ): 

61 last_tr = tr_data 

62 elif node.template_name.lower() == "archar": 

63 tr_data = Translation( 

64 word=clean_node(wxr, None, node), 

65 sense=sense_text, 

66 lang_code=lang_code, 

67 lang=lang_name, 

68 ) 

69 word_entry.translations.append(tr_data) 

70 last_tr = tr_data 

71 elif ( 

72 node.template_name.lower() 

73 in [ 

74 "m", 

75 "f", 

76 "p", 

77 "n", 

78 "c", 

79 "s", 

80 "mf", 

81 "mpl", 

82 "fpl", 

83 "npl", 

84 "inv", 

85 ] 

86 and last_tr is not None 

87 ): 

88 last_tr.raw_tags.append(clean_node(wxr, None, node)) 

89 translate_raw_tags(last_tr) 

90 elif node.template_name.lower() == "zh-ts": 90 ↛ 40line 90 didn't jump to line 40 because the condition on line 90 was always true

91 last_tr = process_zh_ts_template( 

92 wxr, word_entry, node, sense_text, lang_name, lang_code 

93 ) 

94 elif ( 

95 isinstance(node, WikiNode) 

96 and node.kind == NodeKind.LINK 

97 and after_collon 

98 ): 

99 tr_word = clean_node(wxr, None, node) 

100 if len(tr_word) > 0: 100 ↛ 40line 100 didn't jump to line 40 because the condition on line 100 was always true

101 tr_data = Translation( 

102 word=tr_word, 

103 sense=sense_text, 

104 lang_code=lang_code, 

105 lang=lang_name, 

106 ) 

107 word_entry.translations.append(tr_data) 

108 last_tr = tr_data 

109 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 

110 for nested_list_item in node.find_child_recursively( 

111 NodeKind.LIST_ITEM 

112 ): 

113 process_translation_list_item( 

114 wxr, 

115 word_entry, 

116 nested_list_item, 

117 sense_text, 

118 lang_name, 

119 lang_code, 

120 ) 

121 

122 

123T_TAGS = { 

124 "m": "masculine", 

125 "f": "feminine", 

126 "mf": ["masculine", "feminine"], 

127 "n": "neuter", 

128 "c": "common", 

129 "impf": "imperfective", 

130 "pf": "perfective", 

131 "s": "singular", 

132 "p": "plural", 

133} 

134 

135 

136def process_t_template( 

137 wxr: WiktextractContext, 

138 word_entry: WordEntry, 

139 node: TemplateNode, 

140 sense_text: str, 

141 lang_name: str, 

142 lang_code: str, 

143) -> list[Translation]: 

144 # https://ja.wiktionary.org/wiki/テンプレート:t 

145 second_arg = wxr.wtp.parse( 

146 wxr.wtp.node_to_wikitext(node.template_parameters.get(2, "")) 

147 ) 

148 for t_node in second_arg.find_child(NodeKind.TEMPLATE): 

149 if t_node.template_name == "zh-l": 149 ↛ 148line 149 didn't jump to line 148 because the condition on line 149 was always true

150 from .linkage import extract_zh_l_template 

151 

152 tr_list = [] 

153 for l_data in extract_zh_l_template(wxr, t_node): 

154 tr_data = Translation( 

155 word=l_data.word, 

156 tags=l_data.tags, 

157 roman=l_data.roman, 

158 lang=lang_name, 

159 lang_code=lang_code, 

160 ) 

161 tr_list.append(tr_data) 

162 word_entry.translations.append(tr_data) 

163 return tr_list 

164 

165 tr_word = clean_node(wxr, None, node.template_parameters.get(2, "")) 

166 if "alt" in node.template_parameters: 

167 tr_word = clean_node(wxr, None, node.template_parameters["alt"]) 

168 roman = clean_node(wxr, None, node.template_parameters.get("tr", "")) 

169 tags = [] 

170 for arg_index in [3, 4]: 

171 if arg_index in node.template_parameters: 

172 tag_arg = clean_node( 

173 wxr, None, node.template_parameters.get(arg_index, "") 

174 ) 

175 tag_value = T_TAGS.get(tag_arg, []) 

176 if isinstance(tag_value, str): 176 ↛ 178line 176 didn't jump to line 178 because the condition on line 176 was always true

177 tags.append(tag_value) 

178 elif isinstance(tag_value, list): 

179 tags.extend(tag_value) 

180 if len(tr_word) > 0: 180 ↛ 191line 180 didn't jump to line 191 because the condition on line 180 was always true

181 tr_data = Translation( 

182 word=tr_word, 

183 roman=roman, 

184 sense=sense_text, 

185 lang_code=lang_code, 

186 lang=lang_name, 

187 tags=tags, 

188 ) 

189 word_entry.translations.append(tr_data) 

190 return [tr_data] 

191 return [] 

192 

193 

194def process_zh_ts_template( 

195 wxr: WiktextractContext, 

196 word_entry: WordEntry, 

197 node: TemplateNode, 

198 sense_text: str, 

199 lang_name: str, 

200 lang_code: str, 

201) -> Optional[Translation]: 

202 # https://ja.wiktionary.org/wiki/テンプレート:zh-ts 

203 tr_data = None 

204 for arg in range(1, 3): 

205 tr_word = clean_node(wxr, None, node.template_parameters.get(arg, "")) 

206 if tr_word != "": 206 ↛ 204line 206 didn't jump to line 204 because the condition on line 206 was always true

207 tr_data = Translation( 

208 word=tr_word, 

209 sense=sense_text, 

210 lang_code=lang_code, 

211 lang=lang_name, 

212 ) 

213 tr_data.tags = ( 

214 ["Traditional Chinese"] if arg == 1 else ["Simplified Chinese"] 

215 ) 

216 word_entry.translations.append(tr_data) 

217 return tr_data