Coverage for src/wiktextract/extractor/ja/translation.py: 92%

1from typing import Optional

3from mediawiki_langcodes import name_to_code

4from wikitextprocessor.parser import LevelNode, NodeKind, TemplateNode, WikiNode

6from ...page import clean_node

7from ...wxr_context import WiktextractContext

8from .models import Translation, WordEntry

9from .tags import translate_raw_tags

12def extract_translation_section(

13 wxr: WiktextractContext,

14 word_entry: WordEntry,

15 level_node: LevelNode,

16) -> None:

17 sense_text = ""

18 for node in level_node.find_child(NodeKind.TEMPLATE | NodeKind.LIST):

19 if isinstance(node, TemplateNode) and node.template_name == "trans-top":

20 sense_text = clean_node(

21 wxr, None, node.template_parameters.get(1, "")

22 )

23 elif node.kind == NodeKind.LIST: 23 ↛ 18line 23 didn't jump to line 18 because the condition on line 23 was always true

24 for list_item in node.find_child(NodeKind.LIST_ITEM):

25 process_translation_list_item(

26 wxr, word_entry, list_item, sense_text, "", ""

27 )

30def process_translation_list_item(

31 wxr: WiktextractContext,

32 word_entry: WordEntry,

33 list_item: WikiNode,

34 sense_text: str,

35 lang_name: str,

36 lang_code: str,

37) -> None:

38 after_collon = False

39 last_tr: Optional[Translation] = None

40 for node_index, node in enumerate(list_item.children):

41 if isinstance(node, str) and ":" in node and not after_collon:

42 after_collon = True

43 lang_nodes = list_item.children[:node_index]

44 lang_nodes.append(node[: node.index(":")])

45 new_lang_name = clean_node(wxr, None, lang_nodes)

46 new_lang_code = name_to_code(new_lang_name, "ja")

47 if new_lang_code != "" or lang_name == "":

48 lang_code = new_lang_code

49 lang_name = new_lang_name

50 elif isinstance(node, TemplateNode):

51 if not after_collon:

52 lang_name = clean_node(wxr, None, node)

53 if node.template_name == "T":

54 lang_code = node.template_parameters.get(1, "")

55 else:

56 lang_code = node.template_name

57 elif node.template_name.lower() in ["t+", "t", "t-", "l", "lang"]:

58 last_tr = process_t_template(

59 wxr, word_entry, node, sense_text, lang_name, lang_code

60 )

61 elif node.template_name.lower() == "archar":

62 tr_data = Translation(

63 word=clean_node(wxr, None, node),

64 sense=sense_text,

65 lang_code=lang_code,

66 lang=lang_name,

67 )

68 word_entry.translations.append(tr_data)

69 last_tr = tr_data

70 elif (

71 node.template_name.lower()

72 in [

73 "m",

74 "f",

75 "p",

76 "n",

77 "c",

78 "s",

79 "mf",

80 "mpl",

81 "fpl",

82 "npl",

83 "inv",

84 ]

85 and last_tr is not None

86 ):

87 last_tr.raw_tags.append(clean_node(wxr, None, node))

88 translate_raw_tags(last_tr)

89 elif node.template_name.lower() == "zh-ts": 89 ↛ 40line 89 didn't jump to line 40 because the condition on line 89 was always true

90 last_tr = process_zh_ts_template(

91 wxr, word_entry, node, sense_text, lang_name, lang_code

92 )

93 elif (

94 isinstance(node, WikiNode)

95 and node.kind == NodeKind.LINK

96 and after_collon

97 ):

98 tr_word = clean_node(wxr, None, node)

99 if len(tr_word) > 0: 99 ↛ 40line 99 didn't jump to line 40 because the condition on line 99 was always true

100 tr_data = Translation(

101 word=tr_word,

102 sense=sense_text,

103 lang_code=lang_code,

104 lang=lang_name,

105 )

106 word_entry.translations.append(tr_data)

107 last_tr = tr_data

108 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:

109 for nested_list_item in node.find_child_recursively(

110 NodeKind.LIST_ITEM

111 ):

112 process_translation_list_item(

113 wxr,

114 word_entry,

115 nested_list_item,

116 sense_text,

117 lang_name,

118 lang_code,

119 )

120

121

122T_TAGS = {

123 "m": "masculine",

124 "f": "feminine",

125 "mf": ["masculine", "feminine"],

126 "n": "neuter",

127 "c": "common",

128 "impf": "imperfective",

129 "pf": "perfective",

130 "s": "singular",

131 "p": "plural",

132}

133

134

135def process_t_template(

136 wxr: WiktextractContext,

137 word_entry: WordEntry,

138 node: TemplateNode,

139 sense_text: str,

140 lang_name: str,

141 lang_code: str,

142) -> Optional[Translation]:

143 # https://ja.wiktionary.org/wiki/テンプレート:t

144 tr_word = clean_node(wxr, None, node.template_parameters.get(2, ""))

145 if "alt" in node.template_parameters:

146 tr_word = clean_node(wxr, None, node.template_parameters["alt"])

147 roman = clean_node(wxr, None, node.template_parameters.get("tr", ""))

148 tags = []

149 for arg_index in [3, 4]:

150 if arg_index in node.template_parameters:

151 tag_arg = clean_node(

152 wxr, None, node.template_parameters.get(arg_index, "")

153 )

154 tag_value = T_TAGS.get(tag_arg, [])

155 if isinstance(tag_value, str): 155 ↛ 157line 155 didn't jump to line 157 because the condition on line 155 was always true

156 tags.append(tag_value)

157 elif isinstance(tag_value, list):

158 tags.extend(tag_value)

159 if len(tr_word) > 0: 159 ↛ 170line 159 didn't jump to line 170 because the condition on line 159 was always true

160 tr_data = Translation(

161 word=tr_word,

162 roman=roman,

163 sense=sense_text,

164 lang_code=lang_code,

165 lang=lang_name,

166 tags=tags,

167 )

168 word_entry.translations.append(tr_data)

169 return tr_data

170 return None

171

172

173def process_zh_ts_template(

174 wxr: WiktextractContext,

175 word_entry: WordEntry,

176 node: TemplateNode,

177 sense_text: str,

178 lang_name: str,

179 lang_code: str,

180) -> Optional[Translation]:

181 # https://ja.wiktionary.org/wiki/テンプレート:zh-ts

182 tr_data = None

183 for arg in range(1, 3):

184 tr_word = clean_node(wxr, None, node.template_parameters.get(arg, ""))

185 if tr_word != "": 185 ↛ 183line 185 didn't jump to line 183 because the condition on line 185 was always true

186 tr_data = Translation(

187 word=tr_word,

188 sense=sense_text,

189 lang_code=lang_code,

190 lang=lang_name,

191 )

192 tr_data.tags = (

193 ["Traditional Chinese"] if arg == 1 else ["Simplified Chinese"]

194 )

195 word_entry.translations.append(tr_data)

196 return tr_data