Coverage for src/wiktextract/extractor/ja/translation.py: 92%
94 statements
« prev ^ index » next coverage.py v7.9.0, created at 2025-06-13 07:43 +0000
« prev ^ index » next coverage.py v7.9.0, created at 2025-06-13 07:43 +0000
1from typing import Optional
3from mediawiki_langcodes import name_to_code
4from wikitextprocessor.parser import LevelNode, NodeKind, TemplateNode, WikiNode
6from ...page import clean_node
7from ...wxr_context import WiktextractContext
8from .models import Translation, WordEntry
9from .tags import translate_raw_tags
12def extract_translation_section(
13 wxr: WiktextractContext,
14 word_entry: WordEntry,
15 level_node: LevelNode,
16) -> None:
17 sense_text = ""
18 for node in level_node.find_child(NodeKind.TEMPLATE | NodeKind.LIST):
19 if isinstance(node, TemplateNode) and node.template_name == "trans-top":
20 sense_text = clean_node(
21 wxr, None, node.template_parameters.get(1, "")
22 )
23 elif node.kind == NodeKind.LIST: 23 ↛ 18line 23 didn't jump to line 18 because the condition on line 23 was always true
24 for list_item in node.find_child(NodeKind.LIST_ITEM):
25 process_translation_list_item(
26 wxr, word_entry, list_item, sense_text, "", ""
27 )
30def process_translation_list_item(
31 wxr: WiktextractContext,
32 word_entry: WordEntry,
33 list_item: WikiNode,
34 sense_text: str,
35 lang_name: str,
36 lang_code: str,
37) -> None:
38 after_collon = False
39 last_tr: Optional[Translation] = None
40 for node_index, node in enumerate(list_item.children):
41 if isinstance(node, str) and ":" in node and not after_collon:
42 after_collon = True
43 lang_nodes = list_item.children[:node_index]
44 lang_nodes.append(node[: node.index(":")])
45 new_lang_name = clean_node(wxr, None, lang_nodes)
46 new_lang_code = name_to_code(new_lang_name, "ja")
47 if new_lang_code != "" or lang_name == "":
48 lang_code = new_lang_code
49 lang_name = new_lang_name
50 elif isinstance(node, TemplateNode):
51 if not after_collon:
52 lang_name = clean_node(wxr, None, node)
53 if node.template_name == "T":
54 lang_code = node.template_parameters.get(1, "")
55 else:
56 lang_code = node.template_name
57 elif node.template_name.lower() in ["t+", "t", "t-", "l", "lang"]:
58 for tr_data in process_t_template(
59 wxr, word_entry, node, sense_text, lang_name, lang_code
60 ):
61 last_tr = tr_data
62 elif node.template_name.lower() == "archar":
63 tr_data = Translation(
64 word=clean_node(wxr, None, node),
65 sense=sense_text,
66 lang_code=lang_code,
67 lang=lang_name,
68 )
69 word_entry.translations.append(tr_data)
70 last_tr = tr_data
71 elif (
72 node.template_name.lower()
73 in [
74 "m",
75 "f",
76 "p",
77 "n",
78 "c",
79 "s",
80 "mf",
81 "mpl",
82 "fpl",
83 "npl",
84 "inv",
85 ]
86 and last_tr is not None
87 ):
88 last_tr.raw_tags.append(clean_node(wxr, None, node))
89 translate_raw_tags(last_tr)
90 elif node.template_name.lower() == "zh-ts": 90 ↛ 40line 90 didn't jump to line 40 because the condition on line 90 was always true
91 last_tr = process_zh_ts_template(
92 wxr, word_entry, node, sense_text, lang_name, lang_code
93 )
94 elif (
95 isinstance(node, WikiNode)
96 and node.kind == NodeKind.LINK
97 and after_collon
98 ):
99 tr_word = clean_node(wxr, None, node)
100 if len(tr_word) > 0: 100 ↛ 40line 100 didn't jump to line 40 because the condition on line 100 was always true
101 tr_data = Translation(
102 word=tr_word,
103 sense=sense_text,
104 lang_code=lang_code,
105 lang=lang_name,
106 )
107 word_entry.translations.append(tr_data)
108 last_tr = tr_data
109 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
110 for nested_list_item in node.find_child_recursively(
111 NodeKind.LIST_ITEM
112 ):
113 process_translation_list_item(
114 wxr,
115 word_entry,
116 nested_list_item,
117 sense_text,
118 lang_name,
119 lang_code,
120 )
123T_TAGS = {
124 "m": "masculine",
125 "f": "feminine",
126 "mf": ["masculine", "feminine"],
127 "n": "neuter",
128 "c": "common",
129 "impf": "imperfective",
130 "pf": "perfective",
131 "s": "singular",
132 "p": "plural",
133}
136def process_t_template(
137 wxr: WiktextractContext,
138 word_entry: WordEntry,
139 node: TemplateNode,
140 sense_text: str,
141 lang_name: str,
142 lang_code: str,
143) -> list[Translation]:
144 # https://ja.wiktionary.org/wiki/テンプレート:t
145 second_arg = wxr.wtp.parse(
146 wxr.wtp.node_to_wikitext(node.template_parameters.get(2, ""))
147 )
148 for t_node in second_arg.find_child(NodeKind.TEMPLATE):
149 if t_node.template_name == "zh-l": 149 ↛ 148line 149 didn't jump to line 148 because the condition on line 149 was always true
150 from .linkage import extract_zh_l_template
152 tr_list = []
153 for l_data in extract_zh_l_template(wxr, t_node):
154 tr_data = Translation(
155 word=l_data.word,
156 tags=l_data.tags,
157 roman=l_data.roman,
158 lang=lang_name,
159 lang_code=lang_code,
160 )
161 tr_list.append(tr_data)
162 word_entry.translations.append(tr_data)
163 return tr_list
165 tr_word = clean_node(wxr, None, node.template_parameters.get(2, ""))
166 if "alt" in node.template_parameters:
167 tr_word = clean_node(wxr, None, node.template_parameters["alt"])
168 roman = clean_node(wxr, None, node.template_parameters.get("tr", ""))
169 tags = []
170 for arg_index in [3, 4]:
171 if arg_index in node.template_parameters:
172 tag_arg = clean_node(
173 wxr, None, node.template_parameters.get(arg_index, "")
174 )
175 tag_value = T_TAGS.get(tag_arg, [])
176 if isinstance(tag_value, str): 176 ↛ 178line 176 didn't jump to line 178 because the condition on line 176 was always true
177 tags.append(tag_value)
178 elif isinstance(tag_value, list):
179 tags.extend(tag_value)
180 if len(tr_word) > 0: 180 ↛ 191line 180 didn't jump to line 191 because the condition on line 180 was always true
181 tr_data = Translation(
182 word=tr_word,
183 roman=roman,
184 sense=sense_text,
185 lang_code=lang_code,
186 lang=lang_name,
187 tags=tags,
188 )
189 word_entry.translations.append(tr_data)
190 return [tr_data]
191 return []
194def process_zh_ts_template(
195 wxr: WiktextractContext,
196 word_entry: WordEntry,
197 node: TemplateNode,
198 sense_text: str,
199 lang_name: str,
200 lang_code: str,
201) -> Optional[Translation]:
202 # https://ja.wiktionary.org/wiki/テンプレート:zh-ts
203 tr_data = None
204 for arg in range(1, 3):
205 tr_word = clean_node(wxr, None, node.template_parameters.get(arg, ""))
206 if tr_word != "": 206 ↛ 204line 206 didn't jump to line 204 because the condition on line 206 was always true
207 tr_data = Translation(
208 word=tr_word,
209 sense=sense_text,
210 lang_code=lang_code,
211 lang=lang_name,
212 )
213 tr_data.tags = (
214 ["Traditional Chinese"] if arg == 1 else ["Simplified Chinese"]
215 )
216 word_entry.translations.append(tr_data)
217 return tr_data