Coverage for src/wiktextract/extractor/ja/translation.py: 92%
83 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from typing import Optional
3from mediawiki_langcodes import name_to_code
4from wikitextprocessor.parser import LevelNode, NodeKind, TemplateNode, WikiNode
6from ...page import clean_node
7from ...wxr_context import WiktextractContext
8from .models import Translation, WordEntry
9from .tags import translate_raw_tags
12def extract_translation_section(
13 wxr: WiktextractContext,
14 word_entry: WordEntry,
15 level_node: LevelNode,
16) -> None:
17 sense_text = ""
18 for node in level_node.find_child(NodeKind.TEMPLATE | NodeKind.LIST):
19 if isinstance(node, TemplateNode) and node.template_name == "trans-top":
20 sense_text = clean_node(
21 wxr, None, node.template_parameters.get(1, "")
22 )
23 elif node.kind == NodeKind.LIST: 23 ↛ 18line 23 didn't jump to line 18 because the condition on line 23 was always true
24 for list_item in node.find_child(NodeKind.LIST_ITEM):
25 process_translation_list_item(
26 wxr, word_entry, list_item, sense_text, "", ""
27 )
30def process_translation_list_item(
31 wxr: WiktextractContext,
32 word_entry: WordEntry,
33 list_item: WikiNode,
34 sense_text: str,
35 lang_name: str,
36 lang_code: str,
37) -> None:
38 after_collon = False
39 last_tr: Optional[Translation] = None
40 for node_index, node in enumerate(list_item.children):
41 if isinstance(node, str) and ":" in node and not after_collon:
42 after_collon = True
43 lang_nodes = list_item.children[:node_index]
44 lang_nodes.append(node[: node.index(":")])
45 new_lang_name = clean_node(wxr, None, lang_nodes)
46 new_lang_code = name_to_code(new_lang_name, "ja")
47 if new_lang_code != "" or lang_name == "":
48 lang_code = new_lang_code
49 lang_name = new_lang_name
50 elif isinstance(node, TemplateNode):
51 if not after_collon:
52 lang_name = clean_node(wxr, None, node)
53 if node.template_name == "T":
54 lang_code = node.template_parameters.get(1, "")
55 else:
56 lang_code = node.template_name
57 elif node.template_name.lower() in ["t+", "t", "t-", "l", "lang"]:
58 last_tr = process_t_template(
59 wxr, word_entry, node, sense_text, lang_name, lang_code
60 )
61 elif node.template_name.lower() == "archar":
62 tr_data = Translation(
63 word=clean_node(wxr, None, node),
64 sense=sense_text,
65 lang_code=lang_code,
66 lang=lang_name,
67 )
68 word_entry.translations.append(tr_data)
69 last_tr = tr_data
70 elif (
71 node.template_name.lower()
72 in [
73 "m",
74 "f",
75 "p",
76 "n",
77 "c",
78 "s",
79 "mf",
80 "mpl",
81 "fpl",
82 "npl",
83 "inv",
84 ]
85 and last_tr is not None
86 ):
87 last_tr.raw_tags.append(clean_node(wxr, None, node))
88 translate_raw_tags(last_tr)
89 elif node.template_name.lower() == "zh-ts": 89 ↛ 40line 89 didn't jump to line 40 because the condition on line 89 was always true
90 last_tr = process_zh_ts_template(
91 wxr, word_entry, node, sense_text, lang_name, lang_code
92 )
93 elif (
94 isinstance(node, WikiNode)
95 and node.kind == NodeKind.LINK
96 and after_collon
97 ):
98 tr_word = clean_node(wxr, None, node)
99 if len(tr_word) > 0: 99 ↛ 40line 99 didn't jump to line 40 because the condition on line 99 was always true
100 tr_data = Translation(
101 word=tr_word,
102 sense=sense_text,
103 lang_code=lang_code,
104 lang=lang_name,
105 )
106 word_entry.translations.append(tr_data)
107 last_tr = tr_data
108 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
109 for nested_list_item in node.find_child_recursively(
110 NodeKind.LIST_ITEM
111 ):
112 process_translation_list_item(
113 wxr,
114 word_entry,
115 nested_list_item,
116 sense_text,
117 lang_name,
118 lang_code,
119 )
122T_TAGS = {
123 "m": "masculine",
124 "f": "feminine",
125 "mf": ["masculine", "feminine"],
126 "n": "neuter",
127 "c": "common",
128 "impf": "imperfective",
129 "pf": "perfective",
130 "s": "singular",
131 "p": "plural",
132}
135def process_t_template(
136 wxr: WiktextractContext,
137 word_entry: WordEntry,
138 node: TemplateNode,
139 sense_text: str,
140 lang_name: str,
141 lang_code: str,
142) -> Optional[Translation]:
143 # https://ja.wiktionary.org/wiki/テンプレート:t
144 tr_word = clean_node(wxr, None, node.template_parameters.get(2, ""))
145 if "alt" in node.template_parameters:
146 tr_word = clean_node(wxr, None, node.template_parameters["alt"])
147 roman = clean_node(wxr, None, node.template_parameters.get("tr", ""))
148 tags = []
149 for arg_index in [3, 4]:
150 if arg_index in node.template_parameters:
151 tag_arg = clean_node(
152 wxr, None, node.template_parameters.get(arg_index, "")
153 )
154 tag_value = T_TAGS.get(tag_arg, [])
155 if isinstance(tag_value, str): 155 ↛ 157line 155 didn't jump to line 157 because the condition on line 155 was always true
156 tags.append(tag_value)
157 elif isinstance(tag_value, list):
158 tags.extend(tag_value)
159 if len(tr_word) > 0: 159 ↛ 170line 159 didn't jump to line 170 because the condition on line 159 was always true
160 tr_data = Translation(
161 word=tr_word,
162 roman=roman,
163 sense=sense_text,
164 lang_code=lang_code,
165 lang=lang_name,
166 tags=tags,
167 )
168 word_entry.translations.append(tr_data)
169 return tr_data
170 return None
173def process_zh_ts_template(
174 wxr: WiktextractContext,
175 word_entry: WordEntry,
176 node: TemplateNode,
177 sense_text: str,
178 lang_name: str,
179 lang_code: str,
180) -> Optional[Translation]:
181 # https://ja.wiktionary.org/wiki/テンプレート:zh-ts
182 tr_data = None
183 for arg in range(1, 3):
184 tr_word = clean_node(wxr, None, node.template_parameters.get(arg, ""))
185 if tr_word != "": 185 ↛ 183line 185 didn't jump to line 183 because the condition on line 185 was always true
186 tr_data = Translation(
187 word=tr_word,
188 sense=sense_text,
189 lang_code=lang_code,
190 lang=lang_name,
191 )
192 tr_data.tags = (
193 ["Traditional Chinese"] if arg == 1 else ["Simplified Chinese"]
194 )
195 word_entry.translations.append(tr_data)
196 return tr_data