Coverage for src/wiktextract/extractor/zh/translation.py: 87%
119 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
1from mediawiki_langcodes import code_to_name, name_to_code
2from wikitextprocessor.parser import (
3 LEVEL_KIND_FLAGS,
4 LevelNode,
5 NodeKind,
6 TemplateNode,
7 WikiNode,
8)
10from ...page import clean_node
11from ...wxr_context import WiktextractContext
12from .models import Translation, WordEntry
13from .section_titles import TRANSLATIONS_TITLES
14from .tags import TEMPLATE_TAG_ARGS, translate_raw_tags
17def extract_translation_section(
18 wxr: WiktextractContext,
19 word_entry: WordEntry,
20 level_node: LevelNode,
21 sense: str = "",
22 is_subpage: bool = False,
23 source: str = "",
24) -> None:
25 for child in level_node.find_child(NodeKind.TEMPLATE | NodeKind.LIST):
26 if isinstance(child, TemplateNode):
27 template_name = child.template_name.lower()
28 if (
29 template_name in ("trans-top", "翻譯-頂", "trans-top-also")
30 and 1 in child.template_parameters
31 and not (sense != "" and is_subpage)
32 ):
33 sense = clean_node(wxr, None, child.template_parameters.get(1))
34 elif template_name == "see translation subpage" and not is_subpage:
35 extract_see_trans_subpage_template(wxr, word_entry, child)
36 elif (
37 template_name in ("trans-see", "翻译-见", "翻譯-見")
38 and not is_subpage
39 ):
40 extract_trans_see_template(wxr, word_entry, child)
41 elif template_name == "multitrans":
42 wikitext = "".join(
43 wxr.wtp.node_to_wikitext(c)
44 for c in child.template_parameters.get("data", [])
45 )
46 multitrans = wxr.wtp.parse(wikitext)
47 extract_translation_section(
48 wxr, word_entry, multitrans, sense=sense, source=source
49 )
50 else:
51 for list_item in child.find_child_recursively(NodeKind.LIST_ITEM):
52 process_translation_list_item(
53 wxr, word_entry, list_item, sense, source
54 )
57def process_translation_list_item(
58 wxr: WiktextractContext,
59 word_entry: WordEntry,
60 list_item: WikiNode,
61 sense: str,
62 source: str,
63) -> None:
64 tr_data = Translation(
65 word="", sense=sense, lang="unknown", lang_code="unknown", source=source
66 )
68 for child_index, child in enumerate(list_item.filter_empty_str_child()):
69 if child_index == 0:
70 lang_text = ""
71 if isinstance(child, str):
72 if ":" in child:
73 lang_text = child[: child.index(":")]
74 elif ":" in child: 74 ↛ 78line 74 didn't jump to line 78 because the condition on line 74 was always true
75 lang_text = child[: child.index(":")]
76 else:
77 lang_text = clean_node(wxr, None, child)
78 if len(lang_text) > 0: 78 ↛ 68line 78 didn't jump to line 68 because the condition on line 78 was always true
79 tr_data.lang = lang_text.strip()
80 tr_data.lang_code = name_to_code(tr_data.lang, "zh")
81 elif isinstance(child, TemplateNode):
82 template_name = child.template_name.lower()
83 if template_name in {
84 "t",
85 "t+",
86 "tt",
87 "tt+",
88 "t-check",
89 "t+check",
90 "l",
91 }:
92 if len(tr_data.word) > 0:
93 word_entry.translations.append(
94 tr_data.model_copy(deep=True)
95 )
96 tr_data = Translation(
97 word="",
98 lang=tr_data.lang,
99 lang_code=tr_data.lang_code,
100 sense=sense,
101 source=source,
102 )
103 if tr_data.lang_code == "":
104 tr_data.lang_code = child.template_parameters.get(1, "")
105 if tr_data.lang == "": 105 ↛ 106line 105 didn't jump to line 106 because the condition on line 105 was never true
106 tr_data.lang = code_to_name(tr_data.lang_code, "zh")
107 tr_data.word = clean_node(
108 wxr, None, child.template_parameters.get(2, "")
109 )
110 tr_data.roman = clean_node(
111 wxr, None, child.template_parameters.get("tr", "")
112 )
113 tr_data.alt = clean_node(
114 wxr, None, child.template_parameters.get("alt", "")
115 )
116 tr_data.lit = clean_node(
117 wxr, None, child.template_parameters.get("lit", "")
118 )
119 for arg_key, arg_value in child.template_parameters.items():
120 if (
121 isinstance(arg_key, int) and arg_key >= 3
122 ) or arg_key == "g": # template "l" uses the "g" arg
123 for tag_arg in arg_value.split("-"):
124 if tag_arg in TEMPLATE_TAG_ARGS: 124 ↛ 123line 124 didn't jump to line 123 because the condition on line 124 was always true
125 tag = TEMPLATE_TAG_ARGS[tag_arg]
126 if isinstance(tag, str): 126 ↛ 128line 126 didn't jump to line 128 because the condition on line 126 was always true
127 tr_data.tags.append(tag)
128 elif isinstance(tag, list):
129 tr_data.tags.extend(tag)
131 elif template_name == "t-needed":
132 # ignore empty translation
133 continue
134 elif template_name in ("qualifier", "q"):
135 raw_tag = clean_node(wxr, None, child)
136 tr_data.raw_tags.append(raw_tag.strip("()"))
137 else:
138 # zh qualifier templates that use template "注释"
139 # https://zh.wiktionary.org/wiki/Template:注释
140 raw_tag = clean_node(wxr, None, child)
141 if raw_tag.startswith("〈") and raw_tag.endswith("〉"): 141 ↛ 68line 141 didn't jump to line 68 because the condition on line 141 was always true
142 tr_data.raw_tags.append(raw_tag.strip("〈〉"))
143 elif isinstance(child, WikiNode) and child.kind == NodeKind.LINK:
144 if len(tr_data.word) > 0:
145 word_entry.translations.append(tr_data.model_copy(deep=True))
146 tr_data = Translation(
147 word="",
148 lang=tr_data.lang,
149 lang_code=tr_data.lang_code,
150 sense=sense,
151 source=source,
152 )
153 tr_data.word = clean_node(wxr, None, child)
155 if len(tr_data.word) > 0:
156 translate_raw_tags(tr_data)
157 word_entry.translations.append(tr_data.model_copy(deep=True))
160def extract_trans_see_template(
161 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode
162):
163 # https://zh.wiktionary.org/wiki/Template:翻譯-見
164 sense = clean_node(wxr, None, t_node.template_parameters.get(1, ""))
165 page_titles = []
166 if 2 in t_node.template_parameters:
167 for index in range(2, 11): 167 ↛ 177line 167 didn't jump to line 177 because the loop on line 167 didn't complete
168 if index not in t_node.template_parameters:
169 break
170 page_titles.append(
171 clean_node(wxr, None, t_node.template_parameters[index])
172 )
173 else:
174 page_titles.append(
175 clean_node(wxr, None, t_node.template_parameters.get(1, ""))
176 )
177 for page_title in page_titles:
178 if "#" in page_title:
179 page_title = page_title[: page_title.index("#")]
180 page = wxr.wtp.get_page(page_title)
181 if page is None: 181 ↛ 182line 181 didn't jump to line 182 because the condition on line 181 was never true
182 return
183 root = wxr.wtp.parse(page.body)
184 target_node = find_subpage_section(wxr, root, TRANSLATIONS_TITLES)
185 if target_node is not None: 185 ↛ 177line 185 didn't jump to line 177 because the condition on line 185 was always true
186 extract_translation_section(
187 wxr,
188 word_entry,
189 target_node,
190 sense=sense,
191 is_subpage=True,
192 source=page_title,
193 )
196def extract_see_trans_subpage_template(
197 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode
198):
199 # https://zh.wiktionary.org/wiki/Template:See_translation_subpage
200 target_pos = clean_node(wxr, None, t_node.template_parameters.get(1, ""))
201 if 2 in t_node.template_parameters: 201 ↛ 202line 201 didn't jump to line 202 because the condition on line 201 was never true
202 subpage_title = clean_node(
203 wxr, None, t_node.template_parameters.get(2, "")
204 )
205 if "#" in subpage_title:
206 subpage_title = subpage_title[: subpage_title.index("#")]
207 else:
208 subpage_title = f"{wxr.wtp.title}/翻譯"
210 page = wxr.wtp.get_page(subpage_title)
211 if page is None: 211 ↛ 212line 211 didn't jump to line 212 because the condition on line 211 was never true
212 return
213 root = wxr.wtp.parse(page.body)
214 target_section = find_subpage_section(wxr, root, target_pos)
215 if target_section is not None: 215 ↛ 221line 215 didn't jump to line 221 because the condition on line 215 was always true
216 new_target_section = find_subpage_section(
217 wxr, target_section, TRANSLATIONS_TITLES
218 )
219 if new_target_section is not None: 219 ↛ 220line 219 didn't jump to line 220 because the condition on line 219 was never true
220 target_section = new_target_section
221 if target_section is not None: 221 ↛ exitline 221 didn't return from function 'extract_see_trans_subpage_template' because the condition on line 221 was always true
222 extract_translation_section(
223 wxr,
224 word_entry,
225 target_section,
226 is_subpage=True,
227 source=subpage_title,
228 )
231def find_subpage_section(
232 wxr: WiktextractContext, root: WikiNode, target_sections: set[str]
233) -> WikiNode | None:
234 for level_node in root.find_child_recursively(LEVEL_KIND_FLAGS):
235 section_title = clean_node(wxr, None, level_node.largs)
236 if section_title in target_sections:
237 return level_node
238 return None