Coverage for src/wiktextract/extractor/zh/translation.py: 89%
102 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
1from mediawiki_langcodes import code_to_name, name_to_code
2from wikitextprocessor.parser import (
3 LEVEL_KIND_FLAGS,
4 NodeKind,
5 TemplateNode,
6 WikiNode,
7)
9from ...page import clean_node
10from ...wxr_context import WiktextractContext
11from .models import Translation, WordEntry
12from .section_titles import TRANSLATIONS_TITLES
13from .tags import TEMPLATE_TAG_ARGS, translate_raw_tags
16def extract_translation(
17 wxr: WiktextractContext,
18 page_data: list[WordEntry],
19 level_node: WikiNode,
20 sense: str = "",
21 is_subpage: bool = False,
22) -> None:
23 for child in level_node.find_child(NodeKind.TEMPLATE | NodeKind.LIST):
24 if isinstance(child, TemplateNode):
25 template_name = child.template_name.lower()
26 if (
27 template_name in {"trans-top", "翻譯-頂", "trans-top-also"}
28 and 1 in child.template_parameters
29 ):
30 sense = clean_node(wxr, None, child.template_parameters.get(1))
31 elif (
32 template_name in {"see translation subpage", "trans-see"}
33 and not is_subpage
34 ):
35 translation_subpage(wxr, page_data, child)
36 elif template_name == "multitrans":
37 wikitext = "".join(
38 wxr.wtp.node_to_wikitext(c)
39 for c in child.template_parameters.get("data", [])
40 )
41 multitrans = wxr.wtp.parse(wikitext)
42 extract_translation(wxr, page_data, multitrans, sense)
43 else:
44 for list_item in child.find_child_recursively(NodeKind.LIST_ITEM):
45 process_translation_list_item(
46 wxr,
47 page_data,
48 list_item,
49 sense,
50 )
53def process_translation_list_item(
54 wxr: WiktextractContext,
55 page_data: list[WordEntry],
56 list_item: WikiNode,
57 sense: str,
58) -> None:
59 tr_data = Translation(
60 word="", sense=sense, lang="unknown", lang_code="unknown"
61 )
63 for child_index, child in enumerate(list_item.filter_empty_str_child()):
64 if child_index == 0:
65 lang_text = ""
66 if isinstance(child, str):
67 if ":" in child:
68 lang_text = child[: child.index(":")]
69 elif ":" in child: 69 ↛ 73line 69 didn't jump to line 73 because the condition on line 69 was always true
70 lang_text = child[: child.index(":")]
71 else:
72 lang_text = clean_node(wxr, None, child)
73 if len(lang_text) > 0: 73 ↛ 63line 73 didn't jump to line 63 because the condition on line 73 was always true
74 tr_data.lang = lang_text.strip()
75 tr_data.lang_code = name_to_code(tr_data.lang, "zh")
76 elif isinstance(child, TemplateNode):
77 template_name = child.template_name.lower()
78 if template_name in {
79 "t",
80 "t+",
81 "tt",
82 "tt+",
83 "t-check",
84 "t+check",
85 "l",
86 }:
87 if len(tr_data.word) > 0:
88 page_data[-1].translations.append(
89 tr_data.model_copy(deep=True)
90 )
91 tr_data = Translation(
92 word="",
93 lang=tr_data.lang,
94 lang_code=tr_data.lang_code,
95 sense=sense,
96 )
97 if tr_data.lang_code == "":
98 tr_data.lang_code = child.template_parameters.get(1, "")
99 if tr_data.lang == "": 99 ↛ 100line 99 didn't jump to line 100 because the condition on line 99 was never true
100 tr_data.lang = code_to_name(tr_data.lang_code, "zh")
101 tr_data.word = clean_node(
102 wxr, None, child.template_parameters.get(2, "")
103 )
104 tr_data.roman = clean_node(
105 wxr, None, child.template_parameters.get("tr", "")
106 )
107 tr_data.alt = clean_node(
108 wxr, None, child.template_parameters.get("alt", "")
109 )
110 tr_data.lit = clean_node(
111 wxr, None, child.template_parameters.get("lit", "")
112 )
113 for arg_key, arg_value in child.template_parameters.items():
114 if (
115 isinstance(arg_key, int) and arg_key >= 3
116 ) or arg_key == "g": # template "l" uses the "g" arg
117 for tag_arg in arg_value.split("-"):
118 if tag_arg in TEMPLATE_TAG_ARGS: 118 ↛ 117line 118 didn't jump to line 117 because the condition on line 118 was always true
119 tag = TEMPLATE_TAG_ARGS[tag_arg]
120 if isinstance(tag, str): 120 ↛ 122line 120 didn't jump to line 122 because the condition on line 120 was always true
121 tr_data.tags.append(tag)
122 elif isinstance(tag, list):
123 tr_data.tags.extend(tag)
125 elif template_name == "t-needed":
126 # ignore empty translation
127 continue
128 elif template_name in ("qualifier", "q"):
129 raw_tag = clean_node(wxr, None, child)
130 tr_data.raw_tags.append(raw_tag.strip("()"))
131 else:
132 # zh qualifier templates that use template "注释"
133 # https://zh.wiktionary.org/wiki/Template:注释
134 raw_tag = clean_node(wxr, None, child)
135 if raw_tag.startswith("〈") and raw_tag.endswith("〉"): 135 ↛ 63line 135 didn't jump to line 63 because the condition on line 135 was always true
136 tr_data.raw_tags.append(raw_tag.strip("〈〉"))
137 elif isinstance(child, WikiNode) and child.kind == NodeKind.LINK:
138 if len(tr_data.word) > 0:
139 page_data[-1].translations.append(tr_data.model_copy(deep=True))
140 tr_data = Translation(
141 word="",
142 lang=tr_data.lang,
143 lang_code=tr_data.lang_code,
144 sense=sense,
145 )
146 tr_data.word = clean_node(wxr, None, child)
148 if len(tr_data.word) > 0:
149 translate_raw_tags(tr_data)
150 page_data[-1].translations.append(tr_data.model_copy(deep=True))
153def translation_subpage(
154 wxr: WiktextractContext,
155 page_data: list[WordEntry],
156 template_node: TemplateNode,
157) -> None:
158 # https://zh.wiktionary.org/wiki/Template:翻譯-見
159 # https://zh.wiktionary.org/wiki/Template:See_translation_subpage
161 page_title = wxr.wtp.title
162 target_section = None
163 if template_node.template_name == "see translation subpage":
164 target_section = template_node.template_parameters.get(1)
165 page_title = clean_node(
166 wxr, None, template_node.template_parameters.get(2, wxr.wtp.title)
167 )
168 if "#" in page_title: 168 ↛ 169line 168 didn't jump to line 169 because the condition on line 168 was never true
169 page_title = page_title[: page_title.index("#")]
171 translation_subpage_title = page_title
172 if page_title == wxr.wtp.title:
173 translation_subpage_title = f"{page_title}/翻譯"
174 subpage = wxr.wtp.get_page(translation_subpage_title)
175 if subpage is None: 175 ↛ 176line 175 didn't jump to line 176 because the condition on line 175 was never true
176 return
178 root = wxr.wtp.parse(subpage.body)
179 target_section_node = (
180 find_subpage_section(wxr, root, target_section) or root
181 )
182 if target_section_node is not None: 182 ↛ exitline 182 didn't return from function 'translation_subpage' because the condition on line 182 was always true
183 extract_translation(
184 wxr, page_data, target_section_node, is_subpage=True
185 )
188def find_subpage_section(
189 wxr: WiktextractContext,
190 node: WikiNode | str,
191 target_section: str | None = None,
192) -> WikiNode | None:
193 if not isinstance(node, WikiNode): 193 ↛ 194line 193 didn't jump to line 194 because the condition on line 193 was never true
194 return None
195 for level_node in node.find_child_recursively(LEVEL_KIND_FLAGS): 195 ↛ 201line 195 didn't jump to line 201 because the loop on line 195 didn't complete
196 section_title = clean_node(wxr, None, level_node.largs)
197 if isinstance(target_section, str) and section_title == target_section:
198 return level_node
199 if section_title in TRANSLATIONS_TITLES:
200 return level_node
201 return None