Coverage for src/wiktextract/extractor/tr/translation.py: 64%
76 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
1from mediawiki_langcodes import name_to_code
2from wikitextprocessor.parser import (
3 LEVEL_KIND_FLAGS,
4 LevelNode,
5 NodeKind,
6 TemplateNode,
7 WikiNode,
8)
10from ...page import clean_node
11from ...wxr_context import WiktextractContext
12from .models import Translation, WordEntry
13from .tags import translate_raw_tags
16def extract_translation_section(
17 wxr: WiktextractContext,
18 word_entry: WordEntry,
19 level_node: LevelNode,
20 sense: str = "",
21 from_trans_see: bool = False,
22 source: str = "",
23) -> None:
24 for node in level_node.children:
25 if (
26 isinstance(node, TemplateNode)
27 and node.template_name.lower() in ["üst", "trans-top"]
28 and not (sense != "" and from_trans_see)
29 ):
30 sense = clean_node(wxr, None, node.template_parameters.get(1, ""))
31 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
32 for list_item in node.find_child(NodeKind.LIST_ITEM):
33 extract_translation_list_item(
34 wxr, word_entry, list_item, sense, source
35 )
36 elif ( 36 ↛ 48line 36 didn't jump to line 48 because the condition on line 36 was never true
37 isinstance(node, TemplateNode)
38 and node.template_name
39 in [
40 "çeviri yönlendirme",
41 "Türk dilleri-yönlendirme",
42 "tercüme-yönlendirme",
43 "çeviri-yönlendirme",
44 "tercüme yönlendirme",
45 ]
46 and not from_trans_see
47 ):
48 extract_trans_see_template(wxr, word_entry, node)
51def extract_translation_list_item(
52 wxr: WiktextractContext,
53 word_entry: WordEntry,
54 list_item: WikiNode,
55 sense: str,
56 source: str,
57) -> None:
58 lang_name = "unknown"
59 after_colon = False
60 for index, node in enumerate(list_item.children):
61 if isinstance(node, str) and ":" in node and lang_name == "unknown":
62 lang_name = clean_node(
63 wxr,
64 None,
65 list_item.children[:index] + [node[: node.rindex(":")]],
66 ).strip(": ")
67 after_colon = True
68 elif isinstance(node, TemplateNode) and node.template_name in [
69 "ç",
70 "çeviri",
71 ]:
72 extract_çeviri_template(
73 wxr, word_entry, node, sense, lang_name, source
74 )
75 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
76 for child_list_item in node.find_child(NodeKind.LIST_ITEM):
77 extract_translation_list_item(
78 wxr, word_entry, child_list_item, sense, source
79 )
80 elif (
81 after_colon
82 and isinstance(node, WikiNode)
83 and node.kind == NodeKind.LINK
84 ):
85 word = clean_node(wxr, None, node)
86 if word != "": 86 ↛ 60line 86 didn't jump to line 60 because the condition on line 86 was always true
87 word_entry.translations.append(
88 Translation(
89 word=word,
90 lang=lang_name or "unknown",
91 lang_code=name_to_code(lang_name, "tr") or "unknown",
92 sense=sense,
93 source=source,
94 )
95 )
98def extract_çeviri_template(
99 wxr: WiktextractContext,
100 word_entry: WordEntry,
101 t_node: TemplateNode,
102 sense: str,
103 lang_name: str,
104 source: str,
105) -> None:
106 lang_code = clean_node(
107 wxr, None, t_node.template_parameters.get(1, "unknown")
108 )
109 expanded_node = wxr.wtp.parse(
110 wxr.wtp.node_to_wikitext(t_node), expand_all=True
111 )
112 tr_data = Translation(
113 word="",
114 lang_code=lang_code,
115 lang=lang_name or "unknown",
116 sense=sense,
117 source=source,
118 )
119 for span_tag in expanded_node.find_html( 119 ↛ 124line 119 didn't jump to line 124 because the loop on line 119 didn't complete
120 "span", attr_name="lang", attr_value=lang_code
121 ):
122 tr_data.word = clean_node(wxr, None, span_tag)
123 break
124 for abbr_tag in expanded_node.find_html_recursively("abbr"):
125 raw_tag = clean_node(wxr, None, abbr_tag)
126 if raw_tag != "": 126 ↛ 124line 126 didn't jump to line 124 because the condition on line 126 was always true
127 tr_data.raw_tags.append(raw_tag)
128 for span_tag in expanded_node.find_html("span"):
129 span_class = span_tag.attrs.get("class", "")
130 if span_class in ["tr", "tr Latn"]:
131 tr_data.roman = clean_node(wxr, None, span_tag)
132 break
133 if tr_data.word != "": 133 ↛ 136line 133 didn't jump to line 136 because the condition on line 133 was always true
134 translate_raw_tags(tr_data)
135 word_entry.translations.append(tr_data)
136 clean_node(wxr, word_entry, expanded_node)
139def extract_trans_see_template(
140 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode
141):
142 # https://tr.wiktionary.org/wiki/Şablon:çeviri_yönlendirme
143 sense = clean_node(wxr, None, t_node.template_parameters.get(1, ""))
144 page_titles = []
145 if 2 in t_node.template_parameters:
146 for index in range(2, 11):
147 if index not in t_node.template_parameters:
148 break
149 page_titles.append(
150 clean_node(wxr, None, t_node.template_parameters[index])
151 )
152 else:
153 page_titles.append(
154 clean_node(wxr, None, t_node.template_parameters.get(1, ""))
155 )
156 for page_title in page_titles:
157 if "#" in page_title:
158 page_title = page_title[: page_title.index("#")]
159 page_body = wxr.wtp.get_page_body(page_title, 0)
160 if page_body is None:
161 return
162 root = wxr.wtp.parse(page_body)
163 target_node = find_subpage_section(wxr, root, "Çeviriler")
164 if target_node is not None:
165 extract_translation_section(
166 wxr,
167 word_entry,
168 target_node,
169 sense=sense,
170 from_trans_see=True,
171 source=page_title,
172 )
175def find_subpage_section(
176 wxr: WiktextractContext, root: WikiNode, target_section: str
177) -> WikiNode | None:
178 for level_node in root.find_child_recursively(LEVEL_KIND_FLAGS):
179 section_title = clean_node(wxr, None, level_node.largs)
180 if section_title == target_section:
181 return level_node
182 return None