Coverage for src/wiktextract/extractor/tr/translation.py: 95%
50 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
1from mediawiki_langcodes import name_to_code
2from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
4from ...page import clean_node
5from ...wxr_context import WiktextractContext
6from .models import Translation, WordEntry
7from .tags import translate_raw_tags
10def extract_translation_section(
11 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode
12) -> None:
13 sense = ""
14 for node in level_node.children:
15 if isinstance(node, TemplateNode) and node.template_name.lower() in [
16 "üst",
17 "trans-top",
18 ]:
19 sense = clean_node(wxr, None, node.template_parameters.get(1, ""))
20 for list_node in level_node.find_child(NodeKind.LIST):
21 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
22 extract_translation_list_item(wxr, word_entry, list_item, sense)
25def extract_translation_list_item(
26 wxr: WiktextractContext,
27 word_entry: WordEntry,
28 list_item: WikiNode,
29 sense: str,
30) -> None:
31 lang_name = "unknown"
32 after_colon = False
33 for index, node in enumerate(list_item.children):
34 if isinstance(node, str) and ":" in node and lang_name == "unknown":
35 lang_name = clean_node(
36 wxr,
37 None,
38 list_item.children[:index] + [node[: node.rindex(":")]],
39 ).strip(": ")
40 after_colon = True
41 elif isinstance(node, TemplateNode) and node.template_name in [
42 "ç",
43 "çeviri",
44 ]:
45 extract_çeviri_template(wxr, word_entry, node, sense, lang_name)
46 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
47 for child_list_item in node.find_child(NodeKind.LIST_ITEM):
48 extract_translation_list_item(
49 wxr, word_entry, child_list_item, sense
50 )
51 elif (
52 after_colon
53 and isinstance(node, WikiNode)
54 and node.kind == NodeKind.LINK
55 ):
56 word = clean_node(wxr, None, node)
57 if word != "": 57 ↛ 33line 57 didn't jump to line 33 because the condition on line 57 was always true
58 word_entry.translations.append(
59 Translation(
60 word=word,
61 lang=lang_name or "unknown",
62 lang_code=name_to_code(lang_name, "tr") or "unknown",
63 )
64 )
67def extract_çeviri_template(
68 wxr: WiktextractContext,
69 word_entry: WordEntry,
70 t_node: TemplateNode,
71 sense: str,
72 lang_name: str,
73) -> None:
74 lang_code = clean_node(
75 wxr, None, t_node.template_parameters.get(1, "unknown")
76 )
77 expanded_node = wxr.wtp.parse(
78 wxr.wtp.node_to_wikitext(t_node), expand_all=True
79 )
80 tr_data = Translation(
81 word="", lang_code=lang_code, lang=lang_name or "unknown", sense=sense
82 )
83 for span_tag in expanded_node.find_html( 83 ↛ 88line 83 didn't jump to line 88 because the loop on line 83 didn't complete
84 "span", attr_name="lang", attr_value=lang_code
85 ):
86 tr_data.word = clean_node(wxr, None, span_tag)
87 break
88 for abbr_tag in expanded_node.find_html_recursively("abbr"):
89 raw_tag = clean_node(wxr, None, abbr_tag)
90 if raw_tag != "": 90 ↛ 88line 90 didn't jump to line 88 because the condition on line 90 was always true
91 tr_data.raw_tags.append(raw_tag)
92 for span_tag in expanded_node.find_html("span"):
93 span_class = span_tag.attrs.get("class", "")
94 if span_class in ["tr", "tr Latn"]:
95 tr_data.roman = clean_node(wxr, None, span_tag)
96 break
97 if tr_data.word != "": 97 ↛ 100line 97 didn't jump to line 100 because the condition on line 97 was always true
98 translate_raw_tags(tr_data)
99 word_entry.translations.append(tr_data)
100 clean_node(wxr, word_entry, expanded_node)