Coverage for src/wiktextract/extractor/id/translation.py: 78%
65 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
1from mediawiki_langcodes import name_to_code
2from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
4from ...page import clean_node
5from ...wxr_context import WiktextractContext
6from .models import Translation, WordEntry
7from .tags import translate_raw_tags
10def extract_translation_section(
11 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode
12) -> None:
13 sense = ""
14 for node in level_node.children:
15 if isinstance(node, TemplateNode) and node.template_name in [
16 "trans-top",
17 "kotak mulai",
18 "kotak awal",
19 ]:
20 sense = clean_node(wxr, None, node.template_parameters.get(1, ""))
21 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
22 for list_item in node.find_child(NodeKind.LIST_ITEM):
23 extract_translation_list_item(wxr, word_entry, list_item, sense)
26def extract_translation_list_item(
27 wxr: WiktextractContext,
28 word_entry: WordEntry,
29 list_item: WikiNode,
30 sense: str,
31) -> None:
32 lang_name = "unknown"
33 lang_code = "unknown"
34 for index, node in enumerate(list_item.children):
35 if isinstance(node, str) and ":" in node and lang_name == "unknown":
36 lang_name = (
37 clean_node(wxr, None, list_item.children[:index])
38 + node[: node.index(":")].strip()
39 )
40 lang_name = lang_name.removeprefix("bahasa ").strip()
41 if lang_name == "": 41 ↛ 42line 41 didn't jump to line 42 because the condition on line 41 was never true
42 lang_name = "unknown"
43 if lang_name != "unknown": 43 ↛ 34line 43 didn't jump to line 34 because the condition on line 43 was always true
44 lang_code = name_to_code(lang_name, "id")
45 if lang_code == "": 45 ↛ 46line 45 didn't jump to line 46 because the condition on line 45 was never true
46 lang_code = "unknown"
47 elif isinstance(node, TemplateNode) and node.template_name in [
48 "t",
49 "t+",
50 "trad-",
51 "trad+",
52 "t-simple",
53 ]:
54 extract_t_template(wxr, word_entry, node, lang_name, sense)
55 elif isinstance(node, TemplateNode) and node.template_name in [
56 "qualifier",
57 "q",
58 "qual",
59 "f",
60 "n",
61 "p",
62 ]:
63 extract_qualifier_template(wxr, word_entry, node)
64 elif ( 64 ↛ 69line 64 didn't jump to line 69 because the condition on line 64 was never true
65 isinstance(node, WikiNode)
66 and node.kind == NodeKind.LINK
67 and lang_name != "unknown"
68 ):
69 word = clean_node(wxr, None, node)
70 if word != "":
71 word_entry.translations.append(
72 Translation(
73 word=word,
74 lang=lang_name,
75 lang_code=lang_code,
76 sense=sense,
77 )
78 )
79 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 79 ↛ 80line 79 didn't jump to line 80 because the condition on line 79 was never true
80 for child_list_item in node.find_child(NodeKind.LIST_ITEM):
81 extract_translation_list_item(
82 wxr, word_entry, child_list_item, sense
83 )
86def extract_t_template(
87 wxr: WiktextractContext,
88 word_entry: WordEntry,
89 t_node: TemplateNode,
90 lang_name: str,
91 sense: str,
92) -> None:
93 lang_code = clean_node(wxr, None, t_node.template_parameters.get(1, ""))
94 if lang_code == "": 94 ↛ 95line 94 didn't jump to line 95 because the condition on line 94 was never true
95 lang_code = "unknown"
96 tr_data = Translation(
97 word="", lang=lang_name, lang_code=lang_code, sense=sense
98 )
99 expanded_node = wxr.wtp.parse(
100 wxr.wtp.node_to_wikitext(t_node), expand_all=True
101 )
102 for span_tag in expanded_node.find_html_recursively("span"):
103 if span_tag.attrs.get("lang") == lang_code and tr_data.word == "":
104 tr_data.word = clean_node(wxr, None, span_tag)
105 elif "tr Latn" == span_tag.attrs.get("class", ""):
106 tr_data.roman = clean_node(wxr, None, span_tag)
108 tr_data.lit = clean_node(
109 wxr, None, t_node.template_parameters.get("lit", "")
110 )
111 for abbr_tag in expanded_node.find_html_recursively("abbr"): 111 ↛ 112line 111 didn't jump to line 112 because the loop on line 111 never started
112 tr_data.raw_tags.append(clean_node(wxr, None, abbr_tag))
114 if tr_data.word != "": 114 ↛ exitline 114 didn't return from function 'extract_t_template' because the condition on line 114 was always true
115 translate_raw_tags(tr_data)
116 word_entry.translations.append(tr_data)
117 for link_node in expanded_node.find_child(NodeKind.LINK): 117 ↛ 118line 117 didn't jump to line 118 because the loop on line 117 never started
118 clean_node(wxr, word_entry, link_node)
121def extract_qualifier_template(
122 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode
123) -> None:
124 t_str = clean_node(wxr, None, t_node).strip("() ")
125 for raw_tag in t_str.split(","):
126 raw_tag = raw_tag.strip()
127 if raw_tag != "" and len(word_entry.translations) > 0: 127 ↛ 125line 127 didn't jump to line 125 because the condition on line 127 was always true
128 word_entry.translations[-1].raw_tags.append(raw_tag)
129 if len(word_entry.translations) > 0: 129 ↛ exitline 129 didn't return from function 'extract_qualifier_template' because the condition on line 129 was always true
130 translate_raw_tags(word_entry.translations[-1])