Coverage for src/wiktextract/extractor/ko/translation.py: 94%
47 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-10-25 10:11 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-10-25 10:11 +0000
1import re
3from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
5from ...page import clean_node
6from ...wxr_context import WiktextractContext
7from .models import Translation, WordEntry
10def extract_translation_section(
11 wxr: WiktextractContext,
12 word_entry: WordEntry,
13 level_node: LevelNode,
14) -> None:
15 for t_node in level_node.find_child(NodeKind.TEMPLATE):
16 if t_node.template_name == "외국어": 16 ↛ 15line 16 didn't jump to line 15 because the condition on line 16 was always true
17 extract_translation_template(wxr, word_entry, t_node)
20def extract_translation_template(
21 wxr: WiktextractContext,
22 word_entry: WordEntry,
23 t_node: TemplateNode,
24 sense: str = "",
25) -> None:
26 # https://ko.wiktionary.org/wiki/틀:외국어
27 t_sense = clean_node(wxr, None, t_node.template_parameters.get("덧", ""))
28 if t_sense != "": 28 ↛ 29line 28 didn't jump to line 29 because the condition on line 28 was never true
29 sense = t_sense
30 for key in [1, 2]:
31 arg_value = t_node.template_parameters.get(key, [])
32 parse_arg = wxr.wtp.parse(wxr.wtp.node_to_wikitext(arg_value))
33 for list_item in parse_arg.find_child_recursively(NodeKind.LIST_ITEM):
34 extract_translation_list_item(wxr, word_entry, list_item, sense)
37def extract_translation_list_item(
38 wxr: WiktextractContext,
39 word_entry: WordEntry,
40 list_item: WikiNode,
41 sense: str,
42) -> None:
43 lang_code = "unknown"
44 lang_name = "unknown"
45 for node in list_item.children:
46 if isinstance(node, str) and lang_name == "unknown":
47 m = re.search(r"\((\w+)\):", node)
48 if m is not None: 48 ↛ 45line 48 didn't jump to line 45 because the condition on line 48 was always true
49 lang_code = m.group(1)
50 lang_name = node[: m.start()].strip()
51 elif isinstance(node, WikiNode) and node.kind == NodeKind.LINK:
52 word = clean_node(wxr, None, node)
53 if word != "": 53 ↛ 45line 53 didn't jump to line 45 because the condition on line 53 was always true
54 word_entry.translations.append(
55 Translation(
56 lang=lang_name,
57 lang_code=lang_code,
58 word=word,
59 sense=sense,
60 )
61 )
62 elif isinstance(node, str) and "(" in node and ")" in node:
63 text = ""
64 brackets = 0
65 for c in node:
66 if c == "(":
67 brackets += 1
68 elif c == ")":
69 brackets -= 1
70 if (
71 brackets == 0
72 and text.strip() != ""
73 and len(word_entry.translations) > 0
74 ):
75 text = text.strip()
76 if re.search(r"[a-z]", text):
77 word_entry.translations[-1].roman = text
78 else:
79 word_entry.translations[-1].raw_tags.append(text)
80 text = ""
81 elif brackets > 0:
82 text += c