Coverage for src/wiktextract/extractor/ko/translation.py: 93%
52 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1import re
3from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
5from ...page import clean_node
6from ...wxr_context import WiktextractContext
7from .models import Translation, WordEntry
8from .tags import translate_raw_tags
11def extract_translation_section(
12 wxr: WiktextractContext,
13 word_entry: WordEntry,
14 level_node: LevelNode,
15) -> None:
16 for t_node in level_node.find_child(NodeKind.TEMPLATE):
17 if t_node.template_name == "외국어": 17 ↛ 16line 17 didn't jump to line 16 because the condition on line 17 was always true
18 extract_translation_template(wxr, word_entry, t_node)
21def extract_translation_template(
22 wxr: WiktextractContext,
23 word_entry: WordEntry,
24 t_node: TemplateNode,
25 sense: str = "",
26) -> None:
27 # https://ko.wiktionary.org/wiki/틀:외국어
28 t_sense = clean_node(wxr, None, t_node.template_parameters.get("덧", ""))
29 if t_sense != "": 29 ↛ 30line 29 didn't jump to line 30 because the condition on line 29 was never true
30 sense = t_sense
31 for key in [1, 2]:
32 arg_value = t_node.template_parameters.get(key, [])
33 parse_arg = wxr.wtp.parse(wxr.wtp.node_to_wikitext(arg_value))
34 for list_item in parse_arg.find_child_recursively(NodeKind.LIST_ITEM):
35 extract_translation_list_item(wxr, word_entry, list_item, sense)
38def extract_translation_list_item(
39 wxr: WiktextractContext,
40 word_entry: WordEntry,
41 list_item: WikiNode,
42 sense: str,
43) -> None:
44 lang_code = "unknown"
45 lang_name = "unknown"
46 for node in list_item.children:
47 if isinstance(node, str) and lang_name == "unknown":
48 m = re.search(r"\(([\w-]+)\):", node)
49 if m is not None: 49 ↛ 46line 49 didn't jump to line 46 because the condition on line 49 was always true
50 lang_code = m.group(1)
51 lang_name = node[: m.start()].strip()
52 elif isinstance(node, WikiNode) and node.kind == NodeKind.LINK:
53 word = clean_node(wxr, None, node)
54 if word != "": 54 ↛ 46line 54 didn't jump to line 46 because the condition on line 54 was always true
55 word_entry.translations.append(
56 Translation(
57 lang=lang_name,
58 lang_code=lang_code,
59 word=word,
60 sense=sense,
61 )
62 )
63 elif isinstance(node, str) and "(" in node and ")" in node:
64 text = ""
65 brackets = 0
66 for c in node:
67 if c == "(":
68 brackets += 1
69 elif c == ")":
70 brackets -= 1
71 if (
72 brackets == 0
73 and text.strip() != ""
74 and len(word_entry.translations) > 0
75 ):
76 text = text.strip()
77 if re.search(r"[a-z]", text):
78 word_entry.translations[-1].roman = text
79 else:
80 for raw_tag in text.split("/"):
81 raw_tag = raw_tag.strip()
82 if raw_tag not in ["", "-"]: 82 ↛ 80line 82 didn't jump to line 80 because the condition on line 82 was always true
83 word_entry.translations[-1].raw_tags.append(
84 raw_tag
85 )
86 translate_raw_tags(word_entry.translations[-1])
87 text = ""
88 elif brackets > 0:
89 text += c