Coverage for src/wiktextract/extractor/ru/translation.py: 90%
67 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-24 07:36 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-24 07:36 +0000
1from mediawiki_langcodes import name_to_code
2from wikitextprocessor import HTMLNode, NodeKind, TemplateNode, WikiNode
4from ...page import clean_node
5from ...wxr_context import WiktextractContext
6from .models import Translation, WordEntry
7from .tags import translate_raw_tags
10def extract_translations(
11 wxr: WiktextractContext,
12 word_entry: WordEntry,
13 level3_node: WikiNode,
14):
15 for template_node in level3_node.find_child(NodeKind.TEMPLATE):
16 if template_node.template_name == "перев-блок": 16 ↛ 15line 16 didn't jump to line 15 because the condition on line 16 was always true
17 process_translate_block_template(wxr, word_entry, template_node)
20def process_translate_block_template(
21 wxr: WiktextractContext,
22 word_entry: WordEntry,
23 template_node: TemplateNode,
24) -> None:
25 # https://ru.wiktionary.org/wiki/Шаблон:перев-блок
26 expanded_template = wxr.wtp.parse(
27 wxr.wtp.node_to_wikitext(template_node), expand_all=True
28 )
29 sense = clean_node(wxr, None, template_node.template_parameters.get(1, ""))
30 for list_item in expanded_template.find_child_recursively(
31 NodeKind.LIST_ITEM
32 ):
33 translation = Translation(word="", lang="", sense=sense)
34 for node in list_item.children:
35 if isinstance(node, WikiNode):
36 if node.kind == NodeKind.HTML:
37 if node.tag == "sub":
38 translation.lang_code = clean_node(
39 wxr, None, node.children
40 )
41 elif node.tag == "sup":
42 # language index
43 title = node.attrs.get("title", "")
44 if len(title) > 0: 44 ↛ 34line 44 didn't jump to line 34 because the condition on line 44 was always true
45 translation.raw_tags.append(title)
46 elif node.tag == "span": 46 ↛ 34line 46 didn't jump to line 34 because the condition on line 46 was always true
47 process_translate_list_span_tag(
48 wxr, word_entry, translation, node
49 )
50 elif node.kind == NodeKind.LINK: 50 ↛ 34line 50 didn't jump to line 34 because the condition on line 50 was always true
51 translation.lang = clean_node(wxr, None, node)
52 elif isinstance(node, str) and len(node.strip(" ():\n")) > 0:
53 translation.raw_tags.append(node.strip(" ():\n"))
55 if translation.word != "" and translation.lang != "": 55 ↛ 30line 55 didn't jump to line 30 because the condition on line 55 was always true
56 if translation.lang_code == "": 56 ↛ 57line 56 didn't jump to line 57 because the condition on line 56 was never true
57 translation.lang_code = name_to_code(translation.lang, "ru")
58 translate_raw_tags(translation)
59 word_entry.translations.append(translation)
62def process_translate_list_span_tag(
63 wxr: WiktextractContext,
64 word_entry: WordEntry,
65 translation: Translation,
66 span_node: HTMLNode,
67) -> None:
68 for node in span_node.children:
69 if isinstance(node, WikiNode):
70 if node.kind == NodeKind.LINK:
71 if translation.word == "":
72 translation.word = clean_node(wxr, None, node)
73 else:
74 translation.other = clean_node(wxr, None, node)
75 elif isinstance(node, HTMLNode) and node.tag in ["span", "i"]:
76 # gender tag
77 tag = clean_node(wxr, None, node)
78 if len(tag) > 0: 78 ↛ 68line 78 didn't jump to line 68 because the condition on line 78 was always true
79 translation.raw_tags.append(tag)
80 elif node.kind == NodeKind.ITALIC:
81 translation.raw_tags.append(clean_node(wxr, None, node))
82 elif isinstance(node, str): 82 ↛ 68line 82 didn't jump to line 68 because the condition on line 82 was always true
83 # convert escaped characters like " "
84 text = clean_node(wxr, None, node)
85 if text.endswith((",", ";")):
86 # this list item has multiple translation words
87 striped_text = text.strip(",; ")
88 if striped_text.startswith("(") and striped_text.endswith(")"):
89 translation.roman = striped_text.strip("()")
90 if translation.word != "" and translation.lang != "": 90 ↛ 99line 90 didn't jump to line 99 because the condition on line 90 was always true
91 if translation.lang_code == "": 91 ↛ 92line 91 didn't jump to line 92 because the condition on line 91 was never true
92 translation.lang_code = name_to_code(
93 translation.lang, "ru"
94 )
95 translate_raw_tags(translation)
96 word_entry.translations.append(
97 translation.model_copy(deep=True)
98 )
99 translation.word = ""
100 translation.roman = ""
101 translation.tags = []
102 translation.raw_tags = []
103 elif text.startswith("(") and text.endswith(")"):
104 translation.roman = text.strip("()")
105 elif (
106 text.startswith(",")
107 and text.endswith(")")
108 and translation.lang_code == "ja"
109 ):
110 translation.roman = text.strip(",() ")