Coverage for src/wiktextract/extractor/cs/translation.py: 93%
36 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-17 08:19 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-17 08:19 +0000
1from wikitextprocessor import HTMLNode, NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from .models import Translation, WordEntry
6from .tags import translate_raw_tags
9def extract_translation_section(
10 wxr: WiktextractContext, word_entry: WordEntry, level_node: WikiNode
11):
12 sense_index = 0
13 for list_node in level_node.find_child(NodeKind.LIST):
14 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
15 sense_index += 1
16 for t_node in list_item.find_child(NodeKind.TEMPLATE):
17 if ( 17 ↛ 16line 17 didn't jump to line 16 because the condition on line 17 was always true
18 t_node.template_name == "Překlady"
19 and len(t_node.template_parameters) > 0
20 ):
21 extract_překlady_template(
22 wxr, word_entry, t_node, sense_index
23 )
26def extract_překlady_template(
27 wxr: WiktextractContext,
28 word_entry: WordEntry,
29 t_node: TemplateNode,
30 sense_index: int,
31):
32 # https://cs.wiktionary.org/wiki/Šablona:Překlady
33 expanded_node = wxr.wtp.parse(
34 wxr.wtp.node_to_wikitext(t_node), expand_all=True
35 )
36 sense = ""
37 translations = []
38 for dfn_tag in expanded_node.find_html_recursively("dfn"):
39 sense = clean_node(wxr, None, dfn_tag)
40 for li_tag in expanded_node.find_html_recursively("li"):
41 lang_name = "unknown"
42 for node in li_tag.children:
43 if (
44 isinstance(node, str)
45 and lang_name == "unknown"
46 and node.strip().endswith(":")
47 ):
48 lang_name = node.strip().removesuffix(":") or "unknown"
49 elif (
50 isinstance(node, HTMLNode)
51 and node.tag == "span"
52 and "translation-item" in node.attrs.get("class", "").split()
53 ):
54 word = clean_node(wxr, None, node)
55 if word == "": 55 ↛ 56line 55 didn't jump to line 56 because the condition on line 55 was never true
56 continue
57 translations.append(
58 Translation(
59 word=word,
60 lang=lang_name,
61 lang_code=node.attrs.get("lang", "unknown"),
62 sense=sense,
63 sense_index=sense_index,
64 )
65 )
66 elif (
67 isinstance(node, HTMLNode)
68 and node.tag == "abbr"
69 and "genus" in node.attrs.get("class", "").split()
70 ):
71 raw_tag = node.attrs.get("title", "")
72 if raw_tag != "" and len(translations) > 0: 72 ↛ 42line 72 didn't jump to line 42 because the condition on line 72 was always true
73 translations[-1].raw_tags.append(raw_tag)
74 translate_raw_tags(translations[-1])
76 word_entry.translations.extend(translations)
77 clean_node(wxr, word_entry, expanded_node)