Coverage for src/wiktextract/extractor/nl/translation.py: 92%
54 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1import re
3from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
5from ...page import clean_node
6from ...wxr_context import WiktextractContext
7from .models import Translation, WordEntry
8from .tags import LIST_ITEM_TAG_TEMPLATES
11def extract_translation_section(
12 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode
13) -> None:
14 sense = ""
15 sense_index = 0
16 for node in level_node.find_child(NodeKind.TEMPLATE | NodeKind.LIST):
17 if isinstance(node, TemplateNode) and node.template_name == "trans-top":
18 first_arg = clean_node(
19 wxr, None, node.template_parameters.get(1, "")
20 )
21 m = re.match(r"(\d+)\.", first_arg)
22 if m is not None: 22 ↛ 26line 22 didn't jump to line 26 because the condition on line 22 was always true
23 sense_index = int(m.group(1))
24 sense = first_arg[m.end() :].strip()
25 else:
26 sense = first_arg
27 elif node.kind == NodeKind.LIST: 27 ↛ 16line 27 didn't jump to line 16 because the condition on line 27 was always true
28 for list_item in node.find_child(NodeKind.LIST_ITEM):
29 extract_translation_list_item(
30 wxr, word_entry, list_item, sense, sense_index
31 )
34def extract_translation_list_item(
35 wxr: WiktextractContext,
36 word_entry: WordEntry,
37 list_item: WikiNode,
38 sense: str,
39 sense_index: int,
40) -> None:
41 before_colon = True
42 lang_name = ""
43 brackets = 0
44 roman_str = ""
45 for index, node in enumerate(list_item.children):
46 if before_colon and isinstance(node, str) and ":" in node:
47 before_colon = False
48 lang_name = (
49 clean_node(wxr, None, list_item.children[:index])
50 + node[: node.index(":")].strip()
51 )
52 elif not before_colon:
53 if brackets == 0 and isinstance(node, TemplateNode):
54 if node.template_name == "trad":
55 tr_word = clean_node(
56 wxr, None, node.template_parameters.get(2, "")
57 )
58 if tr_word != "": 58 ↛ 45line 58 didn't jump to line 45 because the condition on line 58 was always true
59 word_entry.translations.append(
60 Translation(
61 lang=lang_name,
62 lang_code=node.template_parameters.get(1, ""),
63 word=tr_word,
64 sense=sense,
65 sense_index=sense_index,
66 )
67 )
68 elif ( 68 ↛ 45line 68 didn't jump to line 45 because the condition on line 68 was always true
69 node.template_name in LIST_ITEM_TAG_TEMPLATES
70 and len(word_entry.translations) > 0
71 ):
72 word_entry.translations[-1].tags.append(
73 LIST_ITEM_TAG_TEMPLATES[node.template_name]
74 )
75 elif isinstance(node, str):
76 for c in node:
77 if c == "(":
78 brackets += 1
79 elif c == ")":
80 brackets -= 1
81 if brackets == 0: 81 ↛ 76line 81 didn't jump to line 76 because the condition on line 81 was always true
82 if len(word_entry.translations) > 0: 82 ↛ 84line 82 didn't jump to line 84 because the condition on line 82 was always true
83 word_entry.translations[-1].roman = roman_str
84 roman_str = ""
85 elif brackets > 0:
86 roman_str += c
87 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
88 for next_list_item in node.find_child(NodeKind.LIST_ITEM):
89 extract_translation_list_item(
90 wxr, word_entry, next_list_item, sense, sense_index
91 )
92 elif brackets > 0: 92 ↛ 45line 92 didn't jump to line 45 because the condition on line 92 was always true
93 roman_str += clean_node(wxr, None, node)