Coverage for src/wiktextract/extractor/nl/translation.py: 92%
49 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-10-25 10:11 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-10-25 10:11 +0000
1import re
3from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
5from ...page import clean_node
6from ...wxr_context import WiktextractContext
7from .models import Translation, WordEntry
10def extract_translation_section(
11 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode
12) -> None:
13 sense = ""
14 sense_index = 0
15 for node in level_node.find_child(NodeKind.TEMPLATE | NodeKind.LIST):
16 if isinstance(node, TemplateNode) and node.template_name == "trans-top":
17 first_arg = clean_node(
18 wxr, None, node.template_parameters.get(1, "")
19 )
20 m = re.match(r"(\d+)\.", first_arg)
21 if m is not None: 21 ↛ 25line 21 didn't jump to line 25 because the condition on line 21 was always true
22 sense_index = int(m.group(1))
23 sense = first_arg[m.end() :].strip()
24 else:
25 sense = first_arg
26 elif node.kind == NodeKind.LIST: 26 ↛ 15line 26 didn't jump to line 15 because the condition on line 26 was always true
27 for list_item in node.find_child(NodeKind.LIST_ITEM):
28 extract_translation_list_item(
29 wxr, word_entry, list_item, sense, sense_index
30 )
33TR_TEMPLATES = {
34 "m": "masculine",
35 "f": "feminine",
36 "n": "neuter",
37 "c": "common",
38 "s": "singular",
39 "p": "plural",
40 "a": "animate",
41 "i": "inanimate",
42}
45def extract_translation_list_item(
46 wxr: WiktextractContext,
47 word_entry: WordEntry,
48 list_item: WikiNode,
49 sense: str,
50 sense_index: int,
51) -> None:
52 before_colon = True
53 lang_name = ""
54 brackets = 0
55 roman_str = ""
56 for index, node in enumerate(list_item.children):
57 if before_colon and isinstance(node, str) and ":" in node:
58 before_colon = False
59 lang_name = (
60 clean_node(wxr, None, list_item.children[:index])
61 + node[: node.index(":")].strip()
62 )
63 elif not before_colon:
64 if brackets == 0 and isinstance(node, TemplateNode):
65 if node.template_name == "trad":
66 word_entry.translations.append(
67 Translation(
68 lang=lang_name,
69 lang_code=node.template_parameters.get(1, ""),
70 word=clean_node(
71 wxr, None, node.template_parameters.get(2, "")
72 ),
73 sense=sense,
74 sense_index=sense_index,
75 )
76 )
77 elif ( 77 ↛ 56line 77 didn't jump to line 56
78 node.template_name in TR_TEMPLATES
79 and len(word_entry.translations) > 0
80 ):
81 word_entry.translations[-1].tags.append(
82 TR_TEMPLATES[node.template_name]
83 )
84 elif isinstance(node, str):
85 for c in node:
86 if c == "(":
87 brackets += 1
88 elif c == ")":
89 brackets -= 1
90 if brackets == 0: 90 ↛ 85line 90 didn't jump to line 85 because the condition on line 90 was always true
91 if len(word_entry.translations) > 0: 91 ↛ 93line 91 didn't jump to line 93 because the condition on line 91 was always true
92 word_entry.translations[-1].roman = roman_str
93 roman_str = ""
94 elif brackets > 0:
95 roman_str += c
96 elif brackets > 0: 96 ↛ 56line 96 didn't jump to line 56 because the condition on line 96 was always true
97 roman_str += clean_node(wxr, None, node)