Coverage for src/wiktextract/extractor/de/form.py: 77%
51 statements
« prev ^ index » next coverage.py v7.9.0, created at 2025-06-13 07:43 +0000
« prev ^ index » next coverage.py v7.9.0, created at 2025-06-13 07:43 +0000
1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from .models import Form, WordEntry
6from .tags import translate_raw_tags
7from .utils import extract_sense_index
10def extracrt_form_section(
11 wxr: WiktextractContext,
12 word_entry: WordEntry,
13 level_node: LevelNode,
14 tags: list[str],
15) -> None:
16 for list_item_node in level_node.find_child_recursively(NodeKind.LIST_ITEM):
17 sense_idx = ""
18 raw_tags = []
19 find_form = False
20 for child in list_item_node.children:
21 if isinstance(child, str) and child.startswith("["): 21 ↛ 22line 21 didn't jump to line 22 because the condition on line 21 was never true
22 sense_idx, _ = extract_sense_index(child)
23 elif isinstance(child, WikiNode) and child.kind == NodeKind.ITALIC:
24 raw_tag = clean_node(wxr, None, child)
25 if raw_tag.endswith(":"): 25 ↛ 20line 25 didn't jump to line 20 because the condition on line 25 was always true
26 raw_tags.append(raw_tag.removesuffix(":").strip())
27 elif (
28 isinstance(child, WikiNode) and child.kind == NodeKind.LINK
29 ) or (
30 isinstance(child, TemplateNode)
31 and child.template_name in ["Arab", "Arabische Schrift"]
32 ):
33 form_text = clean_node(wxr, None, child)
34 if form_text != "": 34 ↛ 20line 34 didn't jump to line 20 because the condition on line 34 was always true
35 form_data = Form(
36 form=form_text,
37 tags=tags,
38 sense_index=sense_idx,
39 raw_tags=raw_tags,
40 )
41 translate_raw_tags(form_data)
42 word_entry.forms.append(form_data)
43 raw_tags.clear()
44 find_form = True
45 elif isinstance(child, TemplateNode): 45 ↛ 46line 45 didn't jump to line 46 because the condition on line 45 was never true
46 t_text = clean_node(wxr, word_entry, child)
47 if t_text.endswith(":"):
48 raw_tags.append(t_text.removesuffix(":").strip())
50 if not find_form: # plain text 50 ↛ 51line 50 didn't jump to line 51 because the condition on line 50 was never true
51 word = clean_node(wxr, None, list_item_node.children)
52 if word != "":
53 form_data = Form(
54 form=word,
55 sense_index=sense_idx,
56 raw_tags=raw_tags,
57 tags=tags,
58 )
59 translate_raw_tags(form_data)
60 word_entry.forms.append(form_data)
63def extract_transcription_section(
64 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode
65) -> None:
66 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM):
67 text = clean_node(
68 wxr, None, list(list_item.invert_find_child(NodeKind.LIST))
69 )
70 raw_tag = ""
71 for roman in text.split(","):
72 if ":" in roman:
73 raw_tag = roman[: roman.index(":")].strip()
74 roman = roman[roman.index(":") + 1 :].strip()
75 roman = roman.strip()
76 if roman != "": 76 ↛ 71line 76 didn't jump to line 71 because the condition on line 76 was always true
77 form = Form(form=roman, tags=["transcription"])
78 if raw_tag != "":
79 form.raw_tags.append(raw_tag)
80 translate_raw_tags(form)
81 word_entry.forms.append(form)