Coverage for src/wiktextract/extractor/pl/example.py: 88%
59 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1import re
2from collections import defaultdict
4from wikitextprocessor import HTMLNode, NodeKind, TemplateNode, WikiNode
6from ...page import clean_node
7from ...wxr_context import WiktextractContext
8from .models import Example, Sense, WordEntry
11def extract_example_section(
12 wxr: WiktextractContext,
13 page_data: list[WordEntry],
14 base_data: WordEntry,
15 level_node: WikiNode,
16) -> None:
17 examples = defaultdict(list)
18 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM):
19 process_example_list_item(wxr, list_item, examples)
21 for data in page_data:
22 if data.lang_code != base_data.lang_code: 22 ↛ 23line 22 didn't jump to line 23 because the condition on line 22 was never true
23 continue
24 for sense in data.senses:
25 if sense.sense_index in examples: 25 ↛ 28line 25 didn't jump to line 28 because the condition on line 25 was always true
26 sense.examples.extend(examples[sense.sense_index])
27 del examples[sense.sense_index]
28 sense.examples.extend(examples[""])
30 if "" in examples: 30 ↛ 32line 30 didn't jump to line 32 because the condition on line 30 was always true
31 del examples[""]
32 if len(page_data) == 0 or page_data[-1].lang_code != base_data.lang_code: 32 ↛ 33line 32 didn't jump to line 33 because the condition on line 32 was never true
33 page_data.append(base_data.model_copy(deep=True))
34 for sense_index, example_list in examples.items(): 34 ↛ 35line 34 didn't jump to line 35 because the loop on line 34 never started
35 sense_data = Sense(
36 tags=["no-gloss"],
37 examples=example_list,
38 sense_index=sense_index,
39 )
40 page_data[-1].senses.append(sense_data)
43def process_example_list_item(
44 wxr: WiktextractContext,
45 list_item: WikiNode,
46 examples: dict[str, list[Example]],
47) -> None:
48 sense_index = ""
49 example_data = Example()
50 translation_start = 0
51 example_start = 0
52 for index, node in enumerate(list_item.children): 52 ↛ 65line 52 didn't jump to line 65 because the loop on line 52 didn't complete
53 if isinstance(node, str):
54 m = re.search(r"\(\d+\.\d+\)", node)
55 if m is not None:
56 sense_index = m.group(0).strip("()")
57 example_start = index + 1
58 elif "→" in node:
59 translation_start = index + 1
60 break
61 elif isinstance(node, WikiNode) and node.kind == NodeKind.ITALIC:
62 example_data.text = clean_node(wxr, None, node)
63 elif isinstance(node, HTMLNode) and node.tag == "ref":
64 example_data.ref = clean_node(wxr, None, node.children)
65 if translation_start != 0: 65 ↛ 83line 65 didn't jump to line 83 because the condition on line 65 was always true
66 lit_start = len(list_item.children)
67 for t_index, node in enumerate(
68 list_item.children[translation_start:], translation_start
69 ):
70 if isinstance(node, TemplateNode) and node.template_name == "dosł":
71 example_data.literal_meaning = clean_node(
72 wxr, None, list_item.children[t_index + 1 :]
73 ).strip("() ")
74 lit_start = t_index
75 break
76 example_data.translation = clean_node(
77 wxr, None, list_item.children[translation_start:lit_start]
78 ).strip("() ")
79 if len(example_data.text) == 0:
80 example_data.text = clean_node(
81 wxr, None, list_item.children[example_start:translation_start]
82 ).strip("→ ")
83 if "(" in example_data.text and example_data.text.endswith(")"):
84 roman_start = example_data.text.rindex("(")
85 example_data.roman = example_data.text[roman_start:].strip("() ")
86 example_data.text = example_data.text[:roman_start].strip()
87 if len(example_data.text) > 0: 87 ↛ exitline 87 didn't return from function 'process_example_list_item' because the condition on line 87 was always true
88 examples[sense_index].append(example_data)