Coverage for src/wiktextract/extractor/pl/example.py: 88%
63 statements
« prev ^ index » next coverage.py v7.9.0, created at 2025-06-13 07:43 +0000
« prev ^ index » next coverage.py v7.9.0, created at 2025-06-13 07:43 +0000
1import re
2from collections import defaultdict
4from wikitextprocessor import HTMLNode, NodeKind, TemplateNode, WikiNode
6from ...page import clean_node
7from ...wxr_context import WiktextractContext
8from ..share import calculate_bold_offsets
9from .models import Example, Sense, WordEntry
12def extract_example_section(
13 wxr: WiktextractContext,
14 page_data: list[WordEntry],
15 base_data: WordEntry,
16 level_node: WikiNode,
17) -> None:
18 examples = defaultdict(list)
19 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM):
20 process_example_list_item(wxr, list_item, examples)
22 for data in page_data:
23 if data.lang_code != base_data.lang_code: 23 ↛ 24line 23 didn't jump to line 24 because the condition on line 23 was never true
24 continue
25 for sense in data.senses:
26 if sense.sense_index in examples: 26 ↛ 29line 26 didn't jump to line 29 because the condition on line 26 was always true
27 sense.examples.extend(examples[sense.sense_index])
28 del examples[sense.sense_index]
29 sense.examples.extend(examples[""])
31 if "" in examples: 31 ↛ 33line 31 didn't jump to line 33 because the condition on line 31 was always true
32 del examples[""]
33 if len(page_data) == 0 or page_data[-1].lang_code != base_data.lang_code: 33 ↛ 34line 33 didn't jump to line 34 because the condition on line 33 was never true
34 page_data.append(base_data.model_copy(deep=True))
35 for sense_index, example_list in examples.items(): 35 ↛ 36line 35 didn't jump to line 36 because the loop on line 35 never started
36 sense_data = Sense(
37 tags=["no-gloss"],
38 examples=example_list,
39 sense_index=sense_index,
40 )
41 page_data[-1].senses.append(sense_data)
44def process_example_list_item(
45 wxr: WiktextractContext,
46 list_item: WikiNode,
47 examples: dict[str, list[Example]],
48) -> None:
49 sense_index = ""
50 example_data = Example()
51 translation_start = 0
52 example_start = 0
53 for index, node in enumerate(list_item.children): 53 ↛ 69line 53 didn't jump to line 69 because the loop on line 53 didn't complete
54 if isinstance(node, str):
55 m = re.search(r"\(\d+\.\d+\)", node)
56 if m is not None:
57 sense_index = m.group(0).strip("()")
58 example_start = index + 1
59 elif "→" in node:
60 translation_start = index + 1
61 break
62 elif isinstance(node, WikiNode) and node.kind == NodeKind.ITALIC:
63 example_data.text = clean_node(wxr, None, node)
64 calculate_bold_offsets(
65 wxr, node, example_data.text, example_data, "bold_text_offsets"
66 )
67 elif isinstance(node, HTMLNode) and node.tag == "ref":
68 example_data.ref = clean_node(wxr, None, node.children)
69 if translation_start != 0: 69 ↛ 109line 69 didn't jump to line 109 because the condition on line 69 was always true
70 lit_start = len(list_item.children)
71 for t_index, node in enumerate(
72 list_item.children[translation_start:], translation_start
73 ):
74 if isinstance(node, TemplateNode) and node.template_name == "dosł":
75 example_data.literal_meaning = clean_node(
76 wxr, None, list_item.children[t_index + 1 :]
77 ).strip("() ")
78 lit_start = t_index
79 break
80 example_data.translation = clean_node(
81 wxr, None, list_item.children[translation_start:lit_start]
82 ).strip("() ")
83 calculate_bold_offsets(
84 wxr,
85 wxr.wtp.parse(
86 wxr.wtp.node_to_wikitext(
87 list_item.children[translation_start:lit_start]
88 )
89 ),
90 example_data.translation,
91 example_data,
92 "bold_translation_offsets",
93 )
94 if len(example_data.text) == 0:
95 example_data.text = clean_node(
96 wxr, None, list_item.children[example_start:translation_start]
97 ).strip("→ ")
98 calculate_bold_offsets(
99 wxr,
100 wxr.wtp.parse(
101 wxr.wtp.node_to_wikitext(
102 list_item.children[example_start:translation_start]
103 )
104 ),
105 example_data.text,
106 example_data,
107 "bold_text_offsets",
108 )
109 if "(" in example_data.text and example_data.text.endswith(")"):
110 roman_start = example_data.text.rindex("(")
111 example_data.roman = example_data.text[roman_start:].strip("() ")
112 example_data.text = example_data.text[:roman_start].strip()
113 if len(example_data.text) > 0: 113 ↛ exitline 113 didn't return from function 'process_example_list_item' because the condition on line 113 was always true
114 examples[sense_index].append(example_data)