Coverage for src/wiktextract/extractor/cs/pos.py: 93%
64 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from .example import extract_example_list_item
6from .models import Sense, WordEntry
7from .section_titles import POS_DATA
8from .tags import translate_raw_tags
11def extract_pos_section(
12 wxr: WiktextractContext,
13 page_data: list[WordEntry],
14 base_data: WordEntry,
15 level_node: LevelNode,
16 pos_title: str,
17):
18 page_data.append(base_data.model_copy(deep=True))
19 page_data[-1].pos_title = pos_title
20 pos_data = POS_DATA[pos_title]
21 page_data[-1].pos = pos_data["pos"]
22 base_data.pos = pos_data["pos"]
23 page_data[-1].tags.extend(pos_data.get("tags", []))
25 for list_node in level_node.find_child(NodeKind.LIST):
26 if list_node.sarg != "*": 26 ↛ 27line 26 didn't jump to line 27 because the condition on line 26 was never true
27 continue
28 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
29 for italic_node in list_item.find_child(NodeKind.ITALIC):
30 italic_str = clean_node(wxr, None, italic_node)
31 for raw_tag in italic_str.split():
32 if raw_tag not in ["", "rod"]:
33 page_data[-1].raw_tags.append(raw_tag)
35 translate_raw_tags(page_data[-1])
38def extract_sense_section(
39 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode
40):
41 for list_node in level_node.find_child(NodeKind.LIST):
42 if list_node.sarg != "#": 42 ↛ 43line 42 didn't jump to line 43 because the condition on line 42 was never true
43 continue
44 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
45 extract_gloss_list_item(wxr, word_entry, list_item)
48def extract_gloss_list_item(
49 wxr: WiktextractContext,
50 word_entry: WordEntry,
51 list_item: WikiNode,
52 parent_sense: Sense | None = None,
53):
54 sense = (
55 parent_sense.model_copy(deep=True)
56 if parent_sense is not None
57 else Sense()
58 )
59 gloss_nodes = []
60 for node in list_item.children:
61 if isinstance(node, TemplateNode) and node.template_name == "Příznaky":
62 extract_příznaky_template(wxr, sense, node)
63 elif isinstance(node, WikiNode) and node.kind == NodeKind.ITALIC:
64 raw_tags = clean_node(wxr, None, node)
65 if raw_tags.startswith("(") and raw_tags.endswith(")"):
66 for raw_tag in raw_tags.strip("() ").split(","):
67 raw_tag = raw_tag.strip()
68 if raw_tag != "": 68 ↛ 66line 68 didn't jump to line 66 because the condition on line 68 was always true
69 sense.raw_tags.append(raw_tag)
70 else:
71 gloss_nodes.append(node)
72 elif not (isinstance(node, WikiNode) and node.kind == NodeKind.LIST):
73 gloss_nodes.append(node)
75 gloss = clean_node(wxr, sense, gloss_nodes)
76 if gloss != "": 76 ↛ 81line 76 didn't jump to line 81 because the condition on line 76 was always true
77 sense.glosses.append(gloss)
78 translate_raw_tags(sense)
79 word_entry.senses.append(sense)
81 for child_list in list_item.find_child(NodeKind.LIST):
82 if child_list.sarg.startswith("#") and child_list.sarg.endswith("#"):
83 for child_list_item in child_list.find_child(NodeKind.LIST_ITEM):
84 extract_gloss_list_item(wxr, word_entry, child_list_item, sense)
85 elif child_list.sarg.startswith("#") and child_list.sarg.endswith( 85 ↛ 81line 85 didn't jump to line 81 because the condition on line 85 was always true
86 (":", "*")
87 ):
88 for child_list_item in child_list.find_child(NodeKind.LIST_ITEM):
89 extract_example_list_item(wxr, sense, child_list_item)
92def extract_příznaky_template(
93 wxr: WiktextractContext, sense: Sense, t_node: TemplateNode
94):
95 # https://cs.wiktionary.org/wiki/Šablona:Příznaky
96 text = clean_node(wxr, sense, t_node).strip("() ")
97 for raw_tag in text.split(","):
98 raw_tag = raw_tag.strip()
99 if raw_tag != "": 99 ↛ 97line 99 didn't jump to line 97 because the condition on line 99 was always true
100 sense.raw_tags.append(raw_tag)