Coverage for src/wiktextract/extractor/it/pos.py: 98%
54 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from .example import extract_example_list_item
6from .models import Sense, WordEntry
7from .section_titles import POS_DATA
8from .tag_form_line import extract_tag_form_line_nodes
9from .tags import translate_raw_tags
11POS_SUBSECTION_TEMPLATES = frozenset(
12 [
13 # https://it.wiktionary.org/wiki/Categoria:Template_per_i_verbi
14 "-participio passato-",
15 "-participio presente-",
16 "Ausiliare",
17 "Deponente",
18 "Intransitivo",
19 "Medio",
20 "Passivo",
21 "Reciproco",
22 "Riflessivo",
23 "riflessivo",
24 "Transitivo",
25 # https://it.wiktionary.org/wiki/Categoria:Template_vocabolo
26 "Attivo",
27 "attivo",
28 "Inpr",
29 "inpr",
30 "Riflpr",
31 ]
32)
35def add_new_pos_data(
36 wxr: WiktextractContext,
37 page_data: list[WordEntry],
38 base_data: WordEntry,
39 level_node: LevelNode,
40 pos_title: str,
41) -> None:
42 page_data.append(base_data.model_copy(deep=True))
43 page_data[-1].pos_title = pos_title
44 pos_data = POS_DATA[pos_title]
45 page_data[-1].pos = pos_data["pos"]
46 page_data[-1].tags.extend(pos_data.get("tags", []))
47 for link_node in level_node.find_child(NodeKind.LINK):
48 clean_node(wxr, page_data[-1], link_node)
51def extract_pos_section(
52 wxr: WiktextractContext,
53 page_data: list[WordEntry],
54 base_data: WordEntry,
55 level_node: LevelNode,
56 pos_title: str,
57) -> None:
58 add_new_pos_data(wxr, page_data, base_data, level_node, pos_title)
59 first_gloss_list_index = len(level_node.children)
60 for index, node in enumerate(level_node.children):
61 if (
62 isinstance(node, WikiNode)
63 and node.kind == NodeKind.LIST
64 and node.sarg.startswith("#")
65 and node.sarg.endswith("#")
66 ):
67 for list_item in node.find_child(NodeKind.LIST_ITEM):
68 extract_gloss_list_item(wxr, page_data[-1], list_item)
69 if index < first_gloss_list_index:
70 first_gloss_list_index = index
71 elif (
72 isinstance(node, TemplateNode)
73 and node.template_name in POS_SUBSECTION_TEMPLATES
74 ):
75 if len(page_data[-1].senses) > 0:
76 add_new_pos_data(
77 wxr, page_data, base_data, level_node, pos_title
78 )
79 raw_tag = clean_node(wxr, page_data[-1], node).strip("= \n")
80 page_data[-1].raw_tags.append(raw_tag)
82 extract_tag_form_line_nodes(
83 wxr, page_data[-1], level_node.children[:first_gloss_list_index]
84 )
87def extract_gloss_list_item(
88 wxr: WiktextractContext,
89 word_entry: WordEntry,
90 list_item: WikiNode,
91) -> None:
92 gloss_nodes = []
93 sense = Sense()
94 for node in list_item.children:
95 if isinstance(node, TemplateNode):
96 t_str = clean_node(wxr, sense, node)
97 if t_str.startswith("(") and t_str.endswith(")"):
98 sense.raw_tags.append(t_str.strip("()"))
99 else:
100 gloss_nodes.append(t_str)
101 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
102 if (
103 node.sarg.endswith(":")
104 and len(sense.examples) > 0
105 and sense.examples[-1].translation == ""
106 ):
107 for tr_list_item in node.find_child(NodeKind.LIST_ITEM):
108 sense.examples[-1].translation = clean_node(
109 wxr, sense, tr_list_item.children
110 )
111 elif node.sarg.endswith(("*", ":")): 111 ↛ 94line 111 didn't jump to line 94 because the condition on line 111 was always true
112 for example_list_item in node.find_child(NodeKind.LIST_ITEM):
113 extract_example_list_item(
114 wxr, sense, example_list_item, word_entry.lang_code
115 )
116 else:
117 gloss_nodes.append(node)
118 gloss_str = clean_node(wxr, sense, gloss_nodes)
119 if gloss_str != "": 119 ↛ exitline 119 didn't return from function 'extract_gloss_list_item' because the condition on line 119 was always true
120 sense.glosses.append(gloss_str)
121 translate_raw_tags(sense)
122 word_entry.senses.append(sense)