Coverage for src/wiktextract/extractor/es/conjugation.py: 89%
62 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from dataclasses import dataclass
3from wikitextprocessor.parser import NodeKind, TemplateNode, WikiNode
5from ...page import clean_node
6from ...wxr_context import WiktextractContext
7from .models import Form, WordEntry
8from .tags import translate_raw_tags
11def extract_conjugation_section(
12 wxr: WiktextractContext,
13 word_entry: WordEntry,
14 level_node: WikiNode,
15) -> None:
16 for template_node in level_node.find_child(NodeKind.TEMPLATE):
17 process_conjugation_template(wxr, word_entry, template_node)
20def process_conjugation_template(
21 wxr: WiktextractContext,
22 word_entry: WordEntry,
23 template_node: TemplateNode,
24) -> None:
25 if "es.v.conj." in template_node.template_name: 25 ↛ exitline 25 didn't return from function 'process_conjugation_template' because the condition on line 25 was always true
26 process_es_v_conj_template(wxr, word_entry, template_node)
29@dataclass
30class SpanHeader:
31 text: str
32 index: int
33 span: int
36IGNORE_ES_V_ROW_PREFIXES = (
37 "Modo ",
38 "Tiempos ",
39)
40IGNORE_ES_V_HEADERS = {"número:", "persona:"}
43def process_es_v_conj_template(
44 wxr: WiktextractContext,
45 word_entry: WordEntry,
46 template_node: TemplateNode,
47) -> None:
48 # https://es.wiktionary.org/wiki/Plantilla:es.v.conj
49 expanded_node = wxr.wtp.parse(
50 wxr.wtp.node_to_wikitext(template_node), expand_all=True
51 )
52 table_nodes = list(expanded_node.find_child(NodeKind.TABLE))
53 if len(table_nodes) == 0: 53 ↛ 54line 53 didn't jump to line 54 because the condition on line 53 was never true
54 return
55 table_node = table_nodes[0]
56 col_headers = []
57 for row in table_node.find_child(NodeKind.TABLE_ROW):
58 row_header = ""
59 all_header_row = not row.contain_node(NodeKind.TABLE_CELL)
60 if row.contain_node(NodeKind.TABLE_HEADER_CELL) and all_header_row:
61 first_header = next(row.find_child(NodeKind.TABLE_HEADER_CELL))
62 first_header_text = clean_node(wxr, None, first_header)
63 if first_header_text.startswith(IGNORE_ES_V_ROW_PREFIXES):
64 continue # ignore personal pronouns row
65 elif len(list(row.filter_empty_str_child())) == 1: # new table
66 col_headers.clear()
67 continue
68 if row.contain_node(NodeKind.TABLE_CELL) and not row.contain_node( 68 ↛ 71line 68 didn't jump to line 71 because the condition on line 68 was never true
69 NodeKind.TABLE_HEADER_CELL
70 ):
71 continue # ignore end notes
73 col_header_index = 0
74 col_cell_index = 0
75 for cell in row.find_child(
76 NodeKind.TABLE_HEADER_CELL | NodeKind.TABLE_CELL
77 ):
78 cell_text = clean_node(wxr, None, cell)
79 colspan = int(cell.attrs.get("colspan", "1"))
80 if cell_text == "" or cell_text in IGNORE_ES_V_HEADERS:
81 continue
82 elif cell.kind == NodeKind.TABLE_HEADER_CELL:
83 if all_header_row:
84 col_headers.append(
85 SpanHeader(cell_text, col_header_index, colspan)
86 )
87 else:
88 row_header = cell_text
89 col_header_index += colspan
90 else:
91 for line in cell_text.splitlines():
92 form = Form(form=line)
93 if row_header != "": 93 ↛ 95line 93 didn't jump to line 95 because the condition on line 93 was always true
94 form.raw_tags.extend(row_header.split(" o "))
95 for col_head in col_headers:
96 if (
97 col_cell_index >= col_head.index
98 and col_cell_index < col_head.index + col_head.span
99 ):
100 form.raw_tags.append(col_head.text)
102 if form.form != "": 102 ↛ 91line 102 didn't jump to line 91 because the condition on line 102 was always true
103 translate_raw_tags(form)
104 word_entry.forms.append(form)
105 col_cell_index += colspan