Coverage for src/wiktextract/extractor/ku/form_table.py: 92%
99 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 10:58 +0000
1import re
2from dataclasses import dataclass
4from wikitextprocessor import NodeKind, TemplateNode
6from ...page import clean_node
7from ...wxr_context import WiktextractContext
8from .models import Form, WordEntry
9from .tags import translate_raw_tags
10from .tewandin import extract_tewandin_page
13def extract_ku_tewîn_nav_template(
14 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode
15) -> None:
16 # https://ku.wiktionary.org/wiki/Şablon:ku-tewîn-nav
17 expanded_node = wxr.wtp.parse(
18 wxr.wtp.node_to_wikitext(t_node), expand_all=True
19 )
20 gender_tags = []
21 gender_arg = clean_node(wxr, None, t_node.template_parameters.get(2, ""))
22 if gender_arg == "mê":
23 gender_tags = ["feminine"]
24 elif gender_arg == "nêr": 24 ↛ 25line 24 didn't jump to line 25 because the condition on line 24 was never true
25 gender_tags = ["masculine"]
26 for table_node in expanded_node.find_child(NodeKind.TABLE):
27 row_header = ""
28 col_headers = []
29 shared_tags = []
30 for row in table_node.find_child(NodeKind.TABLE_ROW):
31 col_index = 0
32 for cell in row.find_child(
33 NodeKind.TABLE_HEADER_CELL | NodeKind.TABLE_CELL
34 ):
35 if cell.kind == NodeKind.TABLE_HEADER_CELL:
36 header_str = clean_node(wxr, None, cell)
37 if len(row.children) == 1:
38 if header_str.endswith(" nebinavkirî"):
39 shared_tags = ["indefinite"]
40 elif header_str.endswith(" binavkirî"): 40 ↛ 32line 40 didn't jump to line 32 because the condition on line 40 was always true
41 shared_tags = ["definite"]
42 elif row.contain_node(NodeKind.TABLE_CELL):
43 row_header = header_str
44 elif header_str not in ["Rewş", ""]:
45 col_headers.append(header_str)
46 elif len(row.children) == 1: 46 ↛ 47line 46 didn't jump to line 47 because the condition on line 46 was never true
47 continue
48 else:
49 for form_str in clean_node(wxr, None, cell).splitlines():
50 if form_str not in ["", wxr.wtp.title]:
51 form = Form(
52 form=form_str, tags=gender_tags + shared_tags
53 )
54 if row_header != "":
55 form.raw_tags.append(row_header)
56 if col_index < len(col_headers): 56 ↛ 58line 56 didn't jump to line 58 because the condition on line 56 was always true
57 form.raw_tags.append(col_headers[col_index])
58 translate_raw_tags(form)
59 word_entry.forms.append(form)
60 col_index += 1
63@dataclass
64class TableHeader:
65 text: str
66 row_index: int = 0
67 rowspan: int = 0
68 col_index: int = 0
69 colspan: int = 0
72def extract_ku_tewîn_lk_template(
73 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode
74) -> None:
75 # https://ku.wiktionary.org/wiki/Şablon:ku-tewîn-lk
76 expanded_node = wxr.wtp.parse(
77 wxr.wtp.node_to_wikitext(t_node), expand_all=True
78 )
79 for table_node in expanded_node.find_child(NodeKind.TABLE):
80 row_index = 0
81 shared_tags = []
82 row_headers = []
83 for row in table_node.find_child(NodeKind.TABLE_ROW):
84 if len(row.children) == 1:
85 row_str = clean_node(wxr, None, row.children)
86 clear_values = False
87 if row_str.endswith(" gerguhêz)"):
88 shared_tags = ["transitive"]
89 clear_values = True
90 elif row_str.endswith(" negerguhêz)"): 90 ↛ 91line 90 didn't jump to line 91 because the condition on line 90 was never true
91 shared_tags = ["intransitive"]
92 clear_values = True
93 elif row_str.startswith("Rehê dema "):
94 clear_values = True
95 elif row_str.startswith("Formên din:"):
96 extract_tewandin_page(wxr, word_entry, row_str[11:].strip())
97 if clear_values:
98 row_index = 0
99 row_headers.clear()
100 continue
101 for header_cell in row.find_child(NodeKind.TABLE_HEADER_CELL):
102 rowspan = 1
103 rowspan_str = header_cell.attrs.get("rowspan", "1")
104 if re.fullmatch(r"\d+", rowspan_str): 104 ↛ 106line 104 didn't jump to line 106 because the condition on line 104 was always true
105 rowspan = int(rowspan_str)
106 row_headers.append(
107 TableHeader(
108 text=clean_node(wxr, None, header_cell),
109 rowspan=rowspan,
110 row_index=row_index,
111 )
112 )
113 for col_index, cell in enumerate(
114 row.find_child(NodeKind.TABLE_CELL)
115 ):
116 cell_str = clean_node(wxr, None, cell)
117 if cell_str == "": 117 ↛ 118line 117 didn't jump to line 118 because the condition on line 117 was never true
118 continue
119 if col_index == 0:
120 row_headers.append(
121 TableHeader(
122 text=cell_str, rowspan=1, row_index=row_index
123 )
124 )
125 else:
126 for form_str in cell_str.split("/"):
127 form_str = form_str.strip()
128 if form_str not in ["", wxr.wtp.title]: 128 ↛ 126line 128 didn't jump to line 126 because the condition on line 128 was always true
129 form = Form(form=form_str, tags=shared_tags)
130 for header in row_headers:
131 if ( 131 ↛ 130line 131 didn't jump to line 130 because the condition on line 131 was always true
132 row_index >= header.row_index
133 and row_index
134 < header.row_index + header.rowspan
135 ):
136 form.raw_tags.append(header.text)
137 translate_raw_tags(form)
138 word_entry.forms.append(form)
140 row_index += 1