Coverage for src/wiktextract/extractor/el/table.py: 83%

291 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-11-03 05:44 +0000

1import re 

2from typing import TypeAlias 

3from unicodedata import name as unicode_name 

4 

5from wikitextprocessor import HTMLNode, NodeKind, TemplateNode, WikiNode 

6 

7from wiktextract.clean import clean_value 

8from wiktextract.extractor.el.tags import translate_raw_tags 

9from wiktextract.wxr_context import WiktextractContext 

10 

11from .models import Form, FormSource, WordEntry 

12from .parse_utils import GREEK_LANGCODES, remove_duplicate_forms 

13 

14# from .simple_tags import simple_tag_map 

15# from .tags_utils import convert_tags 

16 

17# Shorthand for this file. Could be an import, but it's so simple... 

18Node = str | WikiNode 

19 

20 

21# GREEK TABLE HEURISTICS: 

22# If it's a table for a Greek language entry, if it's in a header or is in 

23# italics, it's a header. 

24# If it's NOT a Greek entry and has Greek text, it's a header. 

25 

26 

27# node_fns are different from template_fns. template_fns are functions that 

28# are used to handle how to expand (and otherwise process) templates, while 

29# node functions are used when turning any parsed "abstract" nodes into strings. 

30def cell_node_fn( 

31 node: WikiNode, 

32) -> list[Node] | None: 

33 """Handle nodes in the parse tree specially.""" 

34 assert isinstance(node, WikiNode) 

35 if node.kind == NodeKind.ITALIC: 

36 return ["__I__", *node.children, "__/I__"] 

37 if node.kind == NodeKind.BOLD: 

38 return ["__B__", *node.children, "__/B__"] 

39 # In case someone puts tables inside tables... 

40 kind = node.kind 

41 if kind in { 

42 NodeKind.TABLE_CELL, 

43 NodeKind.TABLE_HEADER_CELL, 

44 }: 

45 return node.children 

46 return None 

47 

48 

49BOLD_RE = re.compile(r"(__/?[BI]__)") 

50TRAILING_NUMBER_RE = re.compile(r"\d+$") 

51 

52ARTICLES: set[str] = { 

53 "ο", 

54 "η", 

55 "το", 

56 "την", 

57 "της", 

58 "τον", 

59 "τη", 

60 "το", 

61 "οι", 

62 "οι", 

63 "τα", 

64 "των", 

65 "τους", 

66 "του", 

67 "τις", 

68 "τα", 

69} 

70EXTENDED_ARTICLES = ARTICLES | { 

71 "ο/η", 

72 "του/της", 

73 "τον/τη", 

74 "τον/την", 

75 "τους/τις", 

76} 

77"""Articles to trim from inflection tables / headwords.""" 

78UNEXPECTED_ARTICLES = { 

79 "αι", 

80 "ένα", 

81 "ένας", 

82 "στα", 

83 "στη", 

84 "στην", 

85 "στης", 

86 "στις", 

87 "στο", 

88 "στον", 

89 "στου", 

90 "στους", 

91 "στων", 

92 "τ'", 

93 "ταις", 

94 "τας", 

95 "τες", 

96 "τη", 

97 "τοις", 

98 "τω", 

99} 

100"""Includes contractions, Ancient Greek articles etc.""" 

101 

102 

103def process_inflection_section( 

104 wxr: WiktextractContext, 

105 data: WordEntry, 

106 snode: WikiNode, 

107 *, 

108 source: FormSource = "", 

109) -> None: 

110 table_nodes: list[tuple[str | None, WikiNode]] = [] 

111 # template_depth is used as a nonlocal variable in bold_node_handler 

112 # to gauge how deep inside a top-level template we are; we want to 

113 # collect template data only for the top-level templates that are 

114 # visible in the wikitext, not templates inside templates. 

115 template_depth = 0 

116 top_template_name: str | None = None 

117 

118 def table_node_handler_fn( 

119 node: WikiNode, 

120 ) -> list[str] | str | None: 

121 """Insert special markers `__*__` and `__/*__` around bold nodes so 

122 that the strings can later be split into "head-word" and "tag-words" 

123 parts. Collect incidental stuff, like side-tables, that are often 

124 put around the head.""" 

125 assert isinstance(node, WikiNode) 

126 nonlocal template_depth 

127 nonlocal top_template_name 

128 

129 if isinstance(node, TemplateNode): 

130 # Recursively expand templates so that even nodes inside the 

131 # the templates are handled with bold_node_handler. 

132 # Argh. Don't use "node_to_text", that causes bad output... 

133 expanded = wxr.wtp.expand(wxr.wtp.node_to_wikitext(node)) 

134 if template_depth == 0: 134 ↛ 138line 134 didn't jump to line 138 because the condition on line 134 was always true

135 # We are looking at a top-level template in the original 

136 # wikitext. 

137 top_template_name = node.template_name 

138 new_node = wxr.wtp.parse(expanded) 

139 

140 template_depth += 1 

141 ret = wxr.wtp.node_to_text( 

142 new_node, node_handler_fn=table_node_handler_fn 

143 ) 

144 template_depth -= 1 

145 if template_depth == 0: 145 ↛ 147line 145 didn't jump to line 147 because the condition on line 145 was always true

146 top_template_name = None 

147 return ret 

148 

149 if node.kind in { 

150 NodeKind.TABLE, 

151 }: 

152 # XXX Handle tables here 

153 # template depth and top-level template name 

154 nonlocal table_nodes 

155 table_nodes.append((top_template_name, node)) 

156 return [""] 

157 return None 

158 

159 _ = wxr.wtp.node_to_html(snode, node_handler_fn=table_node_handler_fn) 

160 

161 for template_name, table_node in table_nodes: 

162 # XXX template_name 

163 parse_table( 

164 wxr, 

165 table_node, 

166 data, 

167 data.lang_code in GREEK_LANGCODES, 

168 template_name=template_name or "", 

169 source=source, 

170 ) 

171 

172 data.forms = remove_duplicate_forms(wxr, data.forms) 

173 

174 

175def parse_table( 

176 wxr: WiktextractContext, 

177 tnode: WikiNode, 

178 data: WordEntry, 

179 is_greek_entry: bool = False, # Whether the entry is for a Greek word 

180 template_name: str = "", 

181 *, 

182 source: FormSource = "", 

183) -> None: 

184 """Parse inflection table. Generates 'form' data; 'foos' is a form of 'foo' 

185 with the tags ['plural'].""" 

186 assert (isinstance(tnode, WikiNode) and tnode.kind == NodeKind.TABLE) or ( 

187 isinstance(tnode, HTMLNode) and tnode.tag == "table" 

188 ) 

189 

190 is_html_table = isinstance(tnode, HTMLNode) 

191 

192 # Some debugging code: if wiktwords is passed a --inflection-tables-file 

193 # argument, we save tables to a file for debugging purposes, or for just 

194 # getting tables that can be used as test data. 

195 if wxr.config.expand_tables: 195 ↛ 196line 195 didn't jump to line 196 because the condition on line 195 was never true

196 with open(wxr.config.expand_tables, "w") as f: 

197 f.write(f"{wxr.wtp.title=}\n") 

198 text = wxr.wtp.node_to_wikitext(tnode) 

199 f.write(f"{text}\n") 

200 

201 Row: TypeAlias = int 

202 Column: TypeAlias = int 

203 

204 # We complete the table using nested dicts (instead of arrays for 

205 # convenience) such that when we come across a node, we push that node's 

206 # reference to each coordinate point in the table grid it occupies. Each 

207 # grid point can then be checked for if it's been handled already and 

208 # skipped if needed. 

209 table_grid: dict[Row, dict[Column, WikiNode]] = {} 

210 

211 first_column_is_headers = True 

212 

213 for r, row in enumerate( 

214 tnode.find_html_recursively("tr") 

215 if is_html_table 

216 else tnode.find_child_recursively(NodeKind.TABLE_ROW) 

217 ): 

218 c = 0 

219 # print(f"{r=}, {row=}") 

220 if r not in table_grid: 

221 table_grid[r] = {} 

222 

223 for cell in ( 

224 row.find_html(["th", "td"]) 

225 if is_html_table 

226 else row.find_child( 

227 NodeKind.TABLE_HEADER_CELL | NodeKind.TABLE_CELL, 

228 ) 

229 ): 

230 while c in table_grid[r]: 

231 c += 1 

232 

233 try: 

234 rowspan = int(cell.attrs.get("rowspan", "1")) # 🡙 

235 colspan = int(cell.attrs.get("colspan", "1")) # 🡘 

236 except ValueError: 

237 rowspan = 1 

238 colspan = 1 

239 # print("COL:", col) 

240 

241 if colspan > 30: 241 ↛ 242line 241 didn't jump to line 242 because the condition on line 241 was never true

242 wxr.wtp.error( 

243 f"Colspan {colspan} over 30, set to 1", 

244 sortid="table/128/20250207", 

245 ) 

246 colspan = 1 

247 if rowspan > 30: 247 ↛ 248line 247 didn't jump to line 248 because the condition on line 247 was never true

248 wxr.wtp.error( 

249 f"Rowspan {rowspan} over 30, set to 1", 

250 sortid="table/134/20250207b", 

251 ) 

252 rowspan = 1 

253 

254 for rr in range(r, r + rowspan): 

255 if rr not in table_grid: 

256 table_grid[rr] = {} 

257 for cc in range(c, c + colspan): 

258 table_grid[rr][cc] = cell 

259 

260 if not table_grid[len(table_grid) - 1]: 260 ↛ 262line 260 didn't jump to line 262 because the condition on line 260 was never true

261 # Last row is empty; traverse backwards to skip empty rows at end 

262 last_item = None 

263 for i, rowd in reversed(table_grid.items()): 

264 if rowd: 

265 last_item = i 

266 break 

267 

268 assert last_item is not None 

269 

270 new_table_grid = dict() 

271 for i, rowd in table_grid.items(): 

272 if i > last_item: 

273 continue 

274 new_table_grid[i] = rowd 

275 table_grid = new_table_grid 

276 

277 if len(table_grid[0]) == 1: 277 ↛ 279line 277 didn't jump to line 279 because the condition on line 277 was never true

278 # Table is one column in width, no headers on rows 

279 first_column_is_headers = False 

280 

281 if len(table_grid) == 2: 281 ↛ 283line 281 didn't jump to line 283 because the condition on line 281 was never true

282 # There's only one or two rows 

283 first_column_is_headers = False 

284 

285 # Headers are saved in two dict that has their keys made out of tuples 

286 # made of their "bookends": so {(1,1), "foo"} for a header that is made 

287 # up of the first cell only of a row in the column_hdrs dict. 

288 # If we come across a header that has those exact same bookends, only 

289 # then do we replace the previous tags with it; if you have overlapping 

290 # 'widths', leave them so that we inherit different 'levels' of headers. 

291 Spread = tuple[int, int] 

292 SpreadDict = dict[Spread, str] 

293 # The column and row headers are saved into big dicts: column_hdrs is a dict 

294 # whose key is what row or column we are in. The values of that table grid 

295 # square is a dict with the bookends (`Spread`) and the tags associated with 

296 # those bookends 

297 column_hdrs_all: dict[Column, SpreadDict] = {} 

298 row_hdrs_all: dict[Row, dict[Column, SpreadDict]] = {} 

299 

300 forms: list[Form] = [] 

301 processed: set[WikiNode] = set() 

302 # Some tables have cells with stuff like `του` we want to add to the 

303 # next cell 

304 prefix: str | None = None 

305 

306 # print(f"{table_grid=}") 

307 

308 first_cells_are_bold = False 

309 found_unformatted_text = False 

310 

311 for r, row_d in table_grid.items(): 

312 # Check for previously added row headers that may have spread lower; 

313 # Remove old row headers that don't exist on this row. 

314 for c, cell in row_d.items(): 

315 if cell in processed: 

316 continue 

317 processed.add(cell) 

318 

319 try: 

320 rowspan = int(cell.attrs.get("rowspan", "1")) # 🡙 

321 colspan = int(cell.attrs.get("colspan", "1")) # 🡘 

322 except ValueError: 

323 rowspan = 1 

324 colspan = 1 

325 

326 spans = process_cell_text(wxr, cell) 

327 

328 if len(spans) <= 0: 

329 continue 

330 

331 if r == 0: 

332 if spans[0][0]: # starts_bold 

333 first_cells_are_bold = True 

334 

335 text = clean_value(wxr, " ".join(span[3] for span in spans)) 

336 # print(f"{text=}") 

337 

338 this_is_header, unformatted_text = is_header( 

339 wxr, 

340 cell, 

341 spans, 

342 is_greek_entry, 

343 found_unformatted_text, 

344 first_cells_are_bold, 

345 ) 

346 

347 if unformatted_text is True: 

348 found_unformatted_text = True 

349 

350 if this_is_header or (c == 0 and first_column_is_headers is True): 

351 # Because Greek wiktionary has its own written script to rely 

352 # in heuristics, we can use that. It also seems that for 

353 # tables in Greek-language entries even if the table doesn't 

354 # use proper header cells, you can trust bolding and italics. 

355 

356 # Currently we don't care which "direction" the header points: 

357 # we add the tag to both column headers and row headers, and 

358 # rely on that all headers are on only rows or columns that 

359 # don't have data cells; ie. headers and data aren't mixed. 

360 

361 # Each row and each column gets its own header data. 

362 # The Spread key is used to keep track which headers should 

363 # "overlap": if the spread is different, that should always 

364 # mean that one is contained within another and thus they're 

365 # not complementary headers, but one "bigger" category and 

366 # one "specific" category. If the Spread is identical, then 

367 # that's obviously two complementary headers, and the later one 

368 # overwrites the other. 

369 for rr in range(r, r + rowspan): 

370 if rr not in row_hdrs_all: 

371 row_hdrs_all[rr] = {c: {(r, r + rowspan): text}} 

372 elif c not in row_hdrs_all[rr]: 372 ↛ 377line 372 didn't jump to line 377 because the condition on line 372 was always true

373 row_hdrs_all[rr][c] = {(r, r + rowspan): text} 

374 else: 

375 # Also overwrites headers with the same "span"; simple 

376 # way to have overlapping sections. 

377 row_hdrs_all[rr][c][(r, r + rowspan)] = text 

378 

379 for cc in range(c, c + colspan): 

380 if cc not in column_hdrs_all: 

381 column_hdrs_all[cc] = {(c, c + colspan): text} 

382 else: 

383 column_hdrs_all[cc][(c, c + colspan)] = text 

384 

385 prefix = None 

386 

387 elif text in ARTICLES: 

388 prefix = text 

389 else: 

390 # cell is data 

391 if text in UNEXPECTED_ARTICLES: 391 ↛ 392line 391 didn't jump to line 392 because the condition on line 391 was never true

392 wxr.wtp.debug( 

393 f"Found '{text}' in table '{wxr.wtp.title}'", 

394 sortid="table/335", 

395 ) 

396 tags: set[str] = set() 

397 for cc, vd in row_hdrs_all.get(r, {}).items(): 

398 if c <= cc: 398 ↛ 399line 398 didn't jump to line 399 because the condition on line 398 was never true

399 continue 

400 for (start, end), tag in vd.items(): 

401 if start > r or end < r + rowspan: 

402 continue 

403 tags.add(tag) 

404 for (start, end), tag in column_hdrs_all.get(c, {}).items(): 

405 if start > c or end < c + colspan: 405 ↛ 406line 405 didn't jump to line 406 because the condition on line 405 was never true

406 continue 

407 tags.add(tag) 

408 texts = [text] 

409 if "&" in text: 

410 texts = [t.strip() for t in text.split("&")] 

411 # Avert your eyes... Python list comprehension syntax amirite 

412 texts = [line for text in texts for line in text.splitlines()] 

413 if prefix is not None: 

414 texts = [f"{prefix} {t}" for t in texts] 

415 prefix = None 

416 if len(tags) > 0: 416 ↛ 428line 416 didn't jump to line 428 because the condition on line 416 was always true

417 # If a cell has no tags in a table, it's probably a note 

418 # or something. 

419 forms.extend( 

420 Form( 

421 form=text, 

422 raw_tags=sorted(tags), 

423 source=source, 

424 ) 

425 for text in texts 

426 ) 

427 else: 

428 wxr.wtp.warning( 

429 f"Cell without any tags in table: {text}", 

430 sortid="table/300/20250217", 

431 ) 

432 

433 # logger.debug( 

434 # f"{wxr.wtp.title}\n{print_tree(tree, indent=2, ret_value=True)}" 

435 # ) 

436 # print(forms) 

437 

438 # # Replace raw_tags with tags if appropriate 

439 # for form in forms: 

440 # legit_tags, new_raw_tags, poses = convert_tags(form.raw_tags) 

441 # # Poses are strings like "adj 1", used in pronunciation data 

442 # # to later associate sound data with the correct pos entry. 

443 # # Ignored here. 

444 # if legit_tags: 

445 # form.tags = legit_tags 

446 # form.tags.extend(poses) 

447 # form.raw_tags = new_raw_tags 

448 # print(f"Inside parse_table: {forms=}") 

449 

450 if len(forms) > 0: 450 ↛ exitline 450 didn't return from function 'parse_table' because the condition on line 450 was always true

451 data.forms.append( 

452 Form( 

453 form=template_name, 

454 tags=["inflection-template"], 

455 source=source, 

456 ) 

457 ) 

458 

459 new_forms = postprocess_table_forms(forms, data.word) 

460 data.forms.extend(new_forms) 

461 

462 

463def remove_article_forms(forms: list[Form], word: str) -> list[Form]: 

464 """Return a new form list without article forms. 

465 

466 Articles can appear in two ways: 

467 * As a separate form: 

468 Ex. https://el.wiktionary.org/wiki/λίθος 

469 * As part of a form, inside form.form 

470 Ex. most tables 

471 

472 Used in both headword and table forms. Note that for headword forms, where 

473 there is usually no grammatic information, we could also use these articles 

474 to populate tags - but since most of the time we remove articles in tables, 

475 it was deemed not worth. 

476 """ 

477 # Do not remove article forms for the article pages themselves... 

478 if word in ARTICLES: 

479 return forms 

480 

481 new_forms: list[Form] = [] 

482 for form in forms: 

483 if form.form in EXTENDED_ARTICLES: 

484 continue 

485 parts = form.form.split() 

486 if len(parts) > 1 and parts[0] in EXTENDED_ARTICLES: 

487 form.form = " ".join(parts[1:]) 

488 if not form.form: 488 ↛ 489line 488 didn't jump to line 489 because the condition on line 488 was never true

489 continue 

490 new_forms.append(form) 

491 return new_forms 

492 

493 

494def postprocess_table_forms(forms: list[Form], word: str) -> list[Form]: 

495 """Postprocess table forms. 

496 

497 * Translate tags 

498 * Remove articles (requires original word) 

499 * Convert some parens to rare tag 

500 * Remove trailing numbers and stars (usu. notes) 

501 * Form expansion 

502 

503 About form expansion, there are two types: 

504 * Separators: "/", "-" 

505 * Strings inside parens 

506 

507 The purpose being to go: 

508 FROM "θα ζητάν(ε) - ζητούν(ε)" 

509 TO ["θα ζητάν", "θα ζητάνε", "θα ζητούν", "θα ζητούνε"] 

510 

511 References: 

512 * https://el.wiktionary.org/wiki/τρώω 

513 * https://el.wiktionary.org/wiki/ζητάω < this page is cursed anyway 

514 https://el.wiktionary.org/wiki/αγαπάω < use this instead 

515 """ 

516 for form in forms: 

517 translate_raw_tags(form) 

518 

519 clean_forms = remove_article_forms(forms, word) 

520 

521 for form in clean_forms: 

522 # Parens > rare inflection (cf. μπόι) 

523 if form.form[0] == "(" and form.form[-1] == ")": 

524 form.form = form.form[1:-1] 

525 form.tags.append("rare") 

526 

527 # Remove trailing numbers (usu. notes) 

528 # https://el.wiktionary.org/wiki/Καπιτόπουλος 

529 form.form = TRAILING_NUMBER_RE.sub("", form.form) 

530 # https://el.wiktionary.org/wiki/επιζών 

531 form.form = form.form.rstrip("*") 

532 

533 # Separators 

534 separators = ("/", "-") 

535 verb_particles = ("θα", "να") 

536 separated_forms: list[Form] = [] 

537 for form in clean_forms: 

538 # Assume only one type of separator present atm 

539 sep = next((sep for sep in separators if sep in form.form), None) 

540 if sep is None: 

541 separated_forms.append(form) 

542 continue 

543 

544 # Ignore suffix/prefixes (-ισμός) 

545 if form.form.startswith(sep) or form.form.endswith(sep): 

546 separated_forms.append(form) 

547 continue 

548 

549 # Extract particle if any 

550 suffix_particle = "" 

551 parts = form.form.split() 

552 if len(parts) > 1 and parts[0] in verb_particles: 

553 suffix_particle = parts[0] 

554 form.form = " ".join(parts[1:]) 

555 

556 for separated in form.form.split(sep): 

557 separated_form = form.model_copy(deep=True) 

558 separated = separated.strip() 

559 if suffix_particle: 

560 separated_form.form = f"{suffix_particle} {separated}" 

561 else: 

562 separated_form.form = separated 

563 separated_forms.append(separated_form) 

564 

565 # Strings inside parens 

566 new_forms: list[Form] = [] 

567 for form in separated_forms: 

568 text = form.form 

569 

570 m = re.match(r"^(.*?)\((.*?)\)(.*)$", text) 

571 if not m: 

572 new_forms.append(form) 

573 continue 

574 

575 before, inside, after = m.groups() 

576 expanded = [before + after, before + inside + after] 

577 for variant in expanded: 

578 new_form = form.model_copy(deep=True) 

579 new_form.form = variant 

580 new_forms.append(new_form) 

581 

582 return new_forms 

583 

584 

585def process_cell_text( 

586 wxr: WiktextractContext, cell: WikiNode 

587) -> list[tuple[bool, bool, bool, str]]: 

588 cell_text = wxr.wtp.node_to_text(cell, node_handler_fn=cell_node_fn) 

589 cell_text = clean_value(wxr, cell_text) 

590 split_text = BOLD_RE.split(cell_text) 

591 

592 # bold, italics, is greek, text 

593 spans: list[tuple[bool, bool, bool, str]] = [] 

594 

595 inside_bold = False 

596 inside_italics = False 

597 for i, text in enumerate(split_text): 

598 text = text.strip() 

599 if not text: 

600 continue 

601 if i % 2 == 0: 

602 for ch in text: 

603 if not ch.isalpha(): 

604 continue 

605 greek = unicode_name(ch).startswith("GREEK") 

606 break 

607 else: 

608 # no alphanumerics detected 

609 continue 

610 

611 spans.append((inside_bold, inside_italics, greek, text)) 

612 continue 

613 match text: 

614 case "__B__": 

615 inside_bold = True 

616 case "__/B__": 

617 inside_bold = False 

618 case "__I__": 

619 inside_italics = True 

620 case "__/I__": 620 ↛ 597line 620 didn't jump to line 597 because the pattern on line 620 always matched

621 inside_italics = False 

622 

623 return spans 

624 

625 

626UnformattedFound: TypeAlias = bool 

627 

628 

629def is_header( 

630 wxr: WiktextractContext, 

631 cell: WikiNode, 

632 spans: list[tuple[bool, bool, bool, str]], 

633 is_greek_entry: bool, 

634 unformatted_text_found: bool, 

635 first_cells_are_bold: bool, 

636) -> tuple[bool, UnformattedFound]: 

637 # Container for more complex logic stuff because trying to figure out 

638 # if something is a header can get messy. 

639 if cell.kind == NodeKind.TABLE_HEADER_CELL: 

640 return True, False 

641 

642 starts_bold, starts_italicized, starts_greek, text = spans[0] 

643 

644 if "bold" in cell.attrs.get("style", ""): 644 ↛ 645line 644 didn't jump to line 645 because the condition on line 644 was never true

645 starts_bold = True 

646 if "italic" in cell.attrs.get("style", ""): 

647 starts_italicized = True 

648 

649 # Not a Greek entry 

650 if not is_greek_entry: 650 ↛ 651line 650 didn't jump to line 651 because the condition on line 650 was never true

651 if starts_greek: 

652 # If the table is for another language other than Greek, a cell 

653 # starting with Greek text is a table header 

654 return True, (starts_bold or starts_italicized) 

655 else: 

656 return False, (starts_bold or starts_italicized) 

657 

658 # Is a Greek entry 

659 if starts_italicized is True: 

660 return True, False 

661 

662 if starts_bold is False: 662 ↛ 665line 662 didn't jump to line 665 because the condition on line 662 was always true

663 return False, True 

664 

665 if unformatted_text_found: 

666 # This is bolded, but we've seen unformatted text before 

667 return True, False 

668 # print(f"{text=}-> {starts_bold=}, {starts_italicized=}, {starts_greek=}") 

669 

670 if first_cells_are_bold: 

671 return True, False 

672 

673 wxr.wtp.wiki_notice( 

674 f"Can't be sure if bolded text entry '{text}' is a header or not", 

675 sortid="table/20250210a", 

676 ) 

677 return False, False