Coverage for src / wiktextract / extractor / el / table.py: 83%

290 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-12 08:09 +0000

1import re 

2from typing import TypeAlias 

3from unicodedata import name as unicode_name 

4 

5from wikitextprocessor import HTMLNode, NodeKind, TemplateNode, WikiNode 

6 

7from wiktextract.clean import clean_value 

8from wiktextract.extractor.el.tags import translate_raw_tags 

9from wiktextract.wxr_context import WiktextractContext 

10 

11from .models import Form, FormSource, WordEntry 

12from .parse_utils import GREEK_LANGCODES, remove_duplicate_forms 

13 

14# Shorthand for this file. Could be an import, but it's so simple... 

15Node = str | WikiNode 

16 

17 

18# GREEK TABLE HEURISTICS: 

19# If it's a table for a Greek language entry, if it's in a header or is in 

20# italics, it's a header. 

21# If it's NOT a Greek entry and has Greek text, it's a header. 

22 

23 

24# node_fns are different from template_fns. template_fns are functions that 

25# are used to handle how to expand (and otherwise process) templates, while 

26# node functions are used when turning any parsed "abstract" nodes into strings. 

27def cell_node_fn( 

28 node: WikiNode, 

29) -> list[Node] | None: 

30 """Handle nodes in the parse tree specially.""" 

31 assert isinstance(node, WikiNode) 

32 if node.kind == NodeKind.ITALIC: 

33 return ["__I__", *node.children, "__/I__"] 

34 if node.kind == NodeKind.BOLD: 

35 return ["__B__", *node.children, "__/B__"] 

36 # In case someone puts tables inside tables... 

37 kind = node.kind 

38 if kind in { 

39 NodeKind.TABLE_CELL, 

40 NodeKind.TABLE_HEADER_CELL, 

41 }: 

42 return node.children 

43 return None 

44 

45 

46BOLD_RE = re.compile(r"(__/?[BI]__)") 

47TRAILING_NUMBER_RE = re.compile(r"\d+$") 

48 

49ARTICLES: set[str] = { 

50 "ο", 

51 "η", 

52 "το", 

53 "την", 

54 "της", 

55 "τον", 

56 "τη", 

57 "το", 

58 "οι", 

59 "οι", 

60 "τα", 

61 "των", 

62 "τους", 

63 "του", 

64 "τις", 

65 "τα", 

66} 

67EXTENDED_ARTICLES = ARTICLES | { 

68 "τη(ν)", 

69 "ο/η", 

70 "του/της", 

71 "τον/τη", 

72 "τον/τη(ν)", 

73 "τον/την", 

74 "τους/τις", 

75} 

76"""Articles to trim from inflection tables / headwords.""" 

77UNEXPECTED_ARTICLES = { 

78 "αι", 

79 "ένα", 

80 "ένας", 

81 "στα", 

82 "στη", 

83 "στην", 

84 "στης", 

85 "στις", 

86 "στο", 

87 "στον", 

88 "στου", 

89 "στους", 

90 "στων", 

91 "τ'", 

92 "ταις", 

93 "τας", 

94 "τες", 

95 "τη", 

96 "τοις", 

97 "τω", 

98} 

99"""Includes contractions, Ancient Greek articles etc.""" 

100 

101 

102def process_inflection_section( 

103 wxr: WiktextractContext, 

104 data: WordEntry, 

105 snode: WikiNode, 

106 *, 

107 source: FormSource = "", 

108 top_template_name: str | None = None, 

109) -> None: 

110 table_nodes: list[tuple[str | None, WikiNode]] = [] 

111 # template_depth is used as a nonlocal variable in bold_node_handler 

112 # to gauge how deep inside a top-level template we are; we want to 

113 # collect template data only for the top-level templates that are 

114 # visible in the wikitext, not templates inside templates. 

115 template_depth = 0 

116 

117 def table_node_handler_fn( 

118 node: WikiNode, 

119 ) -> list[str] | str | None: 

120 """Insert special markers `__*__` and `__/*__` around bold nodes so 

121 that the strings can later be split into "head-word" and "tag-words" 

122 parts. Collect incidental stuff, like side-tables, that are often 

123 put around the head.""" 

124 assert isinstance(node, WikiNode) 

125 nonlocal template_depth 

126 nonlocal top_template_name 

127 

128 if isinstance(node, TemplateNode): 

129 # Recursively expand templates so that even nodes inside the 

130 # the templates are handled with bold_node_handler. 

131 # Argh. Don't use "node_to_text", that causes bad output... 

132 expanded = wxr.wtp.expand(wxr.wtp.node_to_wikitext(node)) 

133 if template_depth == 0: 133 ↛ 137line 133 didn't jump to line 137 because the condition on line 133 was always true

134 # We are looking at a top-level template in the original 

135 # wikitext. 

136 top_template_name = node.template_name 

137 new_node = wxr.wtp.parse(expanded) 

138 

139 template_depth += 1 

140 ret = wxr.wtp.node_to_text( 

141 new_node, node_handler_fn=table_node_handler_fn 

142 ) 

143 template_depth -= 1 

144 if template_depth == 0: 144 ↛ 146line 144 didn't jump to line 146 because the condition on line 144 was always true

145 top_template_name = None 

146 return ret 

147 

148 if node.kind in { 

149 NodeKind.TABLE, 

150 }: 

151 # XXX Handle tables here 

152 # template depth and top-level template name 

153 nonlocal table_nodes 

154 table_nodes.append((top_template_name, node)) 

155 return [""] 

156 return None 

157 

158 _ = wxr.wtp.node_to_html(snode, node_handler_fn=table_node_handler_fn) 

159 

160 for template_name, table_node in table_nodes: 

161 # XXX template_name 

162 parse_table( 

163 wxr, 

164 table_node, 

165 data, 

166 data.lang_code in GREEK_LANGCODES, 

167 template_name=template_name or "", 

168 source=source, 

169 ) 

170 

171 data.forms = remove_duplicate_forms(wxr, data.forms) 

172 

173 

174def parse_table( 

175 wxr: WiktextractContext, 

176 tnode: WikiNode, 

177 data: WordEntry, 

178 is_greek_entry: bool = False, # Whether the entry is for a Greek word 

179 template_name: str = "", 

180 *, 

181 source: FormSource = "", 

182) -> None: 

183 """Parse inflection table. Generates 'form' data; 'foos' is a form of 'foo' 

184 with the tags ['plural'].""" 

185 assert (isinstance(tnode, WikiNode) and tnode.kind == NodeKind.TABLE) or ( 

186 isinstance(tnode, HTMLNode) and tnode.tag == "table" 

187 ) 

188 

189 is_html_table = isinstance(tnode, HTMLNode) 

190 

191 # Some debugging code: if wiktwords is passed a --inflection-tables-file 

192 # argument, we save tables to a file for debugging purposes, or for just 

193 # getting tables that can be used as test data. 

194 if wxr.config.expand_tables: 194 ↛ 195line 194 didn't jump to line 195 because the condition on line 194 was never true

195 with open(wxr.config.expand_tables, "w") as f: 

196 f.write(f"{wxr.wtp.title=}\n") 

197 text = wxr.wtp.node_to_wikitext(tnode) 

198 f.write(f"{text}\n") 

199 

200 Row: TypeAlias = int 

201 Column: TypeAlias = int 

202 

203 # We complete the table using nested dicts (instead of arrays for 

204 # convenience) such that when we come across a node, we push that node's 

205 # reference to each coordinate point in the table grid it occupies. Each 

206 # grid point can then be checked for if it's been handled already and 

207 # skipped if needed. 

208 table_grid: dict[Row, dict[Column, WikiNode]] = {} 

209 

210 first_column_is_headers = True 

211 

212 for r, row in enumerate( 

213 tnode.find_html_recursively("tr") 

214 if is_html_table 

215 else tnode.find_child_recursively(NodeKind.TABLE_ROW) 

216 ): 

217 c = 0 

218 # print(f"{r=}, {row=}") 

219 if r not in table_grid: 

220 table_grid[r] = {} 

221 

222 for cell in ( 

223 row.find_html(["th", "td"]) 

224 if is_html_table 

225 else row.find_child( 

226 NodeKind.TABLE_HEADER_CELL | NodeKind.TABLE_CELL, 

227 ) 

228 ): 

229 while c in table_grid[r]: 

230 c += 1 

231 

232 try: 

233 rowspan = int(cell.attrs.get("rowspan", "1")) # 🡙 

234 colspan = int(cell.attrs.get("colspan", "1")) # 🡘 

235 except ValueError: 

236 rowspan = 1 

237 colspan = 1 

238 # print("COL:", col) 

239 

240 if colspan > 30: 240 ↛ 241line 240 didn't jump to line 241 because the condition on line 240 was never true

241 wxr.wtp.error( 

242 f"Colspan {colspan} over 30, set to 1", 

243 sortid="table/128/20250207", 

244 ) 

245 colspan = 1 

246 if rowspan > 30: 246 ↛ 247line 246 didn't jump to line 247 because the condition on line 246 was never true

247 wxr.wtp.error( 

248 f"Rowspan {rowspan} over 30, set to 1", 

249 sortid="table/134/20250207b", 

250 ) 

251 rowspan = 1 

252 

253 for rr in range(r, r + rowspan): 

254 if rr not in table_grid: 

255 table_grid[rr] = {} 

256 for cc in range(c, c + colspan): 

257 table_grid[rr][cc] = cell 

258 

259 if not table_grid[len(table_grid) - 1]: 259 ↛ 261line 259 didn't jump to line 261 because the condition on line 259 was never true

260 # Last row is empty; traverse backwards to skip empty rows at end 

261 last_item = None 

262 for i, rowd in reversed(table_grid.items()): 

263 if rowd: 

264 last_item = i 

265 break 

266 

267 assert last_item is not None 

268 

269 new_table_grid = dict() 

270 for i, rowd in table_grid.items(): 

271 if i > last_item: 

272 continue 

273 new_table_grid[i] = rowd 

274 table_grid = new_table_grid 

275 

276 if len(table_grid[0]) == 1: 276 ↛ 278line 276 didn't jump to line 278 because the condition on line 276 was never true

277 # Table is one column in width, no headers on rows 

278 first_column_is_headers = False 

279 

280 if len(table_grid) == 2: 280 ↛ 282line 280 didn't jump to line 282 because the condition on line 280 was never true

281 # There's only one or two rows 

282 first_column_is_headers = False 

283 

284 # Headers are saved in two dict that has their keys made out of tuples 

285 # made of their "bookends": so {(1,1), "foo"} for a header that is made 

286 # up of the first cell only of a row in the column_hdrs dict. 

287 # If we come across a header that has those exact same bookends, only 

288 # then do we replace the previous tags with it; if you have overlapping 

289 # 'widths', leave them so that we inherit different 'levels' of headers. 

290 Spread = tuple[int, int] 

291 SpreadDict = dict[Spread, str] 

292 # The column and row headers are saved into big dicts: column_hdrs is a dict 

293 # whose key is what row or column we are in. The values of that table grid 

294 # square is a dict with the bookends (`Spread`) and the tags associated with 

295 # those bookends 

296 column_hdrs_all: dict[Column, SpreadDict] = {} 

297 row_hdrs_all: dict[Row, dict[Column, SpreadDict]] = {} 

298 

299 forms: list[Form] = [] 

300 processed: set[WikiNode] = set() 

301 # Some tables have cells with stuff like `του` we want to add to the 

302 # next cell 

303 prefix: str | None = None 

304 

305 # print(f"{table_grid=}") 

306 

307 first_cells_are_bold = False 

308 found_unformatted_text = False 

309 

310 for r, row_d in table_grid.items(): 

311 # Check for previously added row headers that may have spread lower; 

312 # Remove old row headers that don't exist on this row. 

313 for c, cell in row_d.items(): 

314 if cell in processed: 

315 continue 

316 processed.add(cell) 

317 

318 try: 

319 rowspan = int(cell.attrs.get("rowspan", "1")) # 🡙 

320 colspan = int(cell.attrs.get("colspan", "1")) # 🡘 

321 except ValueError: 

322 rowspan = 1 

323 colspan = 1 

324 

325 spans = process_cell_text(wxr, cell) 

326 

327 if len(spans) <= 0: 

328 continue 

329 

330 if r == 0: 

331 if spans[0][0]: # starts_bold 

332 first_cells_are_bold = True 

333 

334 text = clean_value(wxr, " ".join(span[3] for span in spans)) 

335 # print(f"{text=}") 

336 

337 this_is_header, unformatted_text = is_header( 

338 wxr, 

339 cell, 

340 spans, 

341 is_greek_entry, 

342 found_unformatted_text, 

343 first_cells_are_bold, 

344 ) 

345 

346 if unformatted_text is True: 

347 found_unformatted_text = True 

348 

349 if this_is_header or (c == 0 and first_column_is_headers is True): 

350 # Because Greek wiktionary has its own written script to rely 

351 # in heuristics, we can use that. It also seems that for 

352 # tables in Greek-language entries even if the table doesn't 

353 # use proper header cells, you can trust bolding and italics. 

354 

355 # Currently we don't care which "direction" the header points: 

356 # we add the tag to both column headers and row headers, and 

357 # rely on that all headers are on only rows or columns that 

358 # don't have data cells; ie. headers and data aren't mixed. 

359 

360 # Each row and each column gets its own header data. 

361 # The Spread key is used to keep track which headers should 

362 # "overlap": if the spread is different, that should always 

363 # mean that one is contained within another and thus they're 

364 # not complementary headers, but one "bigger" category and 

365 # one "specific" category. If the Spread is identical, then 

366 # that's obviously two complementary headers, and the later one 

367 # overwrites the other. 

368 for rr in range(r, r + rowspan): 

369 if rr not in row_hdrs_all: 

370 row_hdrs_all[rr] = {c: {(r, r + rowspan): text}} 

371 elif c not in row_hdrs_all[rr]: 371 ↛ 376line 371 didn't jump to line 376 because the condition on line 371 was always true

372 row_hdrs_all[rr][c] = {(r, r + rowspan): text} 

373 else: 

374 # Also overwrites headers with the same "span"; simple 

375 # way to have overlapping sections. 

376 row_hdrs_all[rr][c][(r, r + rowspan)] = text 

377 

378 for cc in range(c, c + colspan): 

379 if cc not in column_hdrs_all: 

380 column_hdrs_all[cc] = {(c, c + colspan): text} 

381 else: 

382 column_hdrs_all[cc][(c, c + colspan)] = text 

383 

384 prefix = None 

385 

386 elif text in ARTICLES: 

387 prefix = text 

388 else: 

389 # cell is data 

390 if text in UNEXPECTED_ARTICLES: 390 ↛ 391line 390 didn't jump to line 391 because the condition on line 390 was never true

391 wxr.wtp.debug( 

392 f"Found '{text}' in table '{wxr.wtp.title}'", 

393 sortid="table/335", 

394 ) 

395 tags: set[str] = set() 

396 for cc, vd in row_hdrs_all.get(r, {}).items(): 

397 if c <= cc: 397 ↛ 398line 397 didn't jump to line 398 because the condition on line 397 was never true

398 continue 

399 for (start, end), tag in vd.items(): 

400 if start > r or end < r + rowspan: 

401 continue 

402 tags.add(tag) 

403 for (start, end), tag in column_hdrs_all.get(c, {}).items(): 

404 if start > c or end < c + colspan: 404 ↛ 405line 404 didn't jump to line 405 because the condition on line 404 was never true

405 continue 

406 tags.add(tag) 

407 texts = [text] 

408 if "&" in text: 

409 texts = [t.strip() for t in text.split("&")] 

410 # Avert your eyes... Python list comprehension syntax amirite 

411 texts = [line for text in texts for line in text.splitlines()] 

412 if prefix is not None: 

413 texts = [f"{prefix} {t}" for t in texts] 

414 prefix = None 

415 if len(tags) > 0: 415 ↛ 427line 415 didn't jump to line 427 because the condition on line 415 was always true

416 # If a cell has no tags in a table, it's probably a note 

417 # or something. 

418 forms.extend( 

419 Form( 

420 form=text, 

421 raw_tags=sorted(tags), 

422 source=source, 

423 ) 

424 for text in texts 

425 ) 

426 else: 

427 wxr.wtp.warning( 

428 f"Cell without any tags in table: {text}", 

429 sortid="table/300/20250217", 

430 ) 

431 

432 # logger.debug( 

433 # f"{wxr.wtp.title}\n{print_tree(tree, indent=2, ret_value=True)}" 

434 # ) 

435 # print(forms) 

436 

437 # # Replace raw_tags with tags if appropriate 

438 # for form in forms: 

439 # legit_tags, new_raw_tags, poses = convert_tags(form.raw_tags) 

440 # # Poses are strings like "adj 1", used in pronunciation data 

441 # # to later associate sound data with the correct pos entry. 

442 # # Ignored here. 

443 # if legit_tags: 

444 # form.tags = legit_tags 

445 # form.tags.extend(poses) 

446 # form.raw_tags = new_raw_tags 

447 # print(f"Inside parse_table: {forms=}") 

448 

449 # If there is no template name (https://el.wiktionary.org/wiki/κρόκος) 

450 # we are adding junk anyway. This prevents a Form with empty form, which 

451 # is treated as an (non critical) error by src/wiktextract/wiktionary.py 

452 # 

453 # (I think the κρόκος issue is due to not stopping parsing at headings, 

454 # since the two intermingled templates are in different headings...) 

455 if forms and template_name: 455 ↛ exitline 455 didn't return from function 'parse_table' because the condition on line 455 was always true

456 data.forms.append( 

457 Form( 

458 form=template_name, 

459 tags=["inflection-template"], 

460 source=source, 

461 ) 

462 ) 

463 

464 new_forms = postprocess_table_forms(forms, data.word) 

465 data.forms.extend(new_forms) 

466 

467 

468def remove_article_forms(forms: list[Form], word: str) -> list[Form]: 

469 """Return a new form list without article forms. 

470 

471 Articles can appear in two ways: 

472 * As a separate form: 

473 Ex. https://el.wiktionary.org/wiki/λίθος 

474 * As part of a form, inside form.form 

475 Ex. most tables 

476 

477 Used in both headword and table forms. Note that for headword forms, where 

478 there is usually no grammatic information, we could also use these articles 

479 to populate tags - but since most of the time we remove articles in tables, 

480 it was deemed not worth. 

481 """ 

482 # Do not remove article forms for the article pages themselves... 

483 if word in ARTICLES: 

484 return forms 

485 

486 new_forms: list[Form] = [] 

487 for form in forms: 

488 if form.form in EXTENDED_ARTICLES: 

489 continue 

490 parts = form.form.split() 

491 if len(parts) > 1 and parts[0] in EXTENDED_ARTICLES: 

492 form.form = " ".join(parts[1:]) 

493 if not form.form: 493 ↛ 494line 493 didn't jump to line 494 because the condition on line 493 was never true

494 continue 

495 new_forms.append(form) 

496 return new_forms 

497 

498 

499def postprocess_table_forms(forms: list[Form], word: str) -> list[Form]: 

500 """Postprocess table forms. 

501 

502 * Translate tags 

503 * Remove articles (requires original word) 

504 * Convert some parens to rare tag 

505 * Remove trailing numbers and stars (usu. notes) 

506 * Form expansion 

507 

508 About form expansion, there are two types: 

509 * Separators: "/", "-" 

510 * Strings inside parens 

511 

512 The purpose being to go: 

513 FROM "θα ζητάν(ε) - ζητούν(ε)" 

514 TO ["θα ζητάν", "θα ζητάνε", "θα ζητούν", "θα ζητούνε"] 

515 

516 References: 

517 * https://el.wiktionary.org/wiki/τρώω 

518 * https://el.wiktionary.org/wiki/ζητάω < this page is cursed anyway 

519 https://el.wiktionary.org/wiki/αγαπάω < use this instead 

520 """ 

521 for form in forms: 

522 translate_raw_tags(form) 

523 

524 clean_forms = remove_article_forms(forms, word) 

525 

526 for form in clean_forms: 

527 # Parens > rare inflection (cf. μπόι) 

528 if form.form[0] == "(" and form.form[-1] == ")": 

529 form.form = form.form[1:-1] 

530 form.tags.append("rare") 

531 

532 # Remove trailing numbers (usu. notes) 

533 # https://el.wiktionary.org/wiki/Καπιτόπουλος 

534 form.form = TRAILING_NUMBER_RE.sub("", form.form) 

535 # https://el.wiktionary.org/wiki/επιζών 

536 form.form = form.form.rstrip("*") 

537 

538 # Separators 

539 separators = ("/", "-") 

540 verb_particles = ("θα", "να") 

541 separated_forms: list[Form] = [] 

542 for form in clean_forms: 

543 # Assume only one type of separator present atm 

544 sep = next((sep for sep in separators if sep in form.form), None) 

545 if sep is None: 

546 separated_forms.append(form) 

547 continue 

548 

549 # Ignore separator if the original word contained it 

550 # Ex. "-ισμός", "η-τάξη" etc. 

551 if sep in word: 

552 separated_forms.append(form) 

553 continue 

554 

555 # Extract particle if any 

556 suffix_particle = "" 

557 parts = form.form.split() 

558 if len(parts) > 1 and parts[0] in verb_particles: 

559 suffix_particle = parts[0] 

560 form.form = " ".join(parts[1:]) 

561 

562 for separated in form.form.split(sep): 

563 separated_form = form.model_copy(deep=True) 

564 separated = separated.strip() 

565 if suffix_particle: 

566 separated_form.form = f"{suffix_particle} {separated}" 

567 else: 

568 separated_form.form = separated 

569 separated_forms.append(separated_form) 

570 

571 # Strings inside parens 

572 new_forms: list[Form] = [] 

573 for form in separated_forms: 

574 text = form.form 

575 

576 m = re.match(r"^(.*?)\((.*?)\)(.*)$", text) 

577 if not m: 

578 new_forms.append(form) 

579 continue 

580 

581 before, inside, after = m.groups() 

582 expanded = [before + after, before + inside + after] 

583 for variant in expanded: 

584 new_form = form.model_copy(deep=True) 

585 new_form.form = variant 

586 new_forms.append(new_form) 

587 

588 return new_forms 

589 

590 

591def process_cell_text( 

592 wxr: WiktextractContext, cell: WikiNode 

593) -> list[tuple[bool, bool, bool, str]]: 

594 cell_text = wxr.wtp.node_to_text(cell, node_handler_fn=cell_node_fn) 

595 cell_text = clean_value(wxr, cell_text) 

596 split_text = BOLD_RE.split(cell_text) 

597 

598 # bold, italics, is greek, text 

599 spans: list[tuple[bool, bool, bool, str]] = [] 

600 

601 inside_bold = False 

602 inside_italics = False 

603 for i, text in enumerate(split_text): 

604 text = text.strip() 

605 if not text: 

606 continue 

607 if i % 2 == 0: 

608 for ch in text: 

609 if not ch.isalpha(): 

610 continue 

611 greek = unicode_name(ch).startswith("GREEK") 

612 break 

613 else: 

614 # no alphanumerics detected 

615 continue 

616 

617 spans.append((inside_bold, inside_italics, greek, text)) 

618 continue 

619 match text: 

620 case "__B__": 

621 inside_bold = True 

622 case "__/B__": 

623 inside_bold = False 

624 case "__I__": 

625 inside_italics = True 

626 case "__/I__": 626 ↛ 603line 626 didn't jump to line 603 because the pattern on line 626 always matched

627 inside_italics = False 

628 

629 return spans 

630 

631 

632UnformattedFound: TypeAlias = bool 

633 

634 

635def is_header( 

636 wxr: WiktextractContext, 

637 cell: WikiNode, 

638 spans: list[tuple[bool, bool, bool, str]], 

639 is_greek_entry: bool, 

640 unformatted_text_found: bool, 

641 first_cells_are_bold: bool, 

642) -> tuple[bool, UnformattedFound]: 

643 # Container for more complex logic stuff because trying to figure out 

644 # if something is a header can get messy. 

645 if cell.kind == NodeKind.TABLE_HEADER_CELL: 

646 return True, False 

647 

648 starts_bold, starts_italicized, starts_greek, text = spans[0] 

649 

650 if "bold" in cell.attrs.get("style", ""): 650 ↛ 651line 650 didn't jump to line 651 because the condition on line 650 was never true

651 starts_bold = True 

652 if "italic" in cell.attrs.get("style", ""): 

653 starts_italicized = True 

654 

655 # Not a Greek entry 

656 if not is_greek_entry: 656 ↛ 657line 656 didn't jump to line 657 because the condition on line 656 was never true

657 if starts_greek: 

658 # If the table is for another language other than Greek, a cell 

659 # starting with Greek text is a table header 

660 return True, (starts_bold or starts_italicized) 

661 else: 

662 return False, (starts_bold or starts_italicized) 

663 

664 # Is a Greek entry 

665 if starts_italicized is True: 

666 return True, False 

667 

668 if starts_bold is False: 668 ↛ 671line 668 didn't jump to line 671 because the condition on line 668 was always true

669 return False, True 

670 

671 if unformatted_text_found: 

672 # This is bolded, but we've seen unformatted text before 

673 return True, False 

674 # print(f"{text=}-> {starts_bold=}, {starts_italicized=}, {starts_greek=}") 

675 

676 if first_cells_are_bold: 

677 return True, False 

678 

679 wxr.wtp.wiki_notice( 

680 f"Can't be sure if bolded text entry '{text}' is a header or not", 

681 sortid="table/20250210a", 

682 ) 

683 return False, False