Coverage for src / wiktextract / wxr_context.py: 95%
31 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-26 08:59 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-26 08:59 +0000
1# Wiktextract context object
2import re
3import sqlite3
5from wikitextprocessor import Wtp
7from .config import WiktionaryConfig
10class WiktextractContext:
11 __slots__ = (
12 "wtp",
13 "config",
14 "lang",
15 "word",
16 "pos",
17 "thesaurus_db_path",
18 "thesaurus_db_conn",
19 )
21 def __init__(self, wtp: Wtp, config: WiktionaryConfig):
22 from .thesaurus import init_thesaurus_db
24 self.config = config
25 self.wtp = wtp
26 self.lang = None
27 self.word = None
28 self.pos = None
29 self.thesaurus_db_path = wtp.db_path.with_stem( # type: ignore[union-attr]
30 f"{wtp.db_path.stem}_thesaurus" # type: ignore[union-attr]
31 )
32 self.thesaurus_db_conn = (
33 init_thesaurus_db(self.thesaurus_db_path)
34 if config.extract_thesaurus_pages
35 else None
36 )
37 if config.linktrailing_regex_pattern is not None:
38 self.wtp.linktrailing_re = re.compile(
39 config.linktrailing_regex_pattern
40 )
42 def reconnect_databases(self, check_same_thread: bool = True) -> None:
43 # `multiprocessing.pool.Pool.imap()` runs in another thread, if the db
44 # connection is used to create iterable data for `imap`,
45 # `check_same_thread` must be `False`.
46 if self.config.extract_thesaurus_pages: 46 ↛ 50line 46 didn't jump to line 50 because the condition on line 46 was always true
47 self.thesaurus_db_conn = sqlite3.connect(
48 self.thesaurus_db_path, check_same_thread=check_same_thread
49 )
50 self.wtp.db_conn = sqlite3.connect(
51 self.wtp.db_path,
52 check_same_thread=check_same_thread, # type: ignore[arg-type]
53 )
55 def remove_unpicklable_objects(self) -> None:
56 # remove these variables before passing the `WiktextractContext` object
57 # to worker processes
58 if self.config.extract_thesaurus_pages: 58 ↛ 60line 58 didn't jump to line 60 because the condition on line 58 was always true
59 self.thesaurus_db_conn.close() # type: ignore[union-attr]
60 self.thesaurus_db_conn = None
61 self.wtp.db_conn.close()
62 self.wtp.db_conn = None # type: ignore[assignment]
63 self.wtp.lua = None
64 self.wtp.lua_invoke = None
65 self.wtp.lua_reset_env = None
66 self.wtp.lua_clear_loaddata_cache = None