Coverage for src / wiktextract / wxr_context.py: 95%

31 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-26 08:59 +0000

1# Wiktextract context object 

2import re 

3import sqlite3 

4 

5from wikitextprocessor import Wtp 

6 

7from .config import WiktionaryConfig 

8 

9 

10class WiktextractContext: 

11 __slots__ = ( 

12 "wtp", 

13 "config", 

14 "lang", 

15 "word", 

16 "pos", 

17 "thesaurus_db_path", 

18 "thesaurus_db_conn", 

19 ) 

20 

21 def __init__(self, wtp: Wtp, config: WiktionaryConfig): 

22 from .thesaurus import init_thesaurus_db 

23 

24 self.config = config 

25 self.wtp = wtp 

26 self.lang = None 

27 self.word = None 

28 self.pos = None 

29 self.thesaurus_db_path = wtp.db_path.with_stem( # type: ignore[union-attr] 

30 f"{wtp.db_path.stem}_thesaurus" # type: ignore[union-attr] 

31 ) 

32 self.thesaurus_db_conn = ( 

33 init_thesaurus_db(self.thesaurus_db_path) 

34 if config.extract_thesaurus_pages 

35 else None 

36 ) 

37 if config.linktrailing_regex_pattern is not None: 

38 self.wtp.linktrailing_re = re.compile( 

39 config.linktrailing_regex_pattern 

40 ) 

41 

42 def reconnect_databases(self, check_same_thread: bool = True) -> None: 

43 # `multiprocessing.pool.Pool.imap()` runs in another thread, if the db 

44 # connection is used to create iterable data for `imap`, 

45 # `check_same_thread` must be `False`. 

46 if self.config.extract_thesaurus_pages: 46 ↛ 50line 46 didn't jump to line 50 because the condition on line 46 was always true

47 self.thesaurus_db_conn = sqlite3.connect( 

48 self.thesaurus_db_path, check_same_thread=check_same_thread 

49 ) 

50 self.wtp.db_conn = sqlite3.connect( 

51 self.wtp.db_path, 

52 check_same_thread=check_same_thread, # type: ignore[arg-type] 

53 ) 

54 

55 def remove_unpicklable_objects(self) -> None: 

56 # remove these variables before passing the `WiktextractContext` object 

57 # to worker processes 

58 if self.config.extract_thesaurus_pages: 58 ↛ 60line 58 didn't jump to line 60 because the condition on line 58 was always true

59 self.thesaurus_db_conn.close() # type: ignore[union-attr] 

60 self.thesaurus_db_conn = None 

61 self.wtp.db_conn.close() 

62 self.wtp.db_conn = None # type: ignore[assignment] 

63 self.wtp.lua = None 

64 self.wtp.lua_invoke = None 

65 self.wtp.lua_reset_env = None 

66 self.wtp.lua_clear_loaddata_cache = None