Coverage for src/wiktextract/extractor/simple/text_utils.py: 100%

7 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-10-25 10:11 +0000

1import re 

2 

3from .section_titles import POS_HEADINGS 

4 

5# List all the templates registered to POS headings. 

6POS_TEMPLATE_NAMES: dict[str, str] = {} 

7 

8for pos, templates in POS_HEADINGS.items(): 

9 POS_TEMPLATE_NAMES.update({tn: pos for tn in templates["templates"]}) 

10 

11# Find digits at end of string, like "Noun 2". 

12# This is so commonly used that it needs to stop being a magic regex. 

13POS_ENDING_NUMBER_RE = re.compile(r"\s*(\d+)$") 

14 

15# Use with .strip() and checking for empty strings to eliminate stuff like 

16# ", ". 

17STRIP_PUNCTUATION = " \t\b,.;:*#-–()[]"