Coverage for src/wiktextract/extractor/simple/text_utils.py: 100%
7 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1import re
3from .section_titles import POS_HEADINGS
5# List all the templates registered to POS headings.
6POS_TEMPLATE_NAMES: dict[str, str] = {}
8for pos, templates in POS_HEADINGS.items():
9 POS_TEMPLATE_NAMES.update({tn: pos for tn in templates["templates"]})
11# Find digits at end of string, like "Noun 2".
12# This is so commonly used that it needs to stop being a magic regex.
13POS_ENDING_NUMBER_RE = re.compile(r"\s*(\d+)$")
15# Use with .strip() and checking for empty strings to eliminate stuff like
16# ", ".
17STRIP_PUNCTUATION = " \t\b,.;:*#-–()[]"