Coverage for src/wiktextract/extractor/simple/text

1import re

3from .section_titles import POS_HEADINGS

5# List all the templates registered to POS headings.

6POS_TEMPLATE_NAMES: dict[str, str] = {}

8for pos, templates in POS_HEADINGS.items():

9 POS_TEMPLATE_NAMES.update({tn: pos for tn in templates["templates"]})

11# Find digits at end of string, like "Noun 2".

12# This is so commonly used that it needs to stop being a magic regex.

13POS_ENDING_NUMBER_RE = re.compile(r"\s*(\d+)$")

15# Use with .strip() and checking for empty strings to eliminate stuff like

16# ", ".

17STRIP_PUNCTUATION = " \t\b,.;:*#-–()[]"

Coverage for src/wiktextract/extractor/simple/text_utils.py: 100%