Coverage for src/wiktextract/extractor/el/text_utils.py: 56%
9 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-11 10:26 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-11 10:26 +0000
1import re
2from unicodedata import normalize
4# Find digits at end of string, like "Noun 2".
5# This is so commonly used that it needs to stop being a magic regex.
6ENDING_NUMBER_RE = re.compile(r"\s*(\d+)$")
8# Use with .strip() and checking for empty strings to eliminate stuff like
9# ", ".
10STRIP_PUNCTUATION = " \t\b,.;:*#-–()[]"
13def normalized_int(fancy_digits: str) -> int:
14 try:
15 return int(fancy_digits)
16 except ValueError:
17 return int(normalize("NFKC", fancy_digits))