Coverage for src/wiktextract/extractor/el/text_utils.py: 56%

9 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-11 10:26 +0000

1import re 

2from unicodedata import normalize 

3 

4# Find digits at end of string, like "Noun 2". 

5# This is so commonly used that it needs to stop being a magic regex. 

6ENDING_NUMBER_RE = re.compile(r"\s*(\d+)$") 

7 

8# Use with .strip() and checking for empty strings to eliminate stuff like 

9# ", ". 

10STRIP_PUNCTUATION = " \t\b,.;:*#-–()[]" 

11 

12 

13def normalized_int(fancy_digits: str) -> int: 

14 try: 

15 return int(fancy_digits) 

16 except ValueError: 

17 return int(normalize("NFKC", fancy_digits))