Coverage for src/wiktextract/extractor/simple/tags.py: 89%

1from wiktextract.tags import uppercase_tags, valid_tags

3TAGS: dict[str, list[str]] = {

4 "no-gloss": ["no-gloss"],

5 "comparative": ["comparative"],

6 "Comparative": ["comparative"],

7 "determiner": ["determiner"],

8 "Negative": ["negative"],

9 "Past": ["past"],

10 "Past participle": ["past", "participle"],

11 "Past tense": ["past"],

12 "Plain form": ["canonical"],

13 "Plain present": ["present"],

14 "plural": ["plural"],

15 "Plural": ["plural"],

16 "Positive": ["positive"],

17 "Present": ["present"],

18 "Present participle": ["present", "participle"],

19 "Proper noun": ["proper-noun"],

20 "singular": ["singular"],

21 "superlative": ["superlative"],

22 "Superlative": ["superlative"],

23 "Third person singular": ["third-person", "singular"],

24 "Third-person singular": ["third-person", "singular"],

25 "stressed": ["stressed"],

26 "unstressed": ["unstressed"],

27 "UK": ["UK"],

28 "US": ["US"],

29 "United Kingdom": ["UK"],

30 "United States": ["US"],

31 "before a vowel": ["before-vowel"],

32 "before a consonant": ["before-consonant"],

33 "CA": ["Canada"],

34 "AU": ["Australia"],

35 "Australian": ["Australia"],

36 "California": ["California"],

37 "Canadian": ["Canada"],

38 "CA synth": [""],

39 "GB": ["UK"],

40 "India": ["India"],

41 "Indian English": ["Indian-English"],

42 "Kenya": ["Kenya"],

43 "Limbu": ["Limbu"],

44 "Massachusetts": ["Massachusetts"],

45 "Mid-Atlantic": ["Mid-Atlantic"],

46 "New York accent": ["New-York"],

47 "Northen England": ["Northern-England"],

48 "NZ": ["New-Zealand"],

49 "Rhode Island": ["Rhode-Island"],

50 "Southern England": ["Southern-England"],

51 "uk": ["UK"],

52 "Uk": ["UK"],

53 "UK male": ["UK"],

54 "US female": ["US"],

55 "US Inland North": ["Inland-Northern-American"],

56 "US-Inland North": ["Inland-Northern-American"],

57 "American": ["US"],

58 "Audio US": ["US"],

59}

62# Check validity

63# valid_tags is from the lower level, originally created for the English

64# extractor but also applicable to other extractors: these are the tags

65# that should be used for tagging. Can be added to when needed, but

66# often there's already an equivalent tag with a slightly different name.

67for tags in TAGS.values():

68 for tag in tags:

69 if tag.islower() and tag.isalpha() and tag not in valid_tags: 69 ↛ 70line 69 didn't jump to line 70 because the condition on line 69 was never true

70 assert False, f"Invalid tag in simple_tag_map: {tag}"

72# uppercase_tags are specific tags with uppercase names that are for stuff

73# like locations and dialect and language names.

74for k in uppercase_tags:

75 if k not in TAGS:

76 TAGS[k] = [k.replace(" ", "-")]