Coverage for src/wiktextract/extractor/simple/section_titles.py: 100%

6 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2024-12-27 08:07 +0000

1# https://simple.wiktionary.org/wiki/Category:Part_of_speech_templates 

2from typing import TypedDict 

3 

4POSMap = TypedDict( 

5 "POSMap", 

6 { 

7 "pos": str, 

8 # "debug" is legacy from [en], might be implemented 

9 "debug": str, 

10 "tags": list[str], 

11 # SimplEn seems to very consistently use standardized templates 

12 # for each of its 'official' PoSes, so we can use that as a way 

13 # to further specify tags to be added 

14 "templates": dict[str, list[str] | None], 

15 }, 

16 total=False, 

17) 

18 

19# Main entries for different kinds of POS headings; no aliases 

20POS_HEADINGS: dict[str, POSMap] = { 

21 # "": {"pos": "", "tags": [""], "templates": {"": [""]}}, 

22 "noun": { 

23 "pos": "noun", 

24 "templates": { 

25 "noun": None, 

26 "irrnoun": ["irregular"], 

27 "noun2": ["plural-only"], 

28 "noun3": None, # Two singular forms 

29 "noun4": ["singular-only"], 

30 "letter": ["letter"], 

31 }, 

32 }, 

33 "abbreviation": { 

34 "pos": "abbrev", 

35 "tags": ["abbreviation"], 

36 "templates": {"abbreviation": None}, 

37 }, 

38 "acronym": { 

39 "pos": "abbrev", 

40 "tags": ["abbreviation"], 

41 "templates": {"acronym": None, "initialism": None}, 

42 }, 

43 "adjective": {"pos": "adj", "templates": {"adjective": None, "adj": None}}, 

44 "adverb": {"pos": "adv", "templates": {"adverb": None}}, 

45 "determiner": { 

46 "pos": "det", 

47 "templates": { 

48 "comparative determiner": ["comparative"], 

49 "determiner-comp": ["comparative"], 

50 "determiner": None, 

51 }, 

52 }, 

53 "conjunction": {"pos": "conj", "templates": {"conjunction": None}}, 

54 "contraction": { 

55 "pos": "contraction", 

56 "tags": ["abbreviation"], 

57 "templates": {"contraction": None}, 

58 }, 

59 "coordinator": { 

60 "pos": "conj", 

61 "tags": ["coordinating"], 

62 "templates": {"coordinator": None}, 

63 }, 

64 "expression": {"pos": "phrase", "templates": {"expression": None}}, 

65 "initialism": { 

66 "pos": "abbrev", 

67 "tags": ["abbreviation"], 

68 "templates": {"initialism": None, "acronym": None}, 

69 }, 

70 "interjection": {"pos": "intj", "templates": {"interjection": None}}, 

71 "preposition": {"pos": "prep", "templates": {"preposition": None}}, 

72 "prefix": { 

73 "pos": "prefix", 

74 "tags": ["morpheme"], 

75 "templates": {"prefix": None}, 

76 }, 

77 "pronoun": {"pos": "pron", "templates": {"pron": None}}, 

78 "proper noun": {"pos": "name", "templates": {"proper noun": None}}, 

79 "subordinator": { 

80 "pos": "conj", 

81 "tags": ["subordinate-clause"], 

82 "templates": {"subordinator": None}, 

83 }, 

84 "suffix": { 

85 "pos": "suffix", 

86 "tags": ["morpheme"], 

87 "templates": {"suffix": None}, 

88 }, 

89 "symbol": {"pos": "symbol", "templates": {"symbol": None}}, 

90 "verb": { 

91 "pos": "verb", 

92 "templates": { 

93 "verb": None, 

94 "verb2": None, # alternative paradigms, "spelled" vs. "spelt" 

95 "verb3": None, # Only used by "be", "is", etc. 

96 "verb4": ["auxiliary", "modal"], # modal aux. 'will', 'can'. 

97 "verb5": ["auxiliary"], # non-modal auxiliares like 'do' and 'have' 

98 "verb6": ["auxiliary"], # same as verb5? 

99 }, 

100 }, 

101} 

102 

103 

104POS_HEADINGS_MAP = { 

105 "acronym & initialism": "acronym", 

106} 

107 

108 

109for k, v in POS_HEADINGS_MAP.items(): 

110 POS_HEADINGS[k] = POS_HEADINGS[v]