Coverage for src/wiktextract/extractor/simple/section_titles.py: 100%
6 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1# https://simple.wiktionary.org/wiki/Category:Part_of_speech_templates
2from typing import TypedDict
4POSMap = TypedDict(
5 "POSMap",
6 {
7 "pos": str,
8 # "debug" is legacy from [en], might be implemented
9 "debug": str,
10 "tags": list[str],
11 # SimplEn seems to very consistently use standardized templates
12 # for each of its 'official' PoSes, so we can use that as a way
13 # to further specify tags to be added
14 "templates": dict[str, list[str] | None],
15 },
16 total=False,
17)
19# Main entries for different kinds of POS headings; no aliases
20POS_HEADINGS: dict[str, POSMap] = {
21 # "": {"pos": "", "tags": [""], "templates": {"": [""]}},
22 "noun": {
23 "pos": "noun",
24 "templates": {
25 "noun": None,
26 "irrnoun": ["irregular"],
27 "noun2": ["plural-only"],
28 "noun3": None, # Two singular forms
29 "noun4": ["singular-only"],
30 "letter": ["letter"],
31 },
32 },
33 "abbreviation": {
34 "pos": "abbrev",
35 "tags": ["abbreviation"],
36 "templates": {"abbreviation": None},
37 },
38 "acronym": {
39 "pos": "abbrev",
40 "tags": ["abbreviation"],
41 "templates": {"acronym": None, "initialism": None},
42 },
43 "adjective": {"pos": "adj", "templates": {"adjective": None, "adj": None}},
44 "adverb": {"pos": "adv", "templates": {"adverb": None}},
45 "determiner": {
46 "pos": "det",
47 "templates": {
48 "comparative determiner": ["comparative"],
49 "determiner-comp": ["comparative"],
50 "determiner": None,
51 },
52 },
53 "conjunction": {"pos": "conj", "templates": {"conjunction": None}},
54 "contraction": {
55 "pos": "contraction",
56 "tags": ["abbreviation"],
57 "templates": {"contraction": None},
58 },
59 "coordinator": {
60 "pos": "conj",
61 "tags": ["coordinating"],
62 "templates": {"coordinator": None},
63 },
64 "expression": {"pos": "phrase", "templates": {"expression": None}},
65 "initialism": {
66 "pos": "abbrev",
67 "tags": ["abbreviation"],
68 "templates": {"initialism": None, "acronym": None},
69 },
70 "interjection": {"pos": "intj", "templates": {"interjection": None}},
71 "preposition": {"pos": "prep", "templates": {"preposition": None}},
72 "prefix": {
73 "pos": "prefix",
74 "tags": ["morpheme"],
75 "templates": {"prefix": None},
76 },
77 "pronoun": {"pos": "pron", "templates": {"pron": None}},
78 "proper noun": {"pos": "name", "templates": {"proper noun": None}},
79 "subordinator": {
80 "pos": "conj",
81 "tags": ["subordinate-clause"],
82 "templates": {"subordinator": None},
83 },
84 "suffix": {
85 "pos": "suffix",
86 "tags": ["morpheme"],
87 "templates": {"suffix": None},
88 },
89 "symbol": {"pos": "symbol", "templates": {"symbol": None}},
90 "verb": {
91 "pos": "verb",
92 "templates": {
93 "verb": None,
94 "verb2": None, # alternative paradigms, "spelled" vs. "spelt"
95 "verb3": None, # Only used by "be", "is", etc.
96 "verb4": ["auxiliary", "modal"], # modal aux. 'will', 'can'.
97 "verb5": ["auxiliary"], # non-modal auxiliares like 'do' and 'have'
98 "verb6": ["auxiliary"], # same as verb5?
99 },
100 },
101}
104POS_HEADINGS_MAP = {
105 "acronym & initialism": "acronym",
106}
109for k, v in POS_HEADINGS_MAP.items():
110 POS_HEADINGS[k] = POS_HEADINGS[v]