Coverage for src/wiktextract/extractor/en/section_titles.py: 100%
11 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from wiktextract.config import POSSubtitleData
3# Lower case POS titles
4POS_TITLES: dict[str, POSSubtitleData] = {
5 "abbreviation": {
6 "pos": "abbrev",
7 "debug": "part-of-speech Abbreviation is proscribed",
8 "tags": ["abbreviation"],
9 },
10 "acronym": {
11 "pos": "abbrev",
12 "debug": "part-of-speech Acronym is proscribed",
13 "tags": ["abbreviation"],
14 },
15 "adjectival": {
16 "pos": "adj_noun",
17 "debug": "part-of-speech Adjectival is not valid",
18 },
19 "adjectival noun": {"pos": "adj_noun"},
20 "adjectival verb": {"pos": "adj_verb"},
21 "adjective": {"pos": "adj"},
22 "adjectuve": {"pos": "adj", "debug": "misspelled subtitle"},
23 "adjectives": {"pos": "adj", "debug": "usually used in singular"},
24 "adnominal": {"pos": "adnominal"},
25 "adverb": {"pos": "adv"},
26 "adverbs": {"pos": "adv", "debug": "usually used in singular"},
27 "adverbial phrase": {
28 "pos": "adv_phrase",
29 "debug": "part-of-speech Adverbial phrase is proscribed",
30 },
31 "affix": {"pos": "affix"},
32 "adjective suffix": {
33 "pos": "suffix",
34 "debug": "part-of-speech Adjective suffix is proscribed",
35 },
36 "ambiposition": {"pos": "ambiposition"},
37 "article": {"pos": "article"},
38 "character": {"pos": "character"},
39 "circumfix": {"pos": "circumfix", "tags": ["morpheme"]},
40 "circumposition": {"pos": "circumpos"},
41 "classifier": {"pos": "classifier"},
42 "clipping": {
43 "pos": "abbrev",
44 "debug": "part-of-speech Clipping is proscribed",
45 "tags": ["abbreviation"],
46 },
47 "clitic": {
48 "pos": "suffix",
49 "debug": "part-of-speech Clitic is proscribed",
50 "tags": ["clitic"],
51 },
52 "combining form": {"pos": "combining_form", "tags": ["morpheme"]},
53 "comparative": {"pos": "adj", "tags": ["comparative"]},
54 "conjunction": {"pos": "conj"},
55 "conjuntion": {"pos": "conj", "debug": "misspelled subtitle"},
56 "contraction": {"pos": "contraction", "tags": ["contraction"]},
57 "converb": {"pos": "converb"},
58 "counter": {"pos": "counter"},
59 "definitions": {"pos": "character"},
60 "dependent noun": {
61 "pos": "noun",
62 "tags": [
63 "dependent",
64 ],
65 },
66 "determiner": {"pos": "det"},
67 "diacritical mark": {"pos": "character", "tags": ["diacritic"]},
68 "enclitic": {"pos": "suffix", "tags": ["clitic"]},
69 "enclitic particle": {"pos": "suffix", "tags": ["clitic"]},
70 "gerund": {
71 "pos": "verb",
72 "debug": "part-of-speech Gerund is proscribed",
73 "tags": ["participle", "gerund"],
74 },
75 "han character": {"pos": "character", "tags": ["han"]},
76 "han characters": {
77 "pos": "character",
78 "tags": ["han"],
79 "debug": "psually used in singular",
80 },
81 "hanja": {"pos": "character", "tags": ["Hanja"]},
82 "hanzi": {"pos": "character", "tags": ["hanzi"]},
83 "ideophone": {"pos": "noun", "tags": ["ideophone"]},
84 "idiom": {"pos": "phrase", "tags": ["idiomatic"]},
85 "infix": {"pos": "infix", "tags": ["morpheme"]},
86 "infinitive": {
87 "pos": "verb",
88 "debug": "part-of-speech Infinitive is proscribed",
89 "tags": ["infinitive"],
90 },
91 "initialism": {
92 "pos": "abbrev",
93 "debug": "part-of-speech Initialism is proscribed",
94 "tags": ["abbreviation"],
95 },
96 "interfix": {"pos": "interfix", "tags": ["morpheme"]},
97 "interjection": {"pos": "intj"},
98 "interrogative pronoun": {"pos": "pron", "tags": ["interrogative"]},
99 "intransitive verb": {
100 "pos": "verb",
101 "debug": "part-of-speech Intransitive verb is proscribed",
102 "tags": ["intransitive"],
103 },
104 "instransitive verb": {
105 "pos": "verb",
106 "tags": ["intransitive"],
107 "debug": "pisspelled subtitle",
108 },
109 "kanji": {"pos": "character", "tags": ["kanji"]},
110 "letter": {"pos": "character", "tags": ["letter"]},
111 "ligature": {"pos": "character", "tags": ["ligature"]},
112 "nominal nuclear clause": {
113 "pos": "clause",
114 "debug": "part-of-speech Nominal nuclear clause is proscribed",
115 },
116 "νoun": {"pos": "noun", "debug": "misspelled subtitle"},
117 "nouɲ": {"pos": "noun", "debug": "misspelled subtitle"},
118 "noun": {"pos": "noun"},
119 "noun form": {
120 "pos": "noun",
121 "debug": "part-of-speech Noun form is proscribed",
122 },
123 "nouns": {"pos": "noun", "debug": "usually in singular"},
124 "noum": {"pos": "noun", "debug": "misspelled subtitle"},
125 "number": {"pos": "num", "tags": ["number"]},
126 "numeral": {"pos": "num"},
127 "ordinal number": {
128 "pos": "adj",
129 "debug": "ordinal numbers should be adjectives",
130 "tags": ["ordinal"],
131 },
132 "participle": {"pos": "verb", "tags": ["participle"]},
133 "particle": {"pos": "particle"},
134 "past participle": {"pos": "verb", "tags": ["participle", "past"]},
135 "perfect expression": {"pos": "verb"},
136 "perfection expression": {"pos": "verb"},
137 "perfect participle": {"pos": "verb", "tags": ["participle", "perfect"]},
138 "personal pronoun": {"pos": "pron", "tags": ["person"]},
139 "phrase": {"pos": "phrase"},
140 "phrases": {"pos": "phrase", "debug": "usually used in singular"},
141 "possessive determiner": {"pos": "det", "tags": ["possessive"]},
142 "possessive pronoun": {"pos": "det", "tags": ["possessive"]},
143 "postposition": {"pos": "postp"},
144 "predicative": {"pos": "adj", "tags": ["predicative"]},
145 "prefix": {"pos": "prefix", "tags": ["morpheme"]},
146 "preposition": {"pos": "prep"},
147 "prepositions": {"pos": "prep", "debug": "usually used in singular"},
148 "prepositional expressions": {
149 "pos": "prep",
150 "debug": "part-of-speech Prepositional expressions is proscribed",
151 },
152 "prepositional phrase": {"pos": "prep_phrase"},
153 "prepositional pronoun": {
154 "pos": "pron",
155 "debug": "part-of-speech Prepositional pronoun is proscribed",
156 "tags": ["prepositional"],
157 },
158 "present participle": {
159 "pos": "verb",
160 "debug": "part-of-speech Present participle is proscribed",
161 "tags": ["participle", "present"],
162 },
163 "preverb": {"pos": "preverb"},
164 "pronoun": {"pos": "pron"},
165 "proper noun": {"pos": "name"},
166 "proper oun": {"pos": "name", "debug": "misspelled subtitle"},
167 "proposition": {"pos": "prep", "debug": "misspelled subtitle"},
168 "proverb": {"pos": "proverb"},
169 "punctuation mark": {"pos": "punct", "tags": ["punctuation"]},
170 "punctuation": {
171 "pos": "punct",
172 "debug": "part-of-speech Punctuation should be Punctuation mark",
173 "tags": ["punctuation"],
174 },
175 "relative": {"pos": "conj", "tags": ["relative"]},
176 "romanization": {"pos": "romanization"},
177 "root": {"pos": "root", "tags": ["morpheme"]},
178 "suffix": {"pos": "suffix", "tags": ["morpheme"]},
179 "suffix form": {
180 "pos": "suffix",
181 "debug": "part-of-speech Suffix form is proscribed",
182 "tags": ["morpheme"],
183 },
184 "syllable": {"pos": "syllable"},
185 "symbol": {"pos": "symbol"},
186 "transitive verb": {"pos": "verb", "tags": ["transitive"]},
187 "verb": {"pos": "verb"},
188 "verb form": {
189 "pos": "verb",
190 "debug": "part-of-speech Verb form is proscribed",
191 },
192 "verbal noun": {"pos": "noun", "tags": ["verbal"]},
193 "verbs": {"pos": "verb", "debug": "usually in singular"},
194}
196LINKAGE_TITLES: dict[str, str] = {
197 "synonyms": "synonyms",
198 "ambiguous synonyms": "synonyms",
199 "near synonyms": "synonyms",
200 "pseudo-synonyms": "synonyms",
201 "idiomatic synonyms": "synonyms",
202 "hypernyms": "hypernyms",
203 "hypernym": "hypernyms",
204 "hyperonyms": "hypernyms",
205 "classes": "hypernyms",
206 "class": "hypernyms",
207 "hyponyms": "hyponyms",
208 "holonyms": "holonyms",
209 "meronyms": "meronyms",
210 "derived": "derived",
211 "related": "related",
212 "related terms": "related",
213 "related words": "related",
214 "related characters": "related",
215 "idioms": "related",
216 "idioms/phrases": "related",
217 "similes": "related",
218 "variance": "related",
219 "coordinate terms": "coordinate_terms",
220 "coordinate term": "coordinate_terms",
221 "troponyms": "troponyms",
222 "antonyms": "antonyms",
223 "near antonyms": "antonyms",
224 "instances": "instances",
225 "intances": "instances",
226 "archetypes": "instances",
227 "see also": "related",
228 "seealso": "related",
229 "specific multiples": "related",
230 "various": "related",
231 "metonyms": "related",
232 "demonyms": "related",
233 "comeronyms": "related",
234 "cohyponyms": "related",
235 "proverbs": "proverbs",
236 "abbreviations": "abbreviations",
237 "derived terms": "derived",
238 "alternative forms": "synonyms",
239}
241COMPOUNDS_TITLE = "compounds"
243ETYMOLOGY_TITLES: frozenset[str] = frozenset(["etymology", "glyph origin"])
245IGNORED_TITLES: frozenset[str] = frozenset(
246 ["anagrams", "further reading", "references", "quotations", "statistics"]
247)
249INFLECTION_TITLES: frozenset[str] = frozenset(
250 ["declension", "conjugation", "inflection", "mutation"]
251)
253DESCENDANTS_TITLE = "descendants"
255PROTO_ROOT_DERIVED_TITLES: frozenset[str] = frozenset(
256 ["derived terms", "extensions"]
257)
259PRONUNCIATION_TITLE = "pronunciation"
261TRANSLATIONS_TITLE = "translations"