Coverage for src/wiktextract/extractor/en/section_titles.py: 100%
11 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
1from wiktextract.config import POSSubtitleData
3# Lower case POS titles
4POS_TITLES: dict[str, POSSubtitleData] = {
5 "abbreviation": {
6 "pos": "abbrev",
7 "debug": "part-of-speech Abbreviation is proscribed",
8 "tags": ["abbreviation"],
9 },
10 "acronym": {
11 "pos": "abbrev",
12 "debug": "part-of-speech Acronym is proscribed",
13 "tags": ["abbreviation"],
14 },
15 "adjectival": {
16 "pos": "adj_noun",
17 "debug": "part-of-speech Adjectival is not valid",
18 },
19 "adjectival noun": {"pos": "adj_noun"},
20 "adjectival verb": {"pos": "adj_verb"},
21 "adjective": {"pos": "adj"},
22 "adjectuve": {"pos": "adj", "debug": "misspelled subtitle"},
23 "adjectives": {"pos": "adj", "debug": "usually used in singular"},
24 "adnominal": {"pos": "adnominal"},
25 "adverb": {"pos": "adv"},
26 "adverbs": {"pos": "adv", "debug": "usually used in singular"},
27 "adverbial phrase": {
28 "pos": "adv_phrase",
29 "debug": "part-of-speech Adverbial phrase is proscribed",
30 },
31 "affix": {"pos": "affix"},
32 "adjective suffix": {
33 "pos": "suffix",
34 "debug": "part-of-speech Adjective suffix is proscribed",
35 },
36 "ambiposition": {"pos": "ambiposition"},
37 "article": {"pos": "article"},
38 "character": {"pos": "character"},
39 "circumfix": {"pos": "circumfix", "tags": ["morpheme"]},
40 "circumposition": {"pos": "circumpos"},
41 "classifier": {"pos": "classifier"},
42 "clipping": {
43 "pos": "abbrev",
44 "debug": "part-of-speech Clipping is proscribed",
45 "tags": ["abbreviation"],
46 },
47 "clitic": {
48 "pos": "suffix",
49 "debug": "part-of-speech Clitic is proscribed",
50 "tags": ["clitic"],
51 },
52 "combining form": {"pos": "combining_form", "tags": ["morpheme"]},
53 "comparative": {"pos": "adj", "tags": ["comparative"]},
54 "conjunction": {"pos": "conj"},
55 "conjuntion": {"pos": "conj", "debug": "misspelled subtitle"},
56 "contraction": {"pos": "contraction", "tags": ["contraction"]},
57 "converb": {"pos": "converb"},
58 "counter": {"pos": "counter"},
59 "definitions": {"pos": "character"},
60 "dependent noun": {
61 "pos": "noun",
62 "tags": [
63 "dependent",
64 ],
65 },
66 "determiner": {"pos": "det"},
67 "diacritical mark": {"pos": "character", "tags": ["diacritic"]},
68 "enclitic": {"pos": "suffix", "tags": ["clitic"]},
69 "enclitic particle": {"pos": "suffix", "tags": ["clitic"]},
70 "gerund": {
71 "pos": "verb",
72 "debug": "part-of-speech Gerund is proscribed",
73 "tags": ["participle", "gerund"],
74 },
75 "han character": {"pos": "character", "tags": ["han"]},
76 "han characters": {
77 "pos": "character",
78 "tags": ["han"],
79 "debug": "psually used in singular",
80 },
81 "hanja": {"pos": "character", "tags": ["Hanja"]},
82 "hanzi": {"pos": "character", "tags": ["hanzi"]},
83 "ideophone": {"pos": "noun", "tags": ["ideophone"]},
84 "idiom": {"pos": "phrase", "tags": ["idiomatic"]},
85 "infix": {"pos": "infix", "tags": ["morpheme"]},
86 "infinitive": {
87 "pos": "verb",
88 "debug": "part-of-speech Infinitive is proscribed",
89 "tags": ["infinitive"],
90 },
91 "initialism": {
92 "pos": "abbrev",
93 "debug": "part-of-speech Initialism is proscribed",
94 "tags": ["abbreviation"],
95 },
96 "interfix": {"pos": "interfix", "tags": ["morpheme"]},
97 "interjection": {"pos": "intj"},
98 "interrogative pronoun": {"pos": "pron", "tags": ["interrogative"]},
99 "intransitive verb": {
100 "pos": "verb",
101 "debug": "part-of-speech Intransitive verb is proscribed",
102 "tags": ["intransitive"],
103 },
104 "instransitive verb": {
105 "pos": "verb",
106 "tags": ["intransitive"],
107 "debug": "pisspelled subtitle",
108 },
109 "kanji": {"pos": "character", "tags": ["kanji"]},
110 "letter": {"pos": "character", "tags": ["letter"]},
111 "ligature": {"pos": "character", "tags": ["ligature"]},
112 "logogram": {"pos": "character", "tags": ["logogram"]},
113 "nominal nuclear clause": {
114 "pos": "clause",
115 "debug": "part-of-speech Nominal nuclear clause is proscribed",
116 },
117 "νoun": {"pos": "noun", "debug": "misspelled subtitle"},
118 "nouɲ": {"pos": "noun", "debug": "misspelled subtitle"},
119 "noun": {"pos": "noun"},
120 "noun form": {
121 "pos": "noun",
122 "debug": "part-of-speech Noun form is proscribed",
123 },
124 # "nouns": {"pos": "noun", "debug": "usually in singular"},
125 "noum": {"pos": "noun", "debug": "misspelled subtitle"},
126 "number": {"pos": "num", "tags": ["number"]},
127 "numeral": {"pos": "num"},
128 "ordinal number": {
129 "pos": "adj",
130 "debug": "ordinal numbers should be adjectives",
131 "tags": ["ordinal"],
132 },
133 "participle": {"pos": "verb", "tags": ["participle"]},
134 "particle": {"pos": "particle"},
135 "past participle": {"pos": "verb", "tags": ["participle", "past"]},
136 "perfect expression": {"pos": "verb"},
137 "perfection expression": {"pos": "verb"},
138 "perfect participle": {"pos": "verb", "tags": ["participle", "perfect"]},
139 "personal pronoun": {"pos": "pron", "tags": ["person"]},
140 "phrase": {"pos": "phrase"},
141 "phrases": {"pos": "phrase", "debug": "usually used in singular"},
142 "possessive determiner": {"pos": "det", "tags": ["possessive"]},
143 "possessive pronoun": {"pos": "det", "tags": ["possessive"]},
144 "postposition": {"pos": "postp"},
145 "predicative": {"pos": "adj", "tags": ["predicative"]},
146 "prefix": {"pos": "prefix", "tags": ["morpheme"]},
147 "preposition": {"pos": "prep"},
148 "prepositions": {"pos": "prep", "debug": "usually used in singular"},
149 "prepositional expressions": {
150 "pos": "prep",
151 "debug": "part-of-speech Prepositional expressions is proscribed",
152 },
153 "prepositional phrase": {"pos": "prep_phrase"},
154 "prepositional pronoun": {
155 "pos": "pron",
156 "debug": "part-of-speech Prepositional pronoun is proscribed",
157 "tags": ["prepositional"],
158 },
159 "present participle": {
160 "pos": "verb",
161 "debug": "part-of-speech Present participle is proscribed",
162 "tags": ["participle", "present"],
163 },
164 "preverb": {"pos": "preverb"},
165 "pronoun": {"pos": "pron"},
166 "proper noun": {"pos": "name"},
167 "proper oun": {"pos": "name", "debug": "misspelled subtitle"},
168 "proposition": {"pos": "prep", "debug": "misspelled subtitle"},
169 "proverb": {"pos": "proverb"},
170 "punctuation mark": {"pos": "punct", "tags": ["punctuation"]},
171 "punctuation": {
172 "pos": "punct",
173 "debug": "part-of-speech Punctuation should be Punctuation mark",
174 "tags": ["punctuation"],
175 },
176 "relative": {"pos": "conj", "tags": ["relative"]},
177 "romanization": {"pos": "romanization"},
178 "root": {"pos": "root", "tags": ["morpheme"]},
179 "stem": {"pos": "stem"},
180 "suffix": {"pos": "suffix", "tags": ["morpheme"]},
181 "suffix form": {
182 "pos": "suffix",
183 "debug": "part-of-speech Suffix form is proscribed",
184 "tags": ["morpheme"],
185 },
186 "syllable": {"pos": "syllable"},
187 "symbol": {"pos": "symbol"},
188 "transitive verb": {"pos": "verb", "tags": ["transitive"]},
189 "verb": {"pos": "verb"},
190 "verb form": {
191 "pos": "verb",
192 "debug": "part-of-speech Verb form is proscribed",
193 },
194 "verbal noun": {"pos": "noun", "tags": ["verbal"]},
195 "verbs": {"pos": "verb", "debug": "usually in singular"},
196}
198LINKAGE_TITLES: dict[str, str] = {
199 "synonyms": "synonyms",
200 "ambiguous synonyms": "synonyms",
201 "near synonyms": "synonyms",
202 "pseudo-synonyms": "synonyms",
203 "idiomatic synonyms": "synonyms",
204 "hypernyms": "hypernyms",
205 "hypernym": "hypernyms",
206 "hyperonyms": "hypernyms",
207 "classes": "hypernyms",
208 "class": "hypernyms",
209 "hyponyms": "hyponyms",
210 "holonyms": "holonyms",
211 "meronyms": "meronyms",
212 "derived": "derived",
213 "related": "related",
214 "related terms": "related",
215 "related words": "related",
216 "related characters": "related",
217 "idioms": "related",
218 "idioms/phrases": "related",
219 "similes": "related",
220 "variance": "related",
221 "coordinate terms": "coordinate_terms",
222 "coordinate term": "coordinate_terms",
223 "troponyms": "troponyms",
224 "antonyms": "antonyms",
225 "near antonyms": "antonyms",
226 "instances": "instances",
227 "intances": "instances",
228 "archetypes": "instances",
229 "see also": "related",
230 "seealso": "related",
231 "specific multiples": "related",
232 "various": "related",
233 "metonyms": "related",
234 "demonyms": "related",
235 "comeronyms": "related",
236 "cohyponyms": "related",
237 "proverbs": "proverbs",
238 "abbreviations": "abbreviations",
239 "derived terms": "derived",
240 "nouns": "derived",
241 "proper nouns": "derived",
242}
244COMPOUNDS_TITLE = "compounds"
246ETYMOLOGY_TITLES: frozenset[str] = frozenset(["etymology", "glyph origin"])
248IGNORED_TITLES: frozenset[str] = frozenset(
249 ["anagrams", "further reading", "references", "quotations", "statistics"]
250)
252INFLECTION_TITLES: frozenset[str] = frozenset(
253 ["declension", "conjugation", "inflection", "mutation"]
254)
256DESCENDANTS_TITLE = "descendants"
258PROTO_ROOT_DERIVED_TITLES: frozenset[str] = frozenset(
259 ["derived terms", "extensions"]
260)
262PRONUNCIATION_TITLE = "pronunciation"
264TRANSLATIONS_TITLE = "translations"