Coverage for src/wiktextract/extractor/en/section_titles.py: 100%

11 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-12 08:27 +0000

1from wiktextract.config import POSSubtitleData 

2 

3# Lower case POS titles 

4POS_TITLES: dict[str, POSSubtitleData] = { 

5 "abbreviation": { 

6 "pos": "abbrev", 

7 "debug": "part-of-speech Abbreviation is proscribed", 

8 "tags": ["abbreviation"], 

9 }, 

10 "acronym": { 

11 "pos": "abbrev", 

12 "debug": "part-of-speech Acronym is proscribed", 

13 "tags": ["abbreviation"], 

14 }, 

15 "adjectival": { 

16 "pos": "adj_noun", 

17 "debug": "part-of-speech Adjectival is not valid", 

18 }, 

19 "adjectival noun": {"pos": "adj_noun"}, 

20 "adjectival verb": {"pos": "adj_verb"}, 

21 "adjective": {"pos": "adj"}, 

22 "adjectuve": {"pos": "adj", "debug": "misspelled subtitle"}, 

23 "adjectives": {"pos": "adj", "debug": "usually used in singular"}, 

24 "adnominal": {"pos": "adnominal"}, 

25 "adverb": {"pos": "adv"}, 

26 "adverbs": {"pos": "adv", "debug": "usually used in singular"}, 

27 "adverbial phrase": { 

28 "pos": "adv_phrase", 

29 "debug": "part-of-speech Adverbial phrase is proscribed", 

30 }, 

31 "affix": {"pos": "affix"}, 

32 "adjective suffix": { 

33 "pos": "suffix", 

34 "debug": "part-of-speech Adjective suffix is proscribed", 

35 }, 

36 "ambiposition": {"pos": "ambiposition"}, 

37 "article": {"pos": "article"}, 

38 "character": {"pos": "character"}, 

39 "circumfix": {"pos": "circumfix", "tags": ["morpheme"]}, 

40 "circumposition": {"pos": "circumpos"}, 

41 "classifier": {"pos": "classifier"}, 

42 "clipping": { 

43 "pos": "abbrev", 

44 "debug": "part-of-speech Clipping is proscribed", 

45 "tags": ["abbreviation"], 

46 }, 

47 "clitic": { 

48 "pos": "suffix", 

49 "debug": "part-of-speech Clitic is proscribed", 

50 "tags": ["clitic"], 

51 }, 

52 "combining form": {"pos": "combining_form", "tags": ["morpheme"]}, 

53 "comparative": {"pos": "adj", "tags": ["comparative"]}, 

54 "conjunction": {"pos": "conj"}, 

55 "conjuntion": {"pos": "conj", "debug": "misspelled subtitle"}, 

56 "contraction": {"pos": "contraction", "tags": ["contraction"]}, 

57 "converb": {"pos": "converb"}, 

58 "counter": {"pos": "counter"}, 

59 "definitions": {"pos": "character"}, 

60 "dependent noun": { 

61 "pos": "noun", 

62 "tags": [ 

63 "dependent", 

64 ], 

65 }, 

66 "determiner": {"pos": "det"}, 

67 "diacritical mark": {"pos": "character", "tags": ["diacritic"]}, 

68 "enclitic": {"pos": "suffix", "tags": ["clitic"]}, 

69 "enclitic particle": {"pos": "suffix", "tags": ["clitic"]}, 

70 "gerund": { 

71 "pos": "verb", 

72 "debug": "part-of-speech Gerund is proscribed", 

73 "tags": ["participle", "gerund"], 

74 }, 

75 "han character": {"pos": "character", "tags": ["han"]}, 

76 "han characters": { 

77 "pos": "character", 

78 "tags": ["han"], 

79 "debug": "psually used in singular", 

80 }, 

81 "hanja": {"pos": "character", "tags": ["Hanja"]}, 

82 "hanzi": {"pos": "character", "tags": ["hanzi"]}, 

83 "ideophone": {"pos": "noun", "tags": ["ideophone"]}, 

84 "idiom": {"pos": "phrase", "tags": ["idiomatic"]}, 

85 "infix": {"pos": "infix", "tags": ["morpheme"]}, 

86 "infinitive": { 

87 "pos": "verb", 

88 "debug": "part-of-speech Infinitive is proscribed", 

89 "tags": ["infinitive"], 

90 }, 

91 "initialism": { 

92 "pos": "abbrev", 

93 "debug": "part-of-speech Initialism is proscribed", 

94 "tags": ["abbreviation"], 

95 }, 

96 "interfix": {"pos": "interfix", "tags": ["morpheme"]}, 

97 "interjection": {"pos": "intj"}, 

98 "interrogative pronoun": {"pos": "pron", "tags": ["interrogative"]}, 

99 "intransitive verb": { 

100 "pos": "verb", 

101 "debug": "part-of-speech Intransitive verb is proscribed", 

102 "tags": ["intransitive"], 

103 }, 

104 "instransitive verb": { 

105 "pos": "verb", 

106 "tags": ["intransitive"], 

107 "debug": "pisspelled subtitle", 

108 }, 

109 "kanji": {"pos": "character", "tags": ["kanji"]}, 

110 "letter": {"pos": "character", "tags": ["letter"]}, 

111 "ligature": {"pos": "character", "tags": ["ligature"]}, 

112 "logogram": {"pos": "character", "tags": ["logogram"]}, 

113 "nominal nuclear clause": { 

114 "pos": "clause", 

115 "debug": "part-of-speech Nominal nuclear clause is proscribed", 

116 }, 

117 "νoun": {"pos": "noun", "debug": "misspelled subtitle"}, 

118 "nouɲ": {"pos": "noun", "debug": "misspelled subtitle"}, 

119 "noun": {"pos": "noun"}, 

120 "noun form": { 

121 "pos": "noun", 

122 "debug": "part-of-speech Noun form is proscribed", 

123 }, 

124 # "nouns": {"pos": "noun", "debug": "usually in singular"}, 

125 "noum": {"pos": "noun", "debug": "misspelled subtitle"}, 

126 "number": {"pos": "num", "tags": ["number"]}, 

127 "numeral": {"pos": "num"}, 

128 "ordinal number": { 

129 "pos": "adj", 

130 "debug": "ordinal numbers should be adjectives", 

131 "tags": ["ordinal"], 

132 }, 

133 "participle": {"pos": "verb", "tags": ["participle"]}, 

134 "particle": {"pos": "particle"}, 

135 "past participle": {"pos": "verb", "tags": ["participle", "past"]}, 

136 "perfect expression": {"pos": "verb"}, 

137 "perfection expression": {"pos": "verb"}, 

138 "perfect participle": {"pos": "verb", "tags": ["participle", "perfect"]}, 

139 "personal pronoun": {"pos": "pron", "tags": ["person"]}, 

140 "phrase": {"pos": "phrase"}, 

141 "phrases": {"pos": "phrase", "debug": "usually used in singular"}, 

142 "possessive determiner": {"pos": "det", "tags": ["possessive"]}, 

143 "possessive pronoun": {"pos": "det", "tags": ["possessive"]}, 

144 "postposition": {"pos": "postp"}, 

145 "predicative": {"pos": "adj", "tags": ["predicative"]}, 

146 "prefix": {"pos": "prefix", "tags": ["morpheme"]}, 

147 "preposition": {"pos": "prep"}, 

148 "prepositions": {"pos": "prep", "debug": "usually used in singular"}, 

149 "prepositional expressions": { 

150 "pos": "prep", 

151 "debug": "part-of-speech Prepositional expressions is proscribed", 

152 }, 

153 "prepositional phrase": {"pos": "prep_phrase"}, 

154 "prepositional pronoun": { 

155 "pos": "pron", 

156 "debug": "part-of-speech Prepositional pronoun is proscribed", 

157 "tags": ["prepositional"], 

158 }, 

159 "present participle": { 

160 "pos": "verb", 

161 "debug": "part-of-speech Present participle is proscribed", 

162 "tags": ["participle", "present"], 

163 }, 

164 "preverb": {"pos": "preverb"}, 

165 "pronoun": {"pos": "pron"}, 

166 "proper noun": {"pos": "name"}, 

167 "proper oun": {"pos": "name", "debug": "misspelled subtitle"}, 

168 "proposition": {"pos": "prep", "debug": "misspelled subtitle"}, 

169 "proverb": {"pos": "proverb"}, 

170 "punctuation mark": {"pos": "punct", "tags": ["punctuation"]}, 

171 "punctuation": { 

172 "pos": "punct", 

173 "debug": "part-of-speech Punctuation should be Punctuation mark", 

174 "tags": ["punctuation"], 

175 }, 

176 "relative": {"pos": "conj", "tags": ["relative"]}, 

177 "romanization": {"pos": "romanization"}, 

178 "root": {"pos": "root", "tags": ["morpheme"]}, 

179 "stem": {"pos": "stem"}, 

180 "suffix": {"pos": "suffix", "tags": ["morpheme"]}, 

181 "suffix form": { 

182 "pos": "suffix", 

183 "debug": "part-of-speech Suffix form is proscribed", 

184 "tags": ["morpheme"], 

185 }, 

186 "syllable": {"pos": "syllable"}, 

187 "symbol": {"pos": "symbol"}, 

188 "transitive verb": {"pos": "verb", "tags": ["transitive"]}, 

189 "verb": {"pos": "verb"}, 

190 "verb form": { 

191 "pos": "verb", 

192 "debug": "part-of-speech Verb form is proscribed", 

193 }, 

194 "verbal noun": {"pos": "noun", "tags": ["verbal"]}, 

195 "verbs": {"pos": "verb", "debug": "usually in singular"}, 

196} 

197 

198LINKAGE_TITLES: dict[str, str] = { 

199 "synonyms": "synonyms", 

200 "ambiguous synonyms": "synonyms", 

201 "near synonyms": "synonyms", 

202 "pseudo-synonyms": "synonyms", 

203 "idiomatic synonyms": "synonyms", 

204 "hypernyms": "hypernyms", 

205 "hypernym": "hypernyms", 

206 "hyperonyms": "hypernyms", 

207 "classes": "hypernyms", 

208 "class": "hypernyms", 

209 "hyponyms": "hyponyms", 

210 "holonyms": "holonyms", 

211 "meronyms": "meronyms", 

212 "derived": "derived", 

213 "related": "related", 

214 "related terms": "related", 

215 "related words": "related", 

216 "related characters": "related", 

217 "idioms": "related", 

218 "idioms/phrases": "related", 

219 "similes": "related", 

220 "variance": "related", 

221 "coordinate terms": "coordinate_terms", 

222 "coordinate term": "coordinate_terms", 

223 "troponyms": "troponyms", 

224 "antonyms": "antonyms", 

225 "near antonyms": "antonyms", 

226 "instances": "instances", 

227 "intances": "instances", 

228 "archetypes": "instances", 

229 "see also": "related", 

230 "seealso": "related", 

231 "specific multiples": "related", 

232 "various": "related", 

233 "metonyms": "related", 

234 "demonyms": "related", 

235 "comeronyms": "related", 

236 "cohyponyms": "related", 

237 "proverbs": "proverbs", 

238 "abbreviations": "abbreviations", 

239 "derived terms": "derived", 

240 "nouns": "derived", 

241 "proper nouns": "derived", 

242} 

243 

244COMPOUNDS_TITLE = "compounds" 

245 

246ETYMOLOGY_TITLES: frozenset[str] = frozenset(["etymology", "glyph origin"]) 

247 

248IGNORED_TITLES: frozenset[str] = frozenset( 

249 ["anagrams", "further reading", "references", "quotations", "statistics"] 

250) 

251 

252INFLECTION_TITLES: frozenset[str] = frozenset( 

253 ["declension", "conjugation", "inflection", "mutation"] 

254) 

255 

256DESCENDANTS_TITLE = "descendants" 

257 

258PROTO_ROOT_DERIVED_TITLES: frozenset[str] = frozenset( 

259 ["derived terms", "extensions"] 

260) 

261 

262PRONUNCIATION_TITLE = "pronunciation" 

263 

264TRANSLATIONS_TITLE = "translations"