Coverage for src/wiktextract/extractor/fr/section_types.py: 100%

11 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-13 10:14 +0000

1from ...config import POSSubtitleData 

2 

3# the keys are the first argument of the `S` template 

4# https://fr.wiktionary.org/wiki/Modèle:S 

5# https://fr.wiktionary.org/wiki/Wiktionnaire:Liste_des_sections 

6# https://fr.wiktionary.org/wiki/Wiktionnaire:Liste_des_sections_de_types_de_mots 

7# https://fr.wiktionary.org/wiki/Module:types_de_mots/data 

8POS_SECTIONS: dict[str, POSSubtitleData] = { 

9 "adj": {"pos": "adj"}, 

10 "adj-dém": {"pos": "adj", "tags": ["demonstrative"]}, 

11 "adj-excl": {"pos": "adj", "tags": ["exclamatory"]}, 

12 "adj-indéf": {"pos": "adj", "tags": ["indefinite"]}, 

13 "adj-int": {"pos": "adj", "tags": ["interrogative"]}, 

14 "adj-num": {"pos": "adj", "tags": ["numeral"]}, 

15 "adj-pos": {"pos": "adj", "tags": ["possessive"]}, 

16 "adj-rel": {"pos": "adj", "tags": ["relative"]}, 

17 "adjectif": {"pos": "adj"}, 

18 "adjectif dém": {"pos": "adj", "tags": ["demonstrative"]}, 

19 "adjectif démonstratif": {"pos": "adj", "tags": ["demonstrative"]}, 

20 "adjectif exc": {"pos": "adj", "tags": ["exclamatory"]}, 

21 "adjectif exclamatif": {"pos": "adj", "tags": ["exclamatory"]}, 

22 "adjectif ind": {"pos": "adj", "tags": ["indefinite"]}, 

23 "adjectif indéfini": {"pos": "adj", "tags": ["indefinite"]}, 

24 "adjectif int": {"pos": "adj", "tags": ["interrogative"]}, 

25 "adjectif interrogatif": {"pos": "adj", "tags": ["interrogative"]}, 

26 "adjectif num": {"pos": "adj", "tags": ["numeral"]}, 

27 "adjectif numéral": {"pos": "adj", "tags": ["numeral"]}, 

28 "adjectif pos": {"pos": "adj", "tags": ["possessive"]}, 

29 "adjectif possessif": {"pos": "adj", "tags": ["possessive"]}, 

30 "adjectif qualificatif": {"pos": "adj"}, 

31 "adjectif rel": {"pos": "adj", "tags": ["relative"]}, 

32 "adjectif relatif": {"pos": "adj", "tags": ["relative"]}, 

33 "adv": {"pos": "adv"}, 

34 "adv-ind": {"pos": "adv", "tags": ["indefinite"]}, 

35 "adv-int": {"pos": "adv", "tags": ["interrogative"]}, 

36 "adv-pron": {"pos": "adv"}, 

37 "adv-rel": {"pos": "adv", "tags": ["relative"]}, 

38 "adverbe": {"pos": "adv"}, 

39 "adverbe ind": {"pos": "adv", "tags": ["indefinite"]}, 

40 "adverbe indéfini": {"pos": "adv", "tags": ["indefinite"]}, 

41 "adverbe int": {"pos": "adv", "tags": ["interrogative"]}, 

42 "adverbe interrogatif": {"pos": "adv", "tags": ["interrogative"]}, 

43 "adverbe pro": {"pos": "adv"}, 

44 "adverbe pronominal": {"pos": "adv"}, 

45 "adverbe rel": {"pos": "adv", "tags": ["relative"]}, 

46 "adverbe relatif": {"pos": "adv", "tags": ["relative"]}, 

47 "aff": {"pos": "affix"}, 

48 "affixe": {"pos": "affix"}, 

49 "art": {"pos": "article"}, 

50 "art-déf": {"pos": "article", "tags": ["definite"]}, 

51 "art-indéf": {"pos": "article", "tags": ["indefinite"]}, 

52 "art-part": {"pos": "article", "tags": ["partial"]}, 

53 "article": {"pos": "article"}, 

54 "article déf": {"pos": "article", "tags": ["definite"]}, 

55 "article défini": {"pos": "article", "tags": ["definite"]}, 

56 "article ind": {"pos": "article", "tags": ["indefinite"]}, 

57 "article indéfini": {"pos": "article", "tags": ["indefinite"]}, 

58 "article par": {"pos": "article", "tags": ["partial"]}, 

59 "article partitif": {"pos": "article", "tags": ["partial"]}, 

60 "circon": {"pos": "circumfix", "tags": ["morpheme"]}, 

61 "circonf": {"pos": "circumfix", "tags": ["morpheme"]}, 

62 "circonfixe": {"pos": "circumfix", "tags": ["morpheme"]}, 

63 "class": {"pos": "classifier"}, 

64 "classif": {"pos": "classifier"}, 

65 "classificateur": {"pos": "classifier"}, 

66 "conj": {"pos": "conj"}, 

67 "conj-coord": {"pos": "conj", "tags": ["coordinating"]}, 

68 "conjonction": {"pos": "conj"}, 

69 "conjonction coo": {"pos": "conj", "tags": ["coordinating"]}, 

70 "conjonction de coordination": {"pos": "conj", "tags": ["coordinating"]}, 

71 "copule": {"pos": "conj"}, 

72 "dét": {"pos": "det"}, 

73 "déterminant": {"pos": "det"}, 

74 "déterminant possessif": {"pos": "det", "tags": ["possessive"]}, 

75 "déterminant démonstratif": {"pos": "det", "tags": ["demonstrative"]}, 

76 "encl": {"pos": "suffix", "tags": ["clitic"]}, 

77 "enclitique": {"pos": "suffix", "tags": ["clitic"]}, 

78 "gismu": {"pos": "verb", "tags": ["gismu"]}, 

79 "idéophone": {"pos": "noun", "tags": ["ideophone"]}, 

80 "inf": {"pos": "infix", "tags": ["morpheme"]}, 

81 "infixe": {"pos": "infix", "tags": ["morpheme"]}, 

82 "interf": {"pos": "interfix", "tags": ["morpheme"]}, 

83 "interfixe": {"pos": "interfix", "tags": ["morpheme"]}, 

84 "interj": {"pos": "intj"}, 

85 "interjection": {"pos": "intj"}, 

86 "lettre": {"pos": "character", "tags": ["letter"]}, 

87 "loc": {"pos": "phrase"}, 

88 "loc-phr": {"pos": "phrase"}, 

89 "locution": {"pos": "phrase"}, 

90 "locution phrase": {"pos": "phrase"}, 

91 "locution-phrase": {"pos": "phrase"}, 

92 "nom": {"pos": "noun"}, 

93 "nom commun": {"pos": "noun"}, 

94 "nom de famille": {"pos": "name", "tags": ["surename"]}, 

95 "nom propre": {"pos": "name"}, 

96 "nom scientifique": {"pos": "name", "tags": ["scientific"]}, 

97 "nom-sciences": {"pos": "name", "tags": ["scientific"]}, 

98 "nom science": {"pos": "name", "tags": ["scientific"]}, 

99 "nom scient": {"pos": "name", "tags": ["scientific"]}, 

100 "nom-fam": {"pos": "name", "tags": ["surename"]}, 

101 "nom-pr": {"pos": "name"}, 

102 "num": {"pos": "num"}, 

103 "numér": {"pos": "num"}, 

104 "numéral": {"pos": "num"}, 

105 "onom": {"pos": "onomatopoeia", "tags": ["onomatopoeic"]}, 

106 "onoma": {"pos": "onomatopoeia", "tags": ["onomatopoeic"]}, 

107 "onomatopée": {"pos": "onomatopoeia", "tags": ["onomatopoeic"]}, 

108 "part": {"pos": "particle"}, 

109 "part-num": {"pos": "particle", "tags": ["numeral"]}, 

110 "particule": {"pos": "particle"}, 

111 "particule num": {"pos": "particle", "tags": ["numeral"]}, 

112 "particule numérale": {"pos": "particle", "tags": ["numeral"]}, 

113 "patronyme": {"pos": "name", "tags": ["surename"]}, 

114 "phr": {"pos": "phrase"}, 

115 "phrase": {"pos": "phrase"}, 

116 "post": {"pos": "postp"}, 

117 "postpos": {"pos": "postp"}, 

118 "postposition": {"pos": "postp"}, 

119 "procl": {"pos": "prefix", "tags": ["clitic"]}, 

120 "proclitique": {"pos": "prefix", "tags": ["clitic"]}, 

121 "pronom": {"pos": "pron"}, 

122 "pronom dém": {"pos": "pron", "tags": ["demonstrative"]}, 

123 "pronom démonstratif": {"pos": "pron", "tags": ["demonstrative"]}, 

124 "pronom ind": {"pos": "pron", "tags": ["indefinite"]}, 

125 "pronom indéfini": {"pos": "pron", "tags": ["indefinite"]}, 

126 "pronom int": {"pos": "pron", "tags": ["interrogative"]}, 

127 "pronom interrogatif": {"pos": "pron", "tags": ["interrogative"]}, 

128 "pronom personnel": {"pos": "pron", "tags": ["person"]}, 

129 "pronom pos": {"pos": "pron", "tags": ["possessive"]}, 

130 "pronom possessif": {"pos": "pron", "tags": ["possessive"]}, 

131 "pronom rel": {"pos": "pron", "tags": ["relative"]}, 

132 "pronom relatif": {"pos": "pron", "tags": ["relative"]}, 

133 "pronom réf": {"pos": "pron", "tags": ["person"]}, 

134 "pronom réfléchi": {"pos": "pron", "tags": ["person"]}, 

135 "pronom-adj": {"pos": "pron", "tags": ["adjective"]}, 

136 "pronom-adjectif": {"pos": "pron", "tags": ["adjective"]}, 

137 "pronom-dém": {"pos": "pron", "tags": ["demonstrative"]}, 

138 "pronom-indéf": {"pos": "pron", "tags": ["indefinite"]}, 

139 "pronom-int": {"pos": "pron", "tags": ["interrogative"]}, 

140 "pronom-per": {"pos": "pron", "tags": ["person"]}, 

141 "pronom-pers": {"pos": "pron", "tags": ["person"]}, 

142 "pronom-pos": {"pos": "pron", "tags": ["possessive"]}, 

143 "pronom-rel": {"pos": "pron", "tags": ["relative"]}, 

144 "pronom-réfl": {"pos": "pron", "tags": ["person"]}, 

145 "prov": {"pos": "proverb"}, 

146 "proverbe": {"pos": "proverb"}, 

147 "pré-nom": {"pos": "name", "tags": ["first-name"]}, 

148 "pré-verbe": {"pos": "preverb"}, 

149 "préf": {"pos": "prefix", "tags": ["morpheme"]}, 

150 "préfixe": {"pos": "prefix", "tags": ["morpheme"]}, 

151 "prénom": {"pos": "name", "tags": ["first-name"]}, 

152 "prép": {"pos": "prep"}, 

153 "préposition": {"pos": "prep"}, 

154 "quantif": {"pos": "quantifier"}, 

155 "quantificateur": {"pos": "quantifier"}, 

156 "racine": {"pos": "root", "tags": ["morpheme"]}, 

157 "rad": {"pos": "root", "tags": ["radical"]}, 

158 "radical": {"pos": "root", "tags": ["radical"]}, 

159 "rafsi": {"pos": "affix", "tags": ["rafsi"]}, 

160 "sinogramme": {"pos": "character", "tags": ["letter"]}, 

161 "substantif": {"pos": "noun"}, 

162 "suf": {"pos": "suffix", "tags": ["morpheme"]}, 

163 "suff": {"pos": "suffix", "tags": ["morpheme"]}, 

164 "suffixe": {"pos": "suffix", "tags": ["morpheme"]}, 

165 "symb": {"pos": "symbol"}, 

166 "symbole": {"pos": "symbol"}, 

167 "var-typo": {"pos": "typographic variant", "tags": ["alt-of"]}, 

168 "variante par contrainte typographique": { 

169 "pos": "typographic variant", 

170 "tags": ["alt-of"], 

171 }, 

172 "variante typo": {"pos": "typographic variant", "tags": ["alt-of"]}, 

173 "variante typographique": { 

174 "pos": "typographic variant", 

175 "tags": ["alt-of"], 

176 }, 

177 "verb": {"pos": "verb"}, 

178 "verb pr": {"pos": "verb", "tags": ["pronominal"]}, 

179 "verb-pr": {"pos": "verb", "tags": ["pronominal"]}, 

180 "verbe": {"pos": "verb"}, 

181 "verbe pronominal": {"pos": "verb", "tags": ["pronominal"]}, 

182} 

183 

184# map section arguments to pydantic fields 

185LINKAGE_SECTIONS: dict[str, str] = { 

186 "abrév": "abbreviation", 

187 "abréviations": "abbreviation", 

188 "anagrammes": "anagrams", 

189 "anagr": "anagrams", 

190 "anagramme": "anagrams", 

191 "antonymes": "antonyms", 

192 "app": "related", 

193 "apparentés": "related", 

194 "apr": "related", 

195 "dérivés autres langues": "derived", 

196 "dérivés int": "derived", 

197 "dérivés": "derived", 

198 "dial": "forms", 

199 "dialectes": "forms", 

200 "drv-int": "derived", 

201 "drv": "derived", 

202 "étymologiques": "related", 

203 "holo": "holonyms", 

204 "holonymes": "holonyms", 

205 "hyper": "hypernyms", 

206 "hyperonymes": "hypernyms", 

207 "hypo": "hyponyms", 

208 "hyponymes": "hyponyms", 

209 "méro": "meronyms", 

210 "méronymes": "meronyms", 

211 "paro": "paronyms", 

212 "paronymes": "paronyms", 

213 "phrases": "proverbs", 

214 "q-syn": "synonyms", 

215 "quasi-syn": "synonyms", 

216 "quasi-synonymes": "synonyms", 

217 "syn": "synonyms", 

218 "synonymes": "synonyms", 

219 "tropo": "troponyms", 

220 "troponymes": "troponyms", 

221 "var-dial": "forms", 

222 "var-ortho": "forms", 

223 "var": "forms", 

224 "variantes dial": "forms", 

225 "variantes dialectales": "forms", 

226 "variantes dialectes": "forms", 

227 "variantes ortho": "forms", 

228 "variantes orthographiques": "forms", 

229 "variantes": "forms", 

230 "voc": "related", 

231 "vocabulaire apparenté": "related", 

232 "vocabulaire proche": "related", 

233 "vocabulaire": "related", 

234 "gentilés": "related", 

235 "diminutifs": "related", 

236 "augmentatifs": "related", 

237 "composés": "derived", 

238 "noms vernaculaires": "related", 

239 "écriture": "forms", 

240 "anciennes orthographes": "forms", 

241} 

242 

243LINKAGE_TAGS = { 

244 "dial": ["dialectal"], 

245 "dialectes": ["dialectal"], 

246 "var-dial": ["dialectal"], 

247 "variantes dial": ["dialectal"], 

248 "variantes dialectales": ["dialectal"], 

249 "variantes dialectes": ["dialectal"], 

250 "gentilés": ["demonym", "adjective"], 

251 "diminutifs": ["diminutive"], 

252 "augmentatifs": ["augmentative"], 

253 "composés": ["compound"], 

254 "noms vernaculaires": ["vernacular"], 

255 "anciennes orthographes": ["archaic"], 

256} 

257 

258IGNORED_SECTIONS: frozenset[str] = frozenset( 

259 [ 

260 "références", 

261 "référence", 

262 "réf", 

263 "ref", 

264 "sources", 

265 "src", 

266 "bibliographie", 

267 "bib", 

268 "citations", 

269 "cit", 

270 "voir aussi", 

271 "voir", 

272 ] 

273) 

274 

275COMPOUNDS_SECTIONS: frozenset[str] = frozenset(["composés", "compos"]) 

276 

277ETYMOLOGY_SECTIONS: frozenset[str] = frozenset(["étymologie", "étym", "etym"]) 

278 

279INFLECTION_SECTIONS: frozenset[str] = frozenset( 

280 ["déclinaison", "décl", "conjugaison", "conjug"] 

281) 

282 

283NOTES_SECTIONS: frozenset[str] = frozenset(["notes", "note"]) 

284 

285PRONUNCIATION_SECTIONS: frozenset[str] = frozenset( 

286 ["prononciation", "pron", "prononciations"] 

287) 

288 

289TRANSLATION_SECTIONS: frozenset[str] = frozenset( 

290 ["traductions", "trad", "traductions à trier", "trad-trier", "trad trier"] 

291)