Coverage for src/wiktextract/extractor/fr/section_types.py: 100%

10 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-10-25 10:11 +0000

1from ...config import POSSubtitleData 

2 

3# the keys are the first argument of the `S` template 

4# https://fr.wiktionary.org/wiki/Modèle:S 

5# https://fr.wiktionary.org/wiki/Wiktionnaire:Liste_des_sections 

6# https://fr.wiktionary.org/wiki/Wiktionnaire:Liste_des_sections_de_types_de_mots 

7# https://fr.wiktionary.org/wiki/Module:types_de_mots/data 

8POS_SECTIONS: dict[str, POSSubtitleData] = { 

9 "adj": {"pos": "adj"}, 

10 "adj-dém": {"pos": "adj", "tags": ["demonstrative"]}, 

11 "adj-excl": {"pos": "adj", "tags": ["exclamatory"]}, 

12 "adj-indéf": {"pos": "adj", "tags": ["indefinite"]}, 

13 "adj-int": {"pos": "adj", "tags": ["interrogative"]}, 

14 "adj-num": {"pos": "adj", "tags": ["numeral"]}, 

15 "adj-pos": {"pos": "adj", "tags": ["possessive"]}, 

16 "adj-rel": {"pos": "adj", "tags": ["relative"]}, 

17 "adjectif": {"pos": "adj"}, 

18 "adjectif dém": {"pos": "adj", "tags": ["demonstrative"]}, 

19 "adjectif démonstratif": {"pos": "adj", "tags": ["demonstrative"]}, 

20 "adjectif exc": {"pos": "adj", "tags": ["exclamatory"]}, 

21 "adjectif exclamatif": {"pos": "adj", "tags": ["exclamatory"]}, 

22 "adjectif ind": {"pos": "adj", "tags": ["indefinite"]}, 

23 "adjectif indéfini": {"pos": "adj", "tags": ["indefinite"]}, 

24 "adjectif int": {"pos": "adj", "tags": ["interrogative"]}, 

25 "adjectif interrogatif": {"pos": "adj", "tags": ["interrogative"]}, 

26 "adjectif num": {"pos": "adj", "tags": ["numeral"]}, 

27 "adjectif numéral": {"pos": "adj", "tags": ["numeral"]}, 

28 "adjectif pos": {"pos": "adj", "tags": ["possessive"]}, 

29 "adjectif possessif": {"pos": "adj", "tags": ["possessive"]}, 

30 "adjectif qualificatif": {"pos": "adj"}, 

31 "adjectif rel": {"pos": "adj", "tags": ["relative"]}, 

32 "adjectif relatif": {"pos": "adj", "tags": ["relative"]}, 

33 "adv": {"pos": "adv"}, 

34 "adv-ind": {"pos": "adv", "tags": ["indefinite"]}, 

35 "adv-int": {"pos": "adv", "tags": ["interrogative"]}, 

36 "adv-pron": {"pos": "adv"}, 

37 "adv-rel": {"pos": "adv", "tags": ["relative"]}, 

38 "adverbe": {"pos": "adv"}, 

39 "adverbe ind": {"pos": "adv", "tags": ["indefinite"]}, 

40 "adverbe indéfini": {"pos": "adv", "tags": ["indefinite"]}, 

41 "adverbe int": {"pos": "adv", "tags": ["interrogative"]}, 

42 "adverbe interrogatif": {"pos": "adv", "tags": ["interrogative"]}, 

43 "adverbe pro": {"pos": "adv"}, 

44 "adverbe pronominal": {"pos": "adv"}, 

45 "adverbe rel": {"pos": "adv", "tags": ["relative"]}, 

46 "adverbe relatif": {"pos": "adv", "tags": ["relative"]}, 

47 "aff": {"pos": "affix"}, 

48 "affixe": {"pos": "affix"}, 

49 "art": {"pos": "article"}, 

50 "art-déf": {"pos": "article", "tags": ["definite"]}, 

51 "art-indéf": {"pos": "article", "tags": ["indefinite"]}, 

52 "art-part": {"pos": "article", "tags": ["partial"]}, 

53 "article": {"pos": "article"}, 

54 "article déf": {"pos": "article", "tags": ["definite"]}, 

55 "article défini": {"pos": "article", "tags": ["definite"]}, 

56 "article ind": {"pos": "article", "tags": ["indefinite"]}, 

57 "article indéfini": {"pos": "article", "tags": ["indefinite"]}, 

58 "article par": {"pos": "article", "tags": ["partial"]}, 

59 "article partitif": {"pos": "article", "tags": ["partial"]}, 

60 "circon": {"pos": "circumfix", "tags": ["morpheme"]}, 

61 "circonf": {"pos": "circumfix", "tags": ["morpheme"]}, 

62 "circonfixe": {"pos": "circumfix", "tags": ["morpheme"]}, 

63 "class": {"pos": "classifier"}, 

64 "classif": {"pos": "classifier"}, 

65 "classificateur": {"pos": "classifier"}, 

66 "conj": {"pos": "conj"}, 

67 "conj-coord": {"pos": "conj", "tags": ["coordinating"]}, 

68 "conjonction": {"pos": "conj"}, 

69 "conjonction coo": {"pos": "conj", "tags": ["coordinating"]}, 

70 "conjonction de coordination": {"pos": "conj", "tags": ["coordinating"]}, 

71 "copule": {"pos": "conj"}, 

72 "dét": {"pos": "det"}, 

73 "déterminant": {"pos": "det"}, 

74 "encl": {"pos": "suffix", "tags": ["clitic"]}, 

75 "enclitique": {"pos": "suffix", "tags": ["clitic"]}, 

76 "gismu": {"pos": "verb", "tags": ["gismu"]}, 

77 "idéophone": {"pos": "noun", "tags": ["ideophone"]}, 

78 "inf": {"pos": "infix", "tags": ["morpheme"]}, 

79 "infixe": {"pos": "infix", "tags": ["morpheme"]}, 

80 "interf": {"pos": "interfix", "tags": ["morpheme"]}, 

81 "interfixe": {"pos": "interfix", "tags": ["morpheme"]}, 

82 "interj": {"pos": "intj"}, 

83 "interjection": {"pos": "intj"}, 

84 "lettre": {"pos": "character", "tags": ["letter"]}, 

85 "loc": {"pos": "phrase"}, 

86 "loc-phr": {"pos": "phrase"}, 

87 "locution": {"pos": "phrase"}, 

88 "locution phrase": {"pos": "phrase"}, 

89 "locution-phrase": {"pos": "phrase"}, 

90 "nom": {"pos": "noun"}, 

91 "nom commun": {"pos": "noun"}, 

92 "nom de famille": {"pos": "name", "tags": ["surename"]}, 

93 "nom propre": {"pos": "name"}, 

94 "nom scientifique": {"pos": "name", "tags": ["scientific"]}, 

95 "nom-sciences": {"pos": "name", "tags": ["scientific"]}, 

96 "nom science": {"pos": "name", "tags": ["scientific"]}, 

97 "nom scient": {"pos": "name", "tags": ["scientific"]}, 

98 "nom-fam": {"pos": "name", "tags": ["surename"]}, 

99 "nom-pr": {"pos": "name"}, 

100 "num": {"pos": "num"}, 

101 "numér": {"pos": "num"}, 

102 "numéral": {"pos": "num"}, 

103 "onom": {"pos": "onomatopoeia", "tags": ["onomatopoeic"]}, 

104 "onoma": {"pos": "onomatopoeia", "tags": ["onomatopoeic"]}, 

105 "onomatopée": {"pos": "onomatopoeia", "tags": ["onomatopoeic"]}, 

106 "part": {"pos": "particle"}, 

107 "part-num": {"pos": "particle", "tags": ["numeral"]}, 

108 "particule": {"pos": "particle"}, 

109 "particule num": {"pos": "particle", "tags": ["numeral"]}, 

110 "particule numérale": {"pos": "particle", "tags": ["numeral"]}, 

111 "patronyme": {"pos": "name", "tags": ["surename"]}, 

112 "phr": {"pos": "phrase"}, 

113 "phrase": {"pos": "phrase"}, 

114 "post": {"pos": "postp"}, 

115 "postpos": {"pos": "postp"}, 

116 "postposition": {"pos": "postp"}, 

117 "procl": {"pos": "prefix", "tags": ["clitic"]}, 

118 "proclitique": {"pos": "prefix", "tags": ["clitic"]}, 

119 "pronom": {"pos": "pron"}, 

120 "pronom dém": {"pos": "pron", "tags": ["demonstrative"]}, 

121 "pronom démonstratif": {"pos": "pron", "tags": ["demonstrative"]}, 

122 "pronom ind": {"pos": "pron", "tags": ["indefinite"]}, 

123 "pronom indéfini": {"pos": "pron", "tags": ["indefinite"]}, 

124 "pronom int": {"pos": "pron", "tags": ["interrogative"]}, 

125 "pronom interrogatif": {"pos": "pron", "tags": ["interrogative"]}, 

126 "pronom personnel": {"pos": "pron", "tags": ["person"]}, 

127 "pronom pos": {"pos": "pron", "tags": ["possessive"]}, 

128 "pronom possessif": {"pos": "pron", "tags": ["possessive"]}, 

129 "pronom rel": {"pos": "pron", "tags": ["relative"]}, 

130 "pronom relatif": {"pos": "pron", "tags": ["relative"]}, 

131 "pronom réf": {"pos": "pron", "tags": ["person"]}, 

132 "pronom réfléchi": {"pos": "pron", "tags": ["person"]}, 

133 "pronom-adj": {"pos": "pron", "tags": ["adjective"]}, 

134 "pronom-adjectif": {"pos": "pron", "tags": ["adjective"]}, 

135 "pronom-dém": {"pos": "pron", "tags": ["demonstrative"]}, 

136 "pronom-indéf": {"pos": "pron", "tags": ["indefinite"]}, 

137 "pronom-int": {"pos": "pron", "tags": ["interrogative"]}, 

138 "pronom-per": {"pos": "pron", "tags": ["person"]}, 

139 "pronom-pers": {"pos": "pron", "tags": ["person"]}, 

140 "pronom-pos": {"pos": "pron", "tags": ["possessive"]}, 

141 "pronom-rel": {"pos": "pron", "tags": ["relative"]}, 

142 "pronom-réfl": {"pos": "pron", "tags": ["person"]}, 

143 "prov": {"pos": "proverb"}, 

144 "proverbe": {"pos": "proverb"}, 

145 "pré-nom": {"pos": "name", "tags": ["first name"]}, 

146 "pré-verbe": {"pos": "preverb"}, 

147 "préf": {"pos": "prefix", "tags": ["morpheme"]}, 

148 "préfixe": {"pos": "prefix", "tags": ["morpheme"]}, 

149 "prénom": {"pos": "name", "tags": ["first name"]}, 

150 "prép": {"pos": "prep"}, 

151 "préposition": {"pos": "prep"}, 

152 "quantif": {"pos": "quantifier"}, 

153 "quantificateur": {"pos": "quantifier"}, 

154 "racine": {"pos": "root", "tags": ["morpheme"]}, 

155 "rad": {"pos": "root", "tags": ["radical"]}, 

156 "radical": {"pos": "root", "tags": ["radical"]}, 

157 "rafsi": {"pos": "affix", "tags": ["rafsi"]}, 

158 "sinogramme": {"pos": "character", "tags": ["letter"]}, 

159 "substantif": {"pos": "noun"}, 

160 "suf": {"pos": "suffix", "tags": ["morpheme"]}, 

161 "suff": {"pos": "suffix", "tags": ["morpheme"]}, 

162 "suffixe": {"pos": "suffix", "tags": ["morpheme"]}, 

163 "symb": {"pos": "symbol"}, 

164 "symbole": {"pos": "symbol"}, 

165 "var-typo": {"pos": "typographic variant", "tags": ["alt-of"]}, 

166 "variante par contrainte typographique": { 

167 "pos": "typographic variant", 

168 "tags": ["alt-of"], 

169 }, 

170 "variante typo": {"pos": "typographic variant", "tags": ["alt-of"]}, 

171 "variante typographique": { 

172 "pos": "typographic variant", 

173 "tags": ["alt-of"], 

174 }, 

175 "verb": {"pos": "verb"}, 

176 "verb pr": {"pos": "verb", "tags": ["pronominal"]}, 

177 "verb-pr": {"pos": "verb", "tags": ["pronominal"]}, 

178 "verbe": {"pos": "verb"}, 

179 "verbe pronominal": {"pos": "verb", "tags": ["pronominal"]}, 

180} 

181 

182# map section arguments to pydantic fields 

183LINKAGE_SECTIONS: dict[str, str] = { 

184 "abrév": "abbreviation", 

185 "abréviations": "abbreviation", 

186 "anagrammes": "anagrams", 

187 "anagr": "anagrams", 

188 "anagramme": "anagrams", 

189 "antonymes": "antonyms", 

190 "app": "related", 

191 "apparentés": "related", 

192 "apr": "related", 

193 "dérivés autres langues": "derived", 

194 "dérivés int": "derived", 

195 "dérivés": "derived", 

196 "dial": "related", 

197 "dialectes": "related", 

198 "drv-int": "derived", 

199 "drv": "derived", 

200 "étymologiques": "related", 

201 "holo": "holonyms", 

202 "holonymes": "holonyms", 

203 "hyper": "hypernyms", 

204 "hyperonymes": "hypernyms", 

205 "hypo": "hyponyms", 

206 "hyponymes": "hyponyms", 

207 "méro": "meronyms", 

208 "méronymes": "meronyms", 

209 "paro": "paronyms", 

210 "paronymes": "paronyms", 

211 "phrases": "proverbs", 

212 "q-syn": "synonyms", 

213 "quasi-syn": "synonyms", 

214 "quasi-synonymes": "synonyms", 

215 "syn": "synonyms", 

216 "synonymes": "synonyms", 

217 "tropo": "troponyms", 

218 "troponymes": "troponyms", 

219 "var-dial": "related", 

220 "var-ortho": "related", 

221 "var": "related", 

222 "variantes dial": "related", 

223 "variantes dialectales": "related", 

224 "variantes dialectes": "related", 

225 "variantes ortho": "related", 

226 "variantes orthographiques": "related", 

227 "variantes": "related", 

228 "voc": "related", 

229 "vocabulaire apparenté": "related", 

230 "vocabulaire proche": "related", 

231 "vocabulaire": "related", 

232} 

233 

234IGNORED_SECTIONS: frozenset[str] = frozenset( 

235 [ 

236 "références", 

237 "référence", 

238 "réf", 

239 "ref", 

240 "sources", 

241 "src", 

242 "bibliographie", 

243 "bib", 

244 "citations", 

245 "cit", 

246 ] 

247) 

248 

249COMPOUNDS_SECTIONS: frozenset[str] = frozenset(["composés", "compos"]) 

250 

251ETYMOLOGY_SECTIONS: frozenset[str] = frozenset(["étymologie", "étym", "etym"]) 

252 

253INFLECTION_SECTIONS: frozenset[str] = frozenset( 

254 ["déclinaison", "décl", "conjugaison", "conjug"] 

255) 

256 

257NOTES_SECTIONS: frozenset[str] = frozenset(["notes", "note"]) 

258 

259PRONUNCIATION_SECTIONS: frozenset[str] = frozenset( 

260 ["prononciation", "pron", "prononciations"] 

261) 

262 

263TRANSLATION_SECTIONS: frozenset[str] = frozenset( 

264 ["traductions", "trad", "traductions à trier", "trad-trier", "trad trier"] 

265)