Coverage for src/wiktextract/extractor/fr/section_types.py: 100%
11 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-13 10:14 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-13 10:14 +0000
1from ...config import POSSubtitleData
3# the keys are the first argument of the `S` template
4# https://fr.wiktionary.org/wiki/Modèle:S
5# https://fr.wiktionary.org/wiki/Wiktionnaire:Liste_des_sections
6# https://fr.wiktionary.org/wiki/Wiktionnaire:Liste_des_sections_de_types_de_mots
7# https://fr.wiktionary.org/wiki/Module:types_de_mots/data
8POS_SECTIONS: dict[str, POSSubtitleData] = {
9 "adj": {"pos": "adj"},
10 "adj-dém": {"pos": "adj", "tags": ["demonstrative"]},
11 "adj-excl": {"pos": "adj", "tags": ["exclamatory"]},
12 "adj-indéf": {"pos": "adj", "tags": ["indefinite"]},
13 "adj-int": {"pos": "adj", "tags": ["interrogative"]},
14 "adj-num": {"pos": "adj", "tags": ["numeral"]},
15 "adj-pos": {"pos": "adj", "tags": ["possessive"]},
16 "adj-rel": {"pos": "adj", "tags": ["relative"]},
17 "adjectif": {"pos": "adj"},
18 "adjectif dém": {"pos": "adj", "tags": ["demonstrative"]},
19 "adjectif démonstratif": {"pos": "adj", "tags": ["demonstrative"]},
20 "adjectif exc": {"pos": "adj", "tags": ["exclamatory"]},
21 "adjectif exclamatif": {"pos": "adj", "tags": ["exclamatory"]},
22 "adjectif ind": {"pos": "adj", "tags": ["indefinite"]},
23 "adjectif indéfini": {"pos": "adj", "tags": ["indefinite"]},
24 "adjectif int": {"pos": "adj", "tags": ["interrogative"]},
25 "adjectif interrogatif": {"pos": "adj", "tags": ["interrogative"]},
26 "adjectif num": {"pos": "adj", "tags": ["numeral"]},
27 "adjectif numéral": {"pos": "adj", "tags": ["numeral"]},
28 "adjectif pos": {"pos": "adj", "tags": ["possessive"]},
29 "adjectif possessif": {"pos": "adj", "tags": ["possessive"]},
30 "adjectif qualificatif": {"pos": "adj"},
31 "adjectif rel": {"pos": "adj", "tags": ["relative"]},
32 "adjectif relatif": {"pos": "adj", "tags": ["relative"]},
33 "adv": {"pos": "adv"},
34 "adv-ind": {"pos": "adv", "tags": ["indefinite"]},
35 "adv-int": {"pos": "adv", "tags": ["interrogative"]},
36 "adv-pron": {"pos": "adv"},
37 "adv-rel": {"pos": "adv", "tags": ["relative"]},
38 "adverbe": {"pos": "adv"},
39 "adverbe ind": {"pos": "adv", "tags": ["indefinite"]},
40 "adverbe indéfini": {"pos": "adv", "tags": ["indefinite"]},
41 "adverbe int": {"pos": "adv", "tags": ["interrogative"]},
42 "adverbe interrogatif": {"pos": "adv", "tags": ["interrogative"]},
43 "adverbe pro": {"pos": "adv"},
44 "adverbe pronominal": {"pos": "adv"},
45 "adverbe rel": {"pos": "adv", "tags": ["relative"]},
46 "adverbe relatif": {"pos": "adv", "tags": ["relative"]},
47 "aff": {"pos": "affix"},
48 "affixe": {"pos": "affix"},
49 "art": {"pos": "article"},
50 "art-déf": {"pos": "article", "tags": ["definite"]},
51 "art-indéf": {"pos": "article", "tags": ["indefinite"]},
52 "art-part": {"pos": "article", "tags": ["partial"]},
53 "article": {"pos": "article"},
54 "article déf": {"pos": "article", "tags": ["definite"]},
55 "article défini": {"pos": "article", "tags": ["definite"]},
56 "article ind": {"pos": "article", "tags": ["indefinite"]},
57 "article indéfini": {"pos": "article", "tags": ["indefinite"]},
58 "article par": {"pos": "article", "tags": ["partial"]},
59 "article partitif": {"pos": "article", "tags": ["partial"]},
60 "circon": {"pos": "circumfix", "tags": ["morpheme"]},
61 "circonf": {"pos": "circumfix", "tags": ["morpheme"]},
62 "circonfixe": {"pos": "circumfix", "tags": ["morpheme"]},
63 "class": {"pos": "classifier"},
64 "classif": {"pos": "classifier"},
65 "classificateur": {"pos": "classifier"},
66 "conj": {"pos": "conj"},
67 "conj-coord": {"pos": "conj", "tags": ["coordinating"]},
68 "conjonction": {"pos": "conj"},
69 "conjonction coo": {"pos": "conj", "tags": ["coordinating"]},
70 "conjonction de coordination": {"pos": "conj", "tags": ["coordinating"]},
71 "copule": {"pos": "conj"},
72 "dét": {"pos": "det"},
73 "déterminant": {"pos": "det"},
74 "déterminant possessif": {"pos": "det", "tags": ["possessive"]},
75 "déterminant démonstratif": {"pos": "det", "tags": ["demonstrative"]},
76 "encl": {"pos": "suffix", "tags": ["clitic"]},
77 "enclitique": {"pos": "suffix", "tags": ["clitic"]},
78 "gismu": {"pos": "verb", "tags": ["gismu"]},
79 "idéophone": {"pos": "noun", "tags": ["ideophone"]},
80 "inf": {"pos": "infix", "tags": ["morpheme"]},
81 "infixe": {"pos": "infix", "tags": ["morpheme"]},
82 "interf": {"pos": "interfix", "tags": ["morpheme"]},
83 "interfixe": {"pos": "interfix", "tags": ["morpheme"]},
84 "interj": {"pos": "intj"},
85 "interjection": {"pos": "intj"},
86 "lettre": {"pos": "character", "tags": ["letter"]},
87 "loc": {"pos": "phrase"},
88 "loc-phr": {"pos": "phrase"},
89 "locution": {"pos": "phrase"},
90 "locution phrase": {"pos": "phrase"},
91 "locution-phrase": {"pos": "phrase"},
92 "nom": {"pos": "noun"},
93 "nom commun": {"pos": "noun"},
94 "nom de famille": {"pos": "name", "tags": ["surename"]},
95 "nom propre": {"pos": "name"},
96 "nom scientifique": {"pos": "name", "tags": ["scientific"]},
97 "nom-sciences": {"pos": "name", "tags": ["scientific"]},
98 "nom science": {"pos": "name", "tags": ["scientific"]},
99 "nom scient": {"pos": "name", "tags": ["scientific"]},
100 "nom-fam": {"pos": "name", "tags": ["surename"]},
101 "nom-pr": {"pos": "name"},
102 "num": {"pos": "num"},
103 "numér": {"pos": "num"},
104 "numéral": {"pos": "num"},
105 "onom": {"pos": "onomatopoeia", "tags": ["onomatopoeic"]},
106 "onoma": {"pos": "onomatopoeia", "tags": ["onomatopoeic"]},
107 "onomatopée": {"pos": "onomatopoeia", "tags": ["onomatopoeic"]},
108 "part": {"pos": "particle"},
109 "part-num": {"pos": "particle", "tags": ["numeral"]},
110 "particule": {"pos": "particle"},
111 "particule num": {"pos": "particle", "tags": ["numeral"]},
112 "particule numérale": {"pos": "particle", "tags": ["numeral"]},
113 "patronyme": {"pos": "name", "tags": ["surename"]},
114 "phr": {"pos": "phrase"},
115 "phrase": {"pos": "phrase"},
116 "post": {"pos": "postp"},
117 "postpos": {"pos": "postp"},
118 "postposition": {"pos": "postp"},
119 "procl": {"pos": "prefix", "tags": ["clitic"]},
120 "proclitique": {"pos": "prefix", "tags": ["clitic"]},
121 "pronom": {"pos": "pron"},
122 "pronom dém": {"pos": "pron", "tags": ["demonstrative"]},
123 "pronom démonstratif": {"pos": "pron", "tags": ["demonstrative"]},
124 "pronom ind": {"pos": "pron", "tags": ["indefinite"]},
125 "pronom indéfini": {"pos": "pron", "tags": ["indefinite"]},
126 "pronom int": {"pos": "pron", "tags": ["interrogative"]},
127 "pronom interrogatif": {"pos": "pron", "tags": ["interrogative"]},
128 "pronom personnel": {"pos": "pron", "tags": ["person"]},
129 "pronom pos": {"pos": "pron", "tags": ["possessive"]},
130 "pronom possessif": {"pos": "pron", "tags": ["possessive"]},
131 "pronom rel": {"pos": "pron", "tags": ["relative"]},
132 "pronom relatif": {"pos": "pron", "tags": ["relative"]},
133 "pronom réf": {"pos": "pron", "tags": ["person"]},
134 "pronom réfléchi": {"pos": "pron", "tags": ["person"]},
135 "pronom-adj": {"pos": "pron", "tags": ["adjective"]},
136 "pronom-adjectif": {"pos": "pron", "tags": ["adjective"]},
137 "pronom-dém": {"pos": "pron", "tags": ["demonstrative"]},
138 "pronom-indéf": {"pos": "pron", "tags": ["indefinite"]},
139 "pronom-int": {"pos": "pron", "tags": ["interrogative"]},
140 "pronom-per": {"pos": "pron", "tags": ["person"]},
141 "pronom-pers": {"pos": "pron", "tags": ["person"]},
142 "pronom-pos": {"pos": "pron", "tags": ["possessive"]},
143 "pronom-rel": {"pos": "pron", "tags": ["relative"]},
144 "pronom-réfl": {"pos": "pron", "tags": ["person"]},
145 "prov": {"pos": "proverb"},
146 "proverbe": {"pos": "proverb"},
147 "pré-nom": {"pos": "name", "tags": ["first-name"]},
148 "pré-verbe": {"pos": "preverb"},
149 "préf": {"pos": "prefix", "tags": ["morpheme"]},
150 "préfixe": {"pos": "prefix", "tags": ["morpheme"]},
151 "prénom": {"pos": "name", "tags": ["first-name"]},
152 "prép": {"pos": "prep"},
153 "préposition": {"pos": "prep"},
154 "quantif": {"pos": "quantifier"},
155 "quantificateur": {"pos": "quantifier"},
156 "racine": {"pos": "root", "tags": ["morpheme"]},
157 "rad": {"pos": "root", "tags": ["radical"]},
158 "radical": {"pos": "root", "tags": ["radical"]},
159 "rafsi": {"pos": "affix", "tags": ["rafsi"]},
160 "sinogramme": {"pos": "character", "tags": ["letter"]},
161 "substantif": {"pos": "noun"},
162 "suf": {"pos": "suffix", "tags": ["morpheme"]},
163 "suff": {"pos": "suffix", "tags": ["morpheme"]},
164 "suffixe": {"pos": "suffix", "tags": ["morpheme"]},
165 "symb": {"pos": "symbol"},
166 "symbole": {"pos": "symbol"},
167 "var-typo": {"pos": "typographic variant", "tags": ["alt-of"]},
168 "variante par contrainte typographique": {
169 "pos": "typographic variant",
170 "tags": ["alt-of"],
171 },
172 "variante typo": {"pos": "typographic variant", "tags": ["alt-of"]},
173 "variante typographique": {
174 "pos": "typographic variant",
175 "tags": ["alt-of"],
176 },
177 "verb": {"pos": "verb"},
178 "verb pr": {"pos": "verb", "tags": ["pronominal"]},
179 "verb-pr": {"pos": "verb", "tags": ["pronominal"]},
180 "verbe": {"pos": "verb"},
181 "verbe pronominal": {"pos": "verb", "tags": ["pronominal"]},
182}
184# map section arguments to pydantic fields
185LINKAGE_SECTIONS: dict[str, str] = {
186 "abrév": "abbreviation",
187 "abréviations": "abbreviation",
188 "anagrammes": "anagrams",
189 "anagr": "anagrams",
190 "anagramme": "anagrams",
191 "antonymes": "antonyms",
192 "app": "related",
193 "apparentés": "related",
194 "apr": "related",
195 "dérivés autres langues": "derived",
196 "dérivés int": "derived",
197 "dérivés": "derived",
198 "dial": "forms",
199 "dialectes": "forms",
200 "drv-int": "derived",
201 "drv": "derived",
202 "étymologiques": "related",
203 "holo": "holonyms",
204 "holonymes": "holonyms",
205 "hyper": "hypernyms",
206 "hyperonymes": "hypernyms",
207 "hypo": "hyponyms",
208 "hyponymes": "hyponyms",
209 "méro": "meronyms",
210 "méronymes": "meronyms",
211 "paro": "paronyms",
212 "paronymes": "paronyms",
213 "phrases": "proverbs",
214 "q-syn": "synonyms",
215 "quasi-syn": "synonyms",
216 "quasi-synonymes": "synonyms",
217 "syn": "synonyms",
218 "synonymes": "synonyms",
219 "tropo": "troponyms",
220 "troponymes": "troponyms",
221 "var-dial": "forms",
222 "var-ortho": "forms",
223 "var": "forms",
224 "variantes dial": "forms",
225 "variantes dialectales": "forms",
226 "variantes dialectes": "forms",
227 "variantes ortho": "forms",
228 "variantes orthographiques": "forms",
229 "variantes": "forms",
230 "voc": "related",
231 "vocabulaire apparenté": "related",
232 "vocabulaire proche": "related",
233 "vocabulaire": "related",
234 "gentilés": "related",
235 "diminutifs": "related",
236 "augmentatifs": "related",
237 "composés": "derived",
238 "noms vernaculaires": "related",
239 "écriture": "forms",
240 "anciennes orthographes": "forms",
241}
243LINKAGE_TAGS = {
244 "dial": ["dialectal"],
245 "dialectes": ["dialectal"],
246 "var-dial": ["dialectal"],
247 "variantes dial": ["dialectal"],
248 "variantes dialectales": ["dialectal"],
249 "variantes dialectes": ["dialectal"],
250 "gentilés": ["demonym", "adjective"],
251 "diminutifs": ["diminutive"],
252 "augmentatifs": ["augmentative"],
253 "composés": ["compound"],
254 "noms vernaculaires": ["vernacular"],
255 "anciennes orthographes": ["archaic"],
256}
258IGNORED_SECTIONS: frozenset[str] = frozenset(
259 [
260 "références",
261 "référence",
262 "réf",
263 "ref",
264 "sources",
265 "src",
266 "bibliographie",
267 "bib",
268 "citations",
269 "cit",
270 "voir aussi",
271 "voir",
272 ]
273)
275COMPOUNDS_SECTIONS: frozenset[str] = frozenset(["composés", "compos"])
277ETYMOLOGY_SECTIONS: frozenset[str] = frozenset(["étymologie", "étym", "etym"])
279INFLECTION_SECTIONS: frozenset[str] = frozenset(
280 ["déclinaison", "décl", "conjugaison", "conjug"]
281)
283NOTES_SECTIONS: frozenset[str] = frozenset(["notes", "note"])
285PRONUNCIATION_SECTIONS: frozenset[str] = frozenset(
286 ["prononciation", "pron", "prononciations"]
287)
289TRANSLATION_SECTIONS: frozenset[str] = frozenset(
290 ["traductions", "trad", "traductions à trier", "trad-trier", "trad trier"]
291)