Coverage for src/wiktextract/extractor/fr/section_types.py: 100%
10 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from ...config import POSSubtitleData
3# the keys are the first argument of the `S` template
4# https://fr.wiktionary.org/wiki/Modèle:S
5# https://fr.wiktionary.org/wiki/Wiktionnaire:Liste_des_sections
6# https://fr.wiktionary.org/wiki/Wiktionnaire:Liste_des_sections_de_types_de_mots
7# https://fr.wiktionary.org/wiki/Module:types_de_mots/data
8POS_SECTIONS: dict[str, POSSubtitleData] = {
9 "adj": {"pos": "adj"},
10 "adj-dém": {"pos": "adj", "tags": ["demonstrative"]},
11 "adj-excl": {"pos": "adj", "tags": ["exclamatory"]},
12 "adj-indéf": {"pos": "adj", "tags": ["indefinite"]},
13 "adj-int": {"pos": "adj", "tags": ["interrogative"]},
14 "adj-num": {"pos": "adj", "tags": ["numeral"]},
15 "adj-pos": {"pos": "adj", "tags": ["possessive"]},
16 "adj-rel": {"pos": "adj", "tags": ["relative"]},
17 "adjectif": {"pos": "adj"},
18 "adjectif dém": {"pos": "adj", "tags": ["demonstrative"]},
19 "adjectif démonstratif": {"pos": "adj", "tags": ["demonstrative"]},
20 "adjectif exc": {"pos": "adj", "tags": ["exclamatory"]},
21 "adjectif exclamatif": {"pos": "adj", "tags": ["exclamatory"]},
22 "adjectif ind": {"pos": "adj", "tags": ["indefinite"]},
23 "adjectif indéfini": {"pos": "adj", "tags": ["indefinite"]},
24 "adjectif int": {"pos": "adj", "tags": ["interrogative"]},
25 "adjectif interrogatif": {"pos": "adj", "tags": ["interrogative"]},
26 "adjectif num": {"pos": "adj", "tags": ["numeral"]},
27 "adjectif numéral": {"pos": "adj", "tags": ["numeral"]},
28 "adjectif pos": {"pos": "adj", "tags": ["possessive"]},
29 "adjectif possessif": {"pos": "adj", "tags": ["possessive"]},
30 "adjectif qualificatif": {"pos": "adj"},
31 "adjectif rel": {"pos": "adj", "tags": ["relative"]},
32 "adjectif relatif": {"pos": "adj", "tags": ["relative"]},
33 "adv": {"pos": "adv"},
34 "adv-ind": {"pos": "adv", "tags": ["indefinite"]},
35 "adv-int": {"pos": "adv", "tags": ["interrogative"]},
36 "adv-pron": {"pos": "adv"},
37 "adv-rel": {"pos": "adv", "tags": ["relative"]},
38 "adverbe": {"pos": "adv"},
39 "adverbe ind": {"pos": "adv", "tags": ["indefinite"]},
40 "adverbe indéfini": {"pos": "adv", "tags": ["indefinite"]},
41 "adverbe int": {"pos": "adv", "tags": ["interrogative"]},
42 "adverbe interrogatif": {"pos": "adv", "tags": ["interrogative"]},
43 "adverbe pro": {"pos": "adv"},
44 "adverbe pronominal": {"pos": "adv"},
45 "adverbe rel": {"pos": "adv", "tags": ["relative"]},
46 "adverbe relatif": {"pos": "adv", "tags": ["relative"]},
47 "aff": {"pos": "affix"},
48 "affixe": {"pos": "affix"},
49 "art": {"pos": "article"},
50 "art-déf": {"pos": "article", "tags": ["definite"]},
51 "art-indéf": {"pos": "article", "tags": ["indefinite"]},
52 "art-part": {"pos": "article", "tags": ["partial"]},
53 "article": {"pos": "article"},
54 "article déf": {"pos": "article", "tags": ["definite"]},
55 "article défini": {"pos": "article", "tags": ["definite"]},
56 "article ind": {"pos": "article", "tags": ["indefinite"]},
57 "article indéfini": {"pos": "article", "tags": ["indefinite"]},
58 "article par": {"pos": "article", "tags": ["partial"]},
59 "article partitif": {"pos": "article", "tags": ["partial"]},
60 "circon": {"pos": "circumfix", "tags": ["morpheme"]},
61 "circonf": {"pos": "circumfix", "tags": ["morpheme"]},
62 "circonfixe": {"pos": "circumfix", "tags": ["morpheme"]},
63 "class": {"pos": "classifier"},
64 "classif": {"pos": "classifier"},
65 "classificateur": {"pos": "classifier"},
66 "conj": {"pos": "conj"},
67 "conj-coord": {"pos": "conj", "tags": ["coordinating"]},
68 "conjonction": {"pos": "conj"},
69 "conjonction coo": {"pos": "conj", "tags": ["coordinating"]},
70 "conjonction de coordination": {"pos": "conj", "tags": ["coordinating"]},
71 "copule": {"pos": "conj"},
72 "dét": {"pos": "det"},
73 "déterminant": {"pos": "det"},
74 "encl": {"pos": "suffix", "tags": ["clitic"]},
75 "enclitique": {"pos": "suffix", "tags": ["clitic"]},
76 "gismu": {"pos": "verb", "tags": ["gismu"]},
77 "idéophone": {"pos": "noun", "tags": ["ideophone"]},
78 "inf": {"pos": "infix", "tags": ["morpheme"]},
79 "infixe": {"pos": "infix", "tags": ["morpheme"]},
80 "interf": {"pos": "interfix", "tags": ["morpheme"]},
81 "interfixe": {"pos": "interfix", "tags": ["morpheme"]},
82 "interj": {"pos": "intj"},
83 "interjection": {"pos": "intj"},
84 "lettre": {"pos": "character", "tags": ["letter"]},
85 "loc": {"pos": "phrase"},
86 "loc-phr": {"pos": "phrase"},
87 "locution": {"pos": "phrase"},
88 "locution phrase": {"pos": "phrase"},
89 "locution-phrase": {"pos": "phrase"},
90 "nom": {"pos": "noun"},
91 "nom commun": {"pos": "noun"},
92 "nom de famille": {"pos": "name", "tags": ["surename"]},
93 "nom propre": {"pos": "name"},
94 "nom scientifique": {"pos": "name", "tags": ["scientific"]},
95 "nom-sciences": {"pos": "name", "tags": ["scientific"]},
96 "nom science": {"pos": "name", "tags": ["scientific"]},
97 "nom scient": {"pos": "name", "tags": ["scientific"]},
98 "nom-fam": {"pos": "name", "tags": ["surename"]},
99 "nom-pr": {"pos": "name"},
100 "num": {"pos": "num"},
101 "numér": {"pos": "num"},
102 "numéral": {"pos": "num"},
103 "onom": {"pos": "onomatopoeia", "tags": ["onomatopoeic"]},
104 "onoma": {"pos": "onomatopoeia", "tags": ["onomatopoeic"]},
105 "onomatopée": {"pos": "onomatopoeia", "tags": ["onomatopoeic"]},
106 "part": {"pos": "particle"},
107 "part-num": {"pos": "particle", "tags": ["numeral"]},
108 "particule": {"pos": "particle"},
109 "particule num": {"pos": "particle", "tags": ["numeral"]},
110 "particule numérale": {"pos": "particle", "tags": ["numeral"]},
111 "patronyme": {"pos": "name", "tags": ["surename"]},
112 "phr": {"pos": "phrase"},
113 "phrase": {"pos": "phrase"},
114 "post": {"pos": "postp"},
115 "postpos": {"pos": "postp"},
116 "postposition": {"pos": "postp"},
117 "procl": {"pos": "prefix", "tags": ["clitic"]},
118 "proclitique": {"pos": "prefix", "tags": ["clitic"]},
119 "pronom": {"pos": "pron"},
120 "pronom dém": {"pos": "pron", "tags": ["demonstrative"]},
121 "pronom démonstratif": {"pos": "pron", "tags": ["demonstrative"]},
122 "pronom ind": {"pos": "pron", "tags": ["indefinite"]},
123 "pronom indéfini": {"pos": "pron", "tags": ["indefinite"]},
124 "pronom int": {"pos": "pron", "tags": ["interrogative"]},
125 "pronom interrogatif": {"pos": "pron", "tags": ["interrogative"]},
126 "pronom personnel": {"pos": "pron", "tags": ["person"]},
127 "pronom pos": {"pos": "pron", "tags": ["possessive"]},
128 "pronom possessif": {"pos": "pron", "tags": ["possessive"]},
129 "pronom rel": {"pos": "pron", "tags": ["relative"]},
130 "pronom relatif": {"pos": "pron", "tags": ["relative"]},
131 "pronom réf": {"pos": "pron", "tags": ["person"]},
132 "pronom réfléchi": {"pos": "pron", "tags": ["person"]},
133 "pronom-adj": {"pos": "pron", "tags": ["adjective"]},
134 "pronom-adjectif": {"pos": "pron", "tags": ["adjective"]},
135 "pronom-dém": {"pos": "pron", "tags": ["demonstrative"]},
136 "pronom-indéf": {"pos": "pron", "tags": ["indefinite"]},
137 "pronom-int": {"pos": "pron", "tags": ["interrogative"]},
138 "pronom-per": {"pos": "pron", "tags": ["person"]},
139 "pronom-pers": {"pos": "pron", "tags": ["person"]},
140 "pronom-pos": {"pos": "pron", "tags": ["possessive"]},
141 "pronom-rel": {"pos": "pron", "tags": ["relative"]},
142 "pronom-réfl": {"pos": "pron", "tags": ["person"]},
143 "prov": {"pos": "proverb"},
144 "proverbe": {"pos": "proverb"},
145 "pré-nom": {"pos": "name", "tags": ["first name"]},
146 "pré-verbe": {"pos": "preverb"},
147 "préf": {"pos": "prefix", "tags": ["morpheme"]},
148 "préfixe": {"pos": "prefix", "tags": ["morpheme"]},
149 "prénom": {"pos": "name", "tags": ["first name"]},
150 "prép": {"pos": "prep"},
151 "préposition": {"pos": "prep"},
152 "quantif": {"pos": "quantifier"},
153 "quantificateur": {"pos": "quantifier"},
154 "racine": {"pos": "root", "tags": ["morpheme"]},
155 "rad": {"pos": "root", "tags": ["radical"]},
156 "radical": {"pos": "root", "tags": ["radical"]},
157 "rafsi": {"pos": "affix", "tags": ["rafsi"]},
158 "sinogramme": {"pos": "character", "tags": ["letter"]},
159 "substantif": {"pos": "noun"},
160 "suf": {"pos": "suffix", "tags": ["morpheme"]},
161 "suff": {"pos": "suffix", "tags": ["morpheme"]},
162 "suffixe": {"pos": "suffix", "tags": ["morpheme"]},
163 "symb": {"pos": "symbol"},
164 "symbole": {"pos": "symbol"},
165 "var-typo": {"pos": "typographic variant", "tags": ["alt-of"]},
166 "variante par contrainte typographique": {
167 "pos": "typographic variant",
168 "tags": ["alt-of"],
169 },
170 "variante typo": {"pos": "typographic variant", "tags": ["alt-of"]},
171 "variante typographique": {
172 "pos": "typographic variant",
173 "tags": ["alt-of"],
174 },
175 "verb": {"pos": "verb"},
176 "verb pr": {"pos": "verb", "tags": ["pronominal"]},
177 "verb-pr": {"pos": "verb", "tags": ["pronominal"]},
178 "verbe": {"pos": "verb"},
179 "verbe pronominal": {"pos": "verb", "tags": ["pronominal"]},
180}
182# map section arguments to pydantic fields
183LINKAGE_SECTIONS: dict[str, str] = {
184 "abrév": "abbreviation",
185 "abréviations": "abbreviation",
186 "anagrammes": "anagrams",
187 "anagr": "anagrams",
188 "anagramme": "anagrams",
189 "antonymes": "antonyms",
190 "app": "related",
191 "apparentés": "related",
192 "apr": "related",
193 "dérivés autres langues": "derived",
194 "dérivés int": "derived",
195 "dérivés": "derived",
196 "dial": "related",
197 "dialectes": "related",
198 "drv-int": "derived",
199 "drv": "derived",
200 "étymologiques": "related",
201 "holo": "holonyms",
202 "holonymes": "holonyms",
203 "hyper": "hypernyms",
204 "hyperonymes": "hypernyms",
205 "hypo": "hyponyms",
206 "hyponymes": "hyponyms",
207 "méro": "meronyms",
208 "méronymes": "meronyms",
209 "paro": "paronyms",
210 "paronymes": "paronyms",
211 "phrases": "proverbs",
212 "q-syn": "synonyms",
213 "quasi-syn": "synonyms",
214 "quasi-synonymes": "synonyms",
215 "syn": "synonyms",
216 "synonymes": "synonyms",
217 "tropo": "troponyms",
218 "troponymes": "troponyms",
219 "var-dial": "related",
220 "var-ortho": "related",
221 "var": "related",
222 "variantes dial": "related",
223 "variantes dialectales": "related",
224 "variantes dialectes": "related",
225 "variantes ortho": "related",
226 "variantes orthographiques": "related",
227 "variantes": "related",
228 "voc": "related",
229 "vocabulaire apparenté": "related",
230 "vocabulaire proche": "related",
231 "vocabulaire": "related",
232}
234IGNORED_SECTIONS: frozenset[str] = frozenset(
235 [
236 "références",
237 "référence",
238 "réf",
239 "ref",
240 "sources",
241 "src",
242 "bibliographie",
243 "bib",
244 "citations",
245 "cit",
246 ]
247)
249COMPOUNDS_SECTIONS: frozenset[str] = frozenset(["composés", "compos"])
251ETYMOLOGY_SECTIONS: frozenset[str] = frozenset(["étymologie", "étym", "etym"])
253INFLECTION_SECTIONS: frozenset[str] = frozenset(
254 ["déclinaison", "décl", "conjugaison", "conjug"]
255)
257NOTES_SECTIONS: frozenset[str] = frozenset(["notes", "note"])
259PRONUNCIATION_SECTIONS: frozenset[str] = frozenset(
260 ["prononciation", "pron", "prononciations"]
261)
263TRANSLATION_SECTIONS: frozenset[str] = frozenset(
264 ["traductions", "trad", "traductions à trier", "trad-trier", "trad trier"]
265)