Coverage for src/wiktextract/extractor/fr/tags.py: 85%
47 statements
« prev ^ index » next coverage.py v7.9.0, created at 2025-06-13 07:43 +0000
« prev ^ index » next coverage.py v7.9.0, created at 2025-06-13 07:43 +0000
1# Grammatical glossary appendix:
2# https://fr.wiktionary.org/wiki/Annexe:Glossaire_grammatical
3# List of templates:
4# https://fr.wiktionary.org/wiki/Wiktionnaire:Liste_de_tous_les_modèles
5from .models import WordEntry
7# https://en.wikipedia.org/wiki/Grammatical_gender
8GENDER_TAGS: dict[str, str | list[str]] = {
9 "commun": "common",
10 "féminin": "feminine",
11 "masculin": "masculine",
12 "neutre": "neuter",
13 # https://fr.wiktionary.org/wiki/Modèle:mf
14 "masculin et féminin identiques": ["masculine", "feminine"],
15 # table header: https://fr.wiktionary.org/wiki/Modèle:fr-rég
16 "masculin et féminin": ["masculine", "feminine"],
17 # "Modèle:mf ?", "Modèle:fm ?"
18 "masculin ou féminin (l’usage hésite)": ["masculine", "feminine"],
19 "féminin ou masculin (l’usage hésite)": ["feminine", "masculine"],
20 "invariable": "invariable", # Modèle:invar
21 # Modèle:flex-ku-nommixt
22 "masculin sing.": ["masculine", "singular"],
23 "féminin sing.": ["feminine", "singular"],
24 # Template:ja-flx-adj-な
25 "neutre négatif": ["neuter", "negative"],
26 "neutre passé": ["neuter", "past"],
27 "neutre négatif passé": ["neuter", "negative", "past"],
28 "poli négatif": ["polite", "negative"],
29 "poli passé": ["polite", "past"],
30 "poli négatif passé": ["polite", "negative", "past"],
31 # Template:m
32 "masculin animé": ["masculine", "animate"],
33 "masculin inanimé": ["masculine", "inanimate"],
34 # Template:f
35 "féminin animé": ["feminine", "animate"],
36 "féminin inanimé": ["feminine", "inanimate"],
37 # Template:n
38 "neutre animé": ["neuter", "animate"],
39 "neutre inanimé": ["neuter", "inanimate"],
40}
42# https://en.wikipedia.org/wiki/Grammatical_number
43NUMBER_TAGS: dict[str, str | list[str]] = {
44 "singulier": "singular",
45 "pluriel": "plural",
46 "duel": "dual",
47 "collectif": "collective",
48 "singulatif": "singulative",
49 "indénombrable": "uncountable", # sv-nom-c-ind
50 "au singulier": "singular",
51 "au singulier uniquement": "singular-only",
52 "au pluriel": "plural",
53 "au pluriel uniquement": "plural-only",
54 "singulier et pluriel identiques": ["singular", "plural"],
55 "nom collectif": "collective",
56 # "générique": "", # Modèle:g
57 # "nom d'unité": "", # Modèle:nu
58 "généralement indénombrable": "uncountable",
59 "dénombrable": "countable",
60}
62# https://en.wikipedia.org/wiki/Grammatical_mood
63MOOD_TAGS: dict[str, str] = {
64 "indicatif": "indicative",
65 "subjonctif": "subjunctive",
66 "conditionnel": "conditional",
67 "impératif": "imperative",
68 "volitif": "volitive",
69 "déclaratif": "declarative",
70 "interrogatif": "interrogative",
71 "aperceptif": "apperceptive",
72 "euphémique": "euphemistic",
73 "évidentiel": "evidential",
74 "spéculatif": "speculative",
75 "assertif": "assertive",
76 "hortatif": "hortative",
77 "promissif": "promissive",
78 "conditionnel / subjonctif": ["conditional", "subjunctive"],
79 "conjonctif": "subjunctive",
80 "provisionnel": "temporal",
81}
83VERB_FORM_TAGS: dict[str, str | list[str]] = {
84 "participe": "participle",
85 "imparfait": "imperfect",
86 # Template:ku-conj-trans
87 "parfait": "perfect",
88 "imparfait narratif": ["imperfect", "narrative"],
89 "infinitif": "infinitive",
90 "gérondif": "gerund",
91 # template "pt-verbe-flexion"
92 "infinitif personnel": ["infinitive", "personal"],
93 "supin": "supine",
94 # Template:ko-conj
95 "conjugaison": "conjugation",
96 "radical": "radical",
97 "formes finales": "final",
98 "registre formel": "formal",
99 "registre informel": "informal",
100 "non poli": "impolite",
101 "poli": "polite",
102 "formes nominales": "nominal",
103 "formes conjonctives": "subjunctive",
104 # Template:ja-在る
105 "formes de base": "base-form",
106 "affirmatif": "affirmative",
107 "négatif": "negative",
108 "adverbial": "adverbial",
109 # Template:bg-verbe186
110 "aoriste": "aorist",
111 "participe passé passif": ["participle", "past", "passive"],
112 "participe passé actif": ["participle", "past", "active"],
113 "participe imparfait": ["participle", "imperfect"],
114 "auxiliaire": "auxiliary",
115 "bitransitif": "ditransitive",
116 "déterminé": "determinate",
117 "indéterminé": "indeterminate",
118}
120# https://en.wikipedia.org/wiki/Grammatical_case
121CASE_TAGS: dict[str, str | list[str]] = {
122 "ablatif": "ablative",
123 "accusatif": "accusative",
124 "accusatif génitif": ["accusative", "genitive"],
125 "nominatif": "nominative",
126 "datif": "dative",
127 "génitif": "genitive",
128 "vocatif": "vocative",
129 "instrumental": "instrumental",
130 "locatif": "locative",
131 "comitatif": "comitative",
132 "essif": "essive",
133 "illatif": "illative",
134 # Template:ro-nom-tab
135 "nominatif accusatif": ["nominative", "accusative"],
136 "datif génitif": ["dative", "genitive"],
137 # Template:ko-nom
138 "nominatif / attributif": ["nominative", "attributive"],
139}
141# https://en.wikipedia.org/wiki/Grammatical_tense
142TENSE_TAGS: dict[str, str | list[str]] = {
143 "présent": "present",
144 "passé": "past",
145 "passé simple": "past",
146 "futur": "future",
147 "futur simple": "future",
148 # https://en.wikipedia.org/wiki/Passé_composé
149 "passé composé": ["past", "multiword-construction"],
150 "plus-que-parfait": "pluperfect",
151 "passé antérieur": ["past", "anterior"],
152 "futur antérieur": ["future", "perfect"],
153 "prétérit": "preterite",
154 "présent simple, 3ᵉ pers. sing.": ["present", "third-person", "singular"],
155 "participe passé": ["participle", "past"],
156 "participe présent": ["participle", "present"],
157 # Template:ku-conj-trans
158 "présent progressif": ["present", "progressive"],
159 "prétérit et imparfait": ["preterite", "imperfect"],
160 "non passé": "non-past",
161 "présent / futur": ["present", "future"],
162}
164# https://en.wikipedia.org/wiki/Grammatical_person
165PERSON_TAGS: dict[str, str | list[str]] = {
166 "1ᵉ personne": "first-person",
167 "1ʳᵉ personne": "first-person",
168 "2ᵉ personne": "second-person",
169 "3ᵉ personne": "third-person",
170 # Modèle:avk-conj
171 "1ʳᵉ du sing.": ["first-person", "singular"],
172 "2ᵉ du sing.": ["second-person", "singular"],
173 "3ᵉ du sing.": ["third-person", "singular"],
174 "1ʳᵉ du plur.": ["first-person", "plural"],
175 "2ᵉ du plur.": ["second-person", "plural"],
176 "3ᵉ du plur.": ["third-person", "plural"],
177 "4ᵉ du plur.": ["fourth-person", "plural"],
178}
180SEMANTICS_TAGS: dict[str, str] = {
181 # https://en.wikipedia.org/wiki/Definiteness
182 "défini": "definite",
183 "indéfini": "indefinite",
184}
186COMPARISON_TAGS: dict[str, str] = {
187 # https://en.wikipedia.org/wiki/Comparison_(grammar)
188 "positif": "positive",
189 "comparatif": "comparative",
190 "superlatif": "superlative",
191 "non comparable": "not-comparable",
192 "superlatif absolu": ["superlative", "absolute"],
193}
195# https://en.wikipedia.org/wiki/Occitan_language#Writing_system
196OCCITAN_NORM_TAGS: dict[str, str] = {
197 # https://fr.wiktionary.org/wiki/Modèle:oc-norme_mistralienne
198 "graphie mistralienne": "Mistralian",
199 # https://fr.wiktionary.org/wiki/Modèle:oc-norme_classique
200 # "graphie normalisée": "",
201 # Modèle:oc-norme bonnaudienne
202 # "graphie bonnaudienne": "",
203}
205# https://en.wikipedia.org/wiki/Breton_mutations
206# https://fr.wiktionary.org/wiki/Modèle:br-nom
207BRETON_MUTATION_TAGS: dict[str, str] = {
208 "non muté": "unmutated",
209 "adoucissante": "mutation-soft",
210 "durcissante": "mutation-hard",
211 "spirante": "mutation-spirant",
212 "nasale": "mutation-nasal",
213}
215JA_TAGS: dict[str, str] = {
216 # https://fr.wiktionary.org/wiki/Modèle:ja-trans
217 "kanji": "kanji",
218 "hiragana": "hiragana",
219 "katakana": "katakana",
220 "transcription": "transcription",
221}
223OTHER_GRAMMATICAL_TAGS: dict[str, str] = {
224 # https://fr.wiktionary.org/wiki/Modèle:be-tab-cas
225 "prépositionnel": "prepositional",
226 "anglicisme": "Anglicism",
227 "pronominal": "pronominal",
228 "diminutif": "diminutive",
229 "réfléchi": "reflexive", # Modèle:réfl
230 "réciproque": "reciprocal", # Modèle:réciproque
231 "impersonnel": "impersonal", # Modèle:impers
232 "transitif": "transitive", # Modèle:t
233 "transitif indirect": ["transitive", "indirect"], # Modèle:transitif indir
234 "intransitif": "intransitive", # Modèle:i
235 "injurieux": "offensive", # Modèle:injurieux
236 # Modèle:zh-formes
237 "simplifié": "Simplified Chinese",
238 "traditionnel": "Traditional Chinese",
239 # Modèle:flex-ku-nomf
240 "ézafé principal": ["ezafe", "primary"],
241 "ézafé secondaire": ["ezafe", "secondary"],
242 "cas oblique": "oblique",
243 # Modèle:ku-conj-trans
244 "forme affirmative": "affirmative",
245 "forme négative": "negative",
246 # Modèle:bg-nom
247 "forme de base": "base-form",
248 "pluriel numéral": ["plural", "numeral"],
249 "animé": "animate",
250 "inanimé": "inanimate",
251 # Template:ko-nom
252 "hangeul": "hangeul",
253 "hanja": "hanja",
254 "avec clitique": "clitic",
255 "indéclinable": "indeclinable",
256 "toponyme": "toponymic",
257 "applicatif": "applicative",
258 "causatif": "causative",
259 "sigle": "abbreviation",
260 "attributif": "attributive",
261 "prédicatif": "predicative",
262 # Template:cy-mut
263 "non muté": "unmutated",
264 "lénition": "lenition",
265 "nasalisation": "nasalization",
266 "syllabaire": "Syllabics",
267 "par ellipse": "ellipsis", # Template:ellipse
268 "ironique": "ironic",
269 "suffixe": "suffix",
270}
272# template text before gloss
273SENSE_TAGS: dict[str, str] = {
274 # https://fr.wiktionary.org/wiki/Modèle:figuré
275 # https://fr.wiktionary.org/wiki/Catégorie:Modèles_de_relation_entre_les_définitions
276 # Catégorie:Modèles de genre textuel
277 # Catégorie:Modèles de registre
278 "sens figuré": "figuratively",
279 "sens propre": "literally",
280 "par métonymie": "metonymically", # Modèle:par métonymie
281 "par hyperbole": "hyperbole",
282 "par extension": "broadly",
283 "par analogie": "analogy",
284 "en particulier": "especially",
285 "par litote": "litotes",
286 "par euphémisme": "euphemism",
287 "spécifiquement": "specifically",
288 "génériquement": "generically",
289 "spécialement": "especially",
290 "généralement": "generally",
291 "enclise": "enclitic",
292 "idiotisme": "idiomatic",
293 "péjoratif": "pejorative",
294 "désuet": "obsolete",
295 "archaïsme": "archaic",
296 "vieilli": "dated",
297 "néologisme": "neologism",
298 "argot": "slang",
299 "rare": "rare",
300 # "plus rare": "rare",
301 "littéraire": "literary", # Modèle:littéraire
302 "poétique": "poetic", # Modèle:poétique
303 # "didactique": "", # Modèle:didactique
304 "soutenu": "formal", # Modèle:soutenu
305 "informel": "informal", # Modèle:informel
306 "familier": "familiar", # Modèle:familier
307 "très familier": "very-familiar", # Modèle:très familier
308 "populaire": "colloquial", # Modèle:populaire
309 "vulgaire": "vulgar", # Modèle:vulgaire
310 "langage enfantin": "childish", # Modèle:enfantin
311 # Catégorie:Modèles de thématique
312 "anglicisme informatique": "Anglicism",
313 "proverbe": "proverb",
314 "collectivement": "collectively",
315 "courant": "common", # Modèle:courant
316 "adjectif attribut": ["adjective", "attributive"],
317}
319# https://en.wikipedia.org/wiki/Voice_(grammar)
320VOICE_TAGS: dict[str, str | list[str]] = {
321 # https://fr.wiktionary.org/wiki/Modèle:eo-conj
322 "participe actif": ["participle", "active"],
323 "participe passif": ["participle", "passive"],
324 "adverbe actif": ["adverb", "active"],
325 "adverbe passif": ["adverb", "passive"],
326 "substantif actif": ["subsuntive", "active"],
327 "substantif passif": ["subsuntive", "passive"],
328 "actif": "active",
329 "passif": "passive",
330 "adverbe": "adverb",
331}
333# Module:lexique/data
334LEXIQUE_TAGS = {
335 "hindouisme": "Hinduism",
336 "judaïsme": "Judaism",
337 "marxisme": "Marxism",
338 "nazisme": "Nazism",
339 "physique": "physical",
340 "rhétorique": "rhetoric",
341 "antiquité": "Ancient",
342 "antiquité grecque": "Ancient-Greek",
343 "antiquité romaine": "Ancient-Roman",
344 "bible": "Biblical",
345 "moyen âge": "Middle-Ages",
346 "union européenne": "European-Union",
347 "analyse": "analytic",
348}
350# Template:cmn-pron
351# https://fr.wiktionary.org/wiki/自由
352ZH_PRON_TAGS = {
353 "pinyin": "Pinyin",
354 "efeo": "EFEO", # https://en.wikipedia.org/wiki/EFEO_Chinese_transcription
355 "wade-giles": "Wade-Giles",
356 "yale": "Yale",
357 "zhuyin": "Bopomofo",
358 "mandarin": "Mandarin",
359 "cantonais": "Cantonese",
360 "cantonais (yue)": "Cantonese",
361 "jyutping": "Jyutping",
362 "hakka": "Hakka",
363 "pha̍k-fa-sṳ": "Phak-fa-su",
364 "meixian, guangdong": ["Meixian", "Guangdong"],
365 "jin": "Jin",
366 "mindong": "Eastern-Min",
367 # https://en.wikipedia.org/wiki/Bàng-uâ-cê
368 "bàng-uâ-cê (fuzhou)": ["Bang-ua-ce", "Fuzhou"],
369 "minnan": "Min",
370 "pe̍h-ōe-jī (hokkien : fujian, taïwan)": [
371 "Peh-oe-ji",
372 "Hokkien",
373 "Fujian",
374 "Taiwan",
375 ],
376 "chaozhou, peng'im": ["Chaozhou", "Peng'im"],
377 "wu": "Wu",
378 "shanghai": "Shanghai",
379 "chinois médiéval": "Medieval-Chinese",
380 "chinois archaïque": "Old-Chinese",
381 "baxter-sagart": "Baxter-Sagart",
382 "zhengzhang": "Zhengzhang",
383}
385ASPECT_TAGS = {
386 "perfectif": "perfective", # Modèle:perfectif
387 "imperfectif": "imperfective", # Modèle:imperfectif
388}
390GRAMMATICAL_TAGS: dict[str, str | list[str]] = {
391 **GENDER_TAGS,
392 **NUMBER_TAGS,
393 **MOOD_TAGS,
394 **VERB_FORM_TAGS,
395 **CASE_TAGS,
396 **TENSE_TAGS,
397 **PERSON_TAGS,
398 **SEMANTICS_TAGS,
399 **COMPARISON_TAGS,
400 **OCCITAN_NORM_TAGS,
401 **BRETON_MUTATION_TAGS,
402 **JA_TAGS,
403 **OTHER_GRAMMATICAL_TAGS,
404 **SENSE_TAGS,
405 **VOICE_TAGS,
406 **LEXIQUE_TAGS,
407 **ZH_PRON_TAGS,
408 **ASPECT_TAGS,
409}
412def translate_raw_tags(
413 data: WordEntry,
414 table_template_name: str = "",
415 tag_dict: dict[str, str] = GRAMMATICAL_TAGS,
416) -> WordEntry:
417 from .topics import SLANG_TOPICS, TOPIC_TAGS
419 raw_tags = []
420 for raw_tag in data.raw_tags:
421 raw_tag_lower = raw_tag.lower()
422 if raw_tag_lower in tag_dict:
423 tr_tag = tag_dict[raw_tag_lower]
424 if isinstance(tr_tag, str):
425 data.tags.append(tr_tag)
426 elif isinstance(tr_tag, list): 426 ↛ 420line 426 didn't jump to line 420 because the condition on line 426 was always true
427 data.tags.extend(tr_tag)
428 elif hasattr(data, "topics") and raw_tag_lower in TOPIC_TAGS:
429 data.topics.append(TOPIC_TAGS[raw_tag_lower])
430 elif hasattr(data, "topics") and raw_tag_lower in SLANG_TOPICS: 430 ↛ 431line 430 didn't jump to line 431 because the condition on line 430 was never true
431 data.topics.append(SLANG_TOPICS[raw_tag_lower])
432 if "slang" not in data.tags:
433 data.tags.append("slang")
434 else:
435 raw_tags.append(raw_tag)
436 data.raw_tags = raw_tags
437 if table_template_name != "":
438 return convert_table_headers(data, table_template_name)
439 return data
442def convert_table_headers(data: WordEntry, template_name: str) -> WordEntry:
443 if template_name == "avk-tab-conjug": 443 ↛ 445line 443 didn't jump to line 445 because the condition on line 443 was never true
444 # https://fr.wiktionary.org/wiki/Modèle:avk-tab-conjug
445 tags = {
446 "1": "first-person",
447 "2": "second-person",
448 "3": "third-person",
449 "4": "fourth-person",
450 }
451 return translate_raw_tags(data, tag_dict=tags)
452 return data