Coverage for src/wiktextract/extractor/fr/tags.py: 85%
47 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
1# Grammatical glossary appendix:
2# https://fr.wiktionary.org/wiki/Annexe:Glossaire_grammatical
3# List of templates:
4# https://fr.wiktionary.org/wiki/Wiktionnaire:Liste_de_tous_les_modèles
5from .models import WordEntry
7# https://en.wikipedia.org/wiki/Grammatical_gender
8GENDER_TAGS: dict[str, str | list[str]] = {
9 "commun": "common",
10 "féminin": "feminine",
11 "masculin": "masculine",
12 "neutre": "neuter",
13 # https://fr.wiktionary.org/wiki/Modèle:mf
14 "masculin et féminin identiques": ["masculine", "feminine"],
15 # table header: https://fr.wiktionary.org/wiki/Modèle:fr-rég
16 "masculin et féminin": ["masculine", "feminine"],
17 # "Modèle:mf ?", "Modèle:fm ?"
18 "masculin ou féminin (l’usage hésite)": ["masculine", "feminine"],
19 "féminin ou masculin (l’usage hésite)": ["feminine", "masculine"],
20 "invariable": "invariable", # Modèle:invar
21 # Modèle:flex-ku-nommixt
22 "masculin sing.": ["masculine", "singular"],
23 "féminin sing.": ["feminine", "singular"],
24 # Template:ja-flx-adj-な
25 "neutre négatif": ["neuter", "negative"],
26 "neutre passé": ["neuter", "past"],
27 "neutre négatif passé": ["neuter", "negative", "past"],
28 "poli négatif": ["polite", "negative"],
29 "poli passé": ["polite", "past"],
30 "poli négatif passé": ["polite", "negative", "past"],
31 # Template:m
32 "masculin animé": ["masculine", "animate"],
33 "masculin inanimé": ["masculine", "inanimate"],
34 # Template:f
35 "féminin animé": ["feminine", "animate"],
36 "féminin inanimé": ["feminine", "inanimate"],
37 # Template:n
38 "neutre animé": ["neuter", "animate"],
39 "neutre inanimé": ["neuter", "inanimate"],
40}
42# https://en.wikipedia.org/wiki/Grammatical_number
43NUMBER_TAGS: dict[str, str | list[str]] = {
44 "singulier": "singular",
45 "pluriel": "plural",
46 "duel": "dual",
47 "collectif": "collective",
48 "singulatif": "singulative",
49 "indénombrable": "uncountable", # sv-nom-c-ind
50 "au singulier": "singular",
51 "au singulier uniquement": "singular-only",
52 "au pluriel": "plural",
53 "au pluriel uniquement": "plural-only",
54 "singulier et pluriel identiques": ["singular", "plural"],
55 "nom collectif": "collective",
56 # "générique": "", # Modèle:g
57 # "nom d'unité": "", # Modèle:nu
58 "généralement indénombrable": "uncountable",
59 "dénombrable": "countable",
60}
62# https://en.wikipedia.org/wiki/Grammatical_mood
63MOOD_TAGS: dict[str, str] = {
64 "indicatif": "indicative",
65 "subjonctif": "subjunctive",
66 "conditionnel": "conditional",
67 "impératif": "imperative",
68 "volitif": "volitive",
69 "déclaratif": "declarative",
70 "interrogatif": "interrogative",
71 "aperceptif": "apperceptive",
72 "euphémique": "euphemistic",
73 "évidentiel": "evidential",
74 "spéculatif": "speculative",
75 "assertif": "assertive",
76 "hortatif": "hortative",
77 "promissif": "promissive",
78 "conditionnel / subjonctif": ["conditional", "subjunctive"],
79 "conjonctif": "subjunctive",
80 "provisionnel": "temporal",
81}
83VERB_FORM_TAGS: dict[str, str | list[str]] = {
84 "participe": "participle",
85 "imparfait": "imperfect",
86 # Template:ku-conj-trans
87 "parfait": "perfect",
88 "imparfait narratif": ["imperfect", "narrative"],
89 "infinitif": "infinitive",
90 "gérondif": "gerund",
91 # template "pt-verbe-flexion"
92 "infinitif personnel": ["infinitive", "personal"],
93 "supin": "supine",
94 # Template:ko-conj
95 "conjugaison": "conjugation",
96 "radical": "radical",
97 "formes finales": "final",
98 "registre formel": "formal",
99 "registre informel": "informal",
100 "non poli": "impolite",
101 "poli": "polite",
102 "formes nominales": "nominal",
103 "formes conjonctives": "subjunctive",
104 # Template:ja-在る
105 "formes de base": "base-form",
106 "affirmatif": "affirmative",
107 "négatif": "negative",
108 "adverbial": "adverbial",
109 # Template:bg-verbe186
110 "aoriste": "aorist",
111 "participe passé passif": ["participle", "past", "passive"],
112 "participe passé actif": ["participle", "past", "active"],
113 "participe imparfait": ["participle", "imperfect"],
114 "auxiliaire": "auxiliary",
115 "bitransitif": "ditransitive",
116 "déterminé": "determinate",
117 "indéterminé": "indeterminate",
118}
120# https://en.wikipedia.org/wiki/Grammatical_case
121CASE_TAGS: dict[str, str | list[str]] = {
122 "ablatif": "ablative",
123 "accusatif": "accusative",
124 "accusatif génitif": ["accusative", "genitive"],
125 "nominatif": "nominative",
126 "datif": "dative",
127 "génitif": "genitive",
128 "vocatif": "vocative",
129 "instrumental": "instrumental",
130 "locatif": "locative",
131 "comitatif": "comitative",
132 "essif": "essive",
133 "illatif": "illative",
134 # Template:ro-nom-tab
135 "nominatif accusatif": ["nominative", "accusative"],
136 "datif génitif": ["dative", "genitive"],
137 # Template:ko-nom
138 "nominatif / attributif": ["nominative", "attributive"],
139 # Modèle:fro-adj
140 "sujet": "subject",
141 "régime": "oblique",
142}
144# https://en.wikipedia.org/wiki/Grammatical_tense
145TENSE_TAGS: dict[str, str | list[str]] = {
146 "présent": "present",
147 "passé": "past",
148 "passé simple": "past",
149 "futur": "future",
150 "futur simple": "future",
151 # https://en.wikipedia.org/wiki/Passé_composé
152 "passé composé": ["past", "multiword-construction"],
153 "plus-que-parfait": "pluperfect",
154 "passé antérieur": ["past", "anterior"],
155 "futur antérieur": ["future", "perfect"],
156 "prétérit": "preterite",
157 "présent simple, 3ᵉ pers. sing.": ["present", "third-person", "singular"],
158 "participe passé": ["participle", "past"],
159 "participe présent": ["participle", "present"],
160 # Template:ku-conj-trans
161 "présent progressif": ["present", "progressive"],
162 "prétérit et imparfait": ["preterite", "imperfect"],
163 "non passé": "non-past",
164 "présent / futur": ["present", "future"],
165}
167# https://en.wikipedia.org/wiki/Grammatical_person
168PERSON_TAGS: dict[str, str | list[str]] = {
169 "1ᵉ personne": "first-person",
170 "1ʳᵉ personne": "first-person",
171 "2ᵉ personne": "second-person",
172 "3ᵉ personne": "third-person",
173 # Modèle:avk-conj
174 "1ʳᵉ du sing.": ["first-person", "singular"],
175 "2ᵉ du sing.": ["second-person", "singular"],
176 "3ᵉ du sing.": ["third-person", "singular"],
177 "1ʳᵉ du plur.": ["first-person", "plural"],
178 "2ᵉ du plur.": ["second-person", "plural"],
179 "3ᵉ du plur.": ["third-person", "plural"],
180 "4ᵉ du plur.": ["fourth-person", "plural"],
181}
183SEMANTICS_TAGS: dict[str, str] = {
184 # https://en.wikipedia.org/wiki/Definiteness
185 "défini": "definite",
186 "indéfini": "indefinite",
187}
189COMPARISON_TAGS: dict[str, str] = {
190 # https://en.wikipedia.org/wiki/Comparison_(grammar)
191 "positif": "positive",
192 "comparatif": "comparative",
193 "superlatif": "superlative",
194 "non comparable": "not-comparable",
195 "superlatif absolu": ["superlative", "absolute"],
196}
198# https://en.wikipedia.org/wiki/Occitan_language#Writing_system
199OCCITAN_NORM_TAGS: dict[str, str] = {
200 # https://fr.wiktionary.org/wiki/Modèle:oc-norme_mistralienne
201 "graphie mistralienne": "Mistralian",
202 # https://fr.wiktionary.org/wiki/Modèle:oc-norme_classique
203 # "graphie normalisée": "",
204 # Modèle:oc-norme bonnaudienne
205 # "graphie bonnaudienne": "",
206}
208# https://en.wikipedia.org/wiki/Breton_mutations
209# https://fr.wiktionary.org/wiki/Modèle:br-nom
210BRETON_MUTATION_TAGS: dict[str, str] = {
211 "non muté": "unmutated",
212 "adoucissante": "mutation-soft",
213 "durcissante": "mutation-hard",
214 "spirante": "mutation-spirant",
215 "nasale": "mutation-nasal",
216}
218JA_TAGS: dict[str, str] = {
219 # https://fr.wiktionary.org/wiki/Modèle:ja-trans
220 "kanji": "kanji",
221 "hiragana": "hiragana",
222 "katakana": "katakana",
223 "transcription": "transcription",
224}
226OTHER_GRAMMATICAL_TAGS: dict[str, str] = {
227 # https://fr.wiktionary.org/wiki/Modèle:be-tab-cas
228 "prépositionnel": "prepositional",
229 "anglicisme": "Anglicism",
230 "pronominal": "pronominal",
231 "diminutif": "diminutive",
232 "réfléchi": "reflexive", # Modèle:réfl
233 "réciproque": "reciprocal", # Modèle:réciproque
234 "impersonnel": "impersonal", # Modèle:impers
235 "transitif": "transitive", # Modèle:t
236 "transitif indirect": ["transitive", "indirect"], # Modèle:transitif indir
237 "intransitif": "intransitive", # Modèle:i
238 "injurieux": "offensive", # Modèle:injurieux
239 # Modèle:zh-formes
240 "simplifié": "Simplified-Chinese",
241 "traditionnel": "Traditional-Chinese",
242 # Modèle:flex-ku-nomf
243 "ézafé principal": ["ezafe", "primary"],
244 "ézafé secondaire": ["ezafe", "secondary"],
245 "cas oblique": "oblique",
246 # Modèle:ku-conj-trans
247 "forme affirmative": "affirmative",
248 "forme négative": "negative",
249 # Modèle:bg-nom
250 "forme de base": "base-form",
251 "pluriel numéral": ["plural", "numeral"],
252 "animé": "animate",
253 "inanimé": "inanimate",
254 # Template:ko-nom
255 "hangeul": "hangeul",
256 "hanja": "hanja",
257 "avec clitique": "clitic",
258 "indéclinable": "indeclinable",
259 "toponyme": "toponymic",
260 "applicatif": "applicative",
261 "causatif": "causative",
262 "sigle": "abbreviation",
263 "attributif": "attributive",
264 "prédicatif": "predicative",
265 # Template:cy-mut
266 "non muté": "unmutated",
267 "lénition": "lenition",
268 "nasalisation": "nasalization",
269 "syllabaire": "Syllabics",
270 "par ellipse": "ellipsis", # Template:ellipse
271 "ironique": "ironic",
272 "suffixe": "suffix",
273}
275# template text before gloss
276SENSE_TAGS: dict[str, str] = {
277 # https://fr.wiktionary.org/wiki/Modèle:figuré
278 # https://fr.wiktionary.org/wiki/Catégorie:Modèles_de_relation_entre_les_définitions
279 # Catégorie:Modèles de genre textuel
280 # Catégorie:Modèles de registre
281 "sens figuré": "figuratively",
282 "sens propre": "literally",
283 "par métonymie": "metonymically", # Modèle:par métonymie
284 "par hyperbole": "hyperbole",
285 "par extension": "broadly",
286 "par analogie": "analogy",
287 "en particulier": "especially",
288 "par litote": "litotes",
289 "par euphémisme": "euphemism",
290 "spécifiquement": "specifically",
291 "génériquement": "generically",
292 "spécialement": "especially",
293 "généralement": "generally",
294 "enclise": "enclitic",
295 "idiotisme": "idiomatic",
296 "péjoratif": "pejorative",
297 "désuet": "obsolete",
298 "archaïsme": "archaic",
299 "vieilli": "dated",
300 "néologisme": "neologism",
301 "argot": "slang",
302 "rare": "rare",
303 # "plus rare": "rare",
304 "littéraire": "literary", # Modèle:littéraire
305 "poétique": "poetic", # Modèle:poétique
306 # "didactique": "", # Modèle:didactique
307 "soutenu": "formal", # Modèle:soutenu
308 "informel": "informal", # Modèle:informel
309 "familier": "familiar", # Modèle:familier
310 "très familier": "very-familiar", # Modèle:très familier
311 "populaire": "colloquial", # Modèle:populaire
312 "vulgaire": "vulgar", # Modèle:vulgaire
313 "langage enfantin": "childish", # Modèle:enfantin
314 # Catégorie:Modèles de thématique
315 "anglicisme informatique": "Anglicism",
316 "proverbe": "proverb",
317 "collectivement": "collectively",
318 "courant": "common", # Modèle:courant
319 "adjectif attribut": ["adjective", "attributive"],
320}
322# https://en.wikipedia.org/wiki/Voice_(grammar)
323VOICE_TAGS: dict[str, str | list[str]] = {
324 # https://fr.wiktionary.org/wiki/Modèle:eo-conj
325 "participe actif": ["participle", "active"],
326 "participe passif": ["participle", "passive"],
327 "adverbe actif": ["adverb", "active"],
328 "adverbe passif": ["adverb", "passive"],
329 "substantif actif": ["subsuntive", "active"],
330 "substantif passif": ["subsuntive", "passive"],
331 "actif": "active",
332 "passif": "passive",
333 "adverbe": "adverb",
334}
336# Module:lexique/data
337LEXIQUE_TAGS = {
338 "hindouisme": "Hinduism",
339 "judaïsme": "Judaism",
340 "marxisme": "Marxism",
341 "nazisme": "Nazism",
342 "physique": "physical",
343 "rhétorique": "rhetoric",
344 "antiquité": "Ancient",
345 "antiquité grecque": "Ancient-Greek",
346 "antiquité romaine": "Ancient-Roman",
347 "bible": "Biblical",
348 "moyen âge": "Middle-Ages",
349 "union européenne": "European-Union",
350 "analyse": "analytic",
351}
353# Template:cmn-pron
354# https://fr.wiktionary.org/wiki/自由
355ZH_PRON_TAGS = {
356 "pinyin": "Pinyin",
357 "efeo": "EFEO", # https://en.wikipedia.org/wiki/EFEO_Chinese_transcription
358 "wade-giles": "Wade-Giles",
359 "yale": "Yale",
360 "zhuyin": "Bopomofo",
361 "mandarin": "Mandarin",
362 "cantonais": "Cantonese",
363 "cantonais (yue)": "Cantonese",
364 "jyutping": "Jyutping",
365 "hakka": "Hakka",
366 "pha̍k-fa-sṳ": "Phak-fa-su",
367 "meixian, guangdong": ["Meixian", "Guangdong"],
368 "jin": "Jin",
369 "mindong": "Eastern-Min",
370 # https://en.wikipedia.org/wiki/Bàng-uâ-cê
371 "bàng-uâ-cê (fuzhou)": ["Bang-ua-ce", "Fuzhou"],
372 "minnan": "Min",
373 "pe̍h-ōe-jī (hokkien : fujian, taïwan)": [
374 "Peh-oe-ji",
375 "Hokkien",
376 "Fujian",
377 "Taiwan",
378 ],
379 "chaozhou, peng'im": ["Chaozhou", "Peng'im"],
380 "wu": "Wu",
381 "shanghai": "Shanghai",
382 "chinois médiéval": "Medieval-Chinese",
383 "chinois archaïque": "Old-Chinese",
384 "baxter-sagart": "Baxter-Sagart",
385 "zhengzhang": "Zhengzhang",
386}
388ASPECT_TAGS = {
389 "perfectif": "perfective", # Modèle:perfectif
390 "imperfectif": "imperfective", # Modèle:imperfectif
391}
393GRAMMATICAL_TAGS: dict[str, str | list[str]] = {
394 **GENDER_TAGS,
395 **NUMBER_TAGS,
396 **MOOD_TAGS,
397 **VERB_FORM_TAGS,
398 **CASE_TAGS,
399 **TENSE_TAGS,
400 **PERSON_TAGS,
401 **SEMANTICS_TAGS,
402 **COMPARISON_TAGS,
403 **OCCITAN_NORM_TAGS,
404 **BRETON_MUTATION_TAGS,
405 **JA_TAGS,
406 **OTHER_GRAMMATICAL_TAGS,
407 **SENSE_TAGS,
408 **VOICE_TAGS,
409 **LEXIQUE_TAGS,
410 **ZH_PRON_TAGS,
411 **ASPECT_TAGS,
412}
415def translate_raw_tags(
416 data: WordEntry,
417 table_template_name: str = "",
418 tag_dict: dict[str, str] = GRAMMATICAL_TAGS,
419) -> WordEntry:
420 from .topics import SLANG_TOPICS, TOPIC_TAGS
422 raw_tags = []
423 for raw_tag in data.raw_tags:
424 raw_tag_lower = raw_tag.lower()
425 if raw_tag_lower in tag_dict:
426 tr_tag = tag_dict[raw_tag_lower]
427 if isinstance(tr_tag, str):
428 data.tags.append(tr_tag)
429 elif isinstance(tr_tag, list): 429 ↛ 423line 429 didn't jump to line 423 because the condition on line 429 was always true
430 data.tags.extend(tr_tag)
431 elif hasattr(data, "topics") and raw_tag_lower in TOPIC_TAGS:
432 data.topics.append(TOPIC_TAGS[raw_tag_lower])
433 elif hasattr(data, "topics") and raw_tag_lower in SLANG_TOPICS: 433 ↛ 434line 433 didn't jump to line 434 because the condition on line 433 was never true
434 data.topics.append(SLANG_TOPICS[raw_tag_lower])
435 if "slang" not in data.tags:
436 data.tags.append("slang")
437 else:
438 raw_tags.append(raw_tag)
439 data.raw_tags = raw_tags
440 if table_template_name != "":
441 return convert_table_headers(data, table_template_name)
442 return data
445def convert_table_headers(data: WordEntry, template_name: str) -> WordEntry:
446 if template_name == "avk-tab-conjug": 446 ↛ 448line 446 didn't jump to line 448 because the condition on line 446 was never true
447 # https://fr.wiktionary.org/wiki/Modèle:avk-tab-conjug
448 tags = {
449 "1": "first-person",
450 "2": "second-person",
451 "3": "third-person",
452 "4": "fourth-person",
453 }
454 return translate_raw_tags(data, tag_dict=tags)
455 return data