Coverage for src/wiktextract/extractor/fr/tags.py: 85%
48 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-10-25 10:11 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-10-25 10:11 +0000
1# Grammatical glossary appendix:
2# https://fr.wiktionary.org/wiki/Annexe:Glossaire_grammatical
3# List of templates:
4# https://fr.wiktionary.org/wiki/Wiktionnaire:Liste_de_tous_les_modèles
5from typing import Union
7from .models import WordEntry
9# https://en.wikipedia.org/wiki/Grammatical_gender
10GENDER_TAGS: dict[str, Union[str, list[str]]] = {
11 "commun": "common",
12 "féminin": "feminine",
13 "masculin": "masculine",
14 "neutre": "neuter",
15 # https://fr.wiktionary.org/wiki/Modèle:mf
16 "masculin et féminin identiques": ["masculine", "feminine"],
17 # table header: https://fr.wiktionary.org/wiki/Modèle:fr-rég
18 "masculin et féminin": ["masculine", "feminine"],
19 # "Modèle:mf ?", "Modèle:fm ?"
20 "masculin ou féminin (l’usage hésite)": ["masculine", "feminine"],
21 "féminin ou masculin (l’usage hésite)": ["feminine", "masculine"],
22 "invariable": "invariable", # Modèle:invar
23}
25# https://en.wikipedia.org/wiki/Grammatical_number
26NUMBER_TAGS: dict[str, Union[str, list[str]]] = {
27 "singulier": "singular",
28 "pluriel": "plural",
29 "duel": "dual",
30 "collectif": "collective",
31 "singulatif": "singulative",
32 "indénombrable": "uncountable", # sv-nom-c-ind
33 "au singulier": "singular",
34 "au singulier uniquement": "singular-only",
35 "au pluriel": "plural",
36 "au pluriel uniquement": "plural-only",
37 "singulier et pluriel identiques": ["singular", "plural"],
38 "nom collectif": "collective",
39 # "générique": "", # Modèle:g
40 # "nom d'unité": "", # Modèle:nu
41 "généralement indénombrable": "uncountable",
42 "dénombrable": "countable",
43}
45# https://en.wikipedia.org/wiki/Grammatical_mood
46MOOD_TAGS: dict[str, str] = {
47 "indicatif": "indicative",
48 "subjonctif": "subjunctive",
49 "conditionnel": "conditional",
50 "impératif": "imperative",
51 "volitif": "volitive",
52}
54VERB_FORM_TAGS: dict[str, Union[str, list[str]]] = {
55 "participe": "participle",
56 "imparfait": "imperfect",
57 "infinitif": "infinitive",
58 "gérondif": "gerund",
59 # template "pt-verbe-flexion"
60 "infinitif personnel": ["infinitive", "personal"],
61 "supin": "supine",
62}
64# https://en.wikipedia.org/wiki/Grammatical_case
65CASE_TAGS: dict[str, Union[str, list[str]]] = {
66 "ablatif": "ablative",
67 "accusatif": "accusative",
68 "accusatif génitif": ["accusative", "genitive"],
69 "nominatif": "nominative",
70 "datif": "dative",
71 "génitif": "genitive",
72 "vocatif": "vocative",
73 "instrumental": "instrumental",
74 "locatif": "locative",
75 "comitatif": "comitative",
76 "essif": "essive",
77 "illatif": "illative",
78}
80# https://en.wikipedia.org/wiki/Grammatical_tense
81TENSE_TAGS: dict[str, Union[str, list[str]]] = {
82 "présent": "present",
83 "passé": "past",
84 "passé simple": "past",
85 "futur": "future",
86 "futur simple": "future",
87 # https://en.wikipedia.org/wiki/Passé_composé
88 "passé composé": "past multiword-construction",
89 "plus-que-parfait": "pluperfect",
90 "passé antérieur": "past anterior",
91 "futur antérieur": "future perfect",
92 "prétérit": "preterite",
93 "présent simple, 3ᵉ pers. sing.": ["present", "third-person", "singular"],
94 "participe passé": ["participle", "past"],
95 "participe présent": ["participle", "present"],
96}
98# https://en.wikipedia.org/wiki/Grammatical_person
99PERSON_TAGS: dict[str, Union[str, list[str]]] = {
100 "1ᵉ personne": "first-person",
101 "1ʳᵉ personne": "first-person",
102 "2ᵉ personne": "second-person",
103 "3ᵉ personne": "third-person",
104 # Modèle:avk-conj
105 "1ʳᵉ du sing.": ["first-person", "singular"],
106 "2ᵉ du sing.": ["second-person", "singular"],
107 "3ᵉ du sing.": ["third-person", "singular"],
108 "1ʳᵉ du plur.": ["first-person", "plural"],
109 "2ᵉ du plur.": ["second-person", "plural"],
110 "3ᵉ du plur.": ["third-person", "plural"],
111 "4ᵉ du plur.": ["fourth-person", "plural"],
112}
114SEMANTICS_TAGS: dict[str, str] = {
115 # https://en.wikipedia.org/wiki/Definiteness
116 "défini": "definite",
117 "indéfini": "indefinite",
118}
120COMPARISON_TAGS: dict[str, str] = {
121 # https://en.wikipedia.org/wiki/Comparison_(grammar)
122 "positif": "positive",
123 "comparatif": "comparative",
124 "superlatif": "superlative",
125}
127# https://en.wikipedia.org/wiki/Occitan_language#Writing_system
128OCCITAN_NORM_TAGS: dict[str, str] = {
129 # https://fr.wiktionary.org/wiki/Modèle:oc-norme_mistralienne
130 "graphie mistralienne": "Mistralian",
131 # https://fr.wiktionary.org/wiki/Modèle:oc-norme_classique
132 # "graphie normalisée": "",
133 # Modèle:oc-norme bonnaudienne
134 # "graphie bonnaudienne": "",
135}
137# https://en.wikipedia.org/wiki/Breton_mutations
138# https://fr.wiktionary.org/wiki/Modèle:br-nom
139BRETON_MUTATION_TAGS: dict[str, str] = {
140 "non muté": "unmutated",
141 "adoucissante": "mutation-soft",
142 "durcissante": "mutation-hard",
143 "spirante": "mutation-spirant",
144 "nasale": "mutation-nasal",
145}
147JA_TAGS: dict[str, str] = {
148 # https://fr.wiktionary.org/wiki/Modèle:ja-trans
149 "kanji": "kanji",
150 "hiragana": "hiragana",
151 "katakana": "katakana",
152 "transcription": "transcription",
153}
155OTHER_GRAMMATICAL_TAGS: dict[str, str] = {
156 # https://fr.wiktionary.org/wiki/Modèle:be-tab-cas
157 "prépositionnel": "prepositional",
158 "anglicisme": "Anglicism",
159 "pronominal": "pronominal",
160 "diminutif": "diminutive",
161 "réfléchi": "reflexive", # Modèle:réfl
162 "réciproque": "reciprocal", # Modèle:réciproque
163 "impersonnel": "impersonal", # Modèle:impers
164 "transitif": "transitive", # Modèle:t
165 "intransitif": "intransitive", # Modèle:i
166 "injurieux": "offensive", # Modèle:injurieux
167 # Modèle:zh-formes
168 "simplifié": "Simplified Chinese",
169 "traditionnel": "Traditional Chinese",
170}
172# template text before gloss
173SENSE_TAGS: dict[str, str] = {
174 # https://fr.wiktionary.org/wiki/Modèle:figuré
175 # https://fr.wiktionary.org/wiki/Catégorie:Modèles_de_relation_entre_les_définitions
176 # Catégorie:Modèles de genre textuel
177 # Catégorie:Modèles de registre
178 "sens figuré": "figuratively",
179 "sens propre": "literally",
180 "par métonymie": "metonymically", # Modèle:par métonymie
181 "par hyperbole": "hyperbole",
182 "par extension": "broadly",
183 "par analogie": "analogy",
184 "en particulier": "especially",
185 "par litote": "litotes",
186 "par euphémisme": "euphemism",
187 "spécifiquement": "specifically",
188 "génériquement": "generically",
189 "spécialement": "especially",
190 "généralement": "generally",
191 "enclise": "enclitic",
192 "idiotisme": "idiomatic",
193 "péjoratif": "pejorative",
194 "désuet": "obsolete",
195 "archaïsme": "archaic",
196 "vieilli": "dated",
197 "néologisme": "neologism",
198 "argot": "slang",
199 "rare": "rare",
200 # "plus rare": "rare",
201 "littéraire": "literary", # Modèle:littéraire
202 "poétique": "poetic", # Modèle:poétique
203 # "didactique": "", # Modèle:didactique
204 "soutenu": "formal", # Modèle:soutenu
205 "informel": "informal", # Modèle:informel
206 "familier": "familiar", # Modèle:familier
207 "très familier": "very-familiar", # Modèle:très familier
208 # "populaire": "", # Modèle:populaire
209 "vulgaire": "vulgar", # Modèle:vulgaire
210 "langage enfantin": "childish", # Modèle:enfantin
211 # Catégorie:Modèles de thématique
212 "anglicisme informatique": "Anglicism",
213 "proverbe": "proverb",
214 "collectivement": "collectively",
215 "courant": "common", # Modèle:courant
216}
218# https://en.wikipedia.org/wiki/Voice_(grammar)
219VOICE_TAGS: dict[str, Union[str, list[str]]] = {
220 # https://fr.wiktionary.org/wiki/Modèle:eo-conj
221 "participe actif": ["participle", "active"],
222 "participe passif": ["participle", "passive"],
223 "adverbe actif": ["adverb", "active"],
224 "adverbe passif": ["adverb", "passive"],
225 "substantif actif": ["subsuntive", "active"],
226 "substantif passif": ["subsuntive", "passive"],
227 "actif": "active",
228 "passif": "passive",
229}
231# Module:lexique/data
232LEXIQUE_TAGS = {
233 "hindouisme": "Hinduism",
234 "judaïsme": "Judaism",
235 "marxisme": "Marxism",
236 "nazisme": "Nazism",
237 "physique": "physical",
238 "rhétorique": "rhetoric",
239 "antiquité": "Ancient",
240 "antiquité grecque": "Ancient-Greek",
241 "antiquité romaine": "Ancient-Roman",
242 "bible": "Biblical",
243 "moyen âge": "Middle-Ages",
244 "union européenne": "European-Union",
245 "analyse": "analytic",
246}
248# Template:cmn-pron
249# https://fr.wiktionary.org/wiki/自由
250ZH_PRON_TAGS = {
251 "pinyin": "Pinyin",
252 "efeo": "EFEO", # https://en.wikipedia.org/wiki/EFEO_Chinese_transcription
253 "wade-giles": "Wade-Giles",
254 "yale": "Yale",
255 "zhuyin": "Bopomofo",
256 "mandarin": "Mandarin",
257 "cantonais": "Cantonese",
258 "cantonais (yue)": "Cantonese",
259 "jyutping": "Jyutping",
260 "hakka": "Hakka",
261 "pha̍k-fa-sṳ": "Phak-fa-su",
262 "meixian, guangdong": ["Meixian", "Guangdong"],
263 "jin": "Jin",
264 "mindong": "Eastern-Min",
265 # https://en.wikipedia.org/wiki/Bàng-uâ-cê
266 "bàng-uâ-cê (fuzhou)": ["Bang-ua-ce", "Fuzhou"],
267 "minnan": "Min",
268 "pe̍h-ōe-jī (hokkien : fujian, taïwan)": [
269 "Peh-oe-ji",
270 "Hokkien",
271 "Fujian",
272 "Taiwan",
273 ],
274 "chaozhou, peng'im": ["Chaozhou", "Peng'im"],
275 "wu": "Wu",
276 "shanghai": "Shanghai",
277 "chinois médiéval": "Medieval-Chinese",
278 "chinois archaïque": "Old-Chinese",
279 "baxter-sagart": "Baxter-Sagart",
280 "zhengzhang": "Zhengzhang",
281}
283ASPECT_TAGS = {
284 "perfectif": "perfective", # Modèle:perfectif
285 "imperfectif": "imperfective", # Modèle:imperfectif
286}
288GRAMMATICAL_TAGS: dict[str, Union[str, list[str]]] = {
289 **GENDER_TAGS,
290 **NUMBER_TAGS,
291 **MOOD_TAGS,
292 **VERB_FORM_TAGS,
293 **CASE_TAGS,
294 **TENSE_TAGS,
295 **PERSON_TAGS,
296 **SEMANTICS_TAGS,
297 **COMPARISON_TAGS,
298 **OCCITAN_NORM_TAGS,
299 **BRETON_MUTATION_TAGS,
300 **JA_TAGS,
301 **OTHER_GRAMMATICAL_TAGS,
302 **SENSE_TAGS,
303 **VOICE_TAGS,
304 **LEXIQUE_TAGS,
305 **ZH_PRON_TAGS,
306 **ASPECT_TAGS,
307}
310def translate_raw_tags(
311 data: WordEntry,
312 table_template_name: str = "",
313 tag_dict: dict[str, str] = GRAMMATICAL_TAGS,
314) -> WordEntry:
315 from .topics import SLANG_TOPICS, TOPIC_TAGS
317 raw_tags = []
318 for raw_tag in data.raw_tags:
319 raw_tag_lower = raw_tag.lower()
320 if raw_tag_lower in tag_dict:
321 tr_tag = tag_dict[raw_tag_lower]
322 if isinstance(tr_tag, str):
323 data.tags.append(tr_tag)
324 elif isinstance(tr_tag, list): 324 ↛ 318line 324 didn't jump to line 318 because the condition on line 324 was always true
325 data.tags.extend(tr_tag)
326 elif hasattr(data, "topics") and raw_tag_lower in TOPIC_TAGS:
327 data.topics.append(TOPIC_TAGS[raw_tag_lower])
328 elif hasattr(data, "topics") and raw_tag_lower in SLANG_TOPICS: 328 ↛ 329line 328 didn't jump to line 329 because the condition on line 328 was never true
329 data.topics.append(SLANG_TOPICS[raw_tag_lower])
330 if "slang" not in data.tags:
331 data.tags.append("slang")
332 else:
333 raw_tags.append(raw_tag)
334 data.raw_tags = raw_tags
335 if table_template_name != "":
336 return convert_table_headers(data, table_template_name)
337 return data
340def convert_table_headers(data: WordEntry, template_name: str) -> WordEntry:
341 if template_name == "avk-tab-conjug": 341 ↛ 343line 341 didn't jump to line 343
342 # https://fr.wiktionary.org/wiki/Modèle:avk-tab-conjug
343 tags = {
344 "1": "first-person",
345 "2": "second-person",
346 "3": "third-person",
347 "4": "fourth-person",
348 }
349 return translate_raw_tags(data, tag_dict=tags)
350 return data