Coverage for src/wiktextract/extractor/fr/tags.py: 85%
47 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1# Grammatical glossary appendix:
2# https://fr.wiktionary.org/wiki/Annexe:Glossaire_grammatical
3# List of templates:
4# https://fr.wiktionary.org/wiki/Wiktionnaire:Liste_de_tous_les_modèles
5from .models import WordEntry
7# https://en.wikipedia.org/wiki/Grammatical_gender
8GENDER_TAGS: dict[str, str | list[str]] = {
9 "commun": "common",
10 "féminin": "feminine",
11 "masculin": "masculine",
12 "neutre": "neuter",
13 # https://fr.wiktionary.org/wiki/Modèle:mf
14 "masculin et féminin identiques": ["masculine", "feminine"],
15 # table header: https://fr.wiktionary.org/wiki/Modèle:fr-rég
16 "masculin et féminin": ["masculine", "feminine"],
17 # "Modèle:mf ?", "Modèle:fm ?"
18 "masculin ou féminin (l’usage hésite)": ["masculine", "feminine"],
19 "féminin ou masculin (l’usage hésite)": ["feminine", "masculine"],
20 "invariable": "invariable", # Modèle:invar
21}
23# https://en.wikipedia.org/wiki/Grammatical_number
24NUMBER_TAGS: dict[str, str | list[str]] = {
25 "singulier": "singular",
26 "pluriel": "plural",
27 "duel": "dual",
28 "collectif": "collective",
29 "singulatif": "singulative",
30 "indénombrable": "uncountable", # sv-nom-c-ind
31 "au singulier": "singular",
32 "au singulier uniquement": "singular-only",
33 "au pluriel": "plural",
34 "au pluriel uniquement": "plural-only",
35 "singulier et pluriel identiques": ["singular", "plural"],
36 "nom collectif": "collective",
37 # "générique": "", # Modèle:g
38 # "nom d'unité": "", # Modèle:nu
39 "généralement indénombrable": "uncountable",
40 "dénombrable": "countable",
41}
43# https://en.wikipedia.org/wiki/Grammatical_mood
44MOOD_TAGS: dict[str, str] = {
45 "indicatif": "indicative",
46 "subjonctif": "subjunctive",
47 "conditionnel": "conditional",
48 "impératif": "imperative",
49 "volitif": "volitive",
50}
52VERB_FORM_TAGS: dict[str, str | list[str]] = {
53 "participe": "participle",
54 "imparfait": "imperfect",
55 "infinitif": "infinitive",
56 "gérondif": "gerund",
57 # template "pt-verbe-flexion"
58 "infinitif personnel": ["infinitive", "personal"],
59 "supin": "supine",
60}
62# https://en.wikipedia.org/wiki/Grammatical_case
63CASE_TAGS: dict[str, str | list[str]] = {
64 "ablatif": "ablative",
65 "accusatif": "accusative",
66 "accusatif génitif": ["accusative", "genitive"],
67 "nominatif": "nominative",
68 "datif": "dative",
69 "génitif": "genitive",
70 "vocatif": "vocative",
71 "instrumental": "instrumental",
72 "locatif": "locative",
73 "comitatif": "comitative",
74 "essif": "essive",
75 "illatif": "illative",
76}
78# https://en.wikipedia.org/wiki/Grammatical_tense
79TENSE_TAGS: dict[str, str | list[str]] = {
80 "présent": "present",
81 "passé": "past",
82 "passé simple": "past",
83 "futur": "future",
84 "futur simple": "future",
85 # https://en.wikipedia.org/wiki/Passé_composé
86 "passé composé": "past multiword-construction",
87 "plus-que-parfait": "pluperfect",
88 "passé antérieur": "past anterior",
89 "futur antérieur": "future perfect",
90 "prétérit": "preterite",
91 "présent simple, 3ᵉ pers. sing.": ["present", "third-person", "singular"],
92 "participe passé": ["participle", "past"],
93 "participe présent": ["participle", "present"],
94}
96# https://en.wikipedia.org/wiki/Grammatical_person
97PERSON_TAGS: dict[str, str | list[str]] = {
98 "1ᵉ personne": "first-person",
99 "1ʳᵉ personne": "first-person",
100 "2ᵉ personne": "second-person",
101 "3ᵉ personne": "third-person",
102 # Modèle:avk-conj
103 "1ʳᵉ du sing.": ["first-person", "singular"],
104 "2ᵉ du sing.": ["second-person", "singular"],
105 "3ᵉ du sing.": ["third-person", "singular"],
106 "1ʳᵉ du plur.": ["first-person", "plural"],
107 "2ᵉ du plur.": ["second-person", "plural"],
108 "3ᵉ du plur.": ["third-person", "plural"],
109 "4ᵉ du plur.": ["fourth-person", "plural"],
110}
112SEMANTICS_TAGS: dict[str, str] = {
113 # https://en.wikipedia.org/wiki/Definiteness
114 "défini": "definite",
115 "indéfini": "indefinite",
116}
118COMPARISON_TAGS: dict[str, str] = {
119 # https://en.wikipedia.org/wiki/Comparison_(grammar)
120 "positif": "positive",
121 "comparatif": "comparative",
122 "superlatif": "superlative",
123}
125# https://en.wikipedia.org/wiki/Occitan_language#Writing_system
126OCCITAN_NORM_TAGS: dict[str, str] = {
127 # https://fr.wiktionary.org/wiki/Modèle:oc-norme_mistralienne
128 "graphie mistralienne": "Mistralian",
129 # https://fr.wiktionary.org/wiki/Modèle:oc-norme_classique
130 # "graphie normalisée": "",
131 # Modèle:oc-norme bonnaudienne
132 # "graphie bonnaudienne": "",
133}
135# https://en.wikipedia.org/wiki/Breton_mutations
136# https://fr.wiktionary.org/wiki/Modèle:br-nom
137BRETON_MUTATION_TAGS: dict[str, str] = {
138 "non muté": "unmutated",
139 "adoucissante": "mutation-soft",
140 "durcissante": "mutation-hard",
141 "spirante": "mutation-spirant",
142 "nasale": "mutation-nasal",
143}
145JA_TAGS: dict[str, str] = {
146 # https://fr.wiktionary.org/wiki/Modèle:ja-trans
147 "kanji": "kanji",
148 "hiragana": "hiragana",
149 "katakana": "katakana",
150 "transcription": "transcription",
151}
153OTHER_GRAMMATICAL_TAGS: dict[str, str] = {
154 # https://fr.wiktionary.org/wiki/Modèle:be-tab-cas
155 "prépositionnel": "prepositional",
156 "anglicisme": "Anglicism",
157 "pronominal": "pronominal",
158 "diminutif": "diminutive",
159 "réfléchi": "reflexive", # Modèle:réfl
160 "réciproque": "reciprocal", # Modèle:réciproque
161 "impersonnel": "impersonal", # Modèle:impers
162 "transitif": "transitive", # Modèle:t
163 "intransitif": "intransitive", # Modèle:i
164 "injurieux": "offensive", # Modèle:injurieux
165 # Modèle:zh-formes
166 "simplifié": "Simplified Chinese",
167 "traditionnel": "Traditional Chinese",
168}
170# template text before gloss
171SENSE_TAGS: dict[str, str] = {
172 # https://fr.wiktionary.org/wiki/Modèle:figuré
173 # https://fr.wiktionary.org/wiki/Catégorie:Modèles_de_relation_entre_les_définitions
174 # Catégorie:Modèles de genre textuel
175 # Catégorie:Modèles de registre
176 "sens figuré": "figuratively",
177 "sens propre": "literally",
178 "par métonymie": "metonymically", # Modèle:par métonymie
179 "par hyperbole": "hyperbole",
180 "par extension": "broadly",
181 "par analogie": "analogy",
182 "en particulier": "especially",
183 "par litote": "litotes",
184 "par euphémisme": "euphemism",
185 "spécifiquement": "specifically",
186 "génériquement": "generically",
187 "spécialement": "especially",
188 "généralement": "generally",
189 "enclise": "enclitic",
190 "idiotisme": "idiomatic",
191 "péjoratif": "pejorative",
192 "désuet": "obsolete",
193 "archaïsme": "archaic",
194 "vieilli": "dated",
195 "néologisme": "neologism",
196 "argot": "slang",
197 "rare": "rare",
198 # "plus rare": "rare",
199 "littéraire": "literary", # Modèle:littéraire
200 "poétique": "poetic", # Modèle:poétique
201 # "didactique": "", # Modèle:didactique
202 "soutenu": "formal", # Modèle:soutenu
203 "informel": "informal", # Modèle:informel
204 "familier": "familiar", # Modèle:familier
205 "très familier": "very-familiar", # Modèle:très familier
206 "populaire": "colloquial", # Modèle:populaire
207 "vulgaire": "vulgar", # Modèle:vulgaire
208 "langage enfantin": "childish", # Modèle:enfantin
209 # Catégorie:Modèles de thématique
210 "anglicisme informatique": "Anglicism",
211 "proverbe": "proverb",
212 "collectivement": "collectively",
213 "courant": "common", # Modèle:courant
214}
216# https://en.wikipedia.org/wiki/Voice_(grammar)
217VOICE_TAGS: dict[str, str | list[str]] = {
218 # https://fr.wiktionary.org/wiki/Modèle:eo-conj
219 "participe actif": ["participle", "active"],
220 "participe passif": ["participle", "passive"],
221 "adverbe actif": ["adverb", "active"],
222 "adverbe passif": ["adverb", "passive"],
223 "substantif actif": ["subsuntive", "active"],
224 "substantif passif": ["subsuntive", "passive"],
225 "actif": "active",
226 "passif": "passive",
227}
229# Module:lexique/data
230LEXIQUE_TAGS = {
231 "hindouisme": "Hinduism",
232 "judaïsme": "Judaism",
233 "marxisme": "Marxism",
234 "nazisme": "Nazism",
235 "physique": "physical",
236 "rhétorique": "rhetoric",
237 "antiquité": "Ancient",
238 "antiquité grecque": "Ancient-Greek",
239 "antiquité romaine": "Ancient-Roman",
240 "bible": "Biblical",
241 "moyen âge": "Middle-Ages",
242 "union européenne": "European-Union",
243 "analyse": "analytic",
244}
246# Template:cmn-pron
247# https://fr.wiktionary.org/wiki/自由
248ZH_PRON_TAGS = {
249 "pinyin": "Pinyin",
250 "efeo": "EFEO", # https://en.wikipedia.org/wiki/EFEO_Chinese_transcription
251 "wade-giles": "Wade-Giles",
252 "yale": "Yale",
253 "zhuyin": "Bopomofo",
254 "mandarin": "Mandarin",
255 "cantonais": "Cantonese",
256 "cantonais (yue)": "Cantonese",
257 "jyutping": "Jyutping",
258 "hakka": "Hakka",
259 "pha̍k-fa-sṳ": "Phak-fa-su",
260 "meixian, guangdong": ["Meixian", "Guangdong"],
261 "jin": "Jin",
262 "mindong": "Eastern-Min",
263 # https://en.wikipedia.org/wiki/Bàng-uâ-cê
264 "bàng-uâ-cê (fuzhou)": ["Bang-ua-ce", "Fuzhou"],
265 "minnan": "Min",
266 "pe̍h-ōe-jī (hokkien : fujian, taïwan)": [
267 "Peh-oe-ji",
268 "Hokkien",
269 "Fujian",
270 "Taiwan",
271 ],
272 "chaozhou, peng'im": ["Chaozhou", "Peng'im"],
273 "wu": "Wu",
274 "shanghai": "Shanghai",
275 "chinois médiéval": "Medieval-Chinese",
276 "chinois archaïque": "Old-Chinese",
277 "baxter-sagart": "Baxter-Sagart",
278 "zhengzhang": "Zhengzhang",
279}
281ASPECT_TAGS = {
282 "perfectif": "perfective", # Modèle:perfectif
283 "imperfectif": "imperfective", # Modèle:imperfectif
284}
286GRAMMATICAL_TAGS: dict[str, str | list[str]] = {
287 **GENDER_TAGS,
288 **NUMBER_TAGS,
289 **MOOD_TAGS,
290 **VERB_FORM_TAGS,
291 **CASE_TAGS,
292 **TENSE_TAGS,
293 **PERSON_TAGS,
294 **SEMANTICS_TAGS,
295 **COMPARISON_TAGS,
296 **OCCITAN_NORM_TAGS,
297 **BRETON_MUTATION_TAGS,
298 **JA_TAGS,
299 **OTHER_GRAMMATICAL_TAGS,
300 **SENSE_TAGS,
301 **VOICE_TAGS,
302 **LEXIQUE_TAGS,
303 **ZH_PRON_TAGS,
304 **ASPECT_TAGS,
305}
308def translate_raw_tags(
309 data: WordEntry,
310 table_template_name: str = "",
311 tag_dict: dict[str, str] = GRAMMATICAL_TAGS,
312) -> WordEntry:
313 from .topics import SLANG_TOPICS, TOPIC_TAGS
315 raw_tags = []
316 for raw_tag in data.raw_tags:
317 raw_tag_lower = raw_tag.lower()
318 if raw_tag_lower in tag_dict:
319 tr_tag = tag_dict[raw_tag_lower]
320 if isinstance(tr_tag, str):
321 data.tags.append(tr_tag)
322 elif isinstance(tr_tag, list): 322 ↛ 316line 322 didn't jump to line 316 because the condition on line 322 was always true
323 data.tags.extend(tr_tag)
324 elif hasattr(data, "topics") and raw_tag_lower in TOPIC_TAGS:
325 data.topics.append(TOPIC_TAGS[raw_tag_lower])
326 elif hasattr(data, "topics") and raw_tag_lower in SLANG_TOPICS: 326 ↛ 327line 326 didn't jump to line 327 because the condition on line 326 was never true
327 data.topics.append(SLANG_TOPICS[raw_tag_lower])
328 if "slang" not in data.tags:
329 data.tags.append("slang")
330 else:
331 raw_tags.append(raw_tag)
332 data.raw_tags = raw_tags
333 if table_template_name != "":
334 return convert_table_headers(data, table_template_name)
335 return data
338def convert_table_headers(data: WordEntry, template_name: str) -> WordEntry:
339 if template_name == "avk-tab-conjug": 339 ↛ 341line 339 didn't jump to line 341 because the condition on line 339 was never true
340 # https://fr.wiktionary.org/wiki/Modèle:avk-tab-conjug
341 tags = {
342 "1": "first-person",
343 "2": "second-person",
344 "3": "third-person",
345 "4": "fourth-person",
346 }
347 return translate_raw_tags(data, tag_dict=tags)
348 return data