Coverage for src/wiktextract/extractor/el/tags.py: 100%
7 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-13 10:14 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-13 10:14 +0000
1from wiktextract.tags import uppercase_tags, valid_tags
3# ======
4# TAGS
5# ======
7# The strings in lists on the right-hand side here should be *shared* tags
8# between different edition implementations. `valid_tags` is a dictionary
9# of these tags (and can be expanded if necessary, although it is unlikely
10# to be needed anymore because we have a lot of them), with some metadata
11# in the value used in the English mainline extractor.
13# Just as an example, this file is basically the simple implementation from
14# the Simple English extractor, which uses basically the same tags and
15# mappings as the mainline English extractor (which makes things simple).
17# Otherwise, the implementation of tags is a translation effort: when this
18# edition of Wiktionary says 'x', what tags does that refer to?
21# Tags used for modern Greek verb tables.
22# * Reference:
23# https://el.wiktionary.org/wiki/Κατηγορία:Πρότυπα_κλίσης_ρημάτων_(νέα_ελληνικά)
24#
25# * Standard table:
26# https://el.wiktionary.org/wiki/ψάχνω
27# * Non-standard table (relatively frequent):
28# https://el.wiktionary.org/wiki/αναφωνώ which follows
29# https://el.wiktionary.org/wiki/Πρότυπο:el-κλίσ-'λαλώ'
30# * Others:
31# https://el.wiktionary.org/wiki/τρώω
32# * Users wrongly putting noun inflections in a Κλήση section
33# https://el.wiktionary.org/wiki/δισεκατομμυριούχος
34verb_table_tags_base: dict[str, list[str]] = {
35 # Persons & numbers
36 "α' ενικ.": ["first-person", "singular"],
37 "β' ενικ.": ["second-person", "singular"],
38 "γ' ενικ.": ["third-person", "singular"],
39 "α' πληθ.": ["first-person", "plural"],
40 "β' πληθ.": ["second-person", "plural"],
41 "γ' πληθ.": ["third-person", "plural"],
42 "ενικός": ["singular"],
43 "πληθυντικός": ["plural"],
44 "εγώ": ["first-person", "singular"],
45 "εσύ": ["second-person", "singular"],
46 "αυτός": ["third-person", "singular"],
47 "εμείς": ["first-person", "plural"],
48 "εσείς": ["second-person", "plural"],
49 "αυτοί": ["third-person", "plural"],
50 "(εσύ)": ["second-person", "singular"],
51 "(εσείς)": ["second-person", "plural"],
52 # Aspect groups
53 # These following three are from:
54 # Greek: An Essential Grammar (Routledge Essential Grammars)
55 "εξακολουθητικοί χρόνοι": ["imperfective"],
56 "συνοπτικοί χρόνοι": ["perfective"],
57 "συντελεσμένοι χρόνοι": ["perfect"],
58 "συντελεσμένοι χρόνοι (β΄ τύποι)": ["perfect", "type-b"],
59 "συντελεσμένοι χρόνοι β΄ (μεταβατικοί)": [
60 "perfect",
61 "type-b",
62 "transitive",
63 ],
64 "συντελεσμένοι χρόνοι β΄ (αμετάβατοι)": [
65 "perfect",
66 "type-b",
67 "intransitive",
68 ],
69 # Basic tenses / aspects
70 "ενεστώτας": ["present"],
71 "παρατατικός": ["imperfect"],
72 "αόριστος": ["aorist"],
73 # Forms / moods
74 "υποτακτική": ["subjunctive"],
75 "προστακτική": ["imperative"],
76 "μετοχή": ["participle"],
77 "απαρέμφατο": ["infinitive"],
78 # Future & perfect subtypes
79 "εξακολουθητικός μέλλοντας": ["future", "imperfect"],
80 "εξ. μέλλ.": ["future", "imperfect"],
81 "συνοπτ. μέλλ.": ["future"],
82 "στιγμιαίος μέλλοντας": ["future"], # στιγμιαίος = συνοπτικός
83 "συντελ. μέλλ.": ["future", "perfect"],
84 "συντελεσμένος μέλλοντας α'": ["future", "perfect", "type-a"],
85 "παρακείμενος": ["present", "perfect"],
86 "παρακείμενος α'": ["present", "perfect", "type-a"],
87 "υπερσυντέλικος": ["past", "perfect"],
88 "υπερσυντέλικος α'": ["past", "perfect", "type-a"],
89 # Others
90 "προσωπικές εγκλίσεις": ["personal"], # ["personal-moods"],
91 "απρόσωπες εγκλίσεις": ["impersonal"], # ["impersonal-moods"],
92 "μονολεκτικοί χρόνοι": [], # ["simple-tenses"], # no να/θα/έχει
93 "περιφραστικοί χρόνοι": [], # ["periphrastic"], # with να/θα/έχει
94 "απαρέμφατο (αόριστος)": ["infinitive", "aorist"],
95 "μετοχή (ενεστώτας)": ["participle", "present"],
96}
98base_tag_map: dict[str, list[str]] = {
99 **verb_table_tags_base,
100 "ονομαστική": ["nominative"],
101 "γενική": ["genitive"],
102 "αιτιατική": ["accusative"],
103 "κλητική": ["vocative"],
104 "αρσενικό": ["masculine"],
105 "θηλυκό": ["feminine"],
106 "ουδέτερο": ["neuter"],
107 # ------ English --------------------------------
108 "no-gloss": ["no-gloss"],
109 "comparative": ["comparative"],
110 "Comparative": ["comparative"],
111 "determiner": ["determiner"],
112 "Negative": ["negative"],
113 "Past": ["past"],
114 "Past participle": ["past", "participle"],
115 "Past tense": ["past"],
116 "Plain form": ["canonical"],
117 "Plain present": ["present"],
118 "plural": ["plural"],
119 "Plural": ["plural"],
120 "Positive": ["positive"],
121 "Present": ["present"],
122 "Present participle": ["present", "participle"],
123 "Proper noun": ["proper-noun"],
124 "singular": ["singular"],
125 "superlative": ["superlative"],
126 "Superlative": ["superlative"],
127 "Third person singular": ["third-person", "singular"],
128 "Third-person singular": ["third-person", "singular"],
129 "stressed": ["stressed"],
130 "unstressed": ["unstressed"],
131 "UK": ["UK"],
132 "US": ["US"],
133 "United Kingdom": ["UK"],
134 "United States": ["US"],
135 "before a vowel": ["before-vowel"],
136 "before a consonant": ["before-consonant"],
137 "CA": ["Canada"],
138 "AU": ["Australia"],
139 "Australian": ["Australia"],
140 "California": ["California"],
141 "Canadian": ["Canada"],
142 "CA synth": [],
143 "GB": ["UK"],
144 "India": ["India"],
145 "Indian English": ["Indian-English"],
146 "Kenya": ["Kenya"],
147 "Limbu": ["Limbu"],
148 "Massachusetts": ["Massachusetts"],
149 "Mid-Atlantic": ["Mid-Atlantic"],
150 "New York accent": ["New-York"],
151 "Northen England": ["Northern-England"],
152 "NZ": ["New-Zealand"],
153 "Rhode Island": ["Rhode-Island"],
154 "Southern England": ["Southern-England"],
155 "uk": ["UK"],
156 "Uk": ["UK"],
157 "UK male": ["UK"],
158 "US female": ["US"],
159 "US Inland North": ["Inland-Northern-American"],
160 "US-Inland North": ["Inland-Northern-American"],
161 "American": ["US"],
162 "Audio US": ["US"],
163}
166tag_map = {}
168# uppercase_tags are specific tags with uppercase names that are for stuff
169# like locations and dialect and language names.
170for k in uppercase_tags:
171 if k not in base_tag_map:
172 tag_map[k] = [k.replace(" ", "-")]