Coverage for src/wiktextract/extractor/nl/models.py: 100%
97 statements
« prev ^ index » next coverage.py v7.9.0, created at 2025-06-13 07:43 +0000
« prev ^ index » next coverage.py v7.9.0, created at 2025-06-13 07:43 +0000
1from pydantic import BaseModel, ConfigDict, Field
4class DutchBaseModel(BaseModel):
5 model_config = ConfigDict(
6 extra="forbid",
7 strict=True,
8 validate_assignment=True,
9 validate_default=True,
10 )
13class Example(DutchBaseModel):
14 text: str = ""
15 bold_text_offsets: list[tuple[int, int]] = []
16 translation: str = ""
17 bold_translation_offsets: list[tuple[int, int]] = []
18 ref: str = ""
21class AltForm(DutchBaseModel):
22 word: str
25class Sense(DutchBaseModel):
26 glosses: list[str] = []
27 tags: list[str] = []
28 raw_tags: list[str] = []
29 categories: list[str] = []
30 examples: list[Example] = []
31 form_of: list[AltForm] = []
32 topics: list[str] = []
35class Sound(DutchBaseModel):
36 ipa: str = Field(default="", description="International Phonetic Alphabet")
37 audio: str = Field(default="", description="Audio file name")
38 wav_url: str = ""
39 oga_url: str = ""
40 ogg_url: str = ""
41 mp3_url: str = ""
42 opus_url: str = ""
43 flac_url: str = ""
44 tags: list[str] = []
45 raw_tags: list[str] = []
48class Linkage(DutchBaseModel):
49 word: str
50 tags: list[str] = []
51 raw_tags: list[str] = []
52 roman: str = ""
53 sense: str = Field(default="", description="Definition of the word")
54 sense_index: int = Field(
55 default=0, ge=0, description="Number of the definition, start from 1"
56 )
59class Translation(DutchBaseModel):
60 lang_code: str = Field(
61 default="",
62 description="Wiktionary language code of the translation term",
63 )
64 lang: str = Field(default="", description="Translation language name")
65 word: str = Field(default="", description="Translation term")
66 sense: str = Field(default="", description="Translation gloss")
67 sense_index: int = Field(
68 default=0, ge=0, description="Number of the definition, start from 1"
69 )
70 tags: list[str] = []
71 raw_tags: list[str] = []
72 roman: str = ""
75class Etymology(DutchBaseModel):
76 text: str = ""
77 categories: list[str] = []
78 index: str = ""
81class Form(DutchBaseModel):
82 form: str = ""
83 note: str = ""
84 tags: list[str] = []
85 raw_tags: list[str] = []
86 ipa: str = ""
87 source: str = ""
88 sense: str = ""
91class Descendant(DutchBaseModel):
92 lang_code: str
93 lang: str
94 word: str
95 descendants: list["Descendant"] = []
98class WordEntry(DutchBaseModel):
99 model_config = ConfigDict(title="Dutch Wiktionary")
100 word: str = Field(description="Word string", min_length=1)
101 lang_code: str = Field(description="Wiktionary language code", min_length=1)
102 lang: str = Field(description="Localized language name", min_length=1)
103 pos: str = Field(description="Part of speech type", min_length=1)
104 pos_title: str = ""
105 senses: list[Sense] = []
106 categories: list[str] = []
107 tags: list[str] = []
108 raw_tags: list[str] = []
109 etymology_index: str = Field(default="", exclude=True)
110 etymology_texts: list[str] = []
111 sounds: list[Sound] = []
112 abbreviations: list[Linkage] = []
113 anagrams: list[Linkage] = []
114 antonyms: list[Linkage] = []
115 derived: list[Linkage] = []
116 proverbs: list[Linkage] = []
117 holonyms: list[Linkage] = []
118 homophones: list[Linkage] = []
119 hypernyms: list[Linkage] = []
120 hyponyms: list[Linkage] = []
121 metonyms: list[Linkage] = []
122 paronyms: list[Linkage] = []
123 related: list[Linkage] = []
124 rhymes: list[Linkage] = []
125 synonyms: list[Linkage] = []
126 translations: list[Translation] = []
127 hyphenation: str = ""
128 forms: list[Form] = []
129 notes: list[str] = []
130 descendants: list[Descendant] = []
131 extracted_vervoeging_page: bool = Field(default=False, exclude=True)