Coverage for src/wiktextract/extractor/cs/models.py: 100%
48 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
1from pydantic import BaseModel, ConfigDict, Field
4class CzechBaseModel(BaseModel):
5 model_config = ConfigDict(
6 extra="forbid",
7 strict=True,
8 validate_assignment=True,
9 validate_default=True,
10 )
13class Example(CzechBaseModel):
14 text: str
15 bold_text_offsets: list[tuple[int, int]] = []
16 translation: str = ""
17 bold_translation_offsets: list[tuple[int, int]] = []
18 ref: str = Field(
19 default="",
20 description="Source of the sentence, like book title and page number",
21 )
24class Sense(CzechBaseModel):
25 glosses: list[str] = []
26 tags: list[str] = []
27 raw_tags: list[str] = []
28 categories: list[str] = []
29 topics: list[str] = []
30 examples: list[Example] = []
33class Sound(CzechBaseModel):
34 ipa: str = Field(default="", description="International Phonetic Alphabet")
35 tags: list[str] = []
36 raw_tags: list[str] = []
37 audio: str = Field(default="", description="Audio file name")
38 wav_url: str = ""
39 oga_url: str = ""
40 ogg_url: str = ""
41 mp3_url: str = ""
42 opus_url: str = ""
43 flac_url: str = ""
46class Hyphenation(CzechBaseModel):
47 parts: list[str] = []
50class Form(CzechBaseModel):
51 form: str
52 tags: list[str] = []
53 raw_tags: list[str] = []
56class WordEntry(CzechBaseModel):
57 model_config = ConfigDict(title="Czech Wiktionary")
58 word: str = Field(description="Word string")
59 lang_code: str = Field(description="Wiktionary language code")
60 lang: str = Field(description="Localized language name")
61 pos: str = Field(description="Part of speech type")
62 pos_title: str = ""
63 senses: list[Sense] = []
64 categories: list[str] = []
65 tags: list[str] = []
66 raw_tags: list[str] = []
67 sounds: list[Sound] = []
68 hyphenations: list[Hyphenation] = []
69 etymology_text: str = ""
70 forms: list[Form] = []