Coverage for src/wiktextract/extractor/ko/models.py: 100%
82 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from pydantic import BaseModel, ConfigDict, Field
4class KoreanBaseModel(BaseModel):
5 model_config = ConfigDict(
6 extra="forbid",
7 strict=True,
8 validate_assignment=True,
9 validate_default=True,
10 )
13class Sound(KoreanBaseModel):
14 ipa: str = Field(default="", description="International Phonetic Alphabet")
15 audio: str = Field(default="", description="Audio file name")
16 wav_url: str = ""
17 oga_url: str = ""
18 ogg_url: str = ""
19 mp3_url: str = ""
20 opus_url: str = ""
21 flac_url: str = ""
22 tags: list[str] = []
23 raw_tags: list[str] = []
24 hangul: str = ""
25 roman: str = ""
26 other: str = ""
29class Example(KoreanBaseModel):
30 text: str = ""
31 translation: str = ""
32 ref: str = ""
33 roman: str = ""
34 ruby: list[tuple[str, ...]] = Field(
35 default=[], description="Japanese Kanji and furigana"
36 )
37 tags: list[str] = []
38 literal_meaning: str = ""
39 note: str = ""
40 sounds: list[Sound] = []
43class AltForm(KoreanBaseModel):
44 word: str
47class Sense(KoreanBaseModel):
48 glosses: list[str] = []
49 tags: list[str] = []
50 raw_tags: list[str] = []
51 topics: list[str] = []
52 categories: list[str] = []
53 examples: list[Example] = []
54 note: str = ""
55 form_of: list[AltForm] = []
56 pattern: str = Field(default="", description="Sentence structure, 문형")
59class Linkage(KoreanBaseModel):
60 word: str
61 sense: str = ""
62 roman: str = ""
63 raw_tags: list[str] = []
64 tags: list[str] = []
65 sense_index: str = ""
68class Translation(KoreanBaseModel):
69 lang_code: str = Field(
70 description="Wiktionary language code of the translation term"
71 )
72 lang: str = Field(description="Translation language name")
73 word: str = Field(description="Translation term")
74 roman: str = ""
75 tags: list[str] = []
76 raw_tags: list[str] = []
77 sense: str = ""
80class Form(KoreanBaseModel):
81 form: str = ""
82 tags: list[str] = []
83 raw_tags: list[str] = []
86class WordEntry(KoreanBaseModel):
87 model_config = ConfigDict(title="Korean Wiktionary")
88 word: str = Field(description="Word string", min_length=1)
89 lang_code: str = Field(description="Wiktionary language code", min_length=1)
90 lang: str = Field(description="Localized language name", min_length=1)
91 pos: str = Field(description="Part of speech type", min_length=1)
92 pos_title: str = ""
93 senses: list[Sense] = []
94 categories: list[str] = []
95 tags: list[str] = []
96 raw_tags: list[str] = []
97 sounds: list[Sound] = []
98 proverbs: list[Linkage] = []
99 derived: list[Linkage] = []
100 related: list[Linkage] = []
101 synonyms: list[Linkage] = []
102 antonyms: list[Linkage] = []
103 translations: list[Translation] = []
104 etymology_texts: list[str] = []
105 note: str = ""
106 forms: list[Form] = []
107 pattern: str = Field(
108 default="", description="Sentence structure, 문형", exclude=True
109 )
110 idioms: list[Translation] = []
111 hyponyms: list[Translation] = []