Coverage for src/wiktextract/extractor/ko/models.py: 100%
86 statements
« prev ^ index » next coverage.py v7.9.0, created at 2025-06-13 07:43 +0000
« prev ^ index » next coverage.py v7.9.0, created at 2025-06-13 07:43 +0000
1from pydantic import BaseModel, ConfigDict, Field
4class KoreanBaseModel(BaseModel):
5 model_config = ConfigDict(
6 extra="forbid",
7 strict=True,
8 validate_assignment=True,
9 validate_default=True,
10 )
13class Sound(KoreanBaseModel):
14 ipa: str = Field(default="", description="International Phonetic Alphabet")
15 audio: str = Field(default="", description="Audio file name")
16 wav_url: str = ""
17 oga_url: str = ""
18 ogg_url: str = ""
19 mp3_url: str = ""
20 opus_url: str = ""
21 flac_url: str = ""
22 tags: list[str] = []
23 raw_tags: list[str] = []
24 hangul: str = ""
25 roman: str = ""
26 other: str = ""
29class Example(KoreanBaseModel):
30 text: str = ""
31 bold_text_offsets: list[tuple[int, int]] = []
32 translation: str = ""
33 bold_translation_offsets: list[tuple[int, int]] = []
34 ref: str = ""
35 roman: str = ""
36 bold_roman_offsets: list[tuple[int, int]] = []
37 ruby: list[tuple[str, ...]] = Field(
38 default=[], description="Japanese Kanji and furigana"
39 )
40 tags: list[str] = []
41 literal_meaning: str = ""
42 bold_literal_offsets: list[tuple[int, int]] = []
43 note: str = ""
44 sounds: list[Sound] = []
47class AltForm(KoreanBaseModel):
48 word: str
51class Sense(KoreanBaseModel):
52 glosses: list[str] = []
53 tags: list[str] = []
54 raw_tags: list[str] = []
55 topics: list[str] = []
56 categories: list[str] = []
57 examples: list[Example] = []
58 note: str = ""
59 form_of: list[AltForm] = []
60 pattern: str = Field(default="", description="Sentence structure, 문형")
63class Linkage(KoreanBaseModel):
64 word: str
65 sense: str = ""
66 roman: str = ""
67 raw_tags: list[str] = []
68 tags: list[str] = []
69 sense_index: str = ""
72class Translation(KoreanBaseModel):
73 lang_code: str = Field(
74 description="Wiktionary language code of the translation term"
75 )
76 lang: str = Field(description="Translation language name")
77 word: str = Field(description="Translation term")
78 roman: str = ""
79 tags: list[str] = []
80 raw_tags: list[str] = []
81 sense: str = ""
84class Form(KoreanBaseModel):
85 form: str = ""
86 tags: list[str] = []
87 raw_tags: list[str] = []
90class WordEntry(KoreanBaseModel):
91 model_config = ConfigDict(title="Korean Wiktionary")
92 word: str = Field(description="Word string", min_length=1)
93 lang_code: str = Field(description="Wiktionary language code", min_length=1)
94 lang: str = Field(description="Localized language name", min_length=1)
95 pos: str = Field(description="Part of speech type", min_length=1)
96 pos_title: str = ""
97 senses: list[Sense] = []
98 categories: list[str] = []
99 tags: list[str] = []
100 raw_tags: list[str] = []
101 sounds: list[Sound] = []
102 proverbs: list[Linkage] = []
103 derived: list[Linkage] = []
104 related: list[Linkage] = []
105 synonyms: list[Linkage] = []
106 antonyms: list[Linkage] = []
107 translations: list[Translation] = []
108 etymology_texts: list[str] = []
109 note: str = ""
110 forms: list[Form] = []
111 pattern: str = Field(
112 default="", description="Sentence structure, 문형", exclude=True
113 )
114 idioms: list[Translation] = []
115 hyponyms: list[Translation] = []