Coverage for src/wiktextract/extractor/ko/models.py: 100%
97 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-03 05:44 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-03 05:44 +0000
1from pydantic import BaseModel, ConfigDict, Field
4class KoreanBaseModel(BaseModel):
5 model_config = ConfigDict(
6 extra="forbid",
7 strict=True,
8 validate_assignment=True,
9 validate_default=True,
10 )
13class Sound(KoreanBaseModel):
14 ipa: str = Field(default="", description="International Phonetic Alphabet")
15 audio: str = Field(default="", description="Audio file name")
16 wav_url: str = ""
17 oga_url: str = ""
18 ogg_url: str = ""
19 mp3_url: str = ""
20 opus_url: str = ""
21 flac_url: str = ""
22 tags: list[str] = []
23 raw_tags: list[str] = []
24 hangul: str = ""
25 roman: str = ""
26 other: str = ""
29class Example(KoreanBaseModel):
30 text: str = ""
31 bold_text_offsets: list[tuple[int, int]] = []
32 translation: str = ""
33 bold_translation_offsets: list[tuple[int, int]] = []
34 ref: str = ""
35 roman: str = ""
36 bold_roman_offsets: list[tuple[int, int]] = []
37 ruby: list[tuple[str, ...]] = Field(
38 default=[], description="Japanese Kanji and furigana"
39 )
40 tags: list[str] = []
41 literal_meaning: str = ""
42 bold_literal_offsets: list[tuple[int, int]] = []
43 note: str = ""
44 sounds: list[Sound] = []
45 tags: list[str] = []
46 raw_tags: list[str] = []
49class AltForm(KoreanBaseModel):
50 word: str
53class Classifier(KoreanBaseModel):
54 classifier: str = ""
55 tags: list[str] = []
56 raw_tags: list[str] = []
59class Sense(KoreanBaseModel):
60 glosses: list[str] = []
61 tags: list[str] = []
62 raw_tags: list[str] = []
63 topics: list[str] = []
64 categories: list[str] = []
65 examples: list[Example] = []
66 note: str = ""
67 form_of: list[AltForm] = []
68 pattern: str = Field(default="", description="Sentence structure, 문형")
69 classifiers: list[Classifier] = []
72class Linkage(KoreanBaseModel):
73 word: str
74 sense: str = ""
75 roman: str = ""
76 raw_tags: list[str] = []
77 tags: list[str] = []
78 sense_index: str = ""
81class Translation(KoreanBaseModel):
82 lang_code: str = Field(
83 description="Wiktionary language code of the translation term"
84 )
85 lang: str = Field(description="Translation language name")
86 word: str = Field(description="Translation term")
87 roman: str = ""
88 tags: list[str] = []
89 raw_tags: list[str] = []
90 sense: str = ""
93class Form(KoreanBaseModel):
94 form: str = ""
95 tags: list[str] = []
96 raw_tags: list[str] = []
97 roman: str = ""
100class WordEntry(KoreanBaseModel):
101 model_config = ConfigDict(title="Korean Wiktionary")
102 word: str = Field(description="Word string", min_length=1)
103 lang_code: str = Field(description="Wiktionary language code", min_length=1)
104 lang: str = Field(description="Localized language name", min_length=1)
105 pos: str = Field(description="Part of speech type", min_length=1)
106 pos_title: str = ""
107 senses: list[Sense] = []
108 categories: list[str] = []
109 tags: list[str] = []
110 raw_tags: list[str] = []
111 sounds: list[Sound] = []
112 proverbs: list[Linkage] = []
113 derived: list[Linkage] = []
114 related: list[Linkage] = []
115 synonyms: list[Linkage] = []
116 antonyms: list[Linkage] = []
117 translations: list[Translation] = []
118 etymology_texts: list[str] = []
119 note: str = ""
120 forms: list[Form] = []
121 pattern: str = Field(
122 default="", description="Sentence structure, 문형", exclude=True
123 )
124 idioms: list[Translation] = []
125 hyponyms: list[Translation] = []
126 redirects: list[str] = []
127 literal_meaning: str = ""
128 anagrams: list[Linkage] = []