Coverage for src/wiktextract/extractor/th/models.py: 100%
117 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
1from pydantic import BaseModel, ConfigDict, Field
4class ThaiBaseModel(BaseModel):
5 model_config = ConfigDict(
6 extra="forbid",
7 strict=True,
8 validate_assignment=True,
9 validate_default=True,
10 )
13class Sound(ThaiBaseModel):
14 zh_pron: str = Field(default="", description="Chinese word pronunciation")
15 ipa: str = Field(default="", description="International Phonetic Alphabet")
16 audio: str = Field(default="", description="Audio file name")
17 wav_url: str = ""
18 oga_url: str = ""
19 ogg_url: str = ""
20 mp3_url: str = ""
21 opus_url: str = ""
22 flac_url: str = ""
23 tags: list[str] = []
24 raw_tags: list[str] = []
25 homophone: str = ""
26 other: str = ""
27 roman: str = ""
28 rhymes: str = ""
29 enpr: str = ""
32class Example(ThaiBaseModel):
33 text: str
34 bold_text_offsets: list[tuple[int, int]] = []
35 translation: str = ""
36 bold_translation_offsets: list[tuple[int, int]] = []
37 literal_meaning: str = ""
38 bold_literal_offsets: list[tuple[int, int]] = []
39 roman: str = Field(
40 default="", description="Romanization of the example sentence"
41 )
42 bold_roman_offsets: list[tuple[int, int]] = []
43 ref: str = Field(
44 default="",
45 description="Source of the sentence, like book title and page number",
46 )
47 ruby: list[tuple[str, ...]] = Field(
48 default=[], description="Japanese Kanji and furigana"
49 )
50 tags: list[str] = []
51 raw_tags: list[str] = []
52 categories: list[str] = Field(default=[], exclude=True)
53 sounds: list[Sound] = []
56class AltForm(ThaiBaseModel):
57 word: str
58 roman: str = ""
61class Classifier(ThaiBaseModel):
62 classifier: str = ""
63 tags: list[str] = []
64 raw_tags: list[str] = []
67class Sense(ThaiBaseModel):
68 glosses: list[str] = []
69 tags: list[str] = []
70 raw_tags: list[str] = []
71 categories: list[str] = []
72 examples: list[Example] = []
73 form_of: list[AltForm] = []
74 alt_of: list[AltForm] = []
75 topics: list[str] = []
76 classifiers: list[Classifier] = []
79class Form(ThaiBaseModel):
80 form: str
81 tags: list[str] = []
82 raw_tags: list[str] = []
83 roman: str = ""
86class Translation(ThaiBaseModel):
87 lang_code: str = Field(
88 description="Wiktionary language code of the translation term",
89 )
90 lang: str = Field(description="Translation language name")
91 word: str = Field(description="Translation term")
92 sense: str = Field(default="", description="Translation gloss")
93 tags: list[str] = []
94 raw_tags: list[str] = []
95 roman: str = ""
96 lit: str = Field(default="", description="Literal translation")
99class Linkage(ThaiBaseModel):
100 word: str
101 tags: list[str] = []
102 raw_tags: list[str] = []
103 roman: str = ""
104 source: str = ""
105 sense: str = ""
108class Descendant(ThaiBaseModel):
109 lang_code: str = Field(description="Wiktionary language code")
110 lang: str = Field(description="Language name")
111 word: str
112 roman: str = ""
113 tags: list[str] = []
114 raw_tags: list[str] = []
115 descendants: list["Descendant"] = []
116 sense: str = ""
119class Hyphenation(ThaiBaseModel):
120 parts: list[str] = []
121 tags: list[str] = []
122 raw_tags: list[str] = []
125class WordEntry(ThaiBaseModel):
126 model_config = ConfigDict(title="Thai Wiktionary")
127 word: str = Field(description="Word string", min_length=1)
128 lang_code: str = Field(description="Wiktionary language code", min_length=1)
129 lang: str = Field(description="Localized language name", min_length=1)
130 pos: str = Field(description="Part of speech type", min_length=1)
131 pos_title: str = ""
132 senses: list[Sense] = []
133 categories: list[str] = []
134 tags: list[str] = []
135 raw_tags: list[str] = []
136 etymology_text: str = ""
137 classifiers: list[Classifier] = []
138 forms: list[Form] = []
139 translations: list[Translation] = []
140 antonyms: list[Linkage] = []
141 synonyms: list[Linkage] = []
142 derived: list[Linkage] = []
143 related: list[Linkage] = []
144 descendants: list[Descendant] = []
145 anagrams: list[Linkage] = []
146 notes: list[str] = []
147 hyponyms: list[Linkage] = []
148 hypernyms: list[Linkage] = []
149 idioms: list[Linkage] = []
150 coordinate_terms: list[Linkage] = []
151 sounds: list[Sound] = []
152 hyphenations: list[Hyphenation] = []
153 abbreviations: list[Linkage] = []
154 proverbs: list[Linkage] = []
155 notes: list[str] = []