Coverage for src / wiktextract / extractor / th / models.py: 100%

122 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-02 00:27 +0000

1from pydantic import BaseModel, ConfigDict, Field 

2 

3 

4class ThaiBaseModel(BaseModel): 

5 model_config = ConfigDict( 

6 extra="forbid", 

7 strict=True, 

8 validate_assignment=True, 

9 validate_default=True, 

10 ) 

11 

12 

13class Sound(ThaiBaseModel): 

14 zh_pron: str = Field(default="", description="Chinese word pronunciation") 

15 ipa: str = Field(default="", description="International Phonetic Alphabet") 

16 audio: str = Field(default="", description="Audio file name") 

17 wav_url: str = "" 

18 oga_url: str = "" 

19 ogg_url: str = "" 

20 mp3_url: str = "" 

21 opus_url: str = "" 

22 flac_url: str = "" 

23 tags: list[str] = [] 

24 raw_tags: list[str] = [] 

25 homophone: str = "" 

26 other: str = "" 

27 roman: str = "" 

28 rhymes: str = "" 

29 enpr: str = "" 

30 hangeul: str = "" 

31 

32 

33class Example(ThaiBaseModel): 

34 text: str 

35 bold_text_offsets: list[tuple[int, int]] = [] 

36 translation: str = "" 

37 bold_translation_offsets: list[tuple[int, int]] = [] 

38 literal_meaning: str = "" 

39 bold_literal_offsets: list[tuple[int, int]] = [] 

40 roman: str = Field( 

41 default="", description="Romanization of the example sentence" 

42 ) 

43 bold_roman_offsets: list[tuple[int, int]] = [] 

44 ref: str = Field( 

45 default="", 

46 description="Source of the sentence, like book title and page number", 

47 ) 

48 ruby: list[tuple[str, ...]] = Field( 

49 default=[], description="Japanese Kanji and furigana" 

50 ) 

51 tags: list[str] = [] 

52 raw_tags: list[str] = [] 

53 categories: list[str] = Field(default=[], exclude=True) 

54 sounds: list[Sound] = [] 

55 

56 

57class AltForm(ThaiBaseModel): 

58 word: str 

59 roman: str = "" 

60 

61 

62class Classifier(ThaiBaseModel): 

63 classifier: str = "" 

64 tags: list[str] = [] 

65 raw_tags: list[str] = [] 

66 

67 

68class Sense(ThaiBaseModel): 

69 glosses: list[str] = [] 

70 tags: list[str] = [] 

71 raw_tags: list[str] = [] 

72 categories: list[str] = [] 

73 examples: list[Example] = [] 

74 form_of: list[AltForm] = [] 

75 alt_of: list[AltForm] = [] 

76 topics: list[str] = [] 

77 classifiers: list[Classifier] = [] 

78 

79 

80class Form(ThaiBaseModel): 

81 form: str 

82 tags: list[str] = [] 

83 raw_tags: list[str] = [] 

84 roman: str = "" 

85 ruby: list[tuple[str, ...]] = [] 

86 

87 

88class Translation(ThaiBaseModel): 

89 lang_code: str = Field( 

90 description="Wiktionary language code of the translation term", 

91 ) 

92 lang: str = Field(description="Translation language name") 

93 word: str = Field(description="Translation term") 

94 sense: str = Field(default="", description="Translation gloss") 

95 tags: list[str] = [] 

96 raw_tags: list[str] = [] 

97 roman: str = "" 

98 lit: str = Field(default="", description="Literal translation") 

99 source: str = "" 

100 

101 

102class Linkage(ThaiBaseModel): 

103 word: str 

104 tags: list[str] = [] 

105 raw_tags: list[str] = [] 

106 roman: str = "" 

107 source: str = "" 

108 sense: str = "" 

109 

110 

111class Descendant(ThaiBaseModel): 

112 lang_code: str = Field(description="Wiktionary language code") 

113 lang: str = Field(description="Language name") 

114 word: str 

115 roman: str = "" 

116 tags: list[str] = [] 

117 raw_tags: list[str] = [] 

118 descendants: list["Descendant"] = [] 

119 sense: str = "" 

120 ruby: list[tuple[str, str]] = [] 

121 

122 

123class Hyphenation(ThaiBaseModel): 

124 parts: list[str] = [] 

125 tags: list[str] = [] 

126 raw_tags: list[str] = [] 

127 

128 

129class WordEntry(ThaiBaseModel): 

130 model_config = ConfigDict(title="Thai Wiktionary") 

131 word: str = Field(description="Word string", min_length=1) 

132 lang_code: str = Field(description="Wiktionary language code", min_length=1) 

133 lang: str = Field(description="Localized language name", min_length=1) 

134 pos: str = Field(description="Part of speech type", min_length=1) 

135 pos_title: str = "" 

136 senses: list[Sense] = [] 

137 categories: list[str] = [] 

138 tags: list[str] = [] 

139 raw_tags: list[str] = [] 

140 etymology_texts: list[str] = [] 

141 classifiers: list[Classifier] = [] 

142 forms: list[Form] = [] 

143 translations: list[Translation] = [] 

144 antonyms: list[Linkage] = [] 

145 synonyms: list[Linkage] = [] 

146 derived: list[Linkage] = [] 

147 related: list[Linkage] = [] 

148 descendants: list[Descendant] = [] 

149 anagrams: list[Linkage] = [] 

150 notes: list[str] = [] 

151 hyponyms: list[Linkage] = [] 

152 hypernyms: list[Linkage] = [] 

153 idioms: list[Linkage] = [] 

154 coordinate_terms: list[Linkage] = [] 

155 sounds: list[Sound] = [] 

156 hyphenations: list[Hyphenation] = [] 

157 abbreviations: list[Linkage] = [] 

158 proverbs: list[Linkage] = [] 

159 literal_meaning: str = "" 

160 redirects: list[str] = []