Coverage for src/wiktextract/extractor/th/models.py: 100%

117 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-15 05:18 +0000

1from pydantic import BaseModel, ConfigDict, Field 

2 

3 

4class ThaiBaseModel(BaseModel): 

5 model_config = ConfigDict( 

6 extra="forbid", 

7 strict=True, 

8 validate_assignment=True, 

9 validate_default=True, 

10 ) 

11 

12 

13class Sound(ThaiBaseModel): 

14 zh_pron: str = Field(default="", description="Chinese word pronunciation") 

15 ipa: str = Field(default="", description="International Phonetic Alphabet") 

16 audio: str = Field(default="", description="Audio file name") 

17 wav_url: str = "" 

18 oga_url: str = "" 

19 ogg_url: str = "" 

20 mp3_url: str = "" 

21 opus_url: str = "" 

22 flac_url: str = "" 

23 tags: list[str] = [] 

24 raw_tags: list[str] = [] 

25 homophone: str = "" 

26 other: str = "" 

27 roman: str = "" 

28 rhymes: str = "" 

29 enpr: str = "" 

30 

31 

32class Example(ThaiBaseModel): 

33 text: str 

34 bold_text_offsets: list[tuple[int, int]] = [] 

35 translation: str = "" 

36 bold_translation_offsets: list[tuple[int, int]] = [] 

37 literal_meaning: str = "" 

38 bold_literal_offsets: list[tuple[int, int]] = [] 

39 roman: str = Field( 

40 default="", description="Romanization of the example sentence" 

41 ) 

42 bold_roman_offsets: list[tuple[int, int]] = [] 

43 ref: str = Field( 

44 default="", 

45 description="Source of the sentence, like book title and page number", 

46 ) 

47 ruby: list[tuple[str, ...]] = Field( 

48 default=[], description="Japanese Kanji and furigana" 

49 ) 

50 tags: list[str] = [] 

51 raw_tags: list[str] = [] 

52 categories: list[str] = Field(default=[], exclude=True) 

53 sounds: list[Sound] = [] 

54 

55 

56class AltForm(ThaiBaseModel): 

57 word: str 

58 roman: str = "" 

59 

60 

61class Classifier(ThaiBaseModel): 

62 classifier: str = "" 

63 tags: list[str] = [] 

64 raw_tags: list[str] = [] 

65 

66 

67class Sense(ThaiBaseModel): 

68 glosses: list[str] = [] 

69 tags: list[str] = [] 

70 raw_tags: list[str] = [] 

71 categories: list[str] = [] 

72 examples: list[Example] = [] 

73 form_of: list[AltForm] = [] 

74 alt_of: list[AltForm] = [] 

75 topics: list[str] = [] 

76 classifiers: list[Classifier] = [] 

77 

78 

79class Form(ThaiBaseModel): 

80 form: str 

81 tags: list[str] = [] 

82 raw_tags: list[str] = [] 

83 roman: str = "" 

84 

85 

86class Translation(ThaiBaseModel): 

87 lang_code: str = Field( 

88 description="Wiktionary language code of the translation term", 

89 ) 

90 lang: str = Field(description="Translation language name") 

91 word: str = Field(description="Translation term") 

92 sense: str = Field(default="", description="Translation gloss") 

93 tags: list[str] = [] 

94 raw_tags: list[str] = [] 

95 roman: str = "" 

96 lit: str = Field(default="", description="Literal translation") 

97 

98 

99class Linkage(ThaiBaseModel): 

100 word: str 

101 tags: list[str] = [] 

102 raw_tags: list[str] = [] 

103 roman: str = "" 

104 source: str = "" 

105 sense: str = "" 

106 

107 

108class Descendant(ThaiBaseModel): 

109 lang_code: str = Field(description="Wiktionary language code") 

110 lang: str = Field(description="Language name") 

111 word: str 

112 roman: str = "" 

113 tags: list[str] = [] 

114 raw_tags: list[str] = [] 

115 descendants: list["Descendant"] = [] 

116 sense: str = "" 

117 

118 

119class Hyphenation(ThaiBaseModel): 

120 parts: list[str] = [] 

121 tags: list[str] = [] 

122 raw_tags: list[str] = [] 

123 

124 

125class WordEntry(ThaiBaseModel): 

126 model_config = ConfigDict(title="Thai Wiktionary") 

127 word: str = Field(description="Word string", min_length=1) 

128 lang_code: str = Field(description="Wiktionary language code", min_length=1) 

129 lang: str = Field(description="Localized language name", min_length=1) 

130 pos: str = Field(description="Part of speech type", min_length=1) 

131 pos_title: str = "" 

132 senses: list[Sense] = [] 

133 categories: list[str] = [] 

134 tags: list[str] = [] 

135 raw_tags: list[str] = [] 

136 etymology_text: str = "" 

137 classifiers: list[Classifier] = [] 

138 forms: list[Form] = [] 

139 translations: list[Translation] = [] 

140 antonyms: list[Linkage] = [] 

141 synonyms: list[Linkage] = [] 

142 derived: list[Linkage] = [] 

143 related: list[Linkage] = [] 

144 descendants: list[Descendant] = [] 

145 anagrams: list[Linkage] = [] 

146 notes: list[str] = [] 

147 hyponyms: list[Linkage] = [] 

148 hypernyms: list[Linkage] = [] 

149 idioms: list[Linkage] = [] 

150 coordinate_terms: list[Linkage] = [] 

151 sounds: list[Sound] = [] 

152 hyphenations: list[Hyphenation] = [] 

153 abbreviations: list[Linkage] = [] 

154 proverbs: list[Linkage] = [] 

155 notes: list[str] = []