Coverage for src/wiktextract/extractor/de/models.py: 100%

103 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-10-25 10:11 +0000

1from pydantic import BaseModel, ConfigDict, Field 

2 

3 

4class BaseModelWrap(BaseModel): 

5 model_config = ConfigDict( 

6 extra="forbid", 

7 strict=True, 

8 validate_assignment=True, 

9 validate_default=True, 

10 ) 

11 

12 

13class Linkage(BaseModelWrap): 

14 word: str 

15 sense_index: str = "" 

16 note: str = "" 

17 raw_tags: list[str] = [] 

18 tags: list[str] = [] 

19 

20 

21class Translation(BaseModelWrap): 

22 sense: str = Field( 

23 default="", description="A gloss of the sense being translated" 

24 ) 

25 word: str = Field(default="", description="Translation term") 

26 lang_code: str = Field( 

27 default="", 

28 description="Wiktionary language code of the translation term", 

29 ) 

30 lang: str = Field(default="", description="Localized language name") 

31 uncertain: bool = Field( 

32 default=False, description="Translation marked as uncertain" 

33 ) 

34 roman: str = Field( 

35 default="", description="Transliteration to Roman characters" 

36 ) 

37 sense_index: str = "" 

38 raw_tags: list[str] = [] 

39 tags: list[str] = [] 

40 notes: list[str] = Field(default=[], description="A list of notes") 

41 

42 

43class Example(BaseModelWrap): 

44 text: str = Field(default="", description="Example usage sentence") 

45 translation: str = Field( 

46 default="", description="German translation of the example sentence" 

47 ) 

48 raw_tags: list[str] = [] 

49 tags: list[str] = [] 

50 ref: str = Field(default="", description="Raw reference string") 

51 url: str = Field( 

52 default="", description="A web link. Not necessarily well-formated." 

53 ) 

54 author: str = Field(default="", description="Author's name") 

55 title: str = Field(default="", description="Title of the reference") 

56 title_complement: str = Field( 

57 default="", description="Complement to the title" 

58 ) 

59 pages: str = Field(default="", description="Page numbers") 

60 year: str = Field(default="", description="Year of publication") 

61 publisher: str = Field(default="", description="Published by") 

62 editor: str = Field(default="", description="Editor") 

63 translator: str = Field(default="", description="Translator") 

64 collection: str = Field( 

65 default="", 

66 description="Name of collection that reference was published in", 

67 ) 

68 volume: str = Field(default="", description="Volume number") 

69 comment: str = Field(default="", description="Comment on the reference") 

70 day: str = Field(default="", description="Day of publication") 

71 month: str = Field(default="", description="Month of publication") 

72 accessdate: str = Field( 

73 default="", description="Date of access of online reference" 

74 ) 

75 date: str = Field(default="", description="Date of publication") 

76 number: str = Field(default="", description="Issue number") 

77 # chapter: Optional[str] = Field(default=None, description="Chapter name") 

78 place: str = Field(default="", description="Place of publication") 

79 # editor: Optional[str] = Field(default=None, description="Editor") 

80 edition: str = Field(default="", description="Edition number") 

81 isbn: str = Field(default="", description="ISBN number") 

82 

83 

84class AltForm(BaseModelWrap): 

85 word: str 

86 

87 

88class Sense(BaseModelWrap): 

89 glosses: list[str] = [] 

90 raw_tags: list[str] = [] 

91 tags: list[str] = [] 

92 categories: list[str] = [] 

93 examples: list["Example"] = Field( 

94 default=[], description="List of examples" 

95 ) 

96 # subsenses: list["Sense"] = Field( 

97 # default=[], description="List of subsenses" 

98 # ) 

99 sense_index: str = Field( 

100 default="", description="Sense number used in Wiktionary" 

101 ) 

102 topics: list[str] = [] 

103 form_of: list[AltForm] = [] 

104 

105 

106class Sound(BaseModelWrap): 

107 ipa: str = Field(default="", description="International Phonetic Alphabet") 

108 # phonetic_transcription: list[str] = Field( 

109 # default=[], description="Phonetic transcription, less exact than IPA." 

110 # ) 

111 audio: str = Field(default="", description="Audio file name") 

112 wav_url: str = Field(default="") 

113 ogg_url: str = Field(default="") 

114 mp3_url: str = Field(default="") 

115 oga_url: str = Field(default="") 

116 flac_url: str = Field(default="") 

117 lang_code: str = Field(default="", description="Wiktionary language code") 

118 lang: str = Field(default="", description="Localized language name") 

119 # roman: list[str] = Field( 

120 # default=[], description="Translitaration to Roman characters" 

121 # ) 

122 # syllabic: list[str] = Field( 

123 # default=[], description="Syllabic transcription" 

124 # ) 

125 raw_tags: list[str] = Field( 

126 default=[], description="Specifying the variant of the pronunciation" 

127 ) 

128 tags: list[str] = [] 

129 rhymes: str = "" 

130 

131 

132class Form(BaseModelWrap): 

133 form: str 

134 tags: list[str] = [] 

135 raw_tags: list[str] = [] 

136 source: str = "" 

137 sense_index: str = "" 

138 

139 

140class WordEntry(BaseModelWrap): 

141 """ 

142 WordEntry is a dictionary containing lexical information of a single word 

143 extracted from Wiktionary with wiktextract. 

144 """ 

145 

146 model_config = ConfigDict(title="German Wiktionary") 

147 

148 word: str = Field(description="word string") 

149 pos: str = Field(default="", description="Part of speech type") 

150 other_pos: list[str] = [] 

151 # pos_title: str = Field(default=None, description="Original POS title") 

152 lang_code: str = Field( 

153 description="Wiktionary language code", examples=["es"] 

154 ) 

155 lang: str = Field( 

156 description="Localized language name of the word", examples=["español"] 

157 ) 

158 senses: list[Sense] = [] 

159 # categories: list[str] = Field( 

160 # default=[], 

161 # description="list of non-disambiguated categories for the word", 

162 # ) 

163 translations: list[Translation] = [] 

164 sounds: list[Sound] = [] 

165 antonyms: list[Linkage] = [] 

166 derived: list[Linkage] = [] 

167 hyponyms: list[Linkage] = [] 

168 hypernyms: list[Linkage] = [] 

169 holonyms: list[Linkage] = [] 

170 expressions: list[Linkage] = [] 

171 coordinate_terms: list[Linkage] = [] 

172 proverbs: list[Linkage] = [] 

173 synonyms: list[Linkage] = [] 

174 tags: list[str] = [] 

175 raw_tags: list[str] = [] 

176 categories: list[str] = [] 

177 redirects: list[str] = [] 

178 etymology_text: str = "" 

179 forms: list[Form] = [] 

180 meronyms: list[Linkage] = [] 

181 hyphenation: str = ""