Coverage for src/wiktextract/extractor/de/models.py: 100%

123 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-13 10:14 +0000

1from pydantic import BaseModel, ConfigDict, Field 

2 

3 

4class GermanBaseModel(BaseModel): 

5 model_config = ConfigDict( 

6 extra="forbid", 

7 strict=True, 

8 validate_assignment=True, 

9 validate_default=True, 

10 ) 

11 

12 

13class Linkage(GermanBaseModel): 

14 word: str 

15 sense_index: str = "" 

16 note: str = "" 

17 raw_tags: list[str] = [] 

18 tags: list[str] = [] 

19 topics: list[str] = [] 

20 

21 

22class Translation(GermanBaseModel): 

23 sense: str = Field( 

24 default="", description="A gloss of the sense being translated" 

25 ) 

26 word: str = Field(default="", description="Translation term") 

27 lang_code: str = Field( 

28 default="", 

29 description="Wiktionary language code of the translation term", 

30 ) 

31 lang: str = Field(default="", description="Localized language name") 

32 uncertain: bool = Field( 

33 default=False, description="Translation marked as uncertain" 

34 ) 

35 roman: str = Field( 

36 default="", description="Transliteration to Roman characters" 

37 ) 

38 sense_index: str = "" 

39 raw_tags: list[str] = [] 

40 tags: list[str] = [] 

41 notes: list[str] = Field(default=[], description="A list of notes") 

42 other: str = "" 

43 

44 

45class Example(GermanBaseModel): 

46 text: str = Field(default="", description="Example usage sentence") 

47 italic_text_offsets: list[tuple[int, int]] = [] 

48 translation: str = Field( 

49 default="", description="German translation of the example sentence" 

50 ) 

51 italic_translation_offsets: list[tuple[int, int]] = [] 

52 raw_tags: list[str] = [] 

53 tags: list[str] = [] 

54 ref: str = Field(default="", description="Raw reference string") 

55 url: str = Field( 

56 default="", description="A web link. Not necessarily well-formated." 

57 ) 

58 author: str = Field(default="", description="Author's name") 

59 title: str = Field(default="", description="Title of the reference") 

60 title_complement: str = Field( 

61 default="", description="Complement to the title" 

62 ) 

63 pages: str = Field(default="", description="Page numbers") 

64 year: str = Field(default="", description="Year of publication") 

65 publisher: str = Field(default="", description="Published by") 

66 editor: str = Field(default="", description="Editor") 

67 translator: str = Field(default="", description="Translator") 

68 collection: str = Field( 

69 default="", 

70 description="Name of collection that reference was published in", 

71 ) 

72 volume: str = Field(default="", description="Volume number") 

73 comment: str = Field(default="", description="Comment on the reference") 

74 day: str = Field(default="", description="Day of publication") 

75 month: str = Field(default="", description="Month of publication") 

76 accessdate: str = Field( 

77 default="", description="Date of access of online reference" 

78 ) 

79 date: str = Field(default="", description="Date of publication") 

80 number: str = Field(default="", description="Issue number") 

81 # chapter: Optional[str] = Field(default=None, description="Chapter name") 

82 place: str = Field(default="", description="Place of publication") 

83 # editor: Optional[str] = Field(default=None, description="Editor") 

84 edition: str = Field(default="", description="Edition number") 

85 isbn: str = Field(default="", description="ISBN number") 

86 

87 

88class AltForm(GermanBaseModel): 

89 word: str 

90 

91 

92class Sense(GermanBaseModel): 

93 glosses: list[str] = [] 

94 raw_tags: list[str] = [] 

95 tags: list[str] = [] 

96 categories: list[str] = [] 

97 examples: list["Example"] = Field( 

98 default=[], description="List of examples" 

99 ) 

100 sense_index: str = Field( 

101 default="", description="Sense number used in Wiktionary" 

102 ) 

103 topics: list[str] = [] 

104 form_of: list[AltForm] = [] 

105 alt_of: list[AltForm] = [] 

106 

107 

108class Sound(GermanBaseModel): 

109 ipa: str = Field(default="", description="International Phonetic Alphabet") 

110 audio: str = Field(default="", description="Audio file name") 

111 wav_url: str = Field(default="") 

112 ogg_url: str = Field(default="") 

113 mp3_url: str = Field(default="") 

114 oga_url: str = Field(default="") 

115 flac_url: str = Field(default="") 

116 opus_url: str = Field(default="") 

117 raw_tags: list[str] = [] 

118 tags: list[str] = [] 

119 rhymes: str = "" 

120 categories: list[str] = Field(default=[], exclude=True) 

121 

122 

123class Form(GermanBaseModel): 

124 form: str 

125 tags: list[str] = [] 

126 raw_tags: list[str] = [] 

127 source: str = "" 

128 sense_index: str = "" 

129 topics: list[str] = [] 

130 

131 

132class Descendant(GermanBaseModel): 

133 lang_code: str = Field(default="", description="Wiktionary language code") 

134 lang: str = Field(default="", description="Language name") 

135 word: str = "" 

136 roman: str = "" 

137 sense_index: str = "" 

138 

139 

140class Hyphenation(GermanBaseModel): 

141 parts: list[str] = [] 

142 tags: list[str] = [] 

143 raw_tags: list[str] = [] 

144 

145 

146class WordEntry(GermanBaseModel): 

147 """ 

148 WordEntry is a dictionary containing lexical information of a single word 

149 extracted from Wiktionary with wiktextract. 

150 """ 

151 

152 model_config = ConfigDict(title="German Wiktionary") 

153 

154 word: str = Field(description="word string") 

155 pos: str = Field(default="", description="Part of speech type") 

156 other_pos: list[str] = [] 

157 pos_title: str = Field(default="", description="Original POS title") 

158 lang_code: str = Field( 

159 description="Wiktionary language code", examples=["es"] 

160 ) 

161 lang: str = Field( 

162 description="Localized language name of the word", examples=["español"] 

163 ) 

164 senses: list[Sense] = [] 

165 translations: list[Translation] = [] 

166 sounds: list[Sound] = [] 

167 antonyms: list[Linkage] = [] 

168 derived: list[Linkage] = [] 

169 hyponyms: list[Linkage] = [] 

170 hypernyms: list[Linkage] = [] 

171 holonyms: list[Linkage] = [] 

172 expressions: list[Linkage] = [] 

173 coordinate_terms: list[Linkage] = [] 

174 proverbs: list[Linkage] = [] 

175 synonyms: list[Linkage] = [] 

176 tags: list[str] = [] 

177 raw_tags: list[str] = [] 

178 categories: list[str] = [] 

179 redirects: list[str] = [] 

180 etymology_texts: list[str] = [] 

181 forms: list[Form] = [] 

182 meronyms: list[Linkage] = [] 

183 hyphenations: list[Hyphenation] = [] 

184 notes: list[str] = [] 

185 related: list[Linkage] = [] 

186 descendants: list[Descendant] = []