Coverage for src/wiktextract/extractor/de/models.py: 100%

120 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-15 05:18 +0000

1from pydantic import BaseModel, ConfigDict, Field 

2 

3 

4class GermanBaseModel(BaseModel): 

5 model_config = ConfigDict( 

6 extra="forbid", 

7 strict=True, 

8 validate_assignment=True, 

9 validate_default=True, 

10 ) 

11 

12 

13class Linkage(GermanBaseModel): 

14 word: str 

15 sense_index: str = "" 

16 note: str = "" 

17 raw_tags: list[str] = [] 

18 tags: list[str] = [] 

19 

20 

21class Translation(GermanBaseModel): 

22 sense: str = Field( 

23 default="", description="A gloss of the sense being translated" 

24 ) 

25 word: str = Field(default="", description="Translation term") 

26 lang_code: str = Field( 

27 default="", 

28 description="Wiktionary language code of the translation term", 

29 ) 

30 lang: str = Field(default="", description="Localized language name") 

31 uncertain: bool = Field( 

32 default=False, description="Translation marked as uncertain" 

33 ) 

34 roman: str = Field( 

35 default="", description="Transliteration to Roman characters" 

36 ) 

37 sense_index: str = "" 

38 raw_tags: list[str] = [] 

39 tags: list[str] = [] 

40 notes: list[str] = Field(default=[], description="A list of notes") 

41 

42 

43class Example(GermanBaseModel): 

44 text: str = Field(default="", description="Example usage sentence") 

45 italic_text_offsets: list[tuple[int, int]] = [] 

46 translation: str = Field( 

47 default="", description="German translation of the example sentence" 

48 ) 

49 italic_translation_offsets: list[tuple[int, int]] = [] 

50 raw_tags: list[str] = [] 

51 tags: list[str] = [] 

52 ref: str = Field(default="", description="Raw reference string") 

53 url: str = Field( 

54 default="", description="A web link. Not necessarily well-formated." 

55 ) 

56 author: str = Field(default="", description="Author's name") 

57 title: str = Field(default="", description="Title of the reference") 

58 title_complement: str = Field( 

59 default="", description="Complement to the title" 

60 ) 

61 pages: str = Field(default="", description="Page numbers") 

62 year: str = Field(default="", description="Year of publication") 

63 publisher: str = Field(default="", description="Published by") 

64 editor: str = Field(default="", description="Editor") 

65 translator: str = Field(default="", description="Translator") 

66 collection: str = Field( 

67 default="", 

68 description="Name of collection that reference was published in", 

69 ) 

70 volume: str = Field(default="", description="Volume number") 

71 comment: str = Field(default="", description="Comment on the reference") 

72 day: str = Field(default="", description="Day of publication") 

73 month: str = Field(default="", description="Month of publication") 

74 accessdate: str = Field( 

75 default="", description="Date of access of online reference" 

76 ) 

77 date: str = Field(default="", description="Date of publication") 

78 number: str = Field(default="", description="Issue number") 

79 # chapter: Optional[str] = Field(default=None, description="Chapter name") 

80 place: str = Field(default="", description="Place of publication") 

81 # editor: Optional[str] = Field(default=None, description="Editor") 

82 edition: str = Field(default="", description="Edition number") 

83 isbn: str = Field(default="", description="ISBN number") 

84 

85 

86class AltForm(GermanBaseModel): 

87 word: str 

88 

89 

90class Sense(GermanBaseModel): 

91 glosses: list[str] = [] 

92 raw_tags: list[str] = [] 

93 tags: list[str] = [] 

94 categories: list[str] = [] 

95 examples: list["Example"] = Field( 

96 default=[], description="List of examples" 

97 ) 

98 sense_index: str = Field( 

99 default="", description="Sense number used in Wiktionary" 

100 ) 

101 topics: list[str] = [] 

102 form_of: list[AltForm] = [] 

103 alt_of: list[AltForm] = [] 

104 

105 

106class Sound(GermanBaseModel): 

107 ipa: str = Field(default="", description="International Phonetic Alphabet") 

108 audio: str = Field(default="", description="Audio file name") 

109 wav_url: str = Field(default="") 

110 ogg_url: str = Field(default="") 

111 mp3_url: str = Field(default="") 

112 oga_url: str = Field(default="") 

113 flac_url: str = Field(default="") 

114 opus_url: str = Field(default="") 

115 raw_tags: list[str] = [] 

116 tags: list[str] = [] 

117 rhymes: str = "" 

118 categories: list[str] = Field(default=[], exclude=True) 

119 

120 

121class Form(GermanBaseModel): 

122 form: str 

123 tags: list[str] = [] 

124 raw_tags: list[str] = [] 

125 source: str = "" 

126 sense_index: str = "" 

127 

128 

129class Descendant(GermanBaseModel): 

130 lang_code: str = Field(default="", description="Wiktionary language code") 

131 lang: str = Field(default="", description="Language name") 

132 word: str = "" 

133 roman: str = "" 

134 sense_index: str = "" 

135 

136 

137class Hyphenation(GermanBaseModel): 

138 parts: list[str] = [] 

139 tags: list[str] = [] 

140 raw_tags: list[str] = [] 

141 

142 

143class WordEntry(GermanBaseModel): 

144 """ 

145 WordEntry is a dictionary containing lexical information of a single word 

146 extracted from Wiktionary with wiktextract. 

147 """ 

148 

149 model_config = ConfigDict(title="German Wiktionary") 

150 

151 word: str = Field(description="word string") 

152 pos: str = Field(default="", description="Part of speech type") 

153 other_pos: list[str] = [] 

154 pos_title: str = Field(default="", description="Original POS title") 

155 lang_code: str = Field( 

156 description="Wiktionary language code", examples=["es"] 

157 ) 

158 lang: str = Field( 

159 description="Localized language name of the word", examples=["español"] 

160 ) 

161 senses: list[Sense] = [] 

162 translations: list[Translation] = [] 

163 sounds: list[Sound] = [] 

164 antonyms: list[Linkage] = [] 

165 derived: list[Linkage] = [] 

166 hyponyms: list[Linkage] = [] 

167 hypernyms: list[Linkage] = [] 

168 holonyms: list[Linkage] = [] 

169 expressions: list[Linkage] = [] 

170 coordinate_terms: list[Linkage] = [] 

171 proverbs: list[Linkage] = [] 

172 synonyms: list[Linkage] = [] 

173 tags: list[str] = [] 

174 raw_tags: list[str] = [] 

175 categories: list[str] = [] 

176 redirects: list[str] = [] 

177 etymology_texts: list[str] = [] 

178 forms: list[Form] = [] 

179 meronyms: list[Linkage] = [] 

180 hyphenations: list[Hyphenation] = [] 

181 notes: list[str] = [] 

182 related: list[Linkage] = [] 

183 descendants: list[Descendant] = []