Coverage for src / wiktextract / extractor / de / models.py: 100%

103 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-05 07:46 +0000

1from pydantic import BaseModel, ConfigDict, Field 

2 

3 

4class GermanBaseModel(BaseModel): 

5 model_config = ConfigDict( 

6 extra="forbid", 

7 strict=True, 

8 validate_assignment=True, 

9 validate_default=True, 

10 ) 

11 

12 

13class Linkage(GermanBaseModel): 

14 word: str 

15 sense_index: str = "" 

16 note: str = "" 

17 raw_tags: list[str] = [] 

18 tags: list[str] = [] 

19 topics: list[str] = [] 

20 

21 

22class Translation(GermanBaseModel): 

23 sense: str = Field( 

24 default="", description="A gloss of the sense being translated" 

25 ) 

26 word: str = Field(default="", description="Translation term") 

27 lang_code: str = Field( 

28 default="", 

29 description="Wiktionary language code of the translation term", 

30 ) 

31 lang: str = Field(default="", description="Localized language name") 

32 uncertain: bool = Field( 

33 default=False, description="Translation marked as uncertain" 

34 ) 

35 roman: str = Field( 

36 default="", description="Transliteration to Roman characters" 

37 ) 

38 sense_index: str = "" 

39 raw_tags: list[str] = [] 

40 tags: list[str] = [] 

41 notes: list[str] = Field(default=[], description="A list of notes") 

42 other: str = "" 

43 

44 

45class Example(GermanBaseModel): 

46 text: str = Field(description="Example usage sentence") 

47 bold_text_offsets: list[tuple[int, int]] = Field( 

48 default=[], description="Italic words" 

49 ) 

50 translation: str = Field( 

51 default="", description="German translation of the example sentence" 

52 ) 

53 bold_translation_offsets: list[tuple[int, int]] = Field( 

54 default=[], description="Italic words" 

55 ) 

56 raw_tags: list[str] = [] 

57 tags: list[str] = [] 

58 ref: str = Field(default="", description="Raw reference string") 

59 

60 

61class AltForm(GermanBaseModel): 

62 word: str 

63 

64 

65class Sense(GermanBaseModel): 

66 glosses: list[str] = [] 

67 raw_tags: list[str] = [] 

68 tags: list[str] = [] 

69 categories: list[str] = [] 

70 examples: list["Example"] = Field( 

71 default=[], description="List of examples" 

72 ) 

73 sense_index: str = Field( 

74 default="", description="Sense number used in Wiktionary" 

75 ) 

76 topics: list[str] = [] 

77 form_of: list[AltForm] = [] 

78 alt_of: list[AltForm] = [] 

79 

80 

81class Sound(GermanBaseModel): 

82 ipa: str = Field(default="", description="International Phonetic Alphabet") 

83 audio: str = Field(default="", description="Audio file name") 

84 wav_url: str = Field(default="") 

85 ogg_url: str = Field(default="") 

86 mp3_url: str = Field(default="") 

87 oga_url: str = Field(default="") 

88 flac_url: str = Field(default="") 

89 opus_url: str = Field(default="") 

90 raw_tags: list[str] = [] 

91 tags: list[str] = [] 

92 rhymes: str = "" 

93 categories: list[str] = Field(default=[], exclude=True) 

94 

95 

96class Form(GermanBaseModel): 

97 form: str 

98 tags: list[str] = [] 

99 raw_tags: list[str] = [] 

100 source: str = "" 

101 sense_index: str = "" 

102 topics: list[str] = [] 

103 pronouns: list[str] = [] 

104 

105 

106class Descendant(GermanBaseModel): 

107 lang_code: str = Field(default="", description="Wiktionary language code") 

108 lang: str = Field(default="", description="Language name") 

109 word: str = "" 

110 roman: str = "" 

111 sense_index: str = "" 

112 

113 

114class Hyphenation(GermanBaseModel): 

115 parts: list[str] = [] 

116 tags: list[str] = [] 

117 raw_tags: list[str] = [] 

118 

119 

120class WordEntry(GermanBaseModel): 

121 """ 

122 WordEntry is a dictionary containing lexical information of a single word 

123 extracted from Wiktionary with wiktextract. 

124 """ 

125 

126 model_config = ConfigDict(title="German Wiktionary") 

127 

128 word: str = Field(description="word string") 

129 pos: str = Field(default="", description="Part of speech type") 

130 pos_title: str = Field(default="", description="Original POS title") 

131 lang_code: str = Field( 

132 description="Wiktionary language code", examples=["es"] 

133 ) 

134 lang: str = Field( 

135 description="Localized language name of the word", examples=["español"] 

136 ) 

137 senses: list[Sense] = [] 

138 translations: list[Translation] = [] 

139 sounds: list[Sound] = [] 

140 antonyms: list[Linkage] = [] 

141 derived: list[Linkage] = [] 

142 hyponyms: list[Linkage] = [] 

143 hypernyms: list[Linkage] = [] 

144 holonyms: list[Linkage] = [] 

145 expressions: list[Linkage] = [] 

146 coordinate_terms: list[Linkage] = [] 

147 proverbs: list[Linkage] = [] 

148 synonyms: list[Linkage] = [] 

149 tags: list[str] = [] 

150 raw_tags: list[str] = [] 

151 categories: list[str] = [] 

152 redirects: list[str] = [] 

153 etymology_texts: list[str] = [] 

154 forms: list[Form] = [] 

155 meronyms: list[Linkage] = [] 

156 hyphenations: list[Hyphenation] = [] 

157 notes: list[str] = [] 

158 related: list[Linkage] = [] 

159 descendants: list[Descendant] = []