Coverage for src/wiktextract/extractor/es/models.py: 100%

87 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-10-25 10:11 +0000

1from pydantic import BaseModel, ConfigDict, Field 

2 

3 

4class BaseModelWrap(BaseModel): 

5 model_config = ConfigDict( 

6 extra="forbid", 

7 strict=True, 

8 validate_assignment=True, 

9 validate_default=True, 

10 ) 

11 

12 

13class Linkage(BaseModelWrap): 

14 word: str 

15 note: str = "" 

16 alternative_spelling: str = Field( 

17 default="", description="Alternative spelling of the word" 

18 ) 

19 sense_index: str = "" 

20 sense: str = "" 

21 

22 

23class Translation(BaseModelWrap): 

24 word: str = Field(description="Translation term") 

25 lang_code: str = Field( 

26 description="Wiktionary language code of the translation term" 

27 ) 

28 lang: str = Field(description="Name of the language of translation") 

29 sense_index: str = "" 

30 raw_tags: list[str] = Field( 

31 default=[], 

32 description="Tags specifying the translated term, usually gender", 

33 ) 

34 tags: list[str] = [] 

35 notes: list[str] = Field(default=[], description="A list of notes") 

36 roman: str = Field( 

37 default="", description="Transliteration in roman characters" 

38 ) 

39 

40 

41class TemplateData(BaseModelWrap): 

42 name: str = Field(default="", description="Template's name.") 

43 args: dict[str, str] = Field( 

44 default={}, description="Arguments given to the template, if any." 

45 ) 

46 expansion: str = Field( 

47 default="", 

48 description="The result of expanding the template.", 

49 ) 

50 

51 

52class Example(BaseModelWrap): 

53 text: str = Field(description="Example usage sentence") 

54 translation: str = Field( 

55 default="", description="Spanish translation of the example sentence" 

56 ) 

57 ref: str = "" 

58 example_templates: list[TemplateData] = [] 

59 

60 

61class AltForm(BaseModelWrap): 

62 word: str 

63 

64 

65class Sense(BaseModelWrap): 

66 glosses: list[str] = Field( 

67 default=[], 

68 description="list of gloss strings for the word sense." 

69 "This has been cleaned, and should be no tagging.", 

70 ) 

71 raw_tags: list[str] = [] 

72 tags: list[str] = [] 

73 topics: list[str] = [] 

74 categories: list[str] = Field( 

75 default=[], description="Category links on the page" 

76 ) 

77 examples: list[Example] = Field(default=[], description="List of examples") 

78 # subsenses: list["Sense"] = Field( 

79 # default=[], description="List of subsenses" 

80 # ) 

81 sense_index: str = Field( 

82 default="", description="Sense number used in Wiktionary" 

83 ) 

84 form_of: list[AltForm] = [] 

85 

86 

87class Sound(BaseModelWrap): 

88 ipa: str = Field("", description="International Phonetic Alphabet") 

89 audio: str = Field("", description="Audio file name") 

90 wav_url: str = "" 

91 ogg_url: str = "" 

92 mp3_url: str = "" 

93 flac_url: str = "" 

94 roman: str = Field("", description="Translitaration to Roman characters") 

95 syllabic: str = Field("", description="Syllabic transcription") 

96 raw_tags: list[str] = Field( 

97 [], description="Specifying the variant of the pronunciation" 

98 ) 

99 tags: list[str] = [] 

100 alternative: str = Field( 

101 "", description="Alternative spelling with same pronunciation" 

102 ) 

103 note: str = "" 

104 not_same_pronunciation: bool = Field( 

105 False, description="This is `True` for the 'Variantes' row" 

106 ) 

107 rhymes: str = "" 

108 homophone: str = "" 

109 

110 

111class Form(BaseModelWrap): 

112 form: str = "" 

113 tags: list[str] = [] 

114 raw_tags: list[str] = [] 

115 row_span: int = Field(1, exclude=True) 

116 

117 

118class WordEntry(BaseModelWrap): 

119 """ 

120 WordEntry is a dictionary containing lexical information of a single word extracted from Wiktionary with wiktextract. 

121 """ # noqa:E501 

122 

123 model_config = ConfigDict(title="Spanish Wiktionary") 

124 

125 word: str = Field(description="word string") 

126 pos: str = Field(default="", description="Part of speech type") 

127 pos_title: str = Field(default="", description="Original POS title") 

128 lang_code: str = Field( 

129 description="Wiktionary language code", examples=["es"] 

130 ) 

131 lang: str = Field( 

132 description="Localized language name of the word", examples=["español"] 

133 ) 

134 senses: list[Sense] = [] 

135 categories: list[str] = Field( 

136 default=[], 

137 description="list of non-disambiguated categories for the word", 

138 ) 

139 sounds: list[Sound] = [] 

140 translations: list[Translation] = [] 

141 etymology_text: str = Field( 

142 default="", description="Etymology section as cleaned text." 

143 ) 

144 etymology_templates: list[TemplateData] = Field( 

145 default=[], 

146 description="Templates and their arguments and expansions from the " 

147 "etymology section.", 

148 ) 

149 etymology_number: int = Field( 

150 default=0, 

151 description="For words with multiple numbered etymologies, this " 

152 "contains the number of the etymology under which this entry appeared.", 

153 ) 

154 antonyms: list[Linkage] = [] 

155 compounds: list[Linkage] = [] 

156 derived: list[Linkage] = [] 

157 hyponyms: list[Linkage] = [] 

158 hypernyms: list[Linkage] = [] 

159 idioms: list[Linkage] = [] 

160 meronyms: list[Linkage] = [] 

161 related: list[Linkage] = [] 

162 synonyms: list[Linkage] = [] 

163 proverbs: list[Linkage] = [] 

164 tags: list[str] = [] 

165 extra_sounds: dict[str, str] = {} 

166 forms: list[Form] = [] 

167 hyphenation: str = ""