Coverage for src/wiktextract/extractor/es/models.py: 100%

1from pydantic import BaseModel, ConfigDict, Field

4class BaseModelWrap(BaseModel):

5 model_config = ConfigDict(

6 extra="forbid",

7 strict=True,

8 validate_assignment=True,

9 validate_default=True,

10 )

13class Linkage(BaseModelWrap):

14 word: str

15 note: str = ""

16 alternative_spelling: str = Field(

17 default="", description="Alternative spelling of the word"

18 )

19 sense_index: str = ""

20 sense: str = ""

23class Translation(BaseModelWrap):

24 word: str = Field(description="Translation term")

25 lang_code: str = Field(

26 description="Wiktionary language code of the translation term"

27 )

28 lang: str = Field(description="Name of the language of translation")

29 sense_index: str = ""

30 raw_tags: list[str] = Field(

31 default=[],

32 description="Tags specifying the translated term, usually gender",

33 )

34 tags: list[str] = []

35 notes: list[str] = Field(default=[], description="A list of notes")

36 roman: str = Field(

37 default="", description="Transliteration in roman characters"

38 )

41class TemplateData(BaseModelWrap):

42 name: str = Field(default="", description="Template's name.")

43 args: dict[str, str] = Field(

44 default={}, description="Arguments given to the template, if any."

45 )

46 expansion: str = Field(

47 default="",

48 description="The result of expanding the template.",

49 )

52class Example(BaseModelWrap):

53 text: str = Field(description="Example usage sentence")

54 translation: str = Field(

55 default="", description="Spanish translation of the example sentence"

56 )

57 ref: str = ""

58 example_templates: list[TemplateData] = []

61class AltForm(BaseModelWrap):

62 word: str

65class Sense(BaseModelWrap):

66 glosses: list[str] = Field(

67 default=[],

68 description="list of gloss strings for the word sense."

69 "This has been cleaned, and should be no tagging.",

70 )

71 raw_tags: list[str] = []

72 tags: list[str] = []

73 topics: list[str] = []

74 categories: list[str] = Field(

75 default=[], description="Category links on the page"

76 )

77 examples: list[Example] = Field(default=[], description="List of examples")

78 # subsenses: list["Sense"] = Field(

79 # default=[], description="List of subsenses"

80 # )

81 sense_index: str = Field(

82 default="", description="Sense number used in Wiktionary"

83 )

84 form_of: list[AltForm] = []

87class Sound(BaseModelWrap):

88 ipa: str = Field("", description="International Phonetic Alphabet")

89 audio: str = Field("", description="Audio file name")

90 wav_url: str = ""

91 ogg_url: str = ""

92 mp3_url: str = ""

93 flac_url: str = ""

94 roman: str = Field("", description="Translitaration to Roman characters")

95 syllabic: str = Field("", description="Syllabic transcription")

96 raw_tags: list[str] = Field(

97 [], description="Specifying the variant of the pronunciation"

98 )

99 tags: list[str] = []

100 alternative: str = Field(

101 "", description="Alternative spelling with same pronunciation"

102 )

103 note: str = ""

104 not_same_pronunciation: bool = Field(

105 False, description="This is `True` for the 'Variantes' row"

106 )

107 rhymes: str = ""

108 homophone: str = ""

109

110

111class Form(BaseModelWrap):

112 form: str = ""

113 tags: list[str] = []

114 raw_tags: list[str] = []

115 row_span: int = Field(1, exclude=True)

116

117

118class WordEntry(BaseModelWrap):

119 """

120 WordEntry is a dictionary containing lexical information of a single word extracted from Wiktionary with wiktextract.

121 """ # noqa:E501

122

123 model_config = ConfigDict(title="Spanish Wiktionary")

124

125 word: str = Field(description="word string")

126 pos: str = Field(default="", description="Part of speech type")

127 pos_title: str = Field(default="", description="Original POS title")

128 lang_code: str = Field(

129 description="Wiktionary language code", examples=["es"]

130 )

131 lang: str = Field(

132 description="Localized language name of the word", examples=["español"]

133 )

134 senses: list[Sense] = []

135 categories: list[str] = Field(

136 default=[],

137 description="list of non-disambiguated categories for the word",

138 )

139 sounds: list[Sound] = []

140 translations: list[Translation] = []

141 etymology_text: str = Field(

142 default="", description="Etymology section as cleaned text."

143 )

144 etymology_templates: list[TemplateData] = Field(

145 default=[],

146 description="Templates and their arguments and expansions from the "

147 "etymology section.",

148 )

149 etymology_number: int = Field(

150 default=0,

151 description="For words with multiple numbered etymologies, this "

152 "contains the number of the etymology under which this entry appeared.",

153 )

154 antonyms: list[Linkage] = []

155 compounds: list[Linkage] = []

156 derived: list[Linkage] = []

157 hyponyms: list[Linkage] = []

158 hypernyms: list[Linkage] = []

159 idioms: list[Linkage] = []

160 meronyms: list[Linkage] = []

161 related: list[Linkage] = []

162 synonyms: list[Linkage] = []

163 proverbs: list[Linkage] = []

164 tags: list[str] = []

165 extra_sounds: dict[str, str] = {}

166 forms: list[Form] = []

167 hyphenation: str = ""