Coverage for src/wiktextract/extractor/es/models.py: 100%

96 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-15 05:18 +0000

1from pydantic import BaseModel, ConfigDict, Field 

2 

3 

4class BaseModelWrap(BaseModel): 

5 model_config = ConfigDict( 

6 extra="forbid", 

7 strict=True, 

8 validate_assignment=True, 

9 validate_default=True, 

10 ) 

11 

12 

13class Linkage(BaseModelWrap): 

14 word: str 

15 note: str = "" 

16 alternative_spelling: str = Field( 

17 default="", description="Alternative spelling of the word" 

18 ) 

19 sense_index: str = "" 

20 sense: str = "" 

21 

22 

23class Translation(BaseModelWrap): 

24 word: str = Field(description="Translation term") 

25 lang_code: str = Field( 

26 description="Wiktionary language code of the translation term" 

27 ) 

28 lang: str = Field(description="Name of the language of translation") 

29 sense_index: str = "" 

30 raw_tags: list[str] = Field( 

31 default=[], 

32 description="Tags specifying the translated term, usually gender", 

33 ) 

34 tags: list[str] = [] 

35 notes: list[str] = Field(default=[], description="A list of notes") 

36 roman: str = Field( 

37 default="", description="Transliteration in roman characters" 

38 ) 

39 sense: str = "" 

40 

41 

42class Example(BaseModelWrap): 

43 text: str = Field(description="Example usage sentence") 

44 bold_text_offsets: list[tuple[int, int]] = [] 

45 translation: str = Field( 

46 default="", description="Spanish translation of the example sentence" 

47 ) 

48 bold_translation_offsets: list[tuple[int, int]] = [] 

49 ref: str = "" 

50 

51 

52class AltForm(BaseModelWrap): 

53 word: str 

54 

55 

56class Sense(BaseModelWrap): 

57 glosses: list[str] = Field( 

58 default=[], 

59 description="list of gloss strings for the word sense." 

60 "This has been cleaned, and should be no tagging.", 

61 ) 

62 raw_tags: list[str] = [] 

63 tags: list[str] = [] 

64 topics: list[str] = [] 

65 categories: list[str] = Field( 

66 default=[], description="Category links on the page" 

67 ) 

68 examples: list[Example] = Field(default=[], description="List of examples") 

69 sense_index: str = Field( 

70 default="", description="Sense number used in Wiktionary" 

71 ) 

72 form_of: list[AltForm] = [] 

73 

74 

75class Sound(BaseModelWrap): 

76 ipa: str = Field("", description="International Phonetic Alphabet") 

77 audio: str = Field("", description="Audio file name") 

78 wav_url: str = "" 

79 oga_url: str = "" 

80 ogg_url: str = "" 

81 mp3_url: str = "" 

82 opus_url: str = "" 

83 flac_url: str = "" 

84 roman: str = Field("", description="Translitaration to Roman characters") 

85 syllabic: str = Field("", description="Syllabic transcription") 

86 raw_tags: list[str] = Field( 

87 [], description="Specifying the variant of the pronunciation" 

88 ) 

89 tags: list[str] = [] 

90 alternative: str = Field( 

91 "", description="Alternative spelling with same pronunciation" 

92 ) 

93 note: str = "" 

94 not_same_pronunciation: bool = Field( 

95 False, description="This is `True` for the 'Variantes' row" 

96 ) 

97 rhymes: str = "" 

98 homophone: str = "" 

99 

100 

101class Form(BaseModelWrap): 

102 form: str = "" 

103 tags: list[str] = [] 

104 raw_tags: list[str] = [] 

105 row_span: int = Field(1, exclude=True) 

106 

107 

108class Hyphenation(BaseModelWrap): 

109 parts: list[str] = [] 

110 tags: list[str] = [] 

111 raw_tags: list[str] = [] 

112 

113 

114class Attestation(BaseModelWrap): 

115 date: str 

116 

117 

118class WordEntry(BaseModelWrap): 

119 """ 

120 WordEntry is a dictionary containing lexical information of a single word extracted from Wiktionary with wiktextract. 

121 """ # noqa:E501 

122 

123 model_config = ConfigDict(title="Spanish Wiktionary") 

124 

125 word: str = Field(description="word string") 

126 pos: str = Field(default="", description="Part of speech type") 

127 pos_title: str = Field(default="", description="Original POS title") 

128 lang_code: str = Field( 

129 description="Wiktionary language code", examples=["es"] 

130 ) 

131 lang: str = Field( 

132 description="Localized language name of the word", examples=["español"] 

133 ) 

134 senses: list[Sense] = [] 

135 categories: list[str] = Field( 

136 default=[], 

137 description="list of non-disambiguated categories for the word", 

138 ) 

139 sounds: list[Sound] = [] 

140 translations: list[Translation] = [] 

141 etymology_text: str = Field( 

142 default="", description="Etymology section as cleaned text." 

143 ) 

144 antonyms: list[Linkage] = [] 

145 compounds: list[Linkage] = [] 

146 derived: list[Linkage] = [] 

147 hyponyms: list[Linkage] = [] 

148 hypernyms: list[Linkage] = [] 

149 idioms: list[Linkage] = [] 

150 meronyms: list[Linkage] = [] 

151 related: list[Linkage] = [] 

152 synonyms: list[Linkage] = [] 

153 proverbs: list[Linkage] = [] 

154 tags: list[str] = [] 

155 raw_tags: list[str] = [] 

156 extra_sounds: dict[str, str] = {} 

157 forms: list[Form] = [] 

158 hyphenations: list[Hyphenation] = [] 

159 cognates: list[Linkage] = [] 

160 morphologies: list[Linkage] = [] 

161 descendants: list[Translation] = [] 

162 attestations: list[Attestation] = []