Coverage for src/wiktextract/extractor/ko/models.py: 100%

1from pydantic import BaseModel, ConfigDict, Field

4class KoreanBaseModel(BaseModel):

5 model_config = ConfigDict(

6 extra="forbid",

7 strict=True,

8 validate_assignment=True,

9 validate_default=True,

10 )

13class Sound(KoreanBaseModel):

14 ipa: str = Field(default="", description="International Phonetic Alphabet")

15 audio: str = Field(default="", description="Audio file name")

16 wav_url: str = ""

17 oga_url: str = ""

18 ogg_url: str = ""

19 mp3_url: str = ""

20 opus_url: str = ""

21 flac_url: str = ""

22 tags: list[str] = []

23 raw_tags: list[str] = []

24 hangul: str = ""

25 roman: str = ""

26 other: str = ""

29class Example(KoreanBaseModel):

30 text: str = ""

31 bold_text_offsets: list[tuple[int, int]] = []

32 translation: str = ""

33 bold_translation_offsets: list[tuple[int, int]] = []

34 ref: str = ""

35 roman: str = ""

36 bold_roman_offsets: list[tuple[int, int]] = []

37 ruby: list[tuple[str, ...]] = Field(

38 default=[], description="Japanese Kanji and furigana"

39 )

40 tags: list[str] = []

41 literal_meaning: str = ""

42 bold_literal_offsets: list[tuple[int, int]] = []

43 note: str = ""

44 sounds: list[Sound] = []

47class AltForm(KoreanBaseModel):

48 word: str

51class Sense(KoreanBaseModel):

52 glosses: list[str] = []

53 tags: list[str] = []

54 raw_tags: list[str] = []

55 topics: list[str] = []

56 categories: list[str] = []

57 examples: list[Example] = []

58 note: str = ""

59 form_of: list[AltForm] = []

60 pattern: str = Field(default="", description="Sentence structure, 문형")

63class Linkage(KoreanBaseModel):

64 word: str

65 sense: str = ""

66 roman: str = ""

67 raw_tags: list[str] = []

68 tags: list[str] = []

69 sense_index: str = ""

72class Translation(KoreanBaseModel):

73 lang_code: str = Field(

74 description="Wiktionary language code of the translation term"

75 )

76 lang: str = Field(description="Translation language name")

77 word: str = Field(description="Translation term")

78 roman: str = ""

79 tags: list[str] = []

80 raw_tags: list[str] = []

81 sense: str = ""

84class Form(KoreanBaseModel):

85 form: str = ""

86 tags: list[str] = []

87 raw_tags: list[str] = []

90class WordEntry(KoreanBaseModel):

91 model_config = ConfigDict(title="Korean Wiktionary")

92 word: str = Field(description="Word string", min_length=1)

93 lang_code: str = Field(description="Wiktionary language code", min_length=1)

94 lang: str = Field(description="Localized language name", min_length=1)

95 pos: str = Field(description="Part of speech type", min_length=1)

96 pos_title: str = ""

97 senses: list[Sense] = []

98 categories: list[str] = []

99 tags: list[str] = []

100 raw_tags: list[str] = []

101 sounds: list[Sound] = []

102 proverbs: list[Linkage] = []

103 derived: list[Linkage] = []

104 related: list[Linkage] = []

105 synonyms: list[Linkage] = []

106 antonyms: list[Linkage] = []

107 translations: list[Translation] = []

108 etymology_texts: list[str] = []

109 note: str = ""

110 forms: list[Form] = []

111 pattern: str = Field(

112 default="", description="Sentence structure, 문형", exclude=True

113 )

114 idioms: list[Translation] = []

115 hyponyms: list[Translation] = []