Coverage for src/wiktextract/extractor/id/tags.py: 62%

1from .models import WordEntry

3GLOSS_TAGS = {

4 # https://id.wiktionary.org/wiki/Templat:Istilah

5 "cak": "slang",

6 "kiasan": "figuratively",

7 "mdt": "morpheme",

8 "Sas": "literary",

9 "arkais": "archaic",

10 "klasik": "Classical",

11 "nonformal": "informal",

12 "akar": "root",

13 "akronim": "acronym",

14 "kasar": "impolite",

15 "hormat": "honorific",

16 "generalisasi": "general",

17 "neologisme": "neologism",

18 "eufemisme": "euphemism",

19 "Sunda": "Sundanese",

20 "Aceh": "Acehnese",

21 "Banjar": "Banjarese",

22 "Minangkabau": "Minangkabau",

23 "Gorontalo": "Gorontalo",

24 "Madura": "Madurese",

25 "Batak": "Batak",

26}

28TRANSLATION_TAGS = {

29 # https://id.wiktionary.org/wiki/Modul:gender_and_number/data

30 "m": "masculine",

31 "f": "feminine",

32 "n": "neuter",

33 "jamak": "plural",

34}

36NUM_TAGS = {

37 "tunggal": "singular",

38 "plural": "plural",

39}

41SOUND_TAGS = {

42 "RP": "Received-Pronunciation",

43 "US": "US",

44}

46POS_HEADER_TAGS = {

47 "pasif": "passive",

48 "transitif": "transitive",

49 "imperatif": "imperative",

50 "aktif": "active",

51}

53TAGS = {

54 **TRANSLATION_TAGS,

55 **GLOSS_TAGS,

56 **NUM_TAGS,

57 **SOUND_TAGS,

58 **POS_HEADER_TAGS,

59}

61TOPICS = {

62 "Kim": "chemistry",

63 "Mat": "mathematics",

64 "Ling": "linguistics",

65 "Dok": "medicine",

66 "Bio": "biology",

67 "Sas": "literature",

68 "Mus": "music",

69 "Antr": "anthropology",

70 "Ars": "architecture",

71 "Ark": "archaeology",

72 "Psi": "psychology",

73 "Isl": "Islam",

74 "Geo": "geology",

75 "Hin": "Hinduism",

76 "Hid": "hydrology",

77 "Huk": "law",

78 "Kat": "Catholicism",

79 "Fis": "physics",

80 "Olr": "sports",

81 "Dik": "education",

82 "Far": "pharmacology",

83 "Lay": "shipping",

84 "Komp": "computer",

85 "Kris": "Christianity",

86 "Bot": "botany",

87 "Stat": "statistics",

88 "Tan": "agriculture",

89 "Elek": "electronics",

90 "Anat": "anatomy",

91 "Adm": "administration",

92 "Sen": "arts",

93 "Tas": "Sufism",

94 "Graf": "printing",

95 "Astron": "astronomy",

96 "Met": "meteorology",

97 "Dag": "commerce",

98 "Zool": "zoology",

99 "Min": "mineralogy",

100 "Kom": "communications",

101 "Hut": "forestry",

102 "Eko": "economy",

103 "Tek": "technology",

104 "Mil": "military",

105 "Sos": "sociology",

106 "Pol": "politics",

107 # "Tern": "",

108 "Man": "management",

109 "Ikn": "fishing",

110 "Fil": "philosophy",

111 "Astrol": "astrology",

112 "Keu": "finance",

113 "Filol": "philology",

114 "Metal": "metallurgy",

115 "Ent": "entomology",

116 "Bud": "Buddhism",

117 # "Idt": "industry",

118 "Mik": "mycology",

119 "Pet": "petrology",

120 "Dem": "demography",

121 # "Hidm": "hydrometeorology",

122 "Film": "film",

123 "Tbg": "culinary",

124 "Bakt": "bacteriology",

125 "Foto": "photography",

126 "Pang": ["food", "sciences"],

127 "Hindu Bali": "Balinese Hinduism",

128}

129

130

131def translate_raw_tags(data: WordEntry) -> None:

132 raw_tags = []

133 for raw_tag in data.raw_tags:

134 if raw_tag in TAGS and hasattr(data, "tags"):

135 tr_tag = TAGS[raw_tag]

136 if isinstance(tr_tag, str): 136 ↛ 138line 136 didn't jump to line 138 because the condition on line 136 was always true

137 data.tags.append(tr_tag)

138 elif isinstance(tr_tag, list):

139 data.tags.extend(tr_tag)

140 elif raw_tag in TOPICS and hasattr(data, "topics"): 140 ↛ 141line 140 didn't jump to line 141 because the condition on line 140 was never true

141 topic = TOPICS[raw_tag]

142 if isinstance(topic, str):

143 data.topics.append(topic)

144 elif isinstance(topic, list):

145 data.topics.extend(topic)

146 else:

147 raw_tags.append(raw_tag)

148 data.raw_tags = raw_tags