Coverage for src/wiktextract/extractor/id/tags.py: 62%

25 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-04 10:58 +0000

1from .models import WordEntry 

2 

3GLOSS_TAGS = { 

4 # https://id.wiktionary.org/wiki/Templat:Istilah 

5 "cak": "slang", 

6 "kiasan": "figuratively", 

7 "mdt": "morpheme", 

8 "Sas": "literary", 

9 "arkais": "archaic", 

10 "klasik": "Classical", 

11 "nonformal": "informal", 

12 "akar": "root", 

13 "akronim": "acronym", 

14 "kasar": "impolite", 

15 "hormat": "honorific", 

16 "generalisasi": "general", 

17 "neologisme": "neologism", 

18 "eufemisme": "euphemism", 

19 "Sunda": "Sundanese", 

20 "Aceh": "Acehnese", 

21 "Banjar": "Banjarese", 

22 "Minangkabau": "Minangkabau", 

23 "Gorontalo": "Gorontalo", 

24 "Madura": "Madurese", 

25 "Batak": "Batak", 

26} 

27 

28TRANSLATION_TAGS = { 

29 # https://id.wiktionary.org/wiki/Modul:gender_and_number/data 

30 "m": "masculine", 

31 "f": "feminine", 

32 "n": "neuter", 

33 "jamak": "plural", 

34} 

35 

36NUM_TAGS = { 

37 "tunggal": "singular", 

38 "plural": "plural", 

39} 

40 

41SOUND_TAGS = { 

42 "RP": "Received-Pronunciation", 

43 "US": "US", 

44} 

45 

46POS_HEADER_TAGS = { 

47 "pasif": "passive", 

48 "transitif": "transitive", 

49 "imperatif": "imperative", 

50 "aktif": "active", 

51} 

52 

53TAGS = { 

54 **TRANSLATION_TAGS, 

55 **GLOSS_TAGS, 

56 **NUM_TAGS, 

57 **SOUND_TAGS, 

58 **POS_HEADER_TAGS, 

59} 

60 

61TOPICS = { 

62 "Kim": "chemistry", 

63 "Mat": "mathematics", 

64 "Ling": "linguistics", 

65 "Dok": "medicine", 

66 "Bio": "biology", 

67 "Sas": "literature", 

68 "Mus": "music", 

69 "Antr": "anthropology", 

70 "Ars": "architecture", 

71 "Ark": "archaeology", 

72 "Psi": "psychology", 

73 "Isl": "Islam", 

74 "Geo": "geology", 

75 "Hin": "Hinduism", 

76 "Hid": "hydrology", 

77 "Huk": "law", 

78 "Kat": "Catholicism", 

79 "Fis": "physics", 

80 "Olr": "sports", 

81 "Dik": "education", 

82 "Far": "pharmacology", 

83 "Lay": "shipping", 

84 "Komp": "computer", 

85 "Kris": "Christianity", 

86 "Bot": "botany", 

87 "Stat": "statistics", 

88 "Tan": "agriculture", 

89 "Elek": "electronics", 

90 "Anat": "anatomy", 

91 "Adm": "administration", 

92 "Sen": "arts", 

93 "Tas": "Sufism", 

94 "Graf": "printing", 

95 "Astron": "astronomy", 

96 "Met": "meteorology", 

97 "Dag": "commerce", 

98 "Zool": "zoology", 

99 "Min": "mineralogy", 

100 "Kom": "communications", 

101 "Hut": "forestry", 

102 "Eko": "economy", 

103 "Tek": "technology", 

104 "Mil": "military", 

105 "Sos": "sociology", 

106 "Pol": "politics", 

107 # "Tern": "", 

108 "Man": "management", 

109 "Ikn": "fishing", 

110 "Fil": "philosophy", 

111 "Astrol": "astrology", 

112 "Keu": "finance", 

113 "Filol": "philology", 

114 "Metal": "metallurgy", 

115 "Ent": "entomology", 

116 "Bud": "Buddhism", 

117 # "Idt": "industry", 

118 "Mik": "mycology", 

119 "Pet": "petrology", 

120 "Dem": "demography", 

121 # "Hidm": "hydrometeorology", 

122 "Film": "film", 

123 "Tbg": "culinary", 

124 "Bakt": "bacteriology", 

125 "Foto": "photography", 

126 "Pang": ["food", "sciences"], 

127 "Hindu Bali": "Balinese Hinduism", 

128} 

129 

130 

131def translate_raw_tags(data: WordEntry) -> None: 

132 raw_tags = [] 

133 for raw_tag in data.raw_tags: 

134 if raw_tag in TAGS and hasattr(data, "tags"): 

135 tr_tag = TAGS[raw_tag] 

136 if isinstance(tr_tag, str): 136 ↛ 138line 136 didn't jump to line 138 because the condition on line 136 was always true

137 data.tags.append(tr_tag) 

138 elif isinstance(tr_tag, list): 

139 data.tags.extend(tr_tag) 

140 elif raw_tag in TOPICS and hasattr(data, "topics"): 140 ↛ 141line 140 didn't jump to line 141 because the condition on line 140 was never true

141 topic = TOPICS[raw_tag] 

142 if isinstance(topic, str): 

143 data.topics.append(topic) 

144 elif isinstance(topic, list): 

145 data.topics.extend(topic) 

146 else: 

147 raw_tags.append(raw_tag) 

148 data.raw_tags = raw_tags