Coverage for src/wiktextract/extractor/ko/tags.py: 90%

20 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-30 10:31 +0000

1from .models import WordEntry 

2 

3# https://ko.wiktionary.org/wiki/모듈:labels/data/topical 

4# https://ko.wiktionary.org/wiki/모듈:labels/data 

5GLOSS_TAGS = { 

6 "인명": "name", 

7 "고어": "archaic", 

8 "구식": "archaic", 

9 # "대명동사": "", 

10 # "말고름": "", 

11 "비유": "metaphoric", 

12 "사어": "obsolete", # dead language 

13 "유아어": "baby-talk", 

14 "자동사": "intransitive", 

15 "직역": "literally", 

16 "타동사": "transitive", 

17 "드물게": "rare", 

18 "원래의 의미": "naturally", 

19 "문학적": "literary", 

20 "해학적": "humorous", 

21 "완곡적": "euphemistic", 

22 "가산": "countable", 

23 "불가산": "uncountable", 

24} 

25 

26SOUND_TAGS = { 

27 # 틀:ko-IPA 

28 "Revised Romanization": ["revised", "romanization"], 

29 "Revised Romanization (translit.)": [ 

30 "revised", 

31 "romanization", 

32 "transliteration", 

33 ], 

34 "McCune-Reischauer": "McCune-Reischauer", 

35 "Yale Romanization": ["Yale", "romanization"], 

36 "표준어/서울": ["SK-Standard", "Seoul"], 

37 # 틀:ja-pron 

38 "도쿄": "Tokyo", 

39 # 틀:발음 듣기, 틀:IPA 

40 "영국": "UK", 

41 "미국": "US", 

42 "영": "UK", 

43 "미": "US", 

44 "표준": "standard", 

45 "남부": "South", 

46 "북부": "North", 

47 "고대": "archaic", 

48 "동부": "East", 

49 "서부": "West", 

50 "포르투갈": "Portugal", 

51 "이집트": "Egypt", 

52 "시리아": "Syria", 

53 "브라질": "Brazil", 

54 "독일": "Germany", 

55 "현대": "modern", 

56 "캐나다": "Canada", 

57 "하노이": "Hanoi", 

58 "브라질 남부": "Southern-Brazil", 

59 "벨기에": "Belgium", 

60 "이란": "Iran", 

61 "파리": "Paris", 

62 "모로코": "Morocco", 

63 "베를린": "Berlin", 

64 "비격식체": "informal", 

65 "민난어 장저우": ["Min-Nan", "Zhangzhou"], 

66} 

67 

68HEADER_TAGS = { 

69 # 틀:한국어_동사 

70 "부정사형": "infinitive", 

71 "연결어미형": "sequential", 

72 "명사형": "noun", 

73 "사동사": "causative", 

74} 

75 

76# also in linkage lists 

77TRANSLATION_TAGS = { 

78 "남성": "masculine", 

79 "여성": "feminine", 

80 "라틴": "Latin", 

81 "중성": "neuter", 

82 "간체": "Simplified-Chinese", 

83 "번체": "Traditional-Chinese", 

84 "번체자": "Traditional-Chinese", 

85 "오스트리아": "Austria", 

86 "표준어": "standard", 

87 "히브리 문자": ["Hebrew", "letter"], 

88 "아랍 문자": ["Arabic", "letter"], 

89 "복수형": "plural", 

90 "단수": "singular", 

91 "복수": "plural", 

92 "불완료체": "imperfect", 

93 "완료체": "completive", 

94 "양성": "masculine", 

95 "바이에른 방언": ["Bavarian", "dialectal"], 

96 "광둥어": "Cantonese", 

97 "오스트레일리아": "Australia", 

98 "글라골 문자": ["Glagolitic", "letter"], 

99 "속어": "slang", 

100 "멕시코 속어": ["Mexico", "slang"], 

101 "에스파냐": "Spain", 

102 "가타카나": "katakana", 

103 "고어": "archaic", 

104 "쯔놈": "Chu-Nom", 

105 "형용사": "adjective", 

106 "사투리": "dialectal", 

107 "약자": "abbreviation", 

108 "동사": "verb", 

109 "드문 단어": "rare", 

110} 

111 

112TAGS = { 

113 **GLOSS_TAGS, 

114 **SOUND_TAGS, 

115 **HEADER_TAGS, 

116 **TRANSLATION_TAGS, 

117 # Template:zh-forms 

118 "정체": "Traditional-Chinese", 

119 "간체": "Simplified-Chinese", 

120 # Template:zh-x 

121 "대만 관화": "Taiwanese-Mandarin", 

122 "표준 중국어": "Standard-Chinese", 

123 "한어병음": "Pinyin", 

124 "광저우 광둥어": "Guangzhou-Cantonese", 

125 "월병": "Jyutping", 

126} 

127 

128TOPICS = { 

129 "금융": "finance", 

130 "광고": "advertising", 

131 "군사": "military", 

132 "어류": "fish", 

133 "물리": "physics", 

134 "법률": "law", 

135 "식물": "botany", 

136 "역사": "history", 

137 "의류": "clothing", 

138 "의학": "medicine", 

139 "전기": "electricity", 

140 # "조류": "birds", 

141 "지리": "geography", 

142 "프로그래밍": "programming", 

143 "컴퓨터": "computer", 

144 "해부학": "anatomy", 

145 "정치": "politics", 

146 "종교": "religion", 

147 "가톨릭": "Catholicism", 

148 "축구": "football", 

149 # "체육": "physical-education", 

150} 

151 

152 

153def translate_raw_tags(data: WordEntry) -> None: 

154 raw_tags = [] 

155 for raw_tag in data.raw_tags: 

156 if raw_tag in TAGS: 

157 tr_tag = TAGS[raw_tag] 

158 if isinstance(tr_tag, str): 

159 data.tags.append(tr_tag) 

160 elif isinstance(tr_tag, list): 160 ↛ 155line 160 didn't jump to line 155 because the condition on line 160 was always true

161 data.tags.extend(tr_tag) 

162 elif hasattr(data, "topics") and raw_tag in TOPICS: 162 ↛ 163line 162 didn't jump to line 163 because the condition on line 162 was never true

163 data.topics.append(TOPICS[raw_tag]) 

164 else: 

165 raw_tags.append(raw_tag) 

166 data.raw_tags = raw_tags