Coverage for src/wiktextract/extractor/ko/linkage.py: 83%

88 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-12 08:27 +0000

1import re 

2 

3from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

4 

5from ...page import clean_node 

6from ...wxr_context import WiktextractContext 

7from .models import Linkage, WordEntry 

8from .section_titles import LINKAGE_SECTIONS 

9from .tags import translate_raw_tags 

10 

11LINKAGE_TEMPLATES = frozenset(["파생어 상자", "합성어 상자"]) 

12 

13 

14def extract_linkage_template( 

15 wxr: WiktextractContext, 

16 word_entry: WordEntry, 

17 node: TemplateNode, 

18) -> bool: 

19 # https://ko.wiktionary.org/wiki/틀:파생어_상자 

20 # https://ko.wiktionary.org/wiki/틀:합성어_상자 

21 added_data = False 

22 if node.template_name in ["파생어 상자", "합성어 상자"]: 22 ↛ 37line 22 didn't jump to line 37 because the condition on line 22 was always true

23 for key in range(1, 41): 23 ↛ 37line 23 didn't jump to line 37 because the loop on line 23 didn't complete

24 if key not in node.template_parameters: 

25 break 

26 word = clean_node(wxr, None, node.template_parameters[key]) 

27 if word != "": 27 ↛ 23line 27 didn't jump to line 23 because the condition on line 27 was always true

28 word_entry.derived.append( 

29 Linkage( 

30 word=word, 

31 sense=word_entry.senses[-1].glosses[-1] 

32 if len(word_entry.senses) > 0 

33 else "", 

34 ) 

35 ) 

36 added_data = True 

37 return added_data 

38 

39 

40def extract_linkage_section( 

41 wxr: WiktextractContext, 

42 word_entry: WordEntry, 

43 level_node: LevelNode, 

44 linkage_type: str, 

45) -> None: 

46 if linkage_type == "proverbs": 

47 extract_proverb_section(wxr, word_entry, level_node) 

48 else: 

49 from .translation import extract_translation_template 

50 

51 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM): 

52 extract_linkage_list_item( 

53 wxr, word_entry, list_item, linkage_type, True 

54 ) 

55 

56 for t_node in level_node.find_child(NodeKind.TEMPLATE): 56 ↛ 57line 56 didn't jump to line 57 because the loop on line 56 never started

57 extract_linkage_template(wxr, word_entry, t_node) 

58 if t_node.template_name == "외국어": 

59 extract_translation_template(wxr, word_entry, t_node) 

60 

61 

62def extract_linkage_list_item( 

63 wxr: WiktextractContext, 

64 word_entry: WordEntry, 

65 list_item: WikiNode, 

66 linkage_type: str, 

67 in_linkage_section: bool, 

68) -> None: 

69 raw_tag = "" 

70 is_roman = False 

71 for child in list_item.children: 

72 if isinstance(child, str): 

73 if ":" in child: 

74 l_type_str = child[: child.index(":")].strip() 

75 if l_type_str in LINKAGE_SECTIONS: 75 ↛ 71line 75 didn't jump to line 71 because the condition on line 75 was always true

76 linkage_type = LINKAGE_SECTIONS[l_type_str] 

77 else: 

78 m = re.search(r"\(([^()]+)\)", child) 

79 if m is not None: 

80 raw_tag = m.group(1).strip() 

81 is_roman = re.search(r"[a-z]", raw_tag) is not None 

82 

83 for link_node in list_item.find_child(NodeKind.LINK): 

84 word = clean_node(wxr, None, link_node) 

85 if word != "": 85 ↛ 83line 85 didn't jump to line 83 because the condition on line 85 was always true

86 linkage = Linkage( 

87 word=word, 

88 sense=word_entry.senses[-1].glosses[-1] 

89 if len(word_entry.senses) > 0 and not in_linkage_section 

90 else "", 

91 ) 

92 if len(raw_tag) > 0: 

93 if is_roman: 

94 linkage.roman = raw_tag 

95 elif re.fullmatch(r"\d+", raw_tag) is not None: 

96 linkage.sense_index = raw_tag 

97 else: 

98 linkage.raw_tags.append(raw_tag) 

99 translate_raw_tags(linkage) 

100 getattr(word_entry, linkage_type).append(linkage) 

101 

102 if not list_item.contain_node(NodeKind.LINK): 102 ↛ 103line 102 didn't jump to line 103 because the condition on line 102 was never true

103 word = clean_node(wxr, None, list_item.children) 

104 if word != "": 

105 linkage = Linkage( 

106 word=word, 

107 sense=word_entry.senses[-1].glosses[-1] 

108 if len(word_entry.senses) > 0 and not in_linkage_section 

109 else "", 

110 ) 

111 translate_raw_tags(linkage) 

112 getattr(word_entry, linkage_type).append(linkage) 

113 

114 

115def extract_proverb_section( 

116 wxr: WiktextractContext, 

117 word_entry: WordEntry, 

118 level_node: LevelNode, 

119) -> None: 

120 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM): 

121 linkage = Linkage(word="") 

122 for index, child in enumerate(list_item.children): 

123 if isinstance(child, str) and ":" in child: 

124 linkage.word = clean_node(wxr, None, list_item.children[:index]) 

125 linkage.word += child[: child.index(":")].strip() 

126 linkage.sense = child[child.index(":") + 1 :].strip() 

127 linkage.sense += clean_node( 

128 wxr, None, list_item.children[index + 1 :] 

129 ) 

130 break 

131 if linkage.word != "": 

132 word_entry.proverbs.append(linkage) 

133 else: 

134 for t_node in list_item.find_child(NodeKind.TEMPLATE): 

135 if t_node.template_name in ["l", "연결"]: 135 ↛ 134line 135 didn't jump to line 134 because the condition on line 135 was always true

136 extract_l_template(wxr, word_entry, t_node, "proverbs") 

137 

138 

139def extract_l_template( 

140 wxr: WiktextractContext, 

141 word_entry: WordEntry, 

142 t_node: TemplateNode, 

143 linkage_type: str, 

144) -> None: 

145 # https://ko.wiktionary.org/wiki/틀:연결 

146 # https://en.wiktionary.org/wiki/Template:link 

147 for word_arg in [3, 2]: 147 ↛ exitline 147 didn't return from function 'extract_l_template' because the loop on line 147 didn't complete

148 if word_arg in t_node.template_parameters: 

149 word = clean_node(wxr, None, t_node.template_parameters[word_arg]) 

150 if word == "": 150 ↛ 151line 150 didn't jump to line 151 because the condition on line 150 was never true

151 break 

152 linkage = Linkage(word=word) 

153 for sense_arg in ["t", 4]: 153 ↛ 159line 153 didn't jump to line 159 because the loop on line 153 didn't complete

154 if sense_arg in t_node.template_parameters: 154 ↛ 153line 154 didn't jump to line 153 because the condition on line 154 was always true

155 linkage.sense = clean_node( 

156 wxr, None, t_node.template_parameters[sense_arg] 

157 ) 

158 break 

159 getattr(word_entry, linkage_type).append(linkage) 

160 break