Coverage for src/wiktextract/extractor/ko/linkage.py: 83%

85 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2024-12-27 08:07 +0000

1import re 

2 

3from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

4 

5from ...page import clean_node 

6from ...wxr_context import WiktextractContext 

7from .models import Linkage, WordEntry 

8from .section_titles import LINKAGE_SECTIONS 

9from .tags import translate_raw_tags 

10 

11LINKAGE_TEMPLATES = frozenset(["파생어 상자", "합성어 상자"]) 

12 

13 

14def extract_linkage_template( 

15 wxr: WiktextractContext, 

16 word_entry: WordEntry, 

17 node: TemplateNode, 

18) -> None: 

19 # https://ko.wiktionary.org/wiki/틀:파생어_상자 

20 # https://ko.wiktionary.org/wiki/틀:합성어_상자 

21 if node.template_name in ["파생어 상자", "합성어 상자"]: 21 ↛ exitline 21 didn't return from function 'extract_linkage_template' because the condition on line 21 was always true

22 for key in range(1, 41): 22 ↛ exitline 22 didn't return from function 'extract_linkage_template' because the loop on line 22 didn't complete

23 if key not in node.template_parameters: 

24 break 

25 word = clean_node(wxr, None, node.template_parameters[key]) 

26 if word != "": 26 ↛ 22line 26 didn't jump to line 22 because the condition on line 26 was always true

27 word_entry.derived.append( 

28 Linkage( 

29 word=word, 

30 sense=word_entry.senses[-1].glosses[-1] 

31 if len(word_entry.senses) > 0 

32 else "", 

33 ) 

34 ) 

35 

36 

37def extract_linkage_section( 

38 wxr: WiktextractContext, 

39 word_entry: WordEntry, 

40 level_node: LevelNode, 

41 linkage_type: str, 

42) -> None: 

43 if linkage_type == "proverbs": 

44 extract_proverb_section(wxr, word_entry, level_node) 

45 else: 

46 from .translation import extract_translation_template 

47 

48 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM): 

49 extract_linkage_list_item( 

50 wxr, word_entry, list_item, linkage_type, True 

51 ) 

52 

53 for t_node in level_node.find_child(NodeKind.TEMPLATE): 53 ↛ 54line 53 didn't jump to line 54 because the loop on line 53 never started

54 extract_linkage_template(wxr, word_entry, t_node) 

55 if t_node.template_name == "외국어": 

56 extract_translation_template(wxr, word_entry, t_node) 

57 

58 

59def extract_linkage_list_item( 

60 wxr: WiktextractContext, 

61 word_entry: WordEntry, 

62 list_item: WikiNode, 

63 linkage_type: str, 

64 in_linkage_section: bool, 

65) -> None: 

66 raw_tag = "" 

67 is_roman = False 

68 for child in list_item.children: 

69 if isinstance(child, str): 

70 if ":" in child: 

71 l_type_str = child[: child.index(":")].strip() 

72 if l_type_str in LINKAGE_SECTIONS: 72 ↛ 68line 72 didn't jump to line 68 because the condition on line 72 was always true

73 linkage_type = LINKAGE_SECTIONS[l_type_str] 

74 else: 

75 m = re.search(r"\(([^()]+)\)", child) 

76 if m is not None: 

77 raw_tag = m.group(1).strip() 

78 is_roman = re.search(r"[a-z]", raw_tag) is not None 

79 

80 for link_node in list_item.find_child(NodeKind.LINK): 

81 word = clean_node(wxr, None, link_node) 

82 if word != "": 82 ↛ 80line 82 didn't jump to line 80 because the condition on line 82 was always true

83 linkage = Linkage( 

84 word=word, 

85 sense=word_entry.senses[-1].glosses[-1] 

86 if len(word_entry.senses) > 0 and not in_linkage_section 

87 else "", 

88 ) 

89 if len(raw_tag) > 0: 

90 if is_roman: 

91 linkage.roman = raw_tag 

92 elif re.fullmatch(r"\d+", raw_tag) is not None: 

93 linkage.sense_index = raw_tag 

94 else: 

95 linkage.raw_tags.append(raw_tag) 

96 translate_raw_tags(linkage) 

97 getattr(word_entry, linkage_type).append(linkage) 

98 

99 if not list_item.contain_node(NodeKind.LINK): 99 ↛ 100line 99 didn't jump to line 100 because the condition on line 99 was never true

100 word = clean_node(wxr, None, list_item.children) 

101 if word != "": 

102 linkage = Linkage( 

103 word=word, 

104 sense=word_entry.senses[-1].glosses[-1] 

105 if len(word_entry.senses) > 0 and not in_linkage_section 

106 else "", 

107 ) 

108 translate_raw_tags(linkage) 

109 getattr(word_entry, linkage_type).append(linkage) 

110 

111 

112def extract_proverb_section( 

113 wxr: WiktextractContext, 

114 word_entry: WordEntry, 

115 level_node: LevelNode, 

116) -> None: 

117 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM): 

118 linkage = Linkage(word="") 

119 for index, child in enumerate(list_item.children): 

120 if isinstance(child, str) and ":" in child: 

121 linkage.word = clean_node(wxr, None, list_item.children[:index]) 

122 linkage.word += child[: child.index(":")].strip() 

123 linkage.sense = child[child.index(":") + 1 :].strip() 

124 linkage.sense += clean_node( 

125 wxr, None, list_item.children[index + 1 :] 

126 ) 

127 break 

128 if linkage.word != "": 

129 word_entry.proverbs.append(linkage) 

130 else: 

131 for t_node in list_item.find_child(NodeKind.TEMPLATE): 

132 if t_node.template_name in ["l", "연결"]: 132 ↛ 131line 132 didn't jump to line 131 because the condition on line 132 was always true

133 extract_l_template(wxr, word_entry, t_node, "proverbs") 

134 

135 

136def extract_l_template( 

137 wxr: WiktextractContext, 

138 word_entry: WordEntry, 

139 t_node: TemplateNode, 

140 linkage_type: str, 

141) -> None: 

142 # https://ko.wiktionary.org/wiki/틀:연결 

143 # https://en.wiktionary.org/wiki/Template:link 

144 for word_arg in [3, 2]: 144 ↛ exitline 144 didn't return from function 'extract_l_template' because the loop on line 144 didn't complete

145 if word_arg in t_node.template_parameters: 

146 word = clean_node(wxr, None, t_node.template_parameters[word_arg]) 

147 if word == "": 147 ↛ 148line 147 didn't jump to line 148 because the condition on line 147 was never true

148 break 

149 linkage = Linkage(word=word) 

150 for sense_arg in ["t", 4]: 150 ↛ 156line 150 didn't jump to line 156 because the loop on line 150 didn't complete

151 if sense_arg in t_node.template_parameters: 151 ↛ 150line 151 didn't jump to line 150 because the condition on line 151 was always true

152 linkage.sense = clean_node( 

153 wxr, None, t_node.template_parameters[sense_arg] 

154 ) 

155 break 

156 getattr(word_entry, linkage_type).append(linkage) 

157 break