Coverage for src/wiktextract/extractor/ko/linkage.py: 83%

1import re

3from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode

5from ...page import clean_node

6from ...wxr_context import WiktextractContext

7from .models import Linkage, WordEntry

8from .section_titles import LINKAGE_SECTIONS

9from .tags import translate_raw_tags

11LINKAGE_TEMPLATES = frozenset(["파생어 상자", "합성어 상자"])

14def extract_linkage_template(

15 wxr: WiktextractContext,

16 word_entry: WordEntry,

17 node: TemplateNode,

18) -> None:

19 # https://ko.wiktionary.org/wiki/틀:파생어_상자

20 # https://ko.wiktionary.org/wiki/틀:합성어_상자

21 if node.template_name in ["파생어 상자", "합성어 상자"]: 21 ↛ exitline 21 didn't return from function 'extract_linkage_template' because the condition on line 21 was always true

22 for key in range(1, 41): 22 ↛ exitline 22 didn't return from function 'extract_linkage_template' because the loop on line 22 didn't complete

23 if key not in node.template_parameters:

24 break

25 word = clean_node(wxr, None, node.template_parameters[key])

26 if word != "": 26 ↛ 22line 26 didn't jump to line 22 because the condition on line 26 was always true

27 word_entry.derived.append(

28 Linkage(

29 word=word,

30 sense=word_entry.senses[-1].glosses[-1]

31 if len(word_entry.senses) > 0

32 else "",

33 )

34 )

37def extract_linkage_section(

38 wxr: WiktextractContext,

39 word_entry: WordEntry,

40 level_node: LevelNode,

41 linkage_type: str,

42) -> None:

43 if linkage_type == "proverbs":

44 extract_proverb_section(wxr, word_entry, level_node)

45 else:

46 from .translation import extract_translation_template

48 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM):

49 extract_linkage_list_item(

50 wxr, word_entry, list_item, linkage_type, True

51 )

53 for t_node in level_node.find_child(NodeKind.TEMPLATE): 53 ↛ 54line 53 didn't jump to line 54 because the loop on line 53 never started

54 extract_linkage_template(wxr, word_entry, t_node)

55 if t_node.template_name == "외국어":

56 extract_translation_template(wxr, word_entry, t_node)

59def extract_linkage_list_item(

60 wxr: WiktextractContext,

61 word_entry: WordEntry,

62 list_item: WikiNode,

63 linkage_type: str,

64 in_linkage_section: bool,

65) -> None:

66 raw_tag = ""

67 is_roman = False

68 for child in list_item.children:

69 if isinstance(child, str):

70 if ":" in child:

71 l_type_str = child[: child.index(":")].strip()

72 if l_type_str in LINKAGE_SECTIONS: 72 ↛ 68line 72 didn't jump to line 68 because the condition on line 72 was always true

73 linkage_type = LINKAGE_SECTIONS[l_type_str]

74 else:

75 m = re.search(r"\(([^()]+)\)", child)

76 if m is not None:

77 raw_tag = m.group(1).strip()

78 is_roman = re.search(r"[a-z]", raw_tag) is not None

80 for link_node in list_item.find_child(NodeKind.LINK):

81 word = clean_node(wxr, None, link_node)

82 if word != "": 82 ↛ 80line 82 didn't jump to line 80 because the condition on line 82 was always true

83 linkage = Linkage(

84 word=word,

85 sense=word_entry.senses[-1].glosses[-1]

86 if len(word_entry.senses) > 0 and not in_linkage_section

87 else "",

88 )

89 if len(raw_tag) > 0:

90 if is_roman:

91 linkage.roman = raw_tag

92 elif re.fullmatch(r"\d+", raw_tag) is not None:

93 linkage.sense_index = raw_tag

94 else:

95 linkage.raw_tags.append(raw_tag)

96 translate_raw_tags(linkage)

97 getattr(word_entry, linkage_type).append(linkage)

99 if not list_item.contain_node(NodeKind.LINK): 99 ↛ 100line 99 didn't jump to line 100 because the condition on line 99 was never true

100 word = clean_node(wxr, None, list_item.children)

101 if word != "":

102 linkage = Linkage(

103 word=word,

104 sense=word_entry.senses[-1].glosses[-1]

105 if len(word_entry.senses) > 0 and not in_linkage_section

106 else "",

107 )

108 translate_raw_tags(linkage)

109 getattr(word_entry, linkage_type).append(linkage)

110

111

112def extract_proverb_section(

113 wxr: WiktextractContext,

114 word_entry: WordEntry,

115 level_node: LevelNode,

116) -> None:

117 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM):

118 linkage = Linkage(word="")

119 for index, child in enumerate(list_item.children):

120 if isinstance(child, str) and ":" in child:

121 linkage.word = clean_node(wxr, None, list_item.children[:index])

122 linkage.word += child[: child.index(":")].strip()

123 linkage.sense = child[child.index(":") + 1 :].strip()

124 linkage.sense += clean_node(

125 wxr, None, list_item.children[index + 1 :]

126 )

127 break

128 if linkage.word != "":

129 word_entry.proverbs.append(linkage)

130 else:

131 for t_node in list_item.find_child(NodeKind.TEMPLATE):

132 if t_node.template_name in ["l", "연결"]: 132 ↛ 131line 132 didn't jump to line 131 because the condition on line 132 was always true

133 extract_l_template(wxr, word_entry, t_node, "proverbs")

134

135

136def extract_l_template(

137 wxr: WiktextractContext,

138 word_entry: WordEntry,

139 t_node: TemplateNode,

140 linkage_type: str,

141) -> None:

142 # https://ko.wiktionary.org/wiki/틀:연결

143 # https://en.wiktionary.org/wiki/Template:link

144 for word_arg in [3, 2]: 144 ↛ exitline 144 didn't return from function 'extract_l_template' because the loop on line 144 didn't complete

145 if word_arg in t_node.template_parameters:

146 word = clean_node(wxr, None, t_node.template_parameters[word_arg])

147 if word == "": 147 ↛ 148line 147 didn't jump to line 148 because the condition on line 147 was never true

148 break

149 linkage = Linkage(word=word)

150 for sense_arg in ["t", 4]: 150 ↛ 156line 150 didn't jump to line 156 because the loop on line 150 didn't complete

151 if sense_arg in t_node.template_parameters: 151 ↛ 150line 151 didn't jump to line 150 because the condition on line 151 was always true

152 linkage.sense = clean_node(

153 wxr, None, t_node.template_parameters[sense_arg]

154 )

155 break

156 getattr(word_entry, linkage_type).append(linkage)

157 break