Coverage for src/wiktextract/extractor/ko/linkage.py: 83%
88 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-12 08:27 +0000
1import re
3from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
5from ...page import clean_node
6from ...wxr_context import WiktextractContext
7from .models import Linkage, WordEntry
8from .section_titles import LINKAGE_SECTIONS
9from .tags import translate_raw_tags
11LINKAGE_TEMPLATES = frozenset(["파생어 상자", "합성어 상자"])
14def extract_linkage_template(
15 wxr: WiktextractContext,
16 word_entry: WordEntry,
17 node: TemplateNode,
18) -> bool:
19 # https://ko.wiktionary.org/wiki/틀:파생어_상자
20 # https://ko.wiktionary.org/wiki/틀:합성어_상자
21 added_data = False
22 if node.template_name in ["파생어 상자", "합성어 상자"]: 22 ↛ 37line 22 didn't jump to line 37 because the condition on line 22 was always true
23 for key in range(1, 41): 23 ↛ 37line 23 didn't jump to line 37 because the loop on line 23 didn't complete
24 if key not in node.template_parameters:
25 break
26 word = clean_node(wxr, None, node.template_parameters[key])
27 if word != "": 27 ↛ 23line 27 didn't jump to line 23 because the condition on line 27 was always true
28 word_entry.derived.append(
29 Linkage(
30 word=word,
31 sense=word_entry.senses[-1].glosses[-1]
32 if len(word_entry.senses) > 0
33 else "",
34 )
35 )
36 added_data = True
37 return added_data
40def extract_linkage_section(
41 wxr: WiktextractContext,
42 word_entry: WordEntry,
43 level_node: LevelNode,
44 linkage_type: str,
45) -> None:
46 if linkage_type == "proverbs":
47 extract_proverb_section(wxr, word_entry, level_node)
48 else:
49 from .translation import extract_translation_template
51 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM):
52 extract_linkage_list_item(
53 wxr, word_entry, list_item, linkage_type, True
54 )
56 for t_node in level_node.find_child(NodeKind.TEMPLATE): 56 ↛ 57line 56 didn't jump to line 57 because the loop on line 56 never started
57 extract_linkage_template(wxr, word_entry, t_node)
58 if t_node.template_name == "외국어":
59 extract_translation_template(wxr, word_entry, t_node)
62def extract_linkage_list_item(
63 wxr: WiktextractContext,
64 word_entry: WordEntry,
65 list_item: WikiNode,
66 linkage_type: str,
67 in_linkage_section: bool,
68) -> None:
69 raw_tag = ""
70 is_roman = False
71 for child in list_item.children:
72 if isinstance(child, str):
73 if ":" in child:
74 l_type_str = child[: child.index(":")].strip()
75 if l_type_str in LINKAGE_SECTIONS: 75 ↛ 71line 75 didn't jump to line 71 because the condition on line 75 was always true
76 linkage_type = LINKAGE_SECTIONS[l_type_str]
77 else:
78 m = re.search(r"\(([^()]+)\)", child)
79 if m is not None:
80 raw_tag = m.group(1).strip()
81 is_roman = re.search(r"[a-z]", raw_tag) is not None
83 for link_node in list_item.find_child(NodeKind.LINK):
84 word = clean_node(wxr, None, link_node)
85 if word != "": 85 ↛ 83line 85 didn't jump to line 83 because the condition on line 85 was always true
86 linkage = Linkage(
87 word=word,
88 sense=word_entry.senses[-1].glosses[-1]
89 if len(word_entry.senses) > 0 and not in_linkage_section
90 else "",
91 )
92 if len(raw_tag) > 0:
93 if is_roman:
94 linkage.roman = raw_tag
95 elif re.fullmatch(r"\d+", raw_tag) is not None:
96 linkage.sense_index = raw_tag
97 else:
98 linkage.raw_tags.append(raw_tag)
99 translate_raw_tags(linkage)
100 getattr(word_entry, linkage_type).append(linkage)
102 if not list_item.contain_node(NodeKind.LINK): 102 ↛ 103line 102 didn't jump to line 103 because the condition on line 102 was never true
103 word = clean_node(wxr, None, list_item.children)
104 if word != "":
105 linkage = Linkage(
106 word=word,
107 sense=word_entry.senses[-1].glosses[-1]
108 if len(word_entry.senses) > 0 and not in_linkage_section
109 else "",
110 )
111 translate_raw_tags(linkage)
112 getattr(word_entry, linkage_type).append(linkage)
115def extract_proverb_section(
116 wxr: WiktextractContext,
117 word_entry: WordEntry,
118 level_node: LevelNode,
119) -> None:
120 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM):
121 linkage = Linkage(word="")
122 for index, child in enumerate(list_item.children):
123 if isinstance(child, str) and ":" in child:
124 linkage.word = clean_node(wxr, None, list_item.children[:index])
125 linkage.word += child[: child.index(":")].strip()
126 linkage.sense = child[child.index(":") + 1 :].strip()
127 linkage.sense += clean_node(
128 wxr, None, list_item.children[index + 1 :]
129 )
130 break
131 if linkage.word != "":
132 word_entry.proverbs.append(linkage)
133 else:
134 for t_node in list_item.find_child(NodeKind.TEMPLATE):
135 if t_node.template_name in ["l", "연결"]: 135 ↛ 134line 135 didn't jump to line 134 because the condition on line 135 was always true
136 extract_l_template(wxr, word_entry, t_node, "proverbs")
139def extract_l_template(
140 wxr: WiktextractContext,
141 word_entry: WordEntry,
142 t_node: TemplateNode,
143 linkage_type: str,
144) -> None:
145 # https://ko.wiktionary.org/wiki/틀:연결
146 # https://en.wiktionary.org/wiki/Template:link
147 for word_arg in [3, 2]: 147 ↛ exitline 147 didn't return from function 'extract_l_template' because the loop on line 147 didn't complete
148 if word_arg in t_node.template_parameters:
149 word = clean_node(wxr, None, t_node.template_parameters[word_arg])
150 if word == "": 150 ↛ 151line 150 didn't jump to line 151 because the condition on line 150 was never true
151 break
152 linkage = Linkage(word=word)
153 for sense_arg in ["t", 4]: 153 ↛ 159line 153 didn't jump to line 159 because the loop on line 153 didn't complete
154 if sense_arg in t_node.template_parameters: 154 ↛ 153line 154 didn't jump to line 153 because the condition on line 154 was always true
155 linkage.sense = clean_node(
156 wxr, None, t_node.template_parameters[sense_arg]
157 )
158 break
159 getattr(word_entry, linkage_type).append(linkage)
160 break