Coverage for src/wiktextract/extractor/ko/linkage.py: 83%
85 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1import re
3from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
5from ...page import clean_node
6from ...wxr_context import WiktextractContext
7from .models import Linkage, WordEntry
8from .section_titles import LINKAGE_SECTIONS
9from .tags import translate_raw_tags
11LINKAGE_TEMPLATES = frozenset(["파생어 상자", "합성어 상자"])
14def extract_linkage_template(
15 wxr: WiktextractContext,
16 word_entry: WordEntry,
17 node: TemplateNode,
18) -> None:
19 # https://ko.wiktionary.org/wiki/틀:파생어_상자
20 # https://ko.wiktionary.org/wiki/틀:합성어_상자
21 if node.template_name in ["파생어 상자", "합성어 상자"]: 21 ↛ exitline 21 didn't return from function 'extract_linkage_template' because the condition on line 21 was always true
22 for key in range(1, 41): 22 ↛ exitline 22 didn't return from function 'extract_linkage_template' because the loop on line 22 didn't complete
23 if key not in node.template_parameters:
24 break
25 word = clean_node(wxr, None, node.template_parameters[key])
26 if word != "": 26 ↛ 22line 26 didn't jump to line 22 because the condition on line 26 was always true
27 word_entry.derived.append(
28 Linkage(
29 word=word,
30 sense=word_entry.senses[-1].glosses[-1]
31 if len(word_entry.senses) > 0
32 else "",
33 )
34 )
37def extract_linkage_section(
38 wxr: WiktextractContext,
39 word_entry: WordEntry,
40 level_node: LevelNode,
41 linkage_type: str,
42) -> None:
43 if linkage_type == "proverbs":
44 extract_proverb_section(wxr, word_entry, level_node)
45 else:
46 from .translation import extract_translation_template
48 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM):
49 extract_linkage_list_item(
50 wxr, word_entry, list_item, linkage_type, True
51 )
53 for t_node in level_node.find_child(NodeKind.TEMPLATE): 53 ↛ 54line 53 didn't jump to line 54 because the loop on line 53 never started
54 extract_linkage_template(wxr, word_entry, t_node)
55 if t_node.template_name == "외국어":
56 extract_translation_template(wxr, word_entry, t_node)
59def extract_linkage_list_item(
60 wxr: WiktextractContext,
61 word_entry: WordEntry,
62 list_item: WikiNode,
63 linkage_type: str,
64 in_linkage_section: bool,
65) -> None:
66 raw_tag = ""
67 is_roman = False
68 for child in list_item.children:
69 if isinstance(child, str):
70 if ":" in child:
71 l_type_str = child[: child.index(":")].strip()
72 if l_type_str in LINKAGE_SECTIONS: 72 ↛ 68line 72 didn't jump to line 68 because the condition on line 72 was always true
73 linkage_type = LINKAGE_SECTIONS[l_type_str]
74 else:
75 m = re.search(r"\(([^()]+)\)", child)
76 if m is not None:
77 raw_tag = m.group(1).strip()
78 is_roman = re.search(r"[a-z]", raw_tag) is not None
80 for link_node in list_item.find_child(NodeKind.LINK):
81 word = clean_node(wxr, None, link_node)
82 if word != "": 82 ↛ 80line 82 didn't jump to line 80 because the condition on line 82 was always true
83 linkage = Linkage(
84 word=word,
85 sense=word_entry.senses[-1].glosses[-1]
86 if len(word_entry.senses) > 0 and not in_linkage_section
87 else "",
88 )
89 if len(raw_tag) > 0:
90 if is_roman:
91 linkage.roman = raw_tag
92 elif re.fullmatch(r"\d+", raw_tag) is not None:
93 linkage.sense_index = raw_tag
94 else:
95 linkage.raw_tags.append(raw_tag)
96 translate_raw_tags(linkage)
97 getattr(word_entry, linkage_type).append(linkage)
99 if not list_item.contain_node(NodeKind.LINK): 99 ↛ 100line 99 didn't jump to line 100 because the condition on line 99 was never true
100 word = clean_node(wxr, None, list_item.children)
101 if word != "":
102 linkage = Linkage(
103 word=word,
104 sense=word_entry.senses[-1].glosses[-1]
105 if len(word_entry.senses) > 0 and not in_linkage_section
106 else "",
107 )
108 translate_raw_tags(linkage)
109 getattr(word_entry, linkage_type).append(linkage)
112def extract_proverb_section(
113 wxr: WiktextractContext,
114 word_entry: WordEntry,
115 level_node: LevelNode,
116) -> None:
117 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM):
118 linkage = Linkage(word="")
119 for index, child in enumerate(list_item.children):
120 if isinstance(child, str) and ":" in child:
121 linkage.word = clean_node(wxr, None, list_item.children[:index])
122 linkage.word += child[: child.index(":")].strip()
123 linkage.sense = child[child.index(":") + 1 :].strip()
124 linkage.sense += clean_node(
125 wxr, None, list_item.children[index + 1 :]
126 )
127 break
128 if linkage.word != "":
129 word_entry.proverbs.append(linkage)
130 else:
131 for t_node in list_item.find_child(NodeKind.TEMPLATE):
132 if t_node.template_name in ["l", "연결"]: 132 ↛ 131line 132 didn't jump to line 131 because the condition on line 132 was always true
133 extract_l_template(wxr, word_entry, t_node, "proverbs")
136def extract_l_template(
137 wxr: WiktextractContext,
138 word_entry: WordEntry,
139 t_node: TemplateNode,
140 linkage_type: str,
141) -> None:
142 # https://ko.wiktionary.org/wiki/틀:연결
143 # https://en.wiktionary.org/wiki/Template:link
144 for word_arg in [3, 2]: 144 ↛ exitline 144 didn't return from function 'extract_l_template' because the loop on line 144 didn't complete
145 if word_arg in t_node.template_parameters:
146 word = clean_node(wxr, None, t_node.template_parameters[word_arg])
147 if word == "": 147 ↛ 148line 147 didn't jump to line 148 because the condition on line 147 was never true
148 break
149 linkage = Linkage(word=word)
150 for sense_arg in ["t", 4]: 150 ↛ 156line 150 didn't jump to line 156 because the loop on line 150 didn't complete
151 if sense_arg in t_node.template_parameters: 151 ↛ 150line 151 didn't jump to line 150 because the condition on line 151 was always true
152 linkage.sense = clean_node(
153 wxr, None, t_node.template_parameters[sense_arg]
154 )
155 break
156 getattr(word_entry, linkage_type).append(linkage)
157 break