Coverage for src/wiktextract/extractor/ko/linkage.py: 85%
59 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-10-25 10:11 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-10-25 10:11 +0000
1import re
3from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
5from ...page import clean_node
6from ...wxr_context import WiktextractContext
7from .models import Linkage, WordEntry
8from .section_titles import LINKAGE_SECTIONS
10LINKAGE_TEMPLATES = frozenset(["파생어 상자", "합성어 상자"])
13def extract_linkage_template(
14 wxr: WiktextractContext,
15 word_entry: WordEntry,
16 node: TemplateNode,
17) -> None:
18 # https://ko.wiktionary.org/wiki/틀:파생어_상자
19 # https://ko.wiktionary.org/wiki/틀:합성어_상자
20 if node.template_name in ["파생어 상자", "합성어 상자"]: 20 ↛ exitline 20 didn't return from function 'extract_linkage_template' because the condition on line 20 was always true
21 for key in range(1, 41): 21 ↛ exitline 21 didn't return from function 'extract_linkage_template' because the loop on line 21 didn't complete
22 if key not in node.template_parameters:
23 break
24 word = clean_node(wxr, None, node.template_parameters[key])
25 if word != "": 25 ↛ 21line 25 didn't jump to line 21 because the condition on line 25 was always true
26 word_entry.derived.append(
27 Linkage(
28 word=word,
29 sense=word_entry.senses[-1].glosses[-1]
30 if len(word_entry.senses) > 0
31 else "",
32 )
33 )
36def extract_linkage_section(
37 wxr: WiktextractContext,
38 word_entry: WordEntry,
39 level_node: LevelNode,
40 linkage_type: str,
41) -> None:
42 if linkage_type == "proverbs":
43 extract_proverb_section(wxr, word_entry, level_node)
44 else:
45 from .translation import extract_translation_template
47 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM):
48 extract_linkage_list_item(wxr, word_entry, list_item, linkage_type)
50 for t_node in level_node.find_child(NodeKind.TEMPLATE): 50 ↛ 51line 50 didn't jump to line 51 because the loop on line 50 never started
51 extract_linkage_template(wxr, word_entry, t_node)
52 if t_node.template_name == "외국어":
53 extract_translation_template(wxr, word_entry, t_node)
56def extract_linkage_list_item(
57 wxr: WiktextractContext,
58 word_entry: WordEntry,
59 list_item: WikiNode,
60 linkage_type: str,
61) -> None:
62 raw_tag = ""
63 is_roman = False
64 for child in list_item.children:
65 if isinstance(child, str):
66 if ":" in child:
67 l_type_str = child[: child.index(":")].strip()
68 if l_type_str in LINKAGE_SECTIONS: 68 ↛ 64line 68 didn't jump to line 64 because the condition on line 68 was always true
69 linkage_type = LINKAGE_SECTIONS[l_type_str]
70 else:
71 m = re.search(r"\(([^()]+)\)", child)
72 if m is not None:
73 raw_tag = m.group(1).strip()
74 is_roman = re.search(r"[a-z]", raw_tag) is not None
76 for link_node in list_item.find_child(NodeKind.LINK):
77 word = clean_node(wxr, None, link_node)
78 if word != "": 78 ↛ 76line 78 didn't jump to line 76 because the condition on line 78 was always true
79 linkage = Linkage(
80 word=word,
81 sense=word_entry.senses[-1].glosses[-1]
82 if len(word_entry.senses) > 0
83 else "",
84 )
85 if len(raw_tag) > 0:
86 if is_roman: 86 ↛ 89line 86 didn't jump to line 89 because the condition on line 86 was always true
87 linkage.roman = raw_tag
88 else:
89 linkage.raw_tags.append(raw_tag)
90 getattr(word_entry, linkage_type).append(linkage)
93def extract_proverb_section(
94 wxr: WiktextractContext,
95 word_entry: WordEntry,
96 level_node: LevelNode,
97) -> None:
98 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM):
99 linkage = Linkage(word="")
100 for index, child in enumerate(list_item.children): 100 ↛ 109line 100 didn't jump to line 109 because the loop on line 100 didn't complete
101 if isinstance(child, str) and ":" in child:
102 linkage.word = clean_node(wxr, None, list_item.children[:index])
103 linkage.word += child[: child.index(":")].strip()
104 linkage.sense = child[child.index(":") + 1 :].strip()
105 linkage.sense += clean_node(
106 wxr, None, list_item.children[index + 1 :]
107 )
108 break
109 if linkage.word != "": 109 ↛ 98line 109 didn't jump to line 98 because the condition on line 109 was always true
110 word_entry.proverbs.append(linkage)