Coverage for src/wiktextract/extractor/ko/example.py: 95%
82 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from wikitextprocessor import NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from ..ruby import extract_ruby
6from ..share import set_sound_file_url_fields
7from .models import Example, Sense, Sound
10def extract_example_list_item(
11 wxr: WiktextractContext,
12 sense: Sense,
13 list_item: WikiNode,
14 lang_code: str,
15 parent_example: Example | None = None,
16) -> None:
17 example = Example() if parent_example is None else parent_example
18 e_text_nodes = []
19 e_tr_nodes = []
20 after_lang_template = False
21 for node in list_item.children:
22 if isinstance(node, TemplateNode) and node.template_name == "lang":
23 after_lang_template = True
24 extract_example_lang_template(wxr, example, node, lang_code)
25 elif isinstance(node, TemplateNode) and node.template_name.startswith(
26 ("따옴", "지봉유설")
27 ):
28 example.ref = (
29 clean_node(wxr, None, node).strip("() ").removeprefix("따옴◄")
30 )
31 elif isinstance(node, TemplateNode) and node.template_name in [
32 "예문",
33 "ux",
34 "uxi",
35 ]:
36 extract_ux_template(wxr, sense, example, node)
37 break
38 elif after_lang_template:
39 e_tr_nodes.append(node)
40 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
41 break
42 elif (
43 isinstance(node, WikiNode)
44 and node.kind == NodeKind.LINK
45 and len(node.largs) > 0
46 and len(node.largs[0]) > 0
47 and isinstance(node.largs[0][0], str)
48 and node.largs[0][0].startswith("File:")
49 ):
50 sound = Sound()
51 sound_file = node.largs[0][0].removeprefix("File:").strip()
52 set_sound_file_url_fields(wxr, sound_file, sound)
53 if sound.audio != "": 53 ↛ 21line 53 didn't jump to line 21 because the condition on line 53 was always true
54 example.sounds.append(sound)
55 else:
56 e_text_nodes.append(node)
58 e_text = clean_node(wxr, sense, e_text_nodes)
59 if e_text != "":
60 example.text = e_text
61 e_tr = clean_node(wxr, sense, e_tr_nodes)
62 if e_tr != "":
63 example.translation = e_tr
65 if len(example.text) > 0:
66 if lang_code == "zh" and "/" in example.text:
67 for index, text in enumerate(example.text.split("/", 1)):
68 new_example = example.model_copy(deep=True)
69 new_example.text = text
70 new_example.tags.append(
71 "Traditional Chinese"
72 if index == 0
73 else "Simplified Chinese"
74 )
75 sense.examples.append(new_example)
76 else:
77 sense.examples.append(example)
79 for nested_list in list_item.find_child(NodeKind.LIST):
80 for nested_list_item in nested_list.find_child(NodeKind.LIST_ITEM):
81 extract_example_list_item(
82 wxr,
83 sense,
84 nested_list_item,
85 lang_code,
86 example if example.text == "" else Example(),
87 )
90def extract_example_lang_template(
91 wxr: WiktextractContext,
92 example: Example,
93 node: TemplateNode,
94 lang_code: str,
95) -> None:
96 # https://ko.wiktionary.org/wiki/틀:lang
97 if lang_code == "ja":
98 example.ruby, text_nodes = extract_ruby(
99 wxr,
100 wxr.wtp.parse(
101 wxr.wtp.node_to_wikitext(node.template_parameters.get(2, "")),
102 expand_all=True,
103 ).children,
104 )
105 example.text = clean_node(wxr, None, text_nodes)
106 else:
107 example.text = clean_node(
108 wxr, None, node.template_parameters.get(2, "")
109 )
110 example.translation = clean_node(
111 wxr, None, node.template_parameters.get(4, "")
112 )
113 if lang_code == "zh" and "(" in example.text and example.text.endswith(")"):
114 roman_start_index = example.text.index("(")
115 example.roman = example.text[roman_start_index:].strip("() ")
116 example.text = example.text[:roman_start_index].strip()
119def extract_ux_template(
120 wxr: WiktextractContext,
121 sense: Sense,
122 example: Example,
123 t_node: TemplateNode,
124) -> None:
125 # https://ko.wiktionary.org/wiki/틀:ux
126 # https://ko.wiktionary.org/wiki/모듈:usex/templates
127 lang_code = t_node.template_parameters.get(1, "")
128 expanded_node = wxr.wtp.parse(
129 wxr.wtp.node_to_wikitext(t_node), expand_all=True
130 )
131 if lang_code == "ja":
132 for span_tag in expanded_node.find_html_recursively("span"):
133 span_class = span_tag.attrs.get("class", "")
134 if span_class == "Jpan":
135 example.ruby, no_ruby = extract_ruby(wxr, span_tag)
136 example.text = clean_node(wxr, None, no_ruby)
137 elif span_class == "tr": 137 ↛ 132line 137 didn't jump to line 132 because the condition on line 137 was always true
138 example.roman = clean_node(wxr, None, span_tag)
139 example.translation = clean_node(
140 wxr, None, t_node.template_parameters.get(4, "")
141 )
142 example.literal_meaning = clean_node(
143 wxr, None, t_node.template_parameters.get("lit", "")
144 )
145 if example.ref == "": 145 ↛ 168line 145 didn't jump to line 168 because the condition on line 145 was always true
146 example.ref = clean_node(
147 wxr, None, t_node.template_parameters.get("ref", "")
148 )
149 else:
150 example.text = clean_node(
151 wxr, None, t_node.template_parameters.get(2, "")
152 )
153 example.translation = clean_node(
154 wxr, None, t_node.template_parameters.get(3, "")
155 )
156 example.note = clean_node(
157 wxr, None, t_node.template_parameters.get("footer", "")
158 )
159 if example.ref == "": 159 ↛ 163line 159 didn't jump to line 163 because the condition on line 159 was always true
160 example.ref = clean_node(
161 wxr, None, t_node.template_parameters.get("출처", "")
162 )
163 if example.ref == "": 163 ↛ 164line 163 didn't jump to line 164 because the condition on line 163 was never true
164 example.ref = clean_node(
165 wxr, None, t_node.template_parameters.get("source", "")
166 )
168 for link_node in expanded_node.find_child(NodeKind.LINK):
169 clean_node(wxr, sense, link_node)