Coverage for src/wiktextract/extractor/ko/example.py: 95%
67 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-10-25 10:11 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-10-25 10:11 +0000
1from wikitextprocessor import NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from ..ruby import extract_ruby
6from .models import Example, Sense
9def extract_example_list_item(
10 wxr: WiktextractContext,
11 sense: Sense,
12 list_item: WikiNode,
13 lang_code: str,
14 parent_example: Example | None = None,
15) -> None:
16 example = Example() if parent_example is None else parent_example
17 after_lang_template = False
18 for node in list_item.children:
19 if isinstance(node, TemplateNode) and node.template_name == "lang":
20 after_lang_template = True
21 extract_example_lang_template(wxr, example, node, lang_code)
22 elif isinstance(node, TemplateNode) and node.template_name.startswith(
23 ("따옴", "지봉유설")
24 ):
25 example.ref = (
26 clean_node(wxr, None, node).strip("() ").removeprefix("따옴◄")
27 )
28 elif isinstance(node, TemplateNode) and node.template_name in [
29 "예문",
30 "ux",
31 "uxi",
32 ]:
33 extract_ux_template(wxr, sense, example, node)
34 break
35 elif after_lang_template:
36 example.translation += clean_node(wxr, None, node)
37 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
38 break
39 else:
40 example.text += clean_node(wxr, None, node)
42 if len(example.text) > 0:
43 if lang_code == "zh" and "/" in example.text:
44 for index, text in enumerate(example.text.split("/", 1)):
45 new_example = example.model_copy(deep=True)
46 new_example.text = text
47 new_example.tags.append(
48 "Traditional Chinese"
49 if index == 0
50 else "Simplified Chinese"
51 )
52 sense.examples.append(new_example)
53 else:
54 sense.examples.append(example)
56 for nested_list in list_item.find_child(NodeKind.LIST):
57 for nested_list_item in nested_list.find_child(NodeKind.LIST_ITEM):
58 extract_example_list_item(
59 wxr, sense, nested_list_item, lang_code, example
60 )
63def extract_example_lang_template(
64 wxr: WiktextractContext,
65 example: Example,
66 node: TemplateNode,
67 lang_code: str,
68) -> None:
69 # https://ko.wiktionary.org/wiki/틀:lang
70 if lang_code == "ja":
71 example.ruby, text_nodes = extract_ruby(
72 wxr,
73 wxr.wtp.parse(
74 wxr.wtp.node_to_wikitext(node.template_parameters.get(2, "")),
75 expand_all=True,
76 ).children,
77 )
78 example.text = clean_node(wxr, None, text_nodes)
79 else:
80 example.text = clean_node(
81 wxr, None, node.template_parameters.get(2, "")
82 )
83 example.translation = clean_node(
84 wxr, None, node.template_parameters.get(4, "")
85 )
86 if lang_code == "zh" and "(" in example.text and example.text.endswith(")"):
87 roman_start_index = example.text.index("(")
88 example.roman = example.text[roman_start_index:].strip("() ")
89 example.text = example.text[:roman_start_index].strip()
92def extract_ux_template(
93 wxr: WiktextractContext,
94 sense: Sense,
95 example: Example,
96 t_node: TemplateNode,
97) -> None:
98 # https://ko.wiktionary.org/wiki/틀:ux
99 # https://ko.wiktionary.org/wiki/모듈:usex/templates
100 lang_code = t_node.template_parameters.get(1, "")
101 expanded_node = wxr.wtp.parse(
102 wxr.wtp.node_to_wikitext(t_node), expand_all=True
103 )
104 if lang_code == "ja":
105 for span_tag in expanded_node.find_html_recursively("span"):
106 span_class = span_tag.attrs.get("class", "")
107 if span_class == "Jpan":
108 example.ruby, no_ruby = extract_ruby(wxr, span_tag)
109 example.text = clean_node(wxr, None, no_ruby)
110 elif span_class == "tr": 110 ↛ 105line 110 didn't jump to line 105 because the condition on line 110 was always true
111 example.roman = clean_node(wxr, None, span_tag)
112 example.translation = clean_node(
113 wxr, None, t_node.template_parameters.get(4, "")
114 )
115 example.literal_meaning = clean_node(
116 wxr, None, t_node.template_parameters.get("lit", "")
117 )
118 if example.ref == "": 118 ↛ 141line 118 didn't jump to line 141 because the condition on line 118 was always true
119 example.ref = clean_node(
120 wxr, None, t_node.template_parameters.get("ref", "")
121 )
122 else:
123 example.text = clean_node(
124 wxr, None, t_node.template_parameters.get(2, "")
125 )
126 example.translation = clean_node(
127 wxr, None, t_node.template_parameters.get(3, "")
128 )
129 example.note = clean_node(
130 wxr, None, t_node.template_parameters.get("footer", "")
131 )
132 if example.ref == "": 132 ↛ 136line 132 didn't jump to line 136 because the condition on line 132 was always true
133 example.ref = clean_node(
134 wxr, None, t_node.template_parameters.get("출처", "")
135 )
136 if example.ref == "": 136 ↛ 137line 136 didn't jump to line 137 because the condition on line 136 was never true
137 example.ref = clean_node(
138 wxr, None, t_node.template_parameters.get("source", "")
139 )
141 for link_node in expanded_node.find_child(NodeKind.LINK):
142 clean_node(wxr, sense, link_node)