Coverage for src/wiktextract/extractor/ja/example.py: 98%
50 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from wikitextprocessor.parser import NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from ..ruby import extract_ruby
6from .linkage import process_linkage_list_item
7from .models import Example, Sense, WordEntry
8from .section_titles import LINKAGES
11def extract_example_list_item(
12 wxr: WiktextractContext,
13 word_entry: WordEntry,
14 sense: Sense,
15 list_item: WikiNode,
16 parent_list_text: str = "",
17) -> None:
18 # https://ja.wiktionary.org/wiki/Wiktionary:用例#用例を示す形式
20 # check if it's linkage data
21 for node_idx, node in enumerate(list_item.children):
22 if isinstance(node, str) and ":" in node:
23 linkage_type_text = clean_node(
24 wxr, None, list_item.children[:node_idx]
25 )
26 if linkage_type_text in LINKAGES: 26 ↛ 21line 26 didn't jump to line 21 because the condition on line 26 was always true
27 process_linkage_list_item(
28 wxr,
29 word_entry,
30 list_item,
31 "",
32 sense.glosses[0] if len(sense.glosses) > 0 else "",
33 )
34 return
36 if any(
37 child.contain_node(NodeKind.BOLD) or child.kind == NodeKind.BOLD
38 for child in list_item.children
39 if isinstance(child, WikiNode) and child.kind != NodeKind.LIST
40 ) or not list_item.contain_node(NodeKind.LIST):
41 # has bold node or doesn't have list child node
42 has_example_template = False
43 for t_node in list_item.find_child(NodeKind.TEMPLATE):
44 if t_node.template_name in ["ux", "uxi"]:
45 process_ux_template(wxr, t_node, sense)
46 has_example_template = True
47 if has_example_template:
48 return
50 expanded_nodes = wxr.wtp.parse(
51 wxr.wtp.node_to_wikitext(
52 list(list_item.invert_find_child(NodeKind.LIST))
53 ),
54 expand_all=True,
55 )
56 ruby, no_ruby = extract_ruby(wxr, expanded_nodes.children)
57 example = Example(text=clean_node(wxr, None, no_ruby), ruby=ruby)
58 for tr_list_item in list_item.find_child_recursively(
59 NodeKind.LIST_ITEM
60 ):
61 example.translation = clean_node(wxr, None, tr_list_item.children)
62 if len(parent_list_text) > 0:
63 example.ref = parent_list_text
64 else:
65 for ref_start_str in ["(", "――"]:
66 if ref_start_str in example.text:
67 ref_start = example.text.rindex(ref_start_str)
68 example.ref = example.text[ref_start:]
69 example.text = example.text[:ref_start].strip()
70 for ref_tag in expanded_nodes.find_html_recursively("ref"):
71 example.ref += " " + clean_node(
72 wxr, None, ref_tag.children
73 )
74 break
75 sense.examples.append(example)
76 else:
77 list_item_text = clean_node(
78 wxr, None, list(list_item.invert_find_child(NodeKind.LIST))
79 )
80 for ref_tag in list_item.find_html("ref"):
81 list_item_text += " " + clean_node(wxr, None, ref_tag.children)
82 for next_list_item in list_item.find_child_recursively(
83 NodeKind.LIST_ITEM
84 ):
85 extract_example_list_item(
86 wxr, word_entry, sense, next_list_item, list_item_text
87 )
90def process_ux_template(
91 wxr: WiktextractContext, template: TemplateNode, sense: Sense
92) -> None:
93 # https://ja.wiktionary.org/wiki/テンプレート:ux
94 # https://ja.wiktionary.org/wiki/テンプレート:uxi
95 example = Example()
96 example.text = clean_node(
97 wxr, None, template.template_parameters.get(2, "")
98 )
99 example.translation = clean_node(
100 wxr, None, template.template_parameters.get(3, "")
101 )
102 if example.text != "": 102 ↛ 104line 102 didn't jump to line 104 because the condition on line 102 was always true
103 sense.examples.append(example)
104 clean_node(wxr, sense, template)