Coverage for src/wiktextract/extractor/zh/etymology.py: 87%
36 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
1from wikitextprocessor.parser import (
2 LEVEL_KIND_FLAGS,
3 NodeKind,
4 TemplateNode,
5 WikiNode,
6)
8from ...page import clean_node
9from ...wxr_context import WiktextractContext
10from .models import Example, WordEntry
13def extract_etymology_section(
14 wxr: WiktextractContext,
15 page_data: list[WordEntry],
16 base_data: WordEntry,
17 level_node: WikiNode,
18) -> None:
19 from .example import extract_template_zh_x
21 etymology_nodes = []
22 level_node_index = len(level_node.children)
23 for next_level_index, next_level_node in level_node.find_child(
24 LEVEL_KIND_FLAGS, True
25 ):
26 level_node_index = next_level_index
27 break
28 for etymology_node in level_node.children[:level_node_index]:
29 if isinstance(
30 etymology_node, TemplateNode
31 ) and etymology_node.template_name in ["zh-x", "zh-q"]:
32 for example_data in extract_template_zh_x(
33 wxr, etymology_node, Example()
34 ):
35 base_data.etymology_examples.append(example_data)
36 clean_node(wxr, base_data, etymology_node)
37 elif isinstance( 37 ↛ 46line 37 didn't jump to line 46 because the condition on line 37 was never true
38 etymology_node, TemplateNode
39 ) and etymology_node.template_name.lower() in [
40 "rfe", # missing etymology
41 "zh-forms",
42 "zh-wp",
43 "wp",
44 "wikipedia",
45 ]:
46 pass
47 elif (
48 isinstance(etymology_node, WikiNode)
49 and etymology_node.kind == NodeKind.LIST
50 ):
51 has_zh_x = False
52 for template_node in etymology_node.find_child_recursively(
53 NodeKind.TEMPLATE
54 ):
55 if template_node.template_name in ["zh-x", "zh-q"]:
56 has_zh_x = True
57 for example_data in extract_template_zh_x(
58 wxr, template_node, Example()
59 ):
60 base_data.etymology_examples.append(example_data)
61 clean_node(wxr, base_data, template_node)
62 if not has_zh_x: 62 ↛ 63line 62 didn't jump to line 63 because the condition on line 62 was never true
63 etymology_nodes.append(etymology_node)
64 elif isinstance( 64 ↛ 71line 64 didn't jump to line 71 because the condition on line 64 was never true
65 etymology_node, TemplateNode
66 ) and etymology_node.template_name in [
67 "ja-see",
68 "ja-see-kango",
69 "zh-see",
70 ]:
71 from .page import process_soft_redirect_template
73 page_data.append(base_data.model_copy(deep=True))
74 process_soft_redirect_template(wxr, etymology_node, page_data[-1])
75 else:
76 etymology_nodes.append(etymology_node)
78 etymology_text = clean_node(wxr, base_data, etymology_nodes)
79 if len(etymology_text) > 0:
80 base_data.etymology_text = etymology_text