Coverage for src/wiktextract/extractor/zh/etymology.py: 87%

36 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-15 05:18 +0000

1from wikitextprocessor.parser import ( 

2 LEVEL_KIND_FLAGS, 

3 NodeKind, 

4 TemplateNode, 

5 WikiNode, 

6) 

7 

8from ...page import clean_node 

9from ...wxr_context import WiktextractContext 

10from .models import Example, WordEntry 

11 

12 

13def extract_etymology_section( 

14 wxr: WiktextractContext, 

15 page_data: list[WordEntry], 

16 base_data: WordEntry, 

17 level_node: WikiNode, 

18) -> None: 

19 from .example import extract_template_zh_x 

20 

21 etymology_nodes = [] 

22 level_node_index = len(level_node.children) 

23 for next_level_index, next_level_node in level_node.find_child( 

24 LEVEL_KIND_FLAGS, True 

25 ): 

26 level_node_index = next_level_index 

27 break 

28 for etymology_node in level_node.children[:level_node_index]: 

29 if isinstance( 

30 etymology_node, TemplateNode 

31 ) and etymology_node.template_name in ["zh-x", "zh-q"]: 

32 for example_data in extract_template_zh_x( 

33 wxr, etymology_node, Example() 

34 ): 

35 base_data.etymology_examples.append(example_data) 

36 clean_node(wxr, base_data, etymology_node) 

37 elif isinstance( 37 ↛ 46line 37 didn't jump to line 46 because the condition on line 37 was never true

38 etymology_node, TemplateNode 

39 ) and etymology_node.template_name.lower() in [ 

40 "rfe", # missing etymology 

41 "zh-forms", 

42 "zh-wp", 

43 "wp", 

44 "wikipedia", 

45 ]: 

46 pass 

47 elif ( 

48 isinstance(etymology_node, WikiNode) 

49 and etymology_node.kind == NodeKind.LIST 

50 ): 

51 has_zh_x = False 

52 for template_node in etymology_node.find_child_recursively( 

53 NodeKind.TEMPLATE 

54 ): 

55 if template_node.template_name in ["zh-x", "zh-q"]: 

56 has_zh_x = True 

57 for example_data in extract_template_zh_x( 

58 wxr, template_node, Example() 

59 ): 

60 base_data.etymology_examples.append(example_data) 

61 clean_node(wxr, base_data, template_node) 

62 if not has_zh_x: 62 ↛ 63line 62 didn't jump to line 63 because the condition on line 62 was never true

63 etymology_nodes.append(etymology_node) 

64 elif isinstance( 64 ↛ 71line 64 didn't jump to line 71 because the condition on line 64 was never true

65 etymology_node, TemplateNode 

66 ) and etymology_node.template_name in [ 

67 "ja-see", 

68 "ja-see-kango", 

69 "zh-see", 

70 ]: 

71 from .page import process_soft_redirect_template 

72 

73 page_data.append(base_data.model_copy(deep=True)) 

74 process_soft_redirect_template(wxr, etymology_node, page_data[-1]) 

75 else: 

76 etymology_nodes.append(etymology_node) 

77 

78 etymology_text = clean_node(wxr, base_data, etymology_nodes) 

79 if len(etymology_text) > 0: 

80 base_data.etymology_text = etymology_text