Coverage for src/wiktextract/extractor/ja/example.py: 98%

50 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2024-12-27 08:07 +0000

1from wikitextprocessor.parser import NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from ..ruby import extract_ruby 

6from .linkage import process_linkage_list_item 

7from .models import Example, Sense, WordEntry 

8from .section_titles import LINKAGES 

9 

10 

11def extract_example_list_item( 

12 wxr: WiktextractContext, 

13 word_entry: WordEntry, 

14 sense: Sense, 

15 list_item: WikiNode, 

16 parent_list_text: str = "", 

17) -> None: 

18 # https://ja.wiktionary.org/wiki/Wiktionary:用例#用例を示す形式 

19 

20 # check if it's linkage data 

21 for node_idx, node in enumerate(list_item.children): 

22 if isinstance(node, str) and ":" in node: 

23 linkage_type_text = clean_node( 

24 wxr, None, list_item.children[:node_idx] 

25 ) 

26 if linkage_type_text in LINKAGES: 26 ↛ 21line 26 didn't jump to line 21 because the condition on line 26 was always true

27 process_linkage_list_item( 

28 wxr, 

29 word_entry, 

30 list_item, 

31 "", 

32 sense.glosses[0] if len(sense.glosses) > 0 else "", 

33 ) 

34 return 

35 

36 if any( 

37 child.contain_node(NodeKind.BOLD) or child.kind == NodeKind.BOLD 

38 for child in list_item.children 

39 if isinstance(child, WikiNode) and child.kind != NodeKind.LIST 

40 ) or not list_item.contain_node(NodeKind.LIST): 

41 # has bold node or doesn't have list child node 

42 has_example_template = False 

43 for t_node in list_item.find_child(NodeKind.TEMPLATE): 

44 if t_node.template_name in ["ux", "uxi"]: 

45 process_ux_template(wxr, t_node, sense) 

46 has_example_template = True 

47 if has_example_template: 

48 return 

49 

50 expanded_nodes = wxr.wtp.parse( 

51 wxr.wtp.node_to_wikitext( 

52 list(list_item.invert_find_child(NodeKind.LIST)) 

53 ), 

54 expand_all=True, 

55 ) 

56 ruby, no_ruby = extract_ruby(wxr, expanded_nodes.children) 

57 example = Example(text=clean_node(wxr, None, no_ruby), ruby=ruby) 

58 for tr_list_item in list_item.find_child_recursively( 

59 NodeKind.LIST_ITEM 

60 ): 

61 example.translation = clean_node(wxr, None, tr_list_item.children) 

62 if len(parent_list_text) > 0: 

63 example.ref = parent_list_text 

64 else: 

65 for ref_start_str in ["(", "――"]: 

66 if ref_start_str in example.text: 

67 ref_start = example.text.rindex(ref_start_str) 

68 example.ref = example.text[ref_start:] 

69 example.text = example.text[:ref_start].strip() 

70 for ref_tag in expanded_nodes.find_html_recursively("ref"): 

71 example.ref += " " + clean_node( 

72 wxr, None, ref_tag.children 

73 ) 

74 break 

75 sense.examples.append(example) 

76 else: 

77 list_item_text = clean_node( 

78 wxr, None, list(list_item.invert_find_child(NodeKind.LIST)) 

79 ) 

80 for ref_tag in list_item.find_html("ref"): 

81 list_item_text += " " + clean_node(wxr, None, ref_tag.children) 

82 for next_list_item in list_item.find_child_recursively( 

83 NodeKind.LIST_ITEM 

84 ): 

85 extract_example_list_item( 

86 wxr, word_entry, sense, next_list_item, list_item_text 

87 ) 

88 

89 

90def process_ux_template( 

91 wxr: WiktextractContext, template: TemplateNode, sense: Sense 

92) -> None: 

93 # https://ja.wiktionary.org/wiki/テンプレート:ux 

94 # https://ja.wiktionary.org/wiki/テンプレート:uxi 

95 example = Example() 

96 example.text = clean_node( 

97 wxr, None, template.template_parameters.get(2, "") 

98 ) 

99 example.translation = clean_node( 

100 wxr, None, template.template_parameters.get(3, "") 

101 ) 

102 if example.text != "": 102 ↛ 104line 102 didn't jump to line 104 because the condition on line 102 was always true

103 sense.examples.append(example) 

104 clean_node(wxr, sense, template)