Coverage for src/wiktextract/extractor/es/example.py: 91%

61 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-10-25 10:11 +0000

1from wikitextprocessor.parser import ( 

2 NodeKind, 

3 TemplateNode, 

4 WikiNode, 

5 WikiNodeChildrenList, 

6) 

7 

8from ...page import clean_node 

9from ...wxr_context import WiktextractContext 

10from .models import Example, Sense, TemplateData 

11 

12 

13def process_ejemplo_template( 

14 wxr: WiktextractContext, 

15 sense_data: Sense, 

16 template_node: TemplateNode, 

17): 

18 # https://es.wiktionary.org/wiki/Plantilla:ejemplo 

19 # https://es.wiktionary.org/wiki/Módulo:ejemplo 

20 example_data = Example(text="") 

21 expanded_template = wxr.wtp.parse( 

22 wxr.wtp.node_to_wikitext(template_node), expand_all=True 

23 ) 

24 for span_tag in expanded_template.find_html_recursively("span"): 

25 span_class = span_tag.attrs.get("class") 

26 if "cita" == span_class: 

27 if ( 

28 len(span_tag.children) > 1 

29 and isinstance(span_tag.children[-1], WikiNode) 

30 and span_tag.children[-1].kind == NodeKind.URL 

31 ): 

32 example_data.text = clean_node( 

33 wxr, None, span_tag.children[:-1] 

34 ) 

35 example_data.ref = clean_node(wxr, None, span_tag.children[-1]) 

36 else: 

37 example_data.text = clean_node(wxr, None, span_tag) 

38 elif "trad" == span_class: 

39 example_data.translation = clean_node( 

40 wxr, None, span_tag 

41 ).removeprefix("Traducción: ") 

42 elif "ref" == span_class: 

43 example_data.ref = clean_node(wxr, None, span_tag) 

44 

45 if len(example_data.text) == 0: 

46 example_data.text = clean_node( 

47 wxr, None, template_node.template_parameters.get(1, "") 

48 ) 

49 

50 if len(example_data.text) > 0: 

51 template_data = TemplateData( 

52 expansion=clean_node(wxr, None, expanded_template) 

53 ) 

54 template_data.name = template_node.template_name 

55 for arg, value in template_node.template_parameters.items(): 

56 template_data.args[str(arg)] = clean_node(wxr, None, value) 

57 example_data.example_templates.append(template_data) 

58 sense_data.examples.append(example_data) 

59 

60 

61def extract_example( 

62 wxr: WiktextractContext, 

63 sense_data: Sense, 

64 nodes: WikiNodeChildrenList, 

65): 

66 text_nodes: WikiNodeChildrenList = [] 

67 for node in nodes: 

68 if isinstance(node, WikiNode) and node.kind == NodeKind.TEMPLATE: 

69 if node.template_name == "ejemplo": 69 ↛ 72line 69 didn't jump to line 72 because the condition on line 69 was always true

70 process_ejemplo_template(wxr, sense_data, node) 

71 else: 

72 text_nodes.append(node) 

73 elif isinstance(node, WikiNode) and node.kind == NodeKind.URL: 

74 if len(sense_data.examples) > 0: 74 ↛ 67line 74 didn't jump to line 67 because the condition on line 74 was always true

75 sense_data.examples[-1].ref = clean_node(wxr, None, node) 

76 else: 

77 text_nodes.append(node) 

78 

79 if len(sense_data.examples) == 0 and len(text_nodes) > 0: 

80 example = Example(text=clean_node(wxr, None, text_nodes)) 

81 sense_data.examples.append(example) 

82 elif len(text_nodes) > 0: 82 ↛ 83line 82 didn't jump to line 83 because the condition on line 82 was never true

83 wxr.wtp.debug( 

84 f"Unprocessed nodes from example group: {text_nodes}", 

85 sortid="extractor/es/example/extract_example/87", 

86 ) 

87 

88 

89def process_example_list( 

90 wxr: WiktextractContext, 

91 sense_data: Sense, 

92 list_item: WikiNode, 

93): 

94 for sub_list_item in list_item.find_child_recursively(NodeKind.LIST_ITEM): 

95 example_data = Example(text="") 

96 text_nodes: WikiNodeChildrenList = [] 

97 for child in sub_list_item.children: 

98 # "cita *" templates are obsolete 

99 if isinstance( 

100 child, TemplateNode 

101 ) and child.template_name.startswith("cita "): 

102 example_data.ref = clean_node(wxr, None, child) 

103 elif ( 103 ↛ 108line 103 didn't jump to line 108

104 isinstance(child, TemplateNode) 

105 and child.template_name == "referencia incompleta" 

106 ): 

107 # ignore empty ref template 

108 continue 

109 else: 

110 text_nodes.append(child) 

111 example_data.text = clean_node(wxr, None, text_nodes) 

112 if len(example_data.text) > 0: 112 ↛ 94line 112 didn't jump to line 94 because the condition on line 112 was always true

113 sense_data.examples.append(example_data) 

114 

115 # If no example was found in sublists, 

116 # assume example is in list_item.children directly. 

117 if len(sense_data.examples) == 0: 

118 text = clean_node(wxr, None, list_item.children).removeprefix( 

119 "Ejemplo: " 

120 ) 

121 if len(text) > 0: 121 ↛ exitline 121 didn't return from function 'process_example_list' because the condition on line 121 was always true

122 example_data = Example(text=text) 

123 sense_data.examples.append(example_data)