Coverage for src/wiktextract/extractor/es/conjugation.py: 89%

62 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-10-25 10:11 +0000

1from dataclasses import dataclass 

2 

3from wikitextprocessor.parser import NodeKind, TemplateNode, WikiNode 

4 

5from ...page import clean_node 

6from ...wxr_context import WiktextractContext 

7from .models import Form, WordEntry 

8from .tags import translate_raw_tags 

9 

10 

11def extract_conjugation_section( 

12 wxr: WiktextractContext, 

13 word_entry: WordEntry, 

14 level_node: WikiNode, 

15) -> None: 

16 for template_node in level_node.find_child(NodeKind.TEMPLATE): 

17 process_conjugation_template(wxr, word_entry, template_node) 

18 

19 

20def process_conjugation_template( 

21 wxr: WiktextractContext, 

22 word_entry: WordEntry, 

23 template_node: TemplateNode, 

24) -> None: 

25 if "es.v.conj." in template_node.template_name: 25 ↛ exitline 25 didn't return from function 'process_conjugation_template' because the condition on line 25 was always true

26 process_es_v_conj_template(wxr, word_entry, template_node) 

27 

28 

29@dataclass 

30class SpanHeader: 

31 text: str 

32 index: int 

33 span: int 

34 

35 

36IGNORE_ES_V_ROW_PREFIXES = ( 

37 "Modo ", 

38 "Tiempos ", 

39) 

40IGNORE_ES_V_HEADERS = {"número:", "persona:"} 

41 

42 

43def process_es_v_conj_template( 

44 wxr: WiktextractContext, 

45 word_entry: WordEntry, 

46 template_node: TemplateNode, 

47) -> None: 

48 # https://es.wiktionary.org/wiki/Plantilla:es.v.conj 

49 expanded_node = wxr.wtp.parse( 

50 wxr.wtp.node_to_wikitext(template_node), expand_all=True 

51 ) 

52 table_nodes = list(expanded_node.find_child(NodeKind.TABLE)) 

53 if len(table_nodes) == 0: 53 ↛ 54line 53 didn't jump to line 54 because the condition on line 53 was never true

54 return 

55 table_node = table_nodes[0] 

56 col_headers = [] 

57 for row in table_node.find_child(NodeKind.TABLE_ROW): 

58 row_header = "" 

59 all_header_row = not row.contain_node(NodeKind.TABLE_CELL) 

60 if row.contain_node(NodeKind.TABLE_HEADER_CELL) and all_header_row: 

61 first_header = next(row.find_child(NodeKind.TABLE_HEADER_CELL)) 

62 first_header_text = clean_node(wxr, None, first_header) 

63 if first_header_text.startswith(IGNORE_ES_V_ROW_PREFIXES): 

64 continue # ignore personal pronouns row 

65 elif len(list(row.filter_empty_str_child())) == 1: # new table 

66 col_headers.clear() 

67 continue 

68 if row.contain_node(NodeKind.TABLE_CELL) and not row.contain_node( 68 ↛ 71line 68 didn't jump to line 71 because the condition on line 68 was never true

69 NodeKind.TABLE_HEADER_CELL 

70 ): 

71 continue # ignore end notes 

72 

73 col_header_index = 0 

74 col_cell_index = 0 

75 for cell in row.find_child( 

76 NodeKind.TABLE_HEADER_CELL | NodeKind.TABLE_CELL 

77 ): 

78 cell_text = clean_node(wxr, None, cell) 

79 colspan = int(cell.attrs.get("colspan", "1")) 

80 if cell_text == "" or cell_text in IGNORE_ES_V_HEADERS: 

81 continue 

82 elif cell.kind == NodeKind.TABLE_HEADER_CELL: 

83 if all_header_row: 

84 col_headers.append( 

85 SpanHeader(cell_text, col_header_index, colspan) 

86 ) 

87 else: 

88 row_header = cell_text 

89 col_header_index += colspan 

90 else: 

91 for line in cell_text.splitlines(): 

92 form = Form(form=line) 

93 if row_header != "": 93 ↛ 95line 93 didn't jump to line 95 because the condition on line 93 was always true

94 form.raw_tags.extend(row_header.split(" o ")) 

95 for col_head in col_headers: 

96 if ( 

97 col_cell_index >= col_head.index 

98 and col_cell_index < col_head.index + col_head.span 

99 ): 

100 form.raw_tags.append(col_head.text) 

101 

102 if form.form != "": 102 ↛ 91line 102 didn't jump to line 91 because the condition on line 102 was always true

103 translate_raw_tags(form) 

104 word_entry.forms.append(form) 

105 col_cell_index += colspan