Coverage for src/wiktextract/extractor/es/conjugation.py: 89%

1from dataclasses import dataclass

3from wikitextprocessor.parser import NodeKind, TemplateNode, WikiNode

5from ...page import clean_node

6from ...wxr_context import WiktextractContext

7from .models import Form, WordEntry

8from .tags import translate_raw_tags

11def extract_conjugation_section(

12 wxr: WiktextractContext,

13 word_entry: WordEntry,

14 level_node: WikiNode,

15) -> None:

16 for template_node in level_node.find_child(NodeKind.TEMPLATE):

17 process_conjugation_template(wxr, word_entry, template_node)

20def process_conjugation_template(

21 wxr: WiktextractContext,

22 word_entry: WordEntry,

23 template_node: TemplateNode,

24) -> None:

25 if "es.v.conj." in template_node.template_name: 25 ↛ exitline 25 didn't return from function 'process_conjugation_template' because the condition on line 25 was always true

26 process_es_v_conj_template(wxr, word_entry, template_node)

29@dataclass

30class SpanHeader:

31 text: str

32 index: int

33 span: int

36IGNORE_ES_V_ROW_PREFIXES = (

37 "Modo ",

38 "Tiempos ",

39)

40IGNORE_ES_V_HEADERS = {"número:", "persona:"}

43def process_es_v_conj_template(

44 wxr: WiktextractContext,

45 word_entry: WordEntry,

46 template_node: TemplateNode,

47) -> None:

48 # https://es.wiktionary.org/wiki/Plantilla:es.v.conj

49 expanded_node = wxr.wtp.parse(

50 wxr.wtp.node_to_wikitext(template_node), expand_all=True

51 )

52 table_nodes = list(expanded_node.find_child(NodeKind.TABLE))

53 if len(table_nodes) == 0: 53 ↛ 54line 53 didn't jump to line 54 because the condition on line 53 was never true

54 return

55 table_node = table_nodes[0]

56 col_headers = []

57 for row in table_node.find_child(NodeKind.TABLE_ROW):

58 row_header = ""

59 all_header_row = not row.contain_node(NodeKind.TABLE_CELL)

60 if row.contain_node(NodeKind.TABLE_HEADER_CELL) and all_header_row:

61 first_header = next(row.find_child(NodeKind.TABLE_HEADER_CELL))

62 first_header_text = clean_node(wxr, None, first_header)

63 if first_header_text.startswith(IGNORE_ES_V_ROW_PREFIXES):

64 continue # ignore personal pronouns row

65 elif len(list(row.filter_empty_str_child())) == 1: # new table

66 col_headers.clear()

67 continue

68 if row.contain_node(NodeKind.TABLE_CELL) and not row.contain_node( 68 ↛ 71line 68 didn't jump to line 71 because the condition on line 68 was never true

69 NodeKind.TABLE_HEADER_CELL

70 ):

71 continue # ignore end notes

73 col_header_index = 0

74 col_cell_index = 0

75 for cell in row.find_child(

76 NodeKind.TABLE_HEADER_CELL | NodeKind.TABLE_CELL

77 ):

78 cell_text = clean_node(wxr, None, cell)

79 colspan = int(cell.attrs.get("colspan", "1"))

80 if cell_text == "" or cell_text in IGNORE_ES_V_HEADERS:

81 continue

82 elif cell.kind == NodeKind.TABLE_HEADER_CELL:

83 if all_header_row:

84 col_headers.append(

85 SpanHeader(cell_text, col_header_index, colspan)

86 )

87 else:

88 row_header = cell_text

89 col_header_index += colspan

90 else:

91 for line in cell_text.splitlines():

92 form = Form(form=line)

93 if row_header != "": 93 ↛ 95line 93 didn't jump to line 95 because the condition on line 93 was always true

94 form.raw_tags.extend(row_header.split(" o "))

95 for col_head in col_headers:

96 if (

97 col_cell_index >= col_head.index

98 and col_cell_index < col_head.index + col_head.span

99 ):

100 form.raw_tags.append(col_head.text)

101

102 if form.form != "": 102 ↛ 91line 102 didn't jump to line 91 because the condition on line 102 was always true

103 translate_raw_tags(form)

104 word_entry.forms.append(form)

105 col_cell_index += colspan