Coverage for src/wiktextract/extractor/ku/form

1import re

2from dataclasses import dataclass

4from wikitextprocessor import NodeKind, TemplateNode

6from ...page import clean_node

7from ...wxr_context import WiktextractContext

8from .models import Form, WordEntry

9from .tags import translate_raw_tags

10from .tewandin import extract_tewandin_page

13def extract_ku_tewîn_nav_template(

14 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode

15) -> None:

16 # https://ku.wiktionary.org/wiki/Şablon:ku-tewîn-nav

17 expanded_node = wxr.wtp.parse(

18 wxr.wtp.node_to_wikitext(t_node), expand_all=True

19 )

20 gender_tags = []

21 gender_arg = clean_node(wxr, None, t_node.template_parameters.get(2, ""))

22 if gender_arg == "mê":

23 gender_tags = ["feminine"]

24 elif gender_arg == "nêr": 24 ↛ 25line 24 didn't jump to line 25 because the condition on line 24 was never true

25 gender_tags = ["masculine"]

26 for table_node in expanded_node.find_child(NodeKind.TABLE):

27 row_header = ""

28 col_headers = []

29 shared_tags = []

30 for row in table_node.find_child(NodeKind.TABLE_ROW):

31 col_index = 0

32 for cell in row.find_child(

33 NodeKind.TABLE_HEADER_CELL | NodeKind.TABLE_CELL

34 ):

35 if cell.kind == NodeKind.TABLE_HEADER_CELL:

36 header_str = clean_node(wxr, None, cell)

37 if len(row.children) == 1:

38 if header_str.endswith(" nebinavkirî"):

39 shared_tags = ["indefinite"]

40 elif header_str.endswith(" binavkirî"): 40 ↛ 32line 40 didn't jump to line 32 because the condition on line 40 was always true

41 shared_tags = ["definite"]

42 elif row.contain_node(NodeKind.TABLE_CELL):

43 row_header = header_str

44 elif header_str not in ["Rewş", ""]:

45 col_headers.append(header_str)

46 elif len(row.children) == 1: 46 ↛ 47line 46 didn't jump to line 47 because the condition on line 46 was never true

47 continue

48 else:

49 for form_str in clean_node(wxr, None, cell).splitlines():

50 if form_str not in ["", wxr.wtp.title]:

51 form = Form(

52 form=form_str, tags=gender_tags + shared_tags

53 )

54 if row_header != "":

55 form.raw_tags.append(row_header)

56 if col_index < len(col_headers): 56 ↛ 58line 56 didn't jump to line 58 because the condition on line 56 was always true

57 form.raw_tags.append(col_headers[col_index])

58 translate_raw_tags(form)

59 word_entry.forms.append(form)

60 col_index += 1

63@dataclass

64class TableHeader:

65 text: str

66 row_index: int = 0

67 rowspan: int = 0

68 col_index: int = 0

69 colspan: int = 0

72def extract_ku_tewîn_lk_template(

73 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode

74) -> None:

75 # https://ku.wiktionary.org/wiki/Şablon:ku-tewîn-lk

76 expanded_node = wxr.wtp.parse(

77 wxr.wtp.node_to_wikitext(t_node), expand_all=True

78 )

79 for table_node in expanded_node.find_child(NodeKind.TABLE):

80 row_index = 0

81 shared_tags = []

82 row_headers = []

83 for row in table_node.find_child(NodeKind.TABLE_ROW):

84 if len(row.children) == 1:

85 row_str = clean_node(wxr, None, row.children)

86 clear_values = False

87 if row_str.endswith(" gerguhêz)"):

88 shared_tags = ["transitive"]

89 clear_values = True

90 elif row_str.endswith(" negerguhêz)"): 90 ↛ 91line 90 didn't jump to line 91 because the condition on line 90 was never true

91 shared_tags = ["intransitive"]

92 clear_values = True

93 elif row_str.startswith("Rehê dema "):

94 clear_values = True

95 elif row_str.startswith("Formên din:"):

96 extract_tewandin_page(wxr, word_entry, row_str[11:].strip())

97 if clear_values:

98 row_index = 0

99 row_headers.clear()

100 continue

101 for header_cell in row.find_child(NodeKind.TABLE_HEADER_CELL):

102 rowspan = 1

103 rowspan_str = header_cell.attrs.get("rowspan", "1")

104 if re.fullmatch(r"\d+", rowspan_str): 104 ↛ 106line 104 didn't jump to line 106 because the condition on line 104 was always true

105 rowspan = int(rowspan_str)

106 row_headers.append(

107 TableHeader(

108 text=clean_node(wxr, None, header_cell),

109 rowspan=rowspan,

110 row_index=row_index,

111 )

112 )

113 for col_index, cell in enumerate(

114 row.find_child(NodeKind.TABLE_CELL)

115 ):

116 cell_str = clean_node(wxr, None, cell)

117 if cell_str == "": 117 ↛ 118line 117 didn't jump to line 118 because the condition on line 117 was never true

118 continue

119 if col_index == 0:

120 row_headers.append(

121 TableHeader(

122 text=cell_str, rowspan=1, row_index=row_index

123 )

124 )

125 else:

126 for form_str in cell_str.split("/"):

127 form_str = form_str.strip()

128 if form_str not in ["", wxr.wtp.title]: 128 ↛ 126line 128 didn't jump to line 126 because the condition on line 128 was always true

129 form = Form(form=form_str, tags=shared_tags)

130 for header in row_headers:

131 if ( 131 ↛ 130line 131 didn't jump to line 130 because the condition on line 131 was always true

132 row_index >= header.row_index

133 and row_index

134 < header.row_index + header.rowspan

135 ):

136 form.raw_tags.append(header.text)

137 translate_raw_tags(form)

138 word_entry.forms.append(form)

139

140 row_index += 1

Coverage for src/wiktextract/extractor/ku/form_table.py: 92%

99 statements