Coverage for src/wiktextract/extractor/ku/tewandin.py: 94%

72 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-04 10:58 +0000

1import re 

2from itertools import count 

3 

4from wikitextprocessor.parser import LEVEL_KIND_FLAGS, NodeKind, TemplateNode 

5 

6from ...page import clean_node 

7from ...wxr_context import WiktextractContext 

8from .models import Form, WordEntry 

9from .tags import translate_raw_tags 

10 

11 

12def extract_tewandin_page( 

13 wxr: WiktextractContext, word_entry: WordEntry, title: str 

14) -> None: 

15 page = wxr.wtp.get_page(title, 106) 

16 if page is None or page.body is None: 

17 return 

18 root = wxr.wtp.parse(page.body) 

19 for t_node in root.find_child(NodeKind.TEMPLATE): 19 ↛ 20line 19 didn't jump to line 20 because the loop on line 19 never started

20 extract_tewandin_template(wxr, word_entry, t_node, title) 

21 for level_node in root.find_child(LEVEL_KIND_FLAGS): 

22 for t_node in level_node.find_child(NodeKind.TEMPLATE): 

23 extract_tewandin_template(wxr, word_entry, t_node, title) 

24 

25 

26def extract_tewandin_template( 

27 wxr: WiktextractContext, 

28 word_entry: WordEntry, 

29 t_node: TemplateNode, 

30 source_page: str, 

31 tab_name: str = "", 

32 form_set: set[str] = set(), 

33) -> None: 

34 if t_node.template_name == "ku-tewandin": 

35 extract_ku_tewandin_template( 

36 wxr, 

37 word_entry, 

38 t_node, 

39 source_page, 

40 tab_name=tab_name, 

41 form_set=form_set, 

42 ) 

43 elif t_node.template_name == "etîket tewandin": 43 ↛ exitline 43 didn't return from function 'extract_tewandin_template' because the condition on line 43 was always true

44 extract_etîket_tewandin_template(wxr, word_entry, t_node, source_page) 

45 

46 

47def extract_ku_tewandin_template( 

48 wxr: WiktextractContext, 

49 word_entry: WordEntry, 

50 t_node: TemplateNode, 

51 source_page: str, 

52 tab_name: str = "", 

53 form_set: set[str] = {}, 

54) -> None: 

55 # https://ku.wiktionary.org/wiki/Şablon:ku-tewandin 

56 from .form_table import TableHeader 

57 

58 expanded_node = wxr.wtp.parse( 

59 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

60 ) 

61 clean_node(wxr, word_entry, expanded_node) 

62 for table in expanded_node.find_child_recursively(NodeKind.TABLE): 

63 col_headers = [] 

64 last_row_has_data_cell = False 

65 for row in table.find_child(NodeKind.TABLE_ROW): 

66 row_header = "" 

67 col_index = 0 

68 for cell in row.find_child( 

69 NodeKind.TABLE_HEADER_CELL | NodeKind.TABLE_CELL 

70 ): 

71 cell_str = clean_node(wxr, None, cell) 

72 if cell_str == "": 

73 continue 

74 colspan = 1 

75 colspan_str = cell.attrs.get("colspan", "1") 

76 if re.fullmatch(r"\d+", colspan_str): 76 ↛ 78line 76 didn't jump to line 78 because the condition on line 76 was always true

77 colspan = int(colspan_str) 

78 if cell.kind == NodeKind.TABLE_HEADER_CELL: 

79 if row.contain_node(NodeKind.TABLE_CELL): 

80 row_header = cell_str 

81 else: 

82 if last_row_has_data_cell: 

83 col_headers.clear() 

84 col_headers.append( 

85 TableHeader( 

86 text=cell_str, 

87 col_index=col_index, 

88 colspan=colspan, 

89 ) 

90 ) 

91 col_index += colspan 

92 last_row_has_data_cell = False 

93 else: 

94 last_row_has_data_cell = True 

95 form = Form(form=cell_str, source=source_page) 

96 if tab_name != "": 96 ↛ 98line 96 didn't jump to line 98 because the condition on line 96 was always true

97 form.raw_tags.append(tab_name) 

98 if row_header != "": 

99 form.raw_tags.append(row_header) 

100 for header in col_headers: 

101 if ( 

102 col_index >= header.col_index 

103 and col_index < header.col_index + header.colspan 

104 ): 

105 form.raw_tags.append(header.text) 

106 if form.form != wxr.wtp.title and form.form not in form_set: 106 ↛ 110line 106 didn't jump to line 110 because the condition on line 106 was always true

107 translate_raw_tags(form) 

108 word_entry.forms.append(form) 

109 form_set.add(form.form) 

110 col_index += colspan 

111 

112 

113def extract_etîket_tewandin_template( 

114 wxr: WiktextractContext, 

115 word_entry: WordEntry, 

116 t_node: TemplateNode, 

117 source_page: str, 

118) -> None: 

119 # https://ku.wiktionary.org/wiki/Şablon:etîket_tewandin 

120 form_set = set() 

121 for num in count(1): 121 ↛ exitline 121 didn't return from function 'extract_etîket_tewandin_template' because the loop on line 121 didn't complete

122 tab_name_arg = f"etîket{num}" 

123 if tab_name_arg not in t_node.template_parameters: 

124 break 

125 tab_name = clean_node( 

126 wxr, None, t_node.template_parameters[tab_name_arg] 

127 ) 

128 tab_arg = wxr.wtp.parse( 

129 wxr.wtp.node_to_wikitext( 

130 t_node.template_parameters[f"naverok{num}"] 

131 ) 

132 ) 

133 for node in tab_arg.find_child(NodeKind.TEMPLATE): 

134 extract_tewandin_template( 

135 wxr, 

136 word_entry, 

137 node, 

138 source_page, 

139 tab_name=tab_name, 

140 form_set=form_set, 

141 )