Coverage for src/wiktextract/extractor/tr/inflection.py: 96%

71 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-15 05:18 +0000

1from dataclasses import dataclass 

2 

3from wikitextprocessor import LevelNode, NodeKind, TemplateNode 

4 

5from ...page import clean_node 

6from ...wxr_context import WiktextractContext 

7from .models import Form, WordEntry 

8from .tags import translate_raw_tags 

9 

10 

11def extract_inflection_section( 

12 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode 

13) -> None: 

14 for t_node in level_node.find_child(NodeKind.TEMPLATE): 

15 if t_node.template_name in [ 15 ↛ 14line 15 didn't jump to line 14 because the condition on line 15 was always true

16 "tr-ad-tablo", 

17 "tr-eylem-tablo", 

18 ] or t_node.template_name.startswith("tr-çekim-ad-"): 

19 extract_tr_ad_tablo_template(wxr, word_entry, t_node) 

20 

21 

22@dataclass 

23class SpanHeader: 

24 text: str 

25 index: int 

26 span: int 

27 

28 

29def extract_tr_ad_tablo_template( 

30 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode 

31) -> None: 

32 expanded_node = wxr.wtp.parse( 

33 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

34 ) 

35 for link_node in expanded_node.find_child(NodeKind.LINK): 

36 clean_node(wxr, word_entry, link_node) 

37 for table_index, table in enumerate( 

38 expanded_node.find_child_recursively(NodeKind.TABLE) 

39 ): 

40 last_row_has_data = False 

41 col_headers = [] 

42 row_headers = [] 

43 row_index = 0 

44 table_tags = [] 

45 table_raw_tags = [] 

46 if t_node.template_name.startswith("tr-çekim-ad-") and table_index == 1: 

47 table_tags.append("possessive") 

48 for row in table.find_child(NodeKind.TABLE_ROW): 

49 col_index = 0 

50 row_has_data = row.contain_node(NodeKind.TABLE_CELL) 

51 if not row_has_data and not row.contain_node( 

52 NodeKind.TABLE_HEADER_CELL 

53 ): 

54 continue 

55 for cell_index, cell in enumerate( 

56 row.find_child(NodeKind.TABLE_HEADER_CELL | NodeKind.TABLE_CELL) 

57 ): 

58 cell_text = clean_node(wxr, None, cell) 

59 if cell_text == "": 

60 continue 

61 if cell.kind == NodeKind.TABLE_HEADER_CELL: 

62 if not row_has_data: 

63 if last_row_has_data and cell_index == 0: # new table 

64 col_headers.clear() 

65 row_headers.clear() 

66 table_raw_tags.clear() 

67 row_index = 0 

68 colspan = int(cell.attrs.get("colspan", "1")) 

69 if ( 

70 t_node.template_name.startswith("tr-çekim-ad-") 

71 and cell_index == 0 

72 ): 

73 table_raw_tags.append(cell_text) 

74 else: 

75 col_headers.append( 

76 SpanHeader(cell_text, col_index, colspan) 

77 ) 

78 col_index += colspan 

79 else: 

80 rowspan = int(cell.attrs.get("rowspan", "1")) 

81 row_headers.append( 

82 SpanHeader(cell_text, row_index, rowspan) 

83 ) 

84 elif cell.kind == NodeKind.TABLE_CELL: 84 ↛ 55line 84 didn't jump to line 55 because the condition on line 84 was always true

85 if cell_text == "—": 

86 col_index += 1 

87 continue 

88 for line in cell_text.splitlines(): 

89 word = line.strip() 

90 if word == "": 90 ↛ 91line 90 didn't jump to line 91 because the condition on line 90 was never true

91 continue 

92 form = Form( 

93 form=word, tags=table_tags, raw_tags=table_raw_tags 

94 ) 

95 for col_head in col_headers: 

96 if ( 

97 col_index >= col_head.index 

98 and col_index < col_head.index + col_head.span 

99 ): 

100 form.raw_tags.append(col_head.text) 

101 for row_head in row_headers: 

102 if ( 

103 row_index >= row_head.index 

104 and row_index < row_head.index + row_head.span 

105 ): 

106 form.raw_tags.append(row_head.text) 

107 translate_raw_tags(form) 

108 word_entry.forms.append(form) 

109 col_index += 1 

110 row_index += 1 

111 last_row_has_data = row_has_data