Coverage for src/wiktextract/extractor/tr/inflection.py: 96%

1from dataclasses import dataclass

3from wikitextprocessor import LevelNode, NodeKind, TemplateNode

5from ...page import clean_node

6from ...wxr_context import WiktextractContext

7from .models import Form, WordEntry

8from .tags import translate_raw_tags

11def extract_inflection_section(

12 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode

13) -> None:

14 for t_node in level_node.find_child(NodeKind.TEMPLATE):

15 if t_node.template_name in [ 15 ↛ 14line 15 didn't jump to line 14 because the condition on line 15 was always true

16 "tr-ad-tablo",

17 "tr-eylem-tablo",

18 ] or t_node.template_name.startswith("tr-çekim-ad-"):

19 extract_tr_ad_tablo_template(wxr, word_entry, t_node)

22@dataclass

23class SpanHeader:

24 text: str

25 index: int

26 span: int

29def extract_tr_ad_tablo_template(

30 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode

31) -> None:

32 expanded_node = wxr.wtp.parse(

33 wxr.wtp.node_to_wikitext(t_node), expand_all=True

34 )

35 for link_node in expanded_node.find_child(NodeKind.LINK):

36 clean_node(wxr, word_entry, link_node)

37 for table_index, table in enumerate(

38 expanded_node.find_child_recursively(NodeKind.TABLE)

39 ):

40 last_row_has_data = False

41 col_headers = []

42 row_headers = []

43 row_index = 0

44 table_tags = []

45 table_raw_tags = []

46 if t_node.template_name.startswith("tr-çekim-ad-") and table_index == 1:

47 table_tags.append("possessive")

48 for row in table.find_child(NodeKind.TABLE_ROW):

49 col_index = 0

50 row_has_data = row.contain_node(NodeKind.TABLE_CELL)

51 if not row_has_data and not row.contain_node(

52 NodeKind.TABLE_HEADER_CELL

53 ):

54 continue

55 for cell_index, cell in enumerate(

56 row.find_child(NodeKind.TABLE_HEADER_CELL | NodeKind.TABLE_CELL)

57 ):

58 cell_text = clean_node(wxr, None, cell)

59 if cell_text == "":

60 continue

61 if cell.kind == NodeKind.TABLE_HEADER_CELL:

62 if not row_has_data:

63 if last_row_has_data and cell_index == 0: # new table

64 col_headers.clear()

65 row_headers.clear()

66 table_raw_tags.clear()

67 row_index = 0

68 colspan = int(cell.attrs.get("colspan", "1"))

69 if (

70 t_node.template_name.startswith("tr-çekim-ad-")

71 and cell_index == 0

72 ):

73 table_raw_tags.append(cell_text)

74 else:

75 col_headers.append(

76 SpanHeader(cell_text, col_index, colspan)

77 )

78 col_index += colspan

79 else:

80 rowspan = int(cell.attrs.get("rowspan", "1"))

81 row_headers.append(

82 SpanHeader(cell_text, row_index, rowspan)

83 )

84 elif cell.kind == NodeKind.TABLE_CELL: 84 ↛ 55line 84 didn't jump to line 55 because the condition on line 84 was always true

85 if cell_text == "—":

86 col_index += 1

87 continue

88 for line in cell_text.splitlines():

89 word = line.strip()

90 if word == "": 90 ↛ 91line 90 didn't jump to line 91 because the condition on line 90 was never true

91 continue

92 form = Form(

93 form=word, tags=table_tags, raw_tags=table_raw_tags

94 )

95 for col_head in col_headers:

96 if (

97 col_index >= col_head.index

98 and col_index < col_head.index + col_head.span

99 ):

100 form.raw_tags.append(col_head.text)

101 for row_head in row_headers:

102 if (

103 row_index >= row_head.index

104 and row_index < row_head.index + row_head.span

105 ):

106 form.raw_tags.append(row_head.text)

107 translate_raw_tags(form)

108 word_entry.forms.append(form)

109 col_index += 1

110 row_index += 1

111 last_row_has_data = row_has_data

Coverage for src / wiktextract / extractor / tr / inflection.py: 96%

71 statements