Coverage for src/wiktextract/extractor/tr/inflection.py: 96%
71 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
1from dataclasses import dataclass
3from wikitextprocessor import LevelNode, NodeKind, TemplateNode
5from ...page import clean_node
6from ...wxr_context import WiktextractContext
7from .models import Form, WordEntry
8from .tags import translate_raw_tags
11def extract_inflection_section(
12 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode
13) -> None:
14 for t_node in level_node.find_child(NodeKind.TEMPLATE):
15 if t_node.template_name in [ 15 ↛ 14line 15 didn't jump to line 14 because the condition on line 15 was always true
16 "tr-ad-tablo",
17 "tr-eylem-tablo",
18 ] or t_node.template_name.startswith("tr-çekim-ad-"):
19 extract_tr_ad_tablo_template(wxr, word_entry, t_node)
22@dataclass
23class SpanHeader:
24 text: str
25 index: int
26 span: int
29def extract_tr_ad_tablo_template(
30 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode
31) -> None:
32 expanded_node = wxr.wtp.parse(
33 wxr.wtp.node_to_wikitext(t_node), expand_all=True
34 )
35 for link_node in expanded_node.find_child(NodeKind.LINK):
36 clean_node(wxr, word_entry, link_node)
37 for table_index, table in enumerate(
38 expanded_node.find_child_recursively(NodeKind.TABLE)
39 ):
40 last_row_has_data = False
41 col_headers = []
42 row_headers = []
43 row_index = 0
44 table_tags = []
45 table_raw_tags = []
46 if t_node.template_name.startswith("tr-çekim-ad-") and table_index == 1:
47 table_tags.append("possessive")
48 for row in table.find_child(NodeKind.TABLE_ROW):
49 col_index = 0
50 row_has_data = row.contain_node(NodeKind.TABLE_CELL)
51 if not row_has_data and not row.contain_node(
52 NodeKind.TABLE_HEADER_CELL
53 ):
54 continue
55 for cell_index, cell in enumerate(
56 row.find_child(NodeKind.TABLE_HEADER_CELL | NodeKind.TABLE_CELL)
57 ):
58 cell_text = clean_node(wxr, None, cell)
59 if cell_text == "":
60 continue
61 if cell.kind == NodeKind.TABLE_HEADER_CELL:
62 if not row_has_data:
63 if last_row_has_data and cell_index == 0: # new table
64 col_headers.clear()
65 row_headers.clear()
66 table_raw_tags.clear()
67 row_index = 0
68 colspan = int(cell.attrs.get("colspan", "1"))
69 if (
70 t_node.template_name.startswith("tr-çekim-ad-")
71 and cell_index == 0
72 ):
73 table_raw_tags.append(cell_text)
74 else:
75 col_headers.append(
76 SpanHeader(cell_text, col_index, colspan)
77 )
78 col_index += colspan
79 else:
80 rowspan = int(cell.attrs.get("rowspan", "1"))
81 row_headers.append(
82 SpanHeader(cell_text, row_index, rowspan)
83 )
84 elif cell.kind == NodeKind.TABLE_CELL: 84 ↛ 55line 84 didn't jump to line 55 because the condition on line 84 was always true
85 if cell_text == "—":
86 col_index += 1
87 continue
88 for line in cell_text.splitlines():
89 word = line.strip()
90 if word == "": 90 ↛ 91line 90 didn't jump to line 91 because the condition on line 90 was never true
91 continue
92 form = Form(
93 form=word, tags=table_tags, raw_tags=table_raw_tags
94 )
95 for col_head in col_headers:
96 if (
97 col_index >= col_head.index
98 and col_index < col_head.index + col_head.span
99 ):
100 form.raw_tags.append(col_head.text)
101 for row_head in row_headers:
102 if (
103 row_index >= row_head.index
104 and row_index < row_head.index + row_head.span
105 ):
106 form.raw_tags.append(row_head.text)
107 translate_raw_tags(form)
108 word_entry.forms.append(form)
109 col_index += 1
110 row_index += 1
111 last_row_has_data = row_has_data