Coverage for src/wiktextract/extractor/ku/descendant.py: 84%
84 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 08:12 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-04 08:12 +0000
1from wikitextprocessor import (
2 HTMLNode,
3 LevelNode,
4 NodeKind,
5 TemplateNode,
6 WikiNode,
7)
9from ...page import clean_node
10from ...wxr_context import WiktextractContext
11from .models import Descendant, WordEntry
12from .tags import translate_raw_tags
15def extract_descendant_section(
16 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode
17) -> None:
18 for list_node in level_node.find_child(NodeKind.LIST):
19 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
20 extract_desc_list_item(wxr, word_entry, [], list_item)
23def extract_desc_list_item(
24 wxr: WiktextractContext,
25 word_entry: WordEntry,
26 parent_data: list[Descendant],
27 list_item: WikiNode,
28) -> None:
29 desc_list = []
30 for node in list_item.children:
31 if isinstance(node, TemplateNode):
32 desc = None
33 if node.template_name == "dû":
34 desc = extract_dû_template(wxr, word_entry, node, parent_data)
35 elif node.template_name == "dardû":
36 desc = extract_dardû_template(
37 wxr, word_entry, node, parent_data
38 )
39 elif node.template_name == "g" and len(desc_list) > 0: 39 ↛ 43line 39 didn't jump to line 43 because the condition on line 39 was always true
40 desc = extract_g_template(
41 wxr, word_entry, node, desc_list[-1], parent_data
42 )
43 if desc is not None:
44 desc_list.append(desc)
45 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
46 for child_list_item in node.find_child(NodeKind.LIST_ITEM):
47 extract_desc_list_item(
48 wxr, word_entry, desc_list, child_list_item
49 )
52def extract_dû_template(
53 wxr: WiktextractContext,
54 word_entry: WordEntry,
55 t_node: TemplateNode,
56 parent_descs: list[Descendant],
57) -> Descendant | None:
58 expanded_node = wxr.wtp.parse(
59 wxr.wtp.node_to_wikitext(t_node), expand_all=True
60 )
61 desc = Descendant(
62 word="",
63 lang_code=clean_node(wxr, None, t_node.template_parameters.get(1, "")),
64 lang="unknown",
65 )
66 extract_expanded_dû_template(wxr, desc, parent_descs, expanded_node)
67 if desc.word != "":
68 for parent_desc in parent_descs:
69 parent_desc.descendants.append(desc)
70 if len(parent_descs) == 0:
71 word_entry.descendants.append(desc)
72 return desc
73 return None
76def extract_expanded_dû_template(
77 wxr: WiktextractContext,
78 desc: Descendant,
79 parent_descs: list[Descendant],
80 expanded_node: WikiNode,
81) -> None:
82 for node in expanded_node.children:
83 if isinstance(node, str) and ":" in node and desc.lang == "unknown":
84 desc.lang = node[: node.index(":")].strip()
85 elif isinstance(node, HTMLNode) and node.tag == "span":
86 span_lang = node.attrs.get("lang", "")
87 span_class = node.attrs.get("class", "")
88 if span_lang.endswith("-Latn"):
89 desc.roman = clean_node(wxr, None, node)
90 elif span_lang != "":
91 desc.word = clean_node(wxr, None, node)
92 if desc.lang_code == "unknown":
93 desc.lang_code = span_lang
94 elif span_class == "mention-gloss": 94 ↛ 95line 94 didn't jump to line 95 because the condition on line 94 was never true
95 desc.sense = clean_node(wxr, None, node)
98def extract_dardû_template(
99 wxr: WiktextractContext,
100 word_entry: WordEntry,
101 t_node: TemplateNode,
102 parent_descs: list[Descendant],
103) -> Descendant | None:
104 expanded_node = wxr.wtp.parse(
105 wxr.wtp.node_to_wikitext(t_node), expand_all=True
106 )
107 desc = Descendant(
108 word="",
109 lang_code=clean_node(wxr, None, t_node.template_parameters.get(1, "")),
110 lang="unknown",
111 )
112 extract_expanded_dû_template(wxr, desc, parent_descs, expanded_node)
113 for dd_tag in expanded_node.find_html_recursively("dd"):
114 child_desc = Descendant(word="", lang_code="unknown", lang="unknown")
115 extract_expanded_dû_template(wxr, child_desc, [desc], dd_tag)
116 if child_desc.word != "": 116 ↛ 113line 116 didn't jump to line 113 because the condition on line 116 was always true
117 desc.descendants.append(child_desc)
118 if desc.word != "": 118 ↛ 124line 118 didn't jump to line 124 because the condition on line 118 was always true
119 for parent_desc in parent_descs: 119 ↛ 120line 119 didn't jump to line 120 because the loop on line 119 never started
120 parent_desc.descendants.append(desc)
121 if len(parent_descs) == 0: 121 ↛ 123line 121 didn't jump to line 123 because the condition on line 121 was always true
122 word_entry.descendants.append(desc)
123 return desc
124 return None
127def extract_g_template(
128 wxr: WiktextractContext,
129 word_entry: WordEntry,
130 t_node: TemplateNode,
131 previous_desc: Descendant,
132 parent_descs: list[Descendant],
133) -> Descendant | None:
134 expanded_node = wxr.wtp.parse(
135 wxr.wtp.node_to_wikitext(t_node), expand_all=True
136 )
137 raw_tags = []
138 for span_tag in expanded_node.find_html( 138 ↛ 141line 138 didn't jump to line 141 because the loop on line 138 never started
139 "span", attr_name="class", attr_value="gender"
140 ):
141 for abbr_tag in span_tag.find_html("abbr"):
142 raw_tag = clean_node(wxr, None, abbr_tag)
143 if raw_tag not in ["", "?"]:
144 raw_tags.append(raw_tag)
145 desc = Descendant(
146 word=clean_node(
147 wxr,
148 None,
149 t_node.template_parameters.get(
150 2, t_node.template_parameters.get("cuda", "")
151 ),
152 ),
153 lang=previous_desc.lang,
154 lang_code=previous_desc.lang_code,
155 roman=clean_node(wxr, None, t_node.template_parameters.get("tr", "")),
156 sense=clean_node(wxr, None, t_node.template_parameters.get("w", "")),
157 raw_tags=raw_tags,
158 )
159 if desc.word != "": 159 ↛ 166line 159 didn't jump to line 166 because the condition on line 159 was always true
160 translate_raw_tags(desc)
161 for parent_desc in parent_descs: 161 ↛ 162line 161 didn't jump to line 162 because the loop on line 161 never started
162 parent_desc.descendants.append(desc)
163 if len(parent_descs) == 0: 163 ↛ 165line 163 didn't jump to line 165 because the condition on line 163 was always true
164 word_entry.descendants.append(desc)
165 return desc
166 return None