Coverage for src/wiktextract/extractor/pt/etymology.py: 89%
39 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
1from wikitextprocessor.parser import (
2 LEVEL_KIND_FLAGS,
3 LevelNode,
4 NodeKind,
5 TemplateNode,
6 WikiNode,
7)
9from ...page import clean_node
10from ...wxr_context import WiktextractContext
11from .models import Attestation, WordEntry
14def extract_etymology_section(
15 wxr: WiktextractContext,
16 page_data: list[WordEntry],
17 level_node: LevelNode,
18) -> None:
19 cats = {}
20 e_nodes = []
21 e_texts = []
22 attestations = []
23 for node in level_node.children:
24 if isinstance(node, WikiNode) and node.kind in LEVEL_KIND_FLAGS: 24 ↛ 25line 24 didn't jump to line 25 because the condition on line 24 was never true
25 break
26 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
27 e_text = clean_node(wxr, cats, e_nodes).lstrip(": ")
28 if e_text != "": 28 ↛ 30line 28 didn't jump to line 30 because the condition on line 28 was always true
29 e_texts.append(e_text)
30 e_nodes.clear()
31 for list_item in node.find_child(NodeKind.LIST_ITEM):
32 e_text = clean_node(wxr, cats, list_item.children)
33 if e_text != "": 33 ↛ 31line 33 didn't jump to line 31 because the condition on line 33 was always true
34 e_texts.append(e_text)
35 elif isinstance(node, TemplateNode) and node.template_name == "datação":
36 attestations = extract_defdate_template(wxr, cats, node)
37 else:
38 e_nodes.append(node)
40 if len(e_nodes) > 0:
41 e_text = clean_node(wxr, cats, e_nodes).lstrip(": ")
42 if e_text != "": 42 ↛ 44line 42 didn't jump to line 44 because the condition on line 42 was always true
43 e_texts.append(e_text)
44 for data in page_data:
45 if data.lang_code == page_data[-1].lang_code: 45 ↛ 44line 45 didn't jump to line 44 because the condition on line 45 was always true
46 data.etymology_texts.extend(e_texts)
47 data.categories.extend(cats.get("categories", []))
48 data.attestations.extend(attestations)
51def extract_defdate_template(
52 wxr: WiktextractContext, cats: dict[str, list[str]], t_node: TemplateNode
53) -> list[Attestation]:
54 attestations = []
55 date = (
56 clean_node(wxr, cats, t_node)
57 .removeprefix("(Datação:")
58 .removesuffix(")")
59 .strip()
60 )
61 if date != "": 61 ↛ 63line 61 didn't jump to line 63 because the condition on line 61 was always true
62 attestations.append(Attestation(date=date))
63 return attestations