Coverage for src/wiktextract/extractor/el/etymology.py: 70%
37 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-03 05:44 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-03 05:44 +0000
1from typing import cast
3from wikitextprocessor import NodeKind, TemplateNode, WikiNode
4from wikitextprocessor.parser import LEVEL_KIND_FLAGS
6from wiktextract import WiktextractContext
7from wiktextract.page import clean_node
9from .models import WordEntry
10from .parse_utils import (
11 POSReturns,
12 find_sections,
13)
14from .pos import extract_form_of_templates
15from .pronunciation import process_pron
16from .section_titles import Heading, POSName
19def process_etym(
20 wxr: WiktextractContext,
21 base_data: WordEntry,
22 node: WikiNode,
23 title: str,
24 num: int,
25) -> tuple[int, POSReturns]:
26 """Extract etymological data from section and process POS children."""
27 # Get everything except subsections, which we assume are POS nodes.
28 etym_contents = list(node.invert_find_child(LEVEL_KIND_FLAGS))
29 etym_sublevels = list(node.find_child(LEVEL_KIND_FLAGS))
30 ret_etym_sublevels: POSReturns = []
32 wxr.wtp.start_subsection(title)
34 section_num = num
36 # Extract form_of data
37 for i, t_node in enumerate(etym_contents):
38 if isinstance(t_node, TemplateNode): 38 ↛ 39line 38 didn't jump to line 39 because the condition on line 38 was never true
39 extract_form_of_templates(wxr, base_data, t_node, etym_contents, i)
40 if isinstance(t_node, WikiNode) and t_node.kind == NodeKind.LIST: 40 ↛ 37line 40 didn't jump to line 37 because the condition on line 40 was always true
41 for l_item in t_node.find_child_recursively(NodeKind.LIST_ITEM):
42 for j, l_node in enumerate(l_item.children):
43 if isinstance(l_node, TemplateNode):
44 extract_form_of_templates(
45 wxr, base_data, l_node, l_item.children, j
46 )
48 # Greek wiktionary doesn't seem to have etymology templates, or at
49 # least they're not used as much.
50 etym_text = clean_node(wxr, base_data, etym_contents).lstrip(":#").strip()
52 if etym_text: 52 ↛ 55line 52 didn't jump to line 55 because the condition on line 52 was always true
53 base_data.etymology_text = etym_text
55 for heading_type, pos, title, tags, num, subnode in find_sections( 55 ↛ 58line 55 didn't jump to line 58 because the loop on line 55 never started
56 wxr, etym_sublevels
57 ):
58 if heading_type == Heading.POS:
59 section_num = num if num > section_num else section_num
60 # SAFETY: Since the heading_type is POS, find_sections
61 # "pos_or_section" is guaranteed to be a pos: POSName
62 pos = cast(POSName, pos)
63 ret_etym_sublevels.append(
64 (
65 pos,
66 title,
67 tags,
68 num,
69 subnode,
70 base_data.model_copy(deep=True),
71 )
72 )
73 elif heading_type == Heading.Pron:
74 section_num = num if num > section_num else section_num
76 num, pron_sublevels = process_pron(
77 wxr, subnode, base_data, title, section_num
78 )
80 ret_etym_sublevels.extend(pron_sublevels)
82 return section_num, ret_etym_sublevels