Coverage for src/wiktextract/extractor/sv/page.py: 88%
30 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
1from typing import Any
3from mediawiki_langcodes import name_to_code
4from wikitextprocessor.parser import LEVEL_KIND_FLAGS, LevelNode, NodeKind
6from ...page import clean_node
7from ...wxr_context import WiktextractContext
8from .models import Sense, WordEntry
9from .pos import extract_pos_section
10from .section_titles import POS_DATA
13def parse_section(
14 wxr: WiktextractContext,
15 page_data: list[WordEntry],
16 base_data: WordEntry,
17 level_node: LevelNode,
18) -> None:
19 title_text = clean_node(wxr, None, level_node.largs)
20 wxr.wtp.start_subsection(title_text)
21 if title_text in POS_DATA: 21 ↛ 24line 21 didn't jump to line 24 because the condition on line 21 was always true
22 extract_pos_section(wxr, page_data, base_data, level_node, title_text)
24 for next_level in level_node.find_child(LEVEL_KIND_FLAGS): 24 ↛ 25line 24 didn't jump to line 25 because the loop on line 24 never started
25 parse_section(wxr, page_data, base_data, next_level)
28def parse_page(
29 wxr: WiktextractContext, page_title: str, page_text: str
30) -> list[dict[str, Any]]:
31 # style guide
32 # https://sv.wiktionary.org/wiki/Wiktionary:Stilguide
33 wxr.wtp.start_page(page_title)
34 tree = wxr.wtp.parse(page_text, pre_expand=True)
35 page_data: list[WordEntry] = []
36 for level2_node in tree.find_child(NodeKind.LEVEL2):
37 lang_name = clean_node(wxr, None, level2_node.largs)
38 lang_code = name_to_code(lang_name, "sv") or "unknown"
39 wxr.wtp.start_section(lang_name)
40 base_data = WordEntry(
41 word=wxr.wtp.title,
42 lang_code=lang_code,
43 lang=lang_name,
44 pos="unknown",
45 )
46 for next_level_node in level2_node.find_child(LEVEL_KIND_FLAGS):
47 parse_section(wxr, page_data, base_data, next_level_node)
49 for data in page_data:
50 if len(data.senses) == 0: 50 ↛ 51line 50 didn't jump to line 51 because the condition on line 50 was never true
51 data.senses.append(Sense(tags=["no-gloss"]))
52 return [m.model_dump(exclude_defaults=True) for m in page_data]