Coverage for src/wiktextract/extractor/sv/page.py: 88%

30 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-15 05:18 +0000

1from typing import Any 

2 

3from mediawiki_langcodes import name_to_code 

4from wikitextprocessor.parser import LEVEL_KIND_FLAGS, LevelNode, NodeKind 

5 

6from ...page import clean_node 

7from ...wxr_context import WiktextractContext 

8from .models import Sense, WordEntry 

9from .pos import extract_pos_section 

10from .section_titles import POS_DATA 

11 

12 

13def parse_section( 

14 wxr: WiktextractContext, 

15 page_data: list[WordEntry], 

16 base_data: WordEntry, 

17 level_node: LevelNode, 

18) -> None: 

19 title_text = clean_node(wxr, None, level_node.largs) 

20 wxr.wtp.start_subsection(title_text) 

21 if title_text in POS_DATA: 21 ↛ 24line 21 didn't jump to line 24 because the condition on line 21 was always true

22 extract_pos_section(wxr, page_data, base_data, level_node, title_text) 

23 

24 for next_level in level_node.find_child(LEVEL_KIND_FLAGS): 24 ↛ 25line 24 didn't jump to line 25 because the loop on line 24 never started

25 parse_section(wxr, page_data, base_data, next_level) 

26 

27 

28def parse_page( 

29 wxr: WiktextractContext, page_title: str, page_text: str 

30) -> list[dict[str, Any]]: 

31 # style guide 

32 # https://sv.wiktionary.org/wiki/Wiktionary:Stilguide 

33 wxr.wtp.start_page(page_title) 

34 tree = wxr.wtp.parse(page_text, pre_expand=True) 

35 page_data: list[WordEntry] = [] 

36 for level2_node in tree.find_child(NodeKind.LEVEL2): 

37 lang_name = clean_node(wxr, None, level2_node.largs) 

38 lang_code = name_to_code(lang_name, "sv") or "unknown" 

39 wxr.wtp.start_section(lang_name) 

40 base_data = WordEntry( 

41 word=wxr.wtp.title, 

42 lang_code=lang_code, 

43 lang=lang_name, 

44 pos="unknown", 

45 ) 

46 for next_level_node in level2_node.find_child(LEVEL_KIND_FLAGS): 

47 parse_section(wxr, page_data, base_data, next_level_node) 

48 

49 for data in page_data: 

50 if len(data.senses) == 0: 50 ↛ 51line 50 didn't jump to line 51 because the condition on line 50 was never true

51 data.senses.append(Sense(tags=["no-gloss"])) 

52 return [m.model_dump(exclude_defaults=True) for m in page_data]