Coverage for src/wiktextract/extractor/ja/etymology.py: 93%

25 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-10-25 10:11 +0000

1from wikitextprocessor.parser import LEVEL_KIND_FLAGS, LevelNode, NodeKind 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from .models import WordEntry 

6 

7 

8def extract_etymology_section( 

9 wxr: WiktextractContext, 

10 page_data: list[WordEntry], 

11 base_data: WordEntry, 

12 level_node: LevelNode, 

13) -> None: 

14 etymology_texts = [] 

15 cats = {} 

16 for list_node in level_node.find_child(NodeKind.LIST): 

17 # don't use `find_child_recursively` to avoid lists in subsection 

18 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

19 text = clean_node( 

20 wxr, cats, list(list_item.invert_find_child(NodeKind.LIST)) 

21 ) 

22 if len(text) > 0: 22 ↛ 18line 22 didn't jump to line 18 because the condition on line 22 was always true

23 etymology_texts.append(text) 

24 if len(etymology_texts) == 0: 

25 text = clean_node( 

26 wxr, cats, list(level_node.invert_find_child(LEVEL_KIND_FLAGS)) 

27 ) 

28 if len(text) > 0: 28 ↛ 30line 28 didn't jump to line 30 because the condition on line 28 was always true

29 etymology_texts.append(text) 

30 for link in level_node.find_child(NodeKind.LINK): 

31 clean_node(wxr, cats, link) 

32 base_data.etymology_texts = etymology_texts 

33 base_data.categories.extend(cats.get("categories", [])) 

34 if level_node.kind != NodeKind.LEVEL3: # under POS section 

35 for data in page_data: 

36 if ( 36 ↛ 35line 36 didn't jump to line 35

37 data.lang_code == base_data.lang_code 

38 and len(data.etymology_texts) == 0 

39 ): 

40 data.etymology_texts = etymology_texts 

41 data.categories.extend(cats.get("categories", []))