Coverage for src/wiktextract/extractor/ja/etymology.py: 93%

25 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-17 08:19 +0000

1from wikitextprocessor.parser import LEVEL_KIND_FLAGS, LevelNode, NodeKind 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from .models import WordEntry 

6 

7 

8def extract_etymology_section( 

9 wxr: WiktextractContext, 

10 page_data: list[WordEntry], 

11 base_data: WordEntry, 

12 level_node: LevelNode, 

13) -> None: 

14 etymology_texts = [] 

15 cats = {} 

16 for list_node in level_node.find_child(NodeKind.LIST): 

17 # don't use `find_child_recursively` to avoid lists in subsection 

18 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

19 text = clean_node( 

20 wxr, 

21 cats, 

22 list( 

23 list_item.invert_find_child( 

24 NodeKind.LIST, include_empty_str=True 

25 ) 

26 ), 

27 ) 

28 if len(text) > 0: 28 ↛ 18line 28 didn't jump to line 18 because the condition on line 28 was always true

29 etymology_texts.append(text) 

30 if len(etymology_texts) == 0: 

31 text = clean_node( 

32 wxr, 

33 cats, 

34 list( 

35 level_node.invert_find_child( 

36 LEVEL_KIND_FLAGS, include_empty_str=True 

37 ) 

38 ), 

39 ) 

40 if len(text) > 0: 40 ↛ 42line 40 didn't jump to line 42 because the condition on line 40 was always true

41 etymology_texts.append(text) 

42 for link in level_node.find_child(NodeKind.LINK): 

43 clean_node(wxr, cats, link) 

44 base_data.etymology_texts = etymology_texts 

45 base_data.categories.extend(cats.get("categories", [])) 

46 if level_node.kind != NodeKind.LEVEL3: # under POS section 

47 for data in page_data: 

48 if ( 48 ↛ 47line 48 didn't jump to line 47 because the condition on line 48 was always true

49 data.lang_code == base_data.lang_code 

50 and len(data.etymology_texts) == 0 

51 ): 

52 data.etymology_texts = etymology_texts 

53 data.categories.extend(cats.get("categories", []))