Coverage for src/wiktextract/extractor/it/linkage.py: 92%

45 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2024-12-27 08:07 +0000

1from wikitextprocessor import LevelNode, NodeKind, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from .models import Linkage, WordEntry 

6 

7 

8def extract_linkage_section( 

9 wxr: WiktextractContext, 

10 page_data: list[WordEntry], 

11 level_node: LevelNode, 

12 linkage_type: str, 

13) -> None: 

14 linkages = [] 

15 for list_node in level_node.find_child(NodeKind.LIST): 

16 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

17 linkages.extend( 

18 extract_proverb_list_item(wxr, list_item) 

19 if linkage_type == "proverbs" 

20 else extract_linkage_list_item(wxr, list_item) 

21 ) 

22 

23 for data in page_data: 

24 if data.lang_code == page_data[-1].lang_code: 24 ↛ 23line 24 didn't jump to line 23 because the condition on line 24 was always true

25 getattr(data, linkage_type).extend(linkages) 

26 

27 

28def extract_linkage_list_item( 

29 wxr: WiktextractContext, list_item: WikiNode 

30) -> list[Linkage]: 

31 raw_tags = [] 

32 linkages = [] 

33 for node in list_item.children: 

34 if isinstance(node, WikiNode): 

35 match node.kind: 

36 case NodeKind.LINK: 

37 node_str = clean_node(wxr, None, node) 

38 if node_str != "": 38 ↛ 33line 38 didn't jump to line 33 because the condition on line 38 was always true

39 linkages.append( 

40 Linkage(word=node_str, raw_tags=raw_tags) 

41 ) 

42 raw_tags.clear() 

43 case NodeKind.TEMPLATE | NodeKind.ITALIC: 43 ↛ 33line 43 didn't jump to line 33 because the pattern on line 43 always matched

44 node_str = clean_node(wxr, None, node) 

45 if node_str.startswith("(") and node_str.endswith(")"): 45 ↛ 33line 45 didn't jump to line 33 because the condition on line 45 was always true

46 raw_tags.append(node_str.strip("()")) 

47 elif isinstance(node, str): 47 ↛ 33line 47 didn't jump to line 33 because the condition on line 47 was always true

48 for word_str in node.split(","): 

49 word_str = word_str.strip() 

50 if word_str.startswith("(") and word_str.endswith(")"): 

51 raw_tags.append(word_str.strip("()")) 

52 elif word_str != "": 

53 linkages.append(Linkage(word=word_str, raw_tags=raw_tags)) 

54 raw_tags.clear() 

55 

56 return linkages 

57 

58 

59def extract_proverb_list_item( 

60 wxr: WiktextractContext, list_item: WikiNode 

61) -> list[Linkage]: 

62 proverb = Linkage(word="") 

63 for index, node in enumerate(list_item.children): 63 ↛ 73line 63 didn't jump to line 73 because the loop on line 63 didn't complete

64 if isinstance(node, WikiNode) and node.kind == NodeKind.ITALIC: 

65 proverb.word = clean_node(wxr, None, node) 

66 elif isinstance(node, str) and ":" in node: 

67 proverb.sense = clean_node( 

68 wxr, 

69 None, 

70 [node[node.index(":") + 1 :]] + list_item.children[index + 1 :], 

71 ) 

72 break 

73 return [proverb] if proverb.word != "" else []