Coverage for src/wiktextract/extractor/it/linkage.py: 74%

59 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-04 10:58 +0000

1from wikitextprocessor import LevelNode, NodeKind, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from .models import Form, Linkage, WordEntry 

6from .tags import translate_raw_tags 

7 

8 

9def extract_linkage_section( 

10 wxr: WiktextractContext, 

11 page_data: list[WordEntry], 

12 level_node: LevelNode, 

13 linkage_type: str, 

14) -> None: 

15 linkages = [] 

16 for list_node in level_node.find_child(NodeKind.LIST): 

17 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

18 linkages.extend( 

19 extract_proverb_list_item(wxr, list_item) 

20 if linkage_type == "proverbs" 

21 else extract_linkage_list_item(wxr, list_item) 

22 ) 

23 

24 for l_data in linkages: 

25 translate_raw_tags(l_data) 

26 

27 for data in page_data: 

28 if data.lang_code == page_data[-1].lang_code: 28 ↛ 27line 28 didn't jump to line 27 because the condition on line 28 was always true

29 getattr(data, linkage_type).extend(linkages) 

30 

31 

32def extract_linkage_list_item( 

33 wxr: WiktextractContext, list_item: WikiNode 

34) -> list[Linkage]: 

35 raw_tags = [] 

36 linkages = [] 

37 for node in list_item.children: 

38 if isinstance(node, WikiNode): 

39 match node.kind: 

40 case NodeKind.LINK: 

41 node_str = clean_node(wxr, None, node) 

42 if node_str != "": 42 ↛ 37line 42 didn't jump to line 37 because the condition on line 42 was always true

43 linkages.append( 

44 Linkage(word=node_str, raw_tags=raw_tags) 

45 ) 

46 raw_tags.clear() 

47 case NodeKind.TEMPLATE | NodeKind.ITALIC: 47 ↛ 37line 47 didn't jump to line 37 because the pattern on line 47 always matched

48 node_str = clean_node(wxr, None, node) 

49 if node_str.startswith("(") and node_str.endswith(")"): 49 ↛ 37line 49 didn't jump to line 37 because the condition on line 49 was always true

50 raw_tags.append(node_str.strip("()")) 

51 elif isinstance(node, str): 51 ↛ 37line 51 didn't jump to line 37 because the condition on line 51 was always true

52 for word_str in node.split(","): 

53 word_str = word_str.strip() 

54 if word_str.startswith("(") and word_str.endswith(")"): 

55 raw_tags.append(word_str.strip("()")) 

56 elif word_str != "": 

57 linkages.append(Linkage(word=word_str, raw_tags=raw_tags)) 

58 raw_tags.clear() 

59 return linkages 

60 

61 

62def extract_proverb_list_item( 

63 wxr: WiktextractContext, list_item: WikiNode 

64) -> list[Linkage]: 

65 proverb = Linkage(word="") 

66 for index, node in enumerate(list_item.children): 66 ↛ 76line 66 didn't jump to line 76 because the loop on line 66 didn't complete

67 if isinstance(node, WikiNode) and node.kind == NodeKind.ITALIC: 

68 proverb.word = clean_node(wxr, None, node) 

69 elif isinstance(node, str) and ":" in node: 

70 proverb.sense = clean_node( 

71 wxr, 

72 None, 

73 [node[node.index(":") + 1 :]] + list_item.children[index + 1 :], 

74 ) 

75 break 

76 return [proverb] if proverb.word != "" else [] 

77 

78 

79def extract_form_section( 

80 wxr: WiktextractContext, page_data: list[WordEntry], level_node: LevelNode 

81) -> None: 

82 forms = [] 

83 for list_node in level_node.find_child(NodeKind.LIST): 

84 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

85 for link_node in list_item.find_child(NodeKind.LINK): 

86 word = clean_node(wxr, None, link_node) 

87 if word != "": 

88 forms.append(Form(form=word)) 

89 for data in page_data: 

90 if data.lang_code == page_data[-1].lang_code: 

91 data.forms.extend(forms)