Coverage for src/wiktextract/extractor/id/linkage.py: 90%

69 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-04 10:58 +0000

1from itertools import count 

2 

3from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

4 

5from ...page import clean_node 

6from ...wxr_context import WiktextractContext 

7from .models import Form, Linkage, WordEntry 

8from .tags import translate_raw_tags 

9 

10 

11def extract_syn_template( 

12 wxr: WiktextractContext, 

13 word_entry: WordEntry, 

14 t_node: TemplateNode, 

15 l_type: str, 

16) -> None: 

17 for index in count(2): 17 ↛ exitline 17 didn't return from function 'extract_syn_template' because the loop on line 17 didn't complete

18 if index not in t_node.template_parameters: 

19 break 

20 word = clean_node(wxr, None, t_node.template_parameters[index]) 

21 if word != "": 21 ↛ 17line 21 didn't jump to line 17 because the condition on line 21 was always true

22 getattr(word_entry, l_type).append( 

23 Linkage( 

24 word=word, 

25 sense=word_entry.senses[-1].glosses[0] 

26 if len(word_entry.senses) > 0 

27 and len(word_entry.senses[-1].glosses) > 0 

28 else "", 

29 ) 

30 ) 

31 

32 

33def extract_linkage_section( 

34 wxr: WiktextractContext, 

35 word_entry: WordEntry, 

36 level_node: LevelNode, 

37 l_type: str, 

38) -> None: 

39 for list_node in level_node.find_child(NodeKind.LIST): 

40 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

41 extract_linkage_list_item(wxr, word_entry, list_item, l_type) 

42 

43 

44def extract_linkage_list_item( 

45 wxr: WiktextractContext, 

46 word_entry: WordEntry, 

47 list_item: WikiNode, 

48 l_type: str, 

49) -> None: 

50 raw_tags = [] 

51 linkages = [] 

52 sense = "" 

53 for index, node in enumerate(list_item.children): 

54 if isinstance(node, WikiNode) and node.kind == NodeKind.LINK: 

55 word = clean_node(wxr, None, node) 

56 if word != "": 56 ↛ 53line 56 didn't jump to line 53 because the condition on line 56 was always true

57 linkages.append(Linkage(word=word)) 

58 elif isinstance(node, TemplateNode): 

59 if node.template_name in ["qualifier", "q", "qual"]: 

60 raw_tag = clean_node(wxr, None, node).strip("()") 

61 if raw_tag != "": 61 ↛ 53line 61 didn't jump to line 53 because the condition on line 61 was always true

62 raw_tags.append(raw_tag) 

63 elif node.template_name == "l": 

64 l_data = extract_l_template(wxr, node) 

65 if l_data.word != "": 65 ↛ 53line 65 didn't jump to line 53 because the condition on line 65 was always true

66 linkages.append(l_data) 

67 elif node.template_name == "m": 

68 l_data = extract_m_template(wxr, node) 

69 if l_data.word != "": 69 ↛ 53line 69 didn't jump to line 53 because the condition on line 69 was always true

70 linkages.append(l_data) 

71 elif node.template_name == "alter": 71 ↛ 53line 71 didn't jump to line 53 because the condition on line 71 was always true

72 linkages.extend(extract_alter_template(wxr, node)) 

73 elif isinstance(node, str) and ":" in node: 

74 sense = clean_node( 

75 wxr, 

76 None, 

77 [node[node.index(":") + 1 :]] + list_item.children[index + 1 :], 

78 ) 

79 

80 for l_data in linkages: 

81 l_data.sense = sense 

82 l_data.raw_tags.extend(raw_tags) 

83 translate_raw_tags(l_data) 

84 

85 if l_type.endswith("forms"): 

86 for l_data in linkages: 

87 if l_data.word == wxr.wtp.title: 87 ↛ 88line 87 didn't jump to line 88 because the condition on line 87 was never true

88 continue 

89 if l_type == "alt_forms": 89 ↛ 91line 89 didn't jump to line 91 because the condition on line 89 was always true

90 l_data.tags.append("alternative") 

91 word_entry.forms.append( 

92 Form( 

93 form=l_data.word, raw_tags=l_data.raw_tags, tags=l_data.tags 

94 ) 

95 ) 

96 else: 

97 getattr(word_entry, l_type).extend(linkages) 

98 

99 

100def extract_l_template( 

101 wxr: WiktextractContext, t_node: TemplateNode 

102) -> Linkage: 

103 return Linkage( 

104 word=clean_node(wxr, None, t_node.template_parameters.get(2, "")) 

105 ) 

106 

107 

108def extract_m_template( 

109 wxr: WiktextractContext, t_node: TemplateNode 

110) -> Linkage: 

111 l_data = Linkage( 

112 word=clean_node( 

113 wxr, 

114 None, 

115 t_node.template_parameters.get( 

116 3, t_node.template_parameters.get(2, "") 

117 ), 

118 ), 

119 roman=clean_node(wxr, None, t_node.template_parameters.get("t", "")), 

120 ) 

121 return l_data 

122 

123 

124def extract_alter_template( 

125 wxr: WiktextractContext, t_node: TemplateNode 

126) -> list[Linkage]: 

127 l_list = [] 

128 for index in count(2): 128 ↛ 134line 128 didn't jump to line 134 because the loop on line 128 didn't complete

129 if index not in t_node.template_parameters: 

130 break 

131 word = clean_node(wxr, None, t_node.template_parameters[index]) 

132 if word != "": 132 ↛ 128line 132 didn't jump to line 128 because the condition on line 132 was always true

133 l_list.append(Linkage(word=word)) 

134 return l_list