Coverage for src/wiktextract/extractor/nl/linkage.py: 83%

91 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-10-25 10:11 +0000

1import re 

2 

3from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

4 

5from ...page import clean_node 

6from ...wxr_context import WiktextractContext 

7from .models import Linkage, WordEntry 

8 

9 

10def extract_linkage_section( 

11 wxr: WiktextractContext, 

12 word_entry: WordEntry, 

13 level_node: LevelNode, 

14 linkage_type: str, 

15) -> None: 

16 sense_index = 0 

17 sense = "" 

18 raw_tags = [] 

19 for node in level_node.children: 

20 if isinstance(node, TemplateNode): 

21 if node.template_name == "intens": 

22 # https://nl.wiktionary.org/wiki/Sjabloon:intens 

23 raw_tags = ["intensivering"] 

24 s_index_str = node.template_parameters.get(2, "").strip() 

25 if re.fullmatch(r"\d+", s_index_str): 25 ↛ 19line 25 didn't jump to line 19 because the condition on line 25 was always true

26 sense_index = int(s_index_str) 

27 elif node.template_name == "L-top": 

28 second_arg = clean_node( 

29 wxr, None, node.template_parameters.get(2, "") 

30 ) 

31 m = re.search(r"\[(\d+)\]", second_arg) 

32 if m is not None: 32 ↛ 36line 32 didn't jump to line 36 because the condition on line 32 was always true

33 sense_index = int(m.group(1)) 

34 sense = second_arg[m.end() :].strip() 

35 else: 

36 sense = second_arg 

37 elif node.template_name == "L-bottom": 

38 sense = "" 

39 sense_index = 0 

40 elif node.template_name.startswith("nld-"): 

41 extract_nld_template(wxr, word_entry, node, linkage_type) 

42 elif node.template_name in ["expr", "fras"]: 42 ↛ 19line 42 didn't jump to line 19 because the condition on line 42 was always true

43 extract_expr_template(wxr, word_entry, node, linkage_type) 

44 elif isinstance(node, WikiNode): 

45 if node.kind == NodeKind.LINK: 

46 word = clean_node(wxr, None, node) 

47 if word != "": 47 ↛ 19line 47 didn't jump to line 19 because the condition on line 47 was always true

48 getattr(word_entry, linkage_type).append( 

49 Linkage( 

50 word=word, 

51 sense=sense, 

52 sense_index=sense_index, 

53 raw_tags=raw_tags, 

54 ) 

55 ) 

56 elif node.kind == NodeKind.LIST: 56 ↛ 19line 56 didn't jump to line 19 because the condition on line 56 was always true

57 for list_item in node.find_child(NodeKind.LIST_ITEM): 

58 extract_linkage_list_item( 

59 wxr, 

60 word_entry, 

61 list_item, 

62 linkage_type, 

63 sense, 

64 sense_index, 

65 ) 

66 

67 

68def extract_linkage_list_item( 

69 wxr: WiktextractContext, 

70 word_entry: WordEntry, 

71 list_item: WordEntry, 

72 linkage_type: str, 

73 sense: str, 

74 sense_index: str, 

75) -> None: 

76 for node in list_item.children: 

77 if isinstance(node, str): 

78 m = re.search(r"\[(\d+)\]", node) 

79 if m is not None: 

80 sense_index = int(m.group(1)) 

81 elif node.strip().startswith("="): 

82 sense = node.strip().removeprefix("=").strip() 

83 linkage_list = getattr(word_entry, linkage_type) 

84 if len(linkage_list) > 0: 84 ↛ 76line 84 didn't jump to line 76 because the condition on line 84 was always true

85 linkage_list[-1].sense = sense 

86 elif isinstance(node, WikiNode) and node.kind == NodeKind.LINK: 86 ↛ 92line 86 didn't jump to line 92 because the condition on line 86 was always true

87 word = clean_node(wxr, None, node) 

88 if word != "": 88 ↛ 76line 88 didn't jump to line 76 because the condition on line 88 was always true

89 getattr(word_entry, linkage_type).append( 

90 Linkage(word=word, sense=sense, sense_index=sense_index) 

91 ) 

92 elif isinstance(node, TemplateNode) and node.template_name == "expr": 

93 extract_expr_template(wxr, word_entry, node, linkage_type) 

94 

95 

96def extract_nld_template( 

97 wxr: WiktextractContext, 

98 word_entry: WordEntry, 

99 t_node: TemplateNode, 

100 linkage_type: str, 

101) -> None: 

102 # https://nl.wiktionary.org/wiki/Sjabloon:nld-rashonden 

103 expanded_node = wxr.wtp.parse( 

104 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

105 ) 

106 sense_index_str = clean_node( 

107 wxr, None, t_node.template_parameters.get(1, "") 

108 ) 

109 sense_index = 0 

110 if re.fullmatch(r"\d+", sense_index_str): 110 ↛ 112line 110 didn't jump to line 112 because the condition on line 110 was always true

111 sense_index = int(sense_index_str) 

112 sense = "" 

113 for italic_node in expanded_node.find_child_recursively(NodeKind.ITALIC): 113 ↛ 118line 113 didn't jump to line 118 because the loop on line 113 didn't complete

114 for link_node in italic_node.find_child(NodeKind.LINK): 

115 sense = clean_node(wxr, None, link_node) 

116 break 

117 

118 for list_item in expanded_node.find_child_recursively(NodeKind.LIST_ITEM): 

119 for link_node in list_item.find_child(NodeKind.LINK): 

120 word = clean_node(wxr, None, link_node) 

121 if word != "": 121 ↛ 119line 121 didn't jump to line 119 because the condition on line 121 was always true

122 getattr(word_entry, linkage_type).append( 

123 Linkage(word=word, sense_index=sense_index, sense=sense) 

124 ) 

125 

126 

127def extract_expr_template( 

128 wxr: WiktextractContext, 

129 word_entry: WordEntry, 

130 t_node: TemplateNode, 

131 linkage_type: str, 

132) -> None: 

133 # https://nl.wiktionary.org/wiki/Sjabloon:expr 

134 # https://nl.wiktionary.org/wiki/Sjabloon:fras 

135 sense_index_str = clean_node( 

136 wxr, None, t_node.template_parameters.get("n", "") 

137 ) 

138 sense_index = 0 

139 if re.fullmatch(r"\d+", sense_index_str) is not None: 139 ↛ 140line 139 didn't jump to line 140 because the condition on line 139 was never true

140 sense_index = int(sense_index_str) 

141 sense_arg = 2 if t_node.template_name == "expr" else 3 

142 word_arg = 1 if t_node.template_name == "expr" else 2 

143 sense = clean_node(wxr, None, t_node.template_parameters.get(sense_arg, "")) 

144 word = clean_node(wxr, None, t_node.template_parameters.get(word_arg, "")) 

145 m = re.match(r"\[?(\d+)\]?", word) 

146 if m is not None: # should use "n" arg 

147 sense_index = int(m.group(1)) 

148 word = word[m.end() :].strip() 

149 if word != "": 149 ↛ exitline 149 didn't return from function 'extract_expr_template' because the condition on line 149 was always true

150 getattr(word_entry, linkage_type).append( 

151 Linkage(word=word, sense=sense, sense_index=sense_index) 

152 ) 

153 

154 

155def extract_fixed_preposition_section( 

156 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode 

157) -> None: 

158 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM): 

159 word = clean_node(wxr, None, list_item.children) 

160 if len(word) > 0: 

161 word_entry.derived.append( 

162 Linkage(word=word, tags=["prepositional"]) 

163 )