Coverage for src/wiktextract/extractor/es/gloss.py: 73%

72 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-10-25 10:11 +0000

1import re 

2 

3from wikitextprocessor.parser import ( 

4 NodeKind, 

5 TemplateNode, 

6 WikiNode, 

7 WikiNodeChildrenList, 

8) 

9 

10from ...page import clean_node 

11from ...wxr_context import WiktextractContext 

12from .models import AltForm, Sense, WordEntry 

13from .sense_data import process_sense_data_list 

14from .tags import translate_raw_tags 

15 

16 

17def extract_gloss( 

18 wxr: WiktextractContext, 

19 page_data: list[WordEntry], 

20 list_node: WikiNode, 

21) -> None: 

22 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

23 gloss_data = Sense() 

24 

25 definition: WikiNodeChildrenList = [] 

26 other: WikiNodeChildrenList = [] 

27 

28 if not list_item.definition: 28 ↛ 29line 28 didn't jump to line 29 because the condition on line 28 was never true

29 continue 

30 

31 for node in list_item.definition: 

32 if isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 32 ↛ 33line 32 didn't jump to line 33 because the condition on line 32 was never true

33 other.append(node) 

34 else: 

35 definition.append(node) 

36 if isinstance(node, TemplateNode) and node.template_name.startswith( 

37 ("f.", "forma ") 

38 ): 

39 process_forma_template(wxr, gloss_data, node) 

40 

41 gloss = clean_node(wxr, gloss_data, definition) 

42 if len(gloss) > 0: 42 ↛ 45line 42 didn't jump to line 45 because the condition on line 42 was always true

43 gloss_data.glosses.append(gloss) 

44 

45 gloss_note = clean_node(wxr, gloss_data, list_item.children) 

46 match = re.match(r"\d+", gloss_note) 

47 if match is not None: 47 ↛ 51line 47 didn't jump to line 51 because the condition on line 47 was always true

48 gloss_data.sense_index = match.group(0) 

49 tag_string = gloss_note[match.end() :].strip() 

50 else: 

51 tag_string = gloss_note.strip() 

52 

53 # split tags by comma or "y" 

54 tags = re.split(r",|y", tag_string) 

55 for tag in tags: 

56 tag = ( 

57 tag.strip() 

58 .removesuffix(".") 

59 .removesuffix("Main") 

60 .removeprefix("Main") 

61 ) 

62 if tag: 

63 gloss_data.raw_tags.append(tag) 

64 

65 translate_raw_tags(gloss_data) 

66 page_data[-1].senses.append(gloss_data) 

67 if len(other) > 0: 67 ↛ 68line 67 didn't jump to line 68 because the condition on line 67 was never true

68 for node in other: 

69 if isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 

70 process_sense_data_list(wxr, page_data[-1], node) 

71 else: 

72 wxr.wtp.debug( 

73 f"Found nodes that are not part of definition: {node}", 

74 sortid="extractor/es/gloss/extract_gloss/46", 

75 ) 

76 

77 

78def process_uso_template( 

79 wxr: WiktextractContext, sense: Sense, template: TemplateNode 

80) -> None: 

81 # https://es.wiktionary.org/wiki/Plantilla:uso 

82 from .tags import USO_TAGS 

83 

84 for arg_name, arg_value in template.template_parameters.items(): 

85 if isinstance(arg_name, int): 85 ↛ 84line 85 didn't jump to line 84 because the condition on line 85 was always true

86 arg_value = clean_node(wxr, None, arg_value) 

87 if arg_value in USO_TAGS: 87 ↛ 94line 87 didn't jump to line 94 because the condition on line 87 was always true

88 tr_tags = USO_TAGS[arg_value] 

89 if isinstance(tr_tags, str): 89 ↛ 91line 89 didn't jump to line 91 because the condition on line 89 was always true

90 sense.tags.append(USO_TAGS[arg_value]) 

91 elif isinstance(tr_tags, list): 

92 sense.tags.extend(USO_TAGS[arg_value]) 

93 else: 

94 sense.raw_tags.append(arg_value) 

95 

96 clean_node(wxr, sense, template) # save category links 

97 

98 

99def process_ambito_template( 

100 wxr: WiktextractContext, sense: Sense, template: TemplateNode 

101) -> None: 

102 # https://es.wiktionary.org/wiki/Plantilla:ámbito 

103 # location data 

104 from .tags import AMBITO_TAGS 

105 

106 for arg_name, arg_value in template.template_parameters.items(): 

107 if isinstance(arg_name, int): 107 ↛ 106line 107 didn't jump to line 106 because the condition on line 107 was always true

108 arg_value = clean_node(wxr, None, arg_value) 

109 if arg_value in AMBITO_TAGS: 109 ↛ 106line 109 didn't jump to line 106 because the condition on line 109 was always true

110 tr_tags = AMBITO_TAGS[arg_value] 

111 if isinstance(tr_tags, str): 111 ↛ 113line 111 didn't jump to line 113 because the condition on line 111 was always true

112 sense.tags.append(AMBITO_TAGS[arg_value]) 

113 elif isinstance(tr_tags, list): 

114 sense.tags.extend(tr_tags) 

115 

116 clean_node(wxr, sense, template) # save category links 

117 

118 

119def process_forma_template( 

120 wxr: WiktextractContext, sense: Sense, template: TemplateNode 

121) -> None: 

122 # https://es.wiktionary.org/wiki/Plantilla:forma_verbo 

123 form_of = clean_node(wxr, None, template.template_parameters.get(1, "")) 

124 if form_of != "": 124 ↛ exitline 124 didn't return from function 'process_forma_template' because the condition on line 124 was always true

125 sense.form_of.append(AltForm(word=form_of)) 

126 if ( 

127 "pronominal" in template.template_parameters 

128 or "pronom" in template.template_parameters 

129 ): 

130 sense.form_of.append(AltForm(word=form_of + "se"))