Coverage for src/wiktextract/extractor/it/pos.py: 98%

54 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2024-12-27 08:07 +0000

1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from .example import extract_example_list_item 

6from .models import Sense, WordEntry 

7from .section_titles import POS_DATA 

8from .tag_form_line import extract_tag_form_line_nodes 

9from .tags import translate_raw_tags 

10 

11POS_SUBSECTION_TEMPLATES = frozenset( 

12 [ 

13 # https://it.wiktionary.org/wiki/Categoria:Template_per_i_verbi 

14 "-participio passato-", 

15 "-participio presente-", 

16 "Ausiliare", 

17 "Deponente", 

18 "Intransitivo", 

19 "Medio", 

20 "Passivo", 

21 "Reciproco", 

22 "Riflessivo", 

23 "riflessivo", 

24 "Transitivo", 

25 # https://it.wiktionary.org/wiki/Categoria:Template_vocabolo 

26 "Attivo", 

27 "attivo", 

28 "Inpr", 

29 "inpr", 

30 "Riflpr", 

31 ] 

32) 

33 

34 

35def add_new_pos_data( 

36 wxr: WiktextractContext, 

37 page_data: list[WordEntry], 

38 base_data: WordEntry, 

39 level_node: LevelNode, 

40 pos_title: str, 

41) -> None: 

42 page_data.append(base_data.model_copy(deep=True)) 

43 page_data[-1].pos_title = pos_title 

44 pos_data = POS_DATA[pos_title] 

45 page_data[-1].pos = pos_data["pos"] 

46 page_data[-1].tags.extend(pos_data.get("tags", [])) 

47 for link_node in level_node.find_child(NodeKind.LINK): 

48 clean_node(wxr, page_data[-1], link_node) 

49 

50 

51def extract_pos_section( 

52 wxr: WiktextractContext, 

53 page_data: list[WordEntry], 

54 base_data: WordEntry, 

55 level_node: LevelNode, 

56 pos_title: str, 

57) -> None: 

58 add_new_pos_data(wxr, page_data, base_data, level_node, pos_title) 

59 first_gloss_list_index = len(level_node.children) 

60 for index, node in enumerate(level_node.children): 

61 if ( 

62 isinstance(node, WikiNode) 

63 and node.kind == NodeKind.LIST 

64 and node.sarg.startswith("#") 

65 and node.sarg.endswith("#") 

66 ): 

67 for list_item in node.find_child(NodeKind.LIST_ITEM): 

68 extract_gloss_list_item(wxr, page_data[-1], list_item) 

69 if index < first_gloss_list_index: 

70 first_gloss_list_index = index 

71 elif ( 

72 isinstance(node, TemplateNode) 

73 and node.template_name in POS_SUBSECTION_TEMPLATES 

74 ): 

75 if len(page_data[-1].senses) > 0: 

76 add_new_pos_data( 

77 wxr, page_data, base_data, level_node, pos_title 

78 ) 

79 raw_tag = clean_node(wxr, page_data[-1], node).strip("= \n") 

80 page_data[-1].raw_tags.append(raw_tag) 

81 

82 extract_tag_form_line_nodes( 

83 wxr, page_data[-1], level_node.children[:first_gloss_list_index] 

84 ) 

85 

86 

87def extract_gloss_list_item( 

88 wxr: WiktextractContext, 

89 word_entry: WordEntry, 

90 list_item: WikiNode, 

91) -> None: 

92 gloss_nodes = [] 

93 sense = Sense() 

94 for node in list_item.children: 

95 if isinstance(node, TemplateNode): 

96 t_str = clean_node(wxr, sense, node) 

97 if t_str.startswith("(") and t_str.endswith(")"): 

98 sense.raw_tags.append(t_str.strip("()")) 

99 else: 

100 gloss_nodes.append(t_str) 

101 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 

102 if ( 

103 node.sarg.endswith(":") 

104 and len(sense.examples) > 0 

105 and sense.examples[-1].translation == "" 

106 ): 

107 for tr_list_item in node.find_child(NodeKind.LIST_ITEM): 

108 sense.examples[-1].translation = clean_node( 

109 wxr, sense, tr_list_item.children 

110 ) 

111 elif node.sarg.endswith(("*", ":")): 111 ↛ 94line 111 didn't jump to line 94 because the condition on line 111 was always true

112 for example_list_item in node.find_child(NodeKind.LIST_ITEM): 

113 extract_example_list_item( 

114 wxr, sense, example_list_item, word_entry.lang_code 

115 ) 

116 else: 

117 gloss_nodes.append(node) 

118 gloss_str = clean_node(wxr, sense, gloss_nodes) 

119 if gloss_str != "": 119 ↛ exitline 119 didn't return from function 'extract_gloss_list_item' because the condition on line 119 was always true

120 sense.glosses.append(gloss_str) 

121 translate_raw_tags(sense) 

122 word_entry.senses.append(sense)