Coverage for src/wiktextract/extractor/fr/translation.py: 92%

75 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-10-25 10:11 +0000

1from typing import Optional 

2 

3from mediawiki_langcodes import code_to_name 

4from wikitextprocessor.parser import NodeKind, TemplateNode, WikiNode 

5 

6from ...page import clean_node 

7from ...wxr_context import WiktextractContext 

8from ..ruby import extract_ruby 

9from .models import Translation, WordEntry 

10from .tags import translate_raw_tags 

11 

12 

13def extract_translation( 

14 wxr: WiktextractContext, 

15 page_data: list[WordEntry], 

16 base_data: WordEntry, 

17 level_node: WikiNode, 

18) -> None: 

19 base_translation_data = Translation() 

20 for level_node_child in level_node.filter_empty_str_child(): 

21 if isinstance(level_node_child, WikiNode): 21 ↛ 20line 21 didn't jump to line 20 because the condition on line 21 was always true

22 if level_node_child.kind == NodeKind.TEMPLATE: 

23 # get sense from "trad-début" template 

24 process_translation_templates( 

25 wxr, 

26 level_node_child, 

27 page_data, 

28 base_translation_data, 

29 None, 

30 ) 

31 elif level_node_child.kind == NodeKind.LIST: 

32 for list_item_node in level_node_child.find_child( 

33 NodeKind.LIST_ITEM 

34 ): 

35 previous_node = None 

36 translation_data = None 

37 for child_node in list_item_node.filter_empty_str_child(): 

38 if isinstance(child_node, WikiNode): 

39 if child_node.kind == NodeKind.TEMPLATE: 

40 translation_data = ( 

41 process_translation_templates( 

42 wxr, 

43 child_node, 

44 page_data, 

45 base_translation_data, 

46 translation_data, 

47 ) 

48 ) 

49 elif child_node.kind == NodeKind.ITALIC: 49 ↛ 56line 49 didn't jump to line 56 because the condition on line 49 was always true

50 process_italic_node( 

51 wxr, 

52 child_node, 

53 previous_node, 

54 translation_data, 

55 ) 

56 previous_node = child_node 

57 

58 

59def process_italic_node( 

60 wxr: WiktextractContext, 

61 italic_node: WikiNode, 

62 previous_node: Optional[WikiNode], 

63 translation_data: Optional[Translation], 

64) -> None: 

65 # add italic text after a "trad" template as a tag 

66 tag = clean_node(wxr, None, italic_node) 

67 if ( 67 ↛ exitline 67 didn't jump to the function exit

68 tag.startswith("(") 

69 and tag.endswith(")") 

70 and previous_node is not None 

71 and previous_node.kind == NodeKind.TEMPLATE 

72 and previous_node.template_name.startswith("trad") 

73 and translation_data is not None 

74 ): 

75 tag = tag.strip("()") 

76 if len(tag) > 0: 76 ↛ exitline 76 didn't return from function 'process_italic_node' because the condition on line 76 was always true

77 translation_data.raw_tags.append(tag) 

78 translate_raw_tags(translation_data) 

79 

80 

81def process_translation_templates( 

82 wxr: WiktextractContext, 

83 template_node: TemplateNode, 

84 page_data: list[WordEntry], 

85 base_translation_data: Translation, 

86 translation_data: Optional[Translation], 

87) -> Optional[Translation]: 

88 if template_node.template_name == "trad-fin": 

89 # ignore translation end template 

90 return 

91 elif template_node.template_name == "trad-début": 

92 # translation box start: https://fr.wiktionary.org/wiki/Modèle:trad-début 

93 sense_parameter = template_node.template_parameters.get(1, "") 

94 sense_text = clean_node(wxr, None, sense_parameter) 

95 base_translation_data.sense = sense_text 

96 sense_index_str = template_node.template_parameters.get(2, "0") 

97 if isinstance(sense_index_str, str) and sense_index_str.isdecimal(): 97 ↛ 159line 97 didn't jump to line 159 because the condition on line 97 was always true

98 base_translation_data.sense_index = int(sense_index_str) 

99 

100 elif template_node.template_name == "T": 

101 # Translation language: https://fr.wiktionary.org/wiki/Modèle:T 

102 base_translation_data.lang_code = template_node.template_parameters.get( 

103 1, "" 

104 ) 

105 base_translation_data.lang = clean_node( 

106 wxr, page_data[-1], template_node 

107 ) 

108 elif template_node.template_name.startswith("trad"): 

109 # Translation term: https://fr.wiktionary.org/wiki/Modèle:trad 

110 if 2 not in template_node.template_parameters: # required parameter 110 ↛ 111line 110 didn't jump to line 111 because the condition on line 110 was never true

111 return 

112 translation_data = base_translation_data.model_copy(deep=True) 

113 term_nodes = template_node.template_parameters.get( 

114 "dif", template_node.template_parameters.get(2) 

115 ) 

116 if base_translation_data.lang_code == "ja": 

117 expanded_term_nodes = wxr.wtp.parse( 

118 wxr.wtp.node_to_wikitext(term_nodes), expand_all=True 

119 ) 

120 ruby_data, node_without_ruby = extract_ruby( 

121 wxr, expanded_term_nodes.children 

122 ) 

123 translation_data.ruby = ruby_data 

124 translation_data.word = clean_node(wxr, None, node_without_ruby) 

125 else: 

126 translation_data.word = clean_node(wxr, None, term_nodes) 

127 translation_data.roman = clean_node( 

128 wxr, 

129 None, 

130 ( 

131 template_node.template_parameters.get( 

132 "tr", template_node.template_parameters.get("R", "") 

133 ) 

134 ), 

135 ) 

136 # traditional writing of Chinese and Korean word 

137 translation_data.traditional_writing = clean_node( 

138 wxr, None, template_node.template_parameters.get("tradi", "") 

139 ) 

140 if 3 in template_node.template_parameters: 

141 for tag_character in template_node.template_parameters[3]: 

142 if tag_character in TRAD_TAGS: 142 ↛ 141line 142 didn't jump to line 141 because the condition on line 142 was always true

143 translation_data.tags.append(TRAD_TAGS[tag_character]) 

144 if translation_data.lang_code == "": 

145 translation_data.lang_code = template_node.template_parameters.get( 

146 1, "" 

147 ) 

148 if translation_data.lang == "": 

149 translation_data.lang = code_to_name( 

150 translation_data.lang_code, "fr" 

151 ).capitalize() 

152 if len(translation_data.word) > 0: 152 ↛ 159line 152 didn't jump to line 159 because the condition on line 152 was always true

153 page_data[-1].translations.append(translation_data) 

154 elif translation_data is not None: 

155 tag = clean_node(wxr, None, template_node).strip("()") 

156 if len(tag) > 0: 156 ↛ 159line 156 didn't jump to line 159 because the condition on line 156 was always true

157 translation_data.raw_tags.append(tag) 

158 translate_raw_tags(translation_data) 

159 return translation_data 

160 

161 

162# https://fr.wiktionary.org/wiki/Modèle:trad 

163TRAD_TAGS: dict[str, str] = { 

164 "m": "masculine", 

165 "f": "feminine", 

166 "n": "neuter", 

167 "c": "common", 

168 "s": "singular", 

169 "p": "plural", 

170 "d": "dual", 

171 "a": "animate", 

172 "i": "inanimate", 

173}