Coverage for src/wiktextract/extractor/fr/translation.py: 92%

74 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2024-12-27 08:07 +0000

1from mediawiki_langcodes import code_to_name 

2from wikitextprocessor.parser import NodeKind, TemplateNode, WikiNode 

3 

4from ...page import clean_node 

5from ...wxr_context import WiktextractContext 

6from ..ruby import extract_ruby 

7from .models import Translation, WordEntry 

8from .tags import translate_raw_tags 

9 

10 

11def extract_translation( 

12 wxr: WiktextractContext, 

13 page_data: list[WordEntry], 

14 base_data: WordEntry, 

15 level_node: WikiNode, 

16) -> None: 

17 base_translation_data = Translation() 

18 for level_node_child in level_node.filter_empty_str_child(): 

19 if isinstance(level_node_child, WikiNode): 19 ↛ 18line 19 didn't jump to line 18 because the condition on line 19 was always true

20 if level_node_child.kind == NodeKind.TEMPLATE: 

21 # get sense from "trad-début" template 

22 process_translation_templates( 

23 wxr, 

24 level_node_child, 

25 page_data, 

26 base_translation_data, 

27 None, 

28 ) 

29 elif level_node_child.kind == NodeKind.LIST: 

30 for list_item_node in level_node_child.find_child( 

31 NodeKind.LIST_ITEM 

32 ): 

33 previous_node = None 

34 translation_data = None 

35 for child_node in list_item_node.filter_empty_str_child(): 

36 if isinstance(child_node, WikiNode): 

37 if child_node.kind == NodeKind.TEMPLATE: 

38 translation_data = ( 

39 process_translation_templates( 

40 wxr, 

41 child_node, 

42 page_data, 

43 base_translation_data, 

44 translation_data, 

45 ) 

46 ) 

47 elif child_node.kind == NodeKind.ITALIC: 47 ↛ 54line 47 didn't jump to line 54 because the condition on line 47 was always true

48 process_italic_node( 

49 wxr, 

50 child_node, 

51 previous_node, 

52 translation_data, 

53 ) 

54 previous_node = child_node 

55 

56 

57def process_italic_node( 

58 wxr: WiktextractContext, 

59 italic_node: WikiNode, 

60 previous_node: WikiNode | None, 

61 translation_data: Translation | None, 

62) -> None: 

63 # add italic text after a "trad" template as a tag 

64 tag = clean_node(wxr, None, italic_node) 

65 if ( 65 ↛ exitline 65 didn't return from function 'process_italic_node' because the condition on line 65 was always true

66 tag.startswith("(") 

67 and tag.endswith(")") 

68 and previous_node is not None 

69 and previous_node.kind == NodeKind.TEMPLATE 

70 and previous_node.template_name.startswith("trad") 

71 and translation_data is not None 

72 ): 

73 tag = tag.strip("()") 

74 if len(tag) > 0: 74 ↛ exitline 74 didn't return from function 'process_italic_node' because the condition on line 74 was always true

75 translation_data.raw_tags.append(tag) 

76 translate_raw_tags(translation_data) 

77 

78 

79def process_translation_templates( 

80 wxr: WiktextractContext, 

81 template_node: TemplateNode, 

82 page_data: list[WordEntry], 

83 base_translation_data: Translation, 

84 translation_data: Translation | None, 

85) -> Translation | None: 

86 if template_node.template_name == "trad-fin": 

87 # ignore translation end template 

88 return 

89 elif template_node.template_name == "trad-début": 

90 # translation box start: https://fr.wiktionary.org/wiki/Modèle:trad-début 

91 sense_parameter = template_node.template_parameters.get(1, "") 

92 sense_text = clean_node(wxr, None, sense_parameter) 

93 base_translation_data.sense = sense_text 

94 sense_index_str = template_node.template_parameters.get(2, "0") 

95 if isinstance(sense_index_str, str) and sense_index_str.isdecimal(): 95 ↛ 157line 95 didn't jump to line 157 because the condition on line 95 was always true

96 base_translation_data.sense_index = int(sense_index_str) 

97 

98 elif template_node.template_name == "T": 

99 # Translation language: https://fr.wiktionary.org/wiki/Modèle:T 

100 base_translation_data.lang_code = template_node.template_parameters.get( 

101 1, "" 

102 ) 

103 base_translation_data.lang = clean_node( 

104 wxr, page_data[-1], template_node 

105 ) 

106 elif template_node.template_name.startswith("trad"): 

107 # Translation term: https://fr.wiktionary.org/wiki/Modèle:trad 

108 if 2 not in template_node.template_parameters: # required parameter 108 ↛ 109line 108 didn't jump to line 109 because the condition on line 108 was never true

109 return 

110 translation_data = base_translation_data.model_copy(deep=True) 

111 term_nodes = template_node.template_parameters.get( 

112 "dif", template_node.template_parameters.get(2) 

113 ) 

114 if base_translation_data.lang_code == "ja": 

115 expanded_term_nodes = wxr.wtp.parse( 

116 wxr.wtp.node_to_wikitext(term_nodes), expand_all=True 

117 ) 

118 ruby_data, node_without_ruby = extract_ruby( 

119 wxr, expanded_term_nodes.children 

120 ) 

121 translation_data.ruby = ruby_data 

122 translation_data.word = clean_node(wxr, None, node_without_ruby) 

123 else: 

124 translation_data.word = clean_node(wxr, None, term_nodes) 

125 translation_data.roman = clean_node( 

126 wxr, 

127 None, 

128 ( 

129 template_node.template_parameters.get( 

130 "tr", template_node.template_parameters.get("R", "") 

131 ) 

132 ), 

133 ) 

134 # traditional writing of Chinese and Korean word 

135 translation_data.traditional_writing = clean_node( 

136 wxr, None, template_node.template_parameters.get("tradi", "") 

137 ) 

138 if 3 in template_node.template_parameters: 

139 for tag_character in template_node.template_parameters[3]: 

140 if tag_character in TRAD_TAGS: 140 ↛ 139line 140 didn't jump to line 139 because the condition on line 140 was always true

141 translation_data.tags.append(TRAD_TAGS[tag_character]) 

142 if translation_data.lang_code == "": 

143 translation_data.lang_code = template_node.template_parameters.get( 

144 1, "" 

145 ) 

146 if translation_data.lang == "": 

147 translation_data.lang = code_to_name( 

148 translation_data.lang_code, "fr" 

149 ).capitalize() 

150 if len(translation_data.word) > 0: 150 ↛ 157line 150 didn't jump to line 157 because the condition on line 150 was always true

151 page_data[-1].translations.append(translation_data) 

152 elif translation_data is not None: 

153 tag = clean_node(wxr, None, template_node).strip("()") 

154 if len(tag) > 0: 154 ↛ 157line 154 didn't jump to line 157 because the condition on line 154 was always true

155 translation_data.raw_tags.append(tag) 

156 translate_raw_tags(translation_data) 

157 return translation_data 

158 

159 

160# https://fr.wiktionary.org/wiki/Modèle:trad 

161TRAD_TAGS: dict[str, str] = { 

162 "m": "masculine", 

163 "f": "feminine", 

164 "n": "neuter", 

165 "c": "common", 

166 "s": "singular", 

167 "p": "plural", 

168 "d": "dual", 

169 "a": "animate", 

170 "i": "inanimate", 

171}