Coverage for src/wiktextract/extractor/fr/translation.py: 92%

74 statements  

« prev     ^ index     » next       coverage.py v7.9.0, created at 2025-06-13 07:43 +0000

1from mediawiki_langcodes import code_to_name 

2from wikitextprocessor.parser import NodeKind, TemplateNode, WikiNode 

3 

4from ...page import clean_node 

5from ...wxr_context import WiktextractContext 

6from ..ruby import extract_ruby 

7from .models import Translation, WordEntry 

8from .tags import translate_raw_tags 

9 

10 

11def extract_translation_section( 

12 wxr: WiktextractContext, page_data: list[WordEntry], level_node: WikiNode 

13) -> None: 

14 base_translation_data = Translation() 

15 for level_node_child in level_node.filter_empty_str_child(): 

16 if isinstance(level_node_child, WikiNode): 16 ↛ 15line 16 didn't jump to line 15 because the condition on line 16 was always true

17 if level_node_child.kind == NodeKind.TEMPLATE: 

18 # get sense from "trad-début" template 

19 process_translation_templates( 

20 wxr, 

21 level_node_child, 

22 page_data, 

23 base_translation_data, 

24 None, 

25 ) 

26 elif level_node_child.kind == NodeKind.LIST: 

27 for list_item_node in level_node_child.find_child( 

28 NodeKind.LIST_ITEM 

29 ): 

30 previous_node = None 

31 translation_data = None 

32 for child_node in list_item_node.filter_empty_str_child(): 

33 if isinstance(child_node, WikiNode): 

34 if child_node.kind == NodeKind.TEMPLATE: 

35 translation_data = ( 

36 process_translation_templates( 

37 wxr, 

38 child_node, 

39 page_data, 

40 base_translation_data, 

41 translation_data, 

42 ) 

43 ) 

44 elif child_node.kind == NodeKind.ITALIC: 44 ↛ 51line 44 didn't jump to line 51 because the condition on line 44 was always true

45 process_italic_node( 

46 wxr, 

47 child_node, 

48 previous_node, 

49 translation_data, 

50 ) 

51 previous_node = child_node 

52 

53 

54def process_italic_node( 

55 wxr: WiktextractContext, 

56 italic_node: WikiNode, 

57 previous_node: WikiNode | None, 

58 translation_data: Translation | None, 

59) -> None: 

60 # add italic text after a "trad" template as a tag 

61 tag = clean_node(wxr, None, italic_node) 

62 if ( 62 ↛ exitline 62 didn't return from function 'process_italic_node' because the condition on line 62 was always true

63 tag.startswith("(") 

64 and tag.endswith(")") 

65 and previous_node is not None 

66 and previous_node.kind == NodeKind.TEMPLATE 

67 and previous_node.template_name.startswith("trad") 

68 and translation_data is not None 

69 ): 

70 tag = tag.strip("()") 

71 if len(tag) > 0: 71 ↛ exitline 71 didn't return from function 'process_italic_node' because the condition on line 71 was always true

72 translation_data.raw_tags.append(tag) 

73 translate_raw_tags(translation_data) 

74 

75 

76def process_translation_templates( 

77 wxr: WiktextractContext, 

78 template_node: TemplateNode, 

79 page_data: list[WordEntry], 

80 base_translation_data: Translation, 

81 translation_data: Translation | None, 

82) -> Translation | None: 

83 if template_node.template_name == "trad-fin": 

84 # ignore translation end template 

85 return 

86 elif template_node.template_name == "trad-début": 

87 # translation box start: https://fr.wiktionary.org/wiki/Modèle:trad-début 

88 sense_parameter = template_node.template_parameters.get(1, "") 

89 sense_text = clean_node(wxr, None, sense_parameter) 

90 base_translation_data.sense = sense_text 

91 sense_index_str = template_node.template_parameters.get(2, "0") 

92 if isinstance(sense_index_str, str) and sense_index_str.isdecimal(): 92 ↛ 154line 92 didn't jump to line 154 because the condition on line 92 was always true

93 base_translation_data.sense_index = int(sense_index_str) 

94 

95 elif template_node.template_name == "T": 

96 # Translation language: https://fr.wiktionary.org/wiki/Modèle:T 

97 base_translation_data.lang_code = template_node.template_parameters.get( 

98 1, "" 

99 ) 

100 base_translation_data.lang = clean_node( 

101 wxr, page_data[-1], template_node 

102 ) 

103 elif template_node.template_name.startswith("trad"): 

104 # Translation term: https://fr.wiktionary.org/wiki/Modèle:trad 

105 if 2 not in template_node.template_parameters: # required parameter 105 ↛ 106line 105 didn't jump to line 106 because the condition on line 105 was never true

106 return 

107 translation_data = base_translation_data.model_copy(deep=True) 

108 term_nodes = template_node.template_parameters.get( 

109 "dif", template_node.template_parameters.get(2) 

110 ) 

111 if base_translation_data.lang_code == "ja": 

112 expanded_term_nodes = wxr.wtp.parse( 

113 wxr.wtp.node_to_wikitext(term_nodes), expand_all=True 

114 ) 

115 ruby_data, node_without_ruby = extract_ruby( 

116 wxr, expanded_term_nodes.children 

117 ) 

118 translation_data.ruby = ruby_data 

119 translation_data.word = clean_node(wxr, None, node_without_ruby) 

120 else: 

121 translation_data.word = clean_node(wxr, None, term_nodes) 

122 translation_data.roman = clean_node( 

123 wxr, 

124 None, 

125 ( 

126 template_node.template_parameters.get( 

127 "tr", template_node.template_parameters.get("R", "") 

128 ) 

129 ), 

130 ) 

131 # traditional writing of Chinese and Korean word 

132 translation_data.traditional_writing = clean_node( 

133 wxr, None, template_node.template_parameters.get("tradi", "") 

134 ) 

135 if 3 in template_node.template_parameters: 

136 for tag_character in template_node.template_parameters[3]: 

137 if tag_character in TRAD_TAGS: 137 ↛ 136line 137 didn't jump to line 136 because the condition on line 137 was always true

138 translation_data.tags.append(TRAD_TAGS[tag_character]) 

139 if translation_data.lang_code == "": 

140 translation_data.lang_code = template_node.template_parameters.get( 

141 1, "" 

142 ) 

143 if translation_data.lang == "": 

144 translation_data.lang = code_to_name( 

145 translation_data.lang_code, "fr" 

146 ).capitalize() 

147 if len(translation_data.word) > 0: 147 ↛ 154line 147 didn't jump to line 154 because the condition on line 147 was always true

148 page_data[-1].translations.append(translation_data) 

149 elif translation_data is not None: 

150 tag = clean_node(wxr, None, template_node).strip("()") 

151 if len(tag) > 0: 151 ↛ 154line 151 didn't jump to line 154 because the condition on line 151 was always true

152 translation_data.raw_tags.append(tag) 

153 translate_raw_tags(translation_data) 

154 return translation_data 

155 

156 

157# https://fr.wiktionary.org/wiki/Modèle:trad 

158TRAD_TAGS: dict[str, str] = { 

159 "m": "masculine", 

160 "f": "feminine", 

161 "n": "neuter", 

162 "c": "common", 

163 "s": "singular", 

164 "p": "plural", 

165 "d": "dual", 

166 "a": "animate", 

167 "i": "inanimate", 

168}