Coverage for src/wiktextract/extractor/fr/translation.py: 92%

1from mediawiki_langcodes import code_to_name

2from wikitextprocessor.parser import NodeKind, TemplateNode, WikiNode

4from ...page import clean_node

5from ...wxr_context import WiktextractContext

6from ..ruby import extract_ruby

7from .models import Translation, WordEntry

8from .tags import translate_raw_tags

11def extract_translation_section(

12 wxr: WiktextractContext, page_data: list[WordEntry], level_node: WikiNode

13) -> None:

14 base_translation_data = Translation()

15 for level_node_child in level_node.filter_empty_str_child():

16 if isinstance(level_node_child, WikiNode): 16 ↛ 15line 16 didn't jump to line 15 because the condition on line 16 was always true

17 if level_node_child.kind == NodeKind.TEMPLATE:

18 # get sense from "trad-début" template

19 process_translation_templates(

20 wxr,

21 level_node_child,

22 page_data,

23 base_translation_data,

24 None,

25 )

26 elif level_node_child.kind == NodeKind.LIST:

27 for list_item_node in level_node_child.find_child(

28 NodeKind.LIST_ITEM

29 ):

30 previous_node = None

31 translation_data = None

32 for child_node in list_item_node.filter_empty_str_child():

33 if isinstance(child_node, WikiNode):

34 if child_node.kind == NodeKind.TEMPLATE:

35 translation_data = (

36 process_translation_templates(

37 wxr,

38 child_node,

39 page_data,

40 base_translation_data,

41 translation_data,

42 )

43 )

44 elif child_node.kind == NodeKind.ITALIC: 44 ↛ 51line 44 didn't jump to line 51 because the condition on line 44 was always true

45 process_italic_node(

46 wxr,

47 child_node,

48 previous_node,

49 translation_data,

50 )

51 previous_node = child_node

54def process_italic_node(

55 wxr: WiktextractContext,

56 italic_node: WikiNode,

57 previous_node: WikiNode | None,

58 translation_data: Translation | None,

59) -> None:

60 # add italic text after a "trad" template as a tag

61 tag = clean_node(wxr, None, italic_node)

62 if ( 62 ↛ exitline 62 didn't return from function 'process_italic_node' because the condition on line 62 was always true

63 tag.startswith("(")

64 and tag.endswith(")")

65 and previous_node is not None

66 and previous_node.kind == NodeKind.TEMPLATE

67 and previous_node.template_name.startswith("trad")

68 and translation_data is not None

69 ):

70 tag = tag.strip("()")

71 if len(tag) > 0: 71 ↛ exitline 71 didn't return from function 'process_italic_node' because the condition on line 71 was always true

72 translation_data.raw_tags.append(tag)

73 translate_raw_tags(translation_data)

76def process_translation_templates(

77 wxr: WiktextractContext,

78 template_node: TemplateNode,

79 page_data: list[WordEntry],

80 base_translation_data: Translation,

81 translation_data: Translation | None,

82) -> Translation | None:

83 if template_node.template_name == "trad-fin":

84 # ignore translation end template

85 return

86 elif template_node.template_name == "trad-début":

87 # translation box start: https://fr.wiktionary.org/wiki/Modèle:trad-début

88 sense_parameter = template_node.template_parameters.get(1, "")

89 sense_text = clean_node(wxr, None, sense_parameter)

90 base_translation_data.sense = sense_text

91 sense_index_str = template_node.template_parameters.get(2, "0")

92 if isinstance(sense_index_str, str) and sense_index_str.isdecimal(): 92 ↛ 154line 92 didn't jump to line 154 because the condition on line 92 was always true

93 base_translation_data.sense_index = int(sense_index_str)

95 elif template_node.template_name == "T":

96 # Translation language: https://fr.wiktionary.org/wiki/Modèle:T

97 base_translation_data.lang_code = template_node.template_parameters.get(

98 1, ""

99 )

100 base_translation_data.lang = clean_node(

101 wxr, page_data[-1], template_node

102 )

103 elif template_node.template_name.startswith("trad"):

104 # Translation term: https://fr.wiktionary.org/wiki/Modèle:trad

105 if 2 not in template_node.template_parameters: # required parameter 105 ↛ 106line 105 didn't jump to line 106 because the condition on line 105 was never true

106 return

107 translation_data = base_translation_data.model_copy(deep=True)

108 term_nodes = template_node.template_parameters.get(

109 "dif", template_node.template_parameters.get(2)

110 )

111 if base_translation_data.lang_code == "ja":

112 expanded_term_nodes = wxr.wtp.parse(

113 wxr.wtp.node_to_wikitext(term_nodes), expand_all=True

114 )

115 ruby_data, node_without_ruby = extract_ruby(

116 wxr, expanded_term_nodes.children

117 )

118 translation_data.ruby = ruby_data

119 translation_data.word = clean_node(wxr, None, node_without_ruby)

120 else:

121 translation_data.word = clean_node(wxr, None, term_nodes)

122 translation_data.roman = clean_node(

123 wxr,

124 None,

125 (

126 template_node.template_parameters.get(

127 "tr", template_node.template_parameters.get("R", "")

128 )

129 ),

130 )

131 # traditional writing of Chinese and Korean word

132 translation_data.traditional_writing = clean_node(

133 wxr, None, template_node.template_parameters.get("tradi", "")

134 )

135 if 3 in template_node.template_parameters:

136 for tag_character in template_node.template_parameters[3]:

137 if tag_character in TRAD_TAGS: 137 ↛ 136line 137 didn't jump to line 136 because the condition on line 137 was always true

138 translation_data.tags.append(TRAD_TAGS[tag_character])

139 if translation_data.lang_code == "":

140 translation_data.lang_code = template_node.template_parameters.get(

141 1, ""

142 )

143 if translation_data.lang == "":

144 translation_data.lang = code_to_name(

145 translation_data.lang_code, "fr"

146 ).capitalize()

147 if len(translation_data.word) > 0: 147 ↛ 154line 147 didn't jump to line 154 because the condition on line 147 was always true

148 page_data[-1].translations.append(translation_data)

149 elif translation_data is not None:

150 tag = clean_node(wxr, None, template_node).strip("()")

151 if len(tag) > 0: 151 ↛ 154line 151 didn't jump to line 154 because the condition on line 151 was always true

152 translation_data.raw_tags.append(tag)

153 translate_raw_tags(translation_data)

154 return translation_data

155

156

157# https://fr.wiktionary.org/wiki/Modèle:trad

158TRAD_TAGS: dict[str, str] = {

159 "m": "masculine",

160 "f": "feminine",

161 "n": "neuter",

162 "c": "common",

163 "s": "singular",

164 "p": "plural",

165 "d": "dual",

166 "a": "animate",

167 "i": "inanimate",

168}