Coverage for src/wiktextract/extractor/fr/translation.py: 92%
74 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from mediawiki_langcodes import code_to_name
2from wikitextprocessor.parser import NodeKind, TemplateNode, WikiNode
4from ...page import clean_node
5from ...wxr_context import WiktextractContext
6from ..ruby import extract_ruby
7from .models import Translation, WordEntry
8from .tags import translate_raw_tags
11def extract_translation(
12 wxr: WiktextractContext,
13 page_data: list[WordEntry],
14 base_data: WordEntry,
15 level_node: WikiNode,
16) -> None:
17 base_translation_data = Translation()
18 for level_node_child in level_node.filter_empty_str_child():
19 if isinstance(level_node_child, WikiNode): 19 ↛ 18line 19 didn't jump to line 18 because the condition on line 19 was always true
20 if level_node_child.kind == NodeKind.TEMPLATE:
21 # get sense from "trad-début" template
22 process_translation_templates(
23 wxr,
24 level_node_child,
25 page_data,
26 base_translation_data,
27 None,
28 )
29 elif level_node_child.kind == NodeKind.LIST:
30 for list_item_node in level_node_child.find_child(
31 NodeKind.LIST_ITEM
32 ):
33 previous_node = None
34 translation_data = None
35 for child_node in list_item_node.filter_empty_str_child():
36 if isinstance(child_node, WikiNode):
37 if child_node.kind == NodeKind.TEMPLATE:
38 translation_data = (
39 process_translation_templates(
40 wxr,
41 child_node,
42 page_data,
43 base_translation_data,
44 translation_data,
45 )
46 )
47 elif child_node.kind == NodeKind.ITALIC: 47 ↛ 54line 47 didn't jump to line 54 because the condition on line 47 was always true
48 process_italic_node(
49 wxr,
50 child_node,
51 previous_node,
52 translation_data,
53 )
54 previous_node = child_node
57def process_italic_node(
58 wxr: WiktextractContext,
59 italic_node: WikiNode,
60 previous_node: WikiNode | None,
61 translation_data: Translation | None,
62) -> None:
63 # add italic text after a "trad" template as a tag
64 tag = clean_node(wxr, None, italic_node)
65 if ( 65 ↛ exitline 65 didn't return from function 'process_italic_node' because the condition on line 65 was always true
66 tag.startswith("(")
67 and tag.endswith(")")
68 and previous_node is not None
69 and previous_node.kind == NodeKind.TEMPLATE
70 and previous_node.template_name.startswith("trad")
71 and translation_data is not None
72 ):
73 tag = tag.strip("()")
74 if len(tag) > 0: 74 ↛ exitline 74 didn't return from function 'process_italic_node' because the condition on line 74 was always true
75 translation_data.raw_tags.append(tag)
76 translate_raw_tags(translation_data)
79def process_translation_templates(
80 wxr: WiktextractContext,
81 template_node: TemplateNode,
82 page_data: list[WordEntry],
83 base_translation_data: Translation,
84 translation_data: Translation | None,
85) -> Translation | None:
86 if template_node.template_name == "trad-fin":
87 # ignore translation end template
88 return
89 elif template_node.template_name == "trad-début":
90 # translation box start: https://fr.wiktionary.org/wiki/Modèle:trad-début
91 sense_parameter = template_node.template_parameters.get(1, "")
92 sense_text = clean_node(wxr, None, sense_parameter)
93 base_translation_data.sense = sense_text
94 sense_index_str = template_node.template_parameters.get(2, "0")
95 if isinstance(sense_index_str, str) and sense_index_str.isdecimal(): 95 ↛ 157line 95 didn't jump to line 157 because the condition on line 95 was always true
96 base_translation_data.sense_index = int(sense_index_str)
98 elif template_node.template_name == "T":
99 # Translation language: https://fr.wiktionary.org/wiki/Modèle:T
100 base_translation_data.lang_code = template_node.template_parameters.get(
101 1, ""
102 )
103 base_translation_data.lang = clean_node(
104 wxr, page_data[-1], template_node
105 )
106 elif template_node.template_name.startswith("trad"):
107 # Translation term: https://fr.wiktionary.org/wiki/Modèle:trad
108 if 2 not in template_node.template_parameters: # required parameter 108 ↛ 109line 108 didn't jump to line 109 because the condition on line 108 was never true
109 return
110 translation_data = base_translation_data.model_copy(deep=True)
111 term_nodes = template_node.template_parameters.get(
112 "dif", template_node.template_parameters.get(2)
113 )
114 if base_translation_data.lang_code == "ja":
115 expanded_term_nodes = wxr.wtp.parse(
116 wxr.wtp.node_to_wikitext(term_nodes), expand_all=True
117 )
118 ruby_data, node_without_ruby = extract_ruby(
119 wxr, expanded_term_nodes.children
120 )
121 translation_data.ruby = ruby_data
122 translation_data.word = clean_node(wxr, None, node_without_ruby)
123 else:
124 translation_data.word = clean_node(wxr, None, term_nodes)
125 translation_data.roman = clean_node(
126 wxr,
127 None,
128 (
129 template_node.template_parameters.get(
130 "tr", template_node.template_parameters.get("R", "")
131 )
132 ),
133 )
134 # traditional writing of Chinese and Korean word
135 translation_data.traditional_writing = clean_node(
136 wxr, None, template_node.template_parameters.get("tradi", "")
137 )
138 if 3 in template_node.template_parameters:
139 for tag_character in template_node.template_parameters[3]:
140 if tag_character in TRAD_TAGS: 140 ↛ 139line 140 didn't jump to line 139 because the condition on line 140 was always true
141 translation_data.tags.append(TRAD_TAGS[tag_character])
142 if translation_data.lang_code == "":
143 translation_data.lang_code = template_node.template_parameters.get(
144 1, ""
145 )
146 if translation_data.lang == "":
147 translation_data.lang = code_to_name(
148 translation_data.lang_code, "fr"
149 ).capitalize()
150 if len(translation_data.word) > 0: 150 ↛ 157line 150 didn't jump to line 157 because the condition on line 150 was always true
151 page_data[-1].translations.append(translation_data)
152 elif translation_data is not None:
153 tag = clean_node(wxr, None, template_node).strip("()")
154 if len(tag) > 0: 154 ↛ 157line 154 didn't jump to line 157 because the condition on line 154 was always true
155 translation_data.raw_tags.append(tag)
156 translate_raw_tags(translation_data)
157 return translation_data
160# https://fr.wiktionary.org/wiki/Modèle:trad
161TRAD_TAGS: dict[str, str] = {
162 "m": "masculine",
163 "f": "feminine",
164 "n": "neuter",
165 "c": "common",
166 "s": "singular",
167 "p": "plural",
168 "d": "dual",
169 "a": "animate",
170 "i": "inanimate",
171}