Coverage for src/wiktextract/extractor/fr/translation.py: 92%
75 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-10-25 10:11 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-10-25 10:11 +0000
1from typing import Optional
3from mediawiki_langcodes import code_to_name
4from wikitextprocessor.parser import NodeKind, TemplateNode, WikiNode
6from ...page import clean_node
7from ...wxr_context import WiktextractContext
8from ..ruby import extract_ruby
9from .models import Translation, WordEntry
10from .tags import translate_raw_tags
13def extract_translation(
14 wxr: WiktextractContext,
15 page_data: list[WordEntry],
16 base_data: WordEntry,
17 level_node: WikiNode,
18) -> None:
19 base_translation_data = Translation()
20 for level_node_child in level_node.filter_empty_str_child():
21 if isinstance(level_node_child, WikiNode): 21 ↛ 20line 21 didn't jump to line 20 because the condition on line 21 was always true
22 if level_node_child.kind == NodeKind.TEMPLATE:
23 # get sense from "trad-début" template
24 process_translation_templates(
25 wxr,
26 level_node_child,
27 page_data,
28 base_translation_data,
29 None,
30 )
31 elif level_node_child.kind == NodeKind.LIST:
32 for list_item_node in level_node_child.find_child(
33 NodeKind.LIST_ITEM
34 ):
35 previous_node = None
36 translation_data = None
37 for child_node in list_item_node.filter_empty_str_child():
38 if isinstance(child_node, WikiNode):
39 if child_node.kind == NodeKind.TEMPLATE:
40 translation_data = (
41 process_translation_templates(
42 wxr,
43 child_node,
44 page_data,
45 base_translation_data,
46 translation_data,
47 )
48 )
49 elif child_node.kind == NodeKind.ITALIC: 49 ↛ 56line 49 didn't jump to line 56 because the condition on line 49 was always true
50 process_italic_node(
51 wxr,
52 child_node,
53 previous_node,
54 translation_data,
55 )
56 previous_node = child_node
59def process_italic_node(
60 wxr: WiktextractContext,
61 italic_node: WikiNode,
62 previous_node: Optional[WikiNode],
63 translation_data: Optional[Translation],
64) -> None:
65 # add italic text after a "trad" template as a tag
66 tag = clean_node(wxr, None, italic_node)
67 if ( 67 ↛ exitline 67 didn't jump to the function exit
68 tag.startswith("(")
69 and tag.endswith(")")
70 and previous_node is not None
71 and previous_node.kind == NodeKind.TEMPLATE
72 and previous_node.template_name.startswith("trad")
73 and translation_data is not None
74 ):
75 tag = tag.strip("()")
76 if len(tag) > 0: 76 ↛ exitline 76 didn't return from function 'process_italic_node' because the condition on line 76 was always true
77 translation_data.raw_tags.append(tag)
78 translate_raw_tags(translation_data)
81def process_translation_templates(
82 wxr: WiktextractContext,
83 template_node: TemplateNode,
84 page_data: list[WordEntry],
85 base_translation_data: Translation,
86 translation_data: Optional[Translation],
87) -> Optional[Translation]:
88 if template_node.template_name == "trad-fin":
89 # ignore translation end template
90 return
91 elif template_node.template_name == "trad-début":
92 # translation box start: https://fr.wiktionary.org/wiki/Modèle:trad-début
93 sense_parameter = template_node.template_parameters.get(1, "")
94 sense_text = clean_node(wxr, None, sense_parameter)
95 base_translation_data.sense = sense_text
96 sense_index_str = template_node.template_parameters.get(2, "0")
97 if isinstance(sense_index_str, str) and sense_index_str.isdecimal(): 97 ↛ 159line 97 didn't jump to line 159 because the condition on line 97 was always true
98 base_translation_data.sense_index = int(sense_index_str)
100 elif template_node.template_name == "T":
101 # Translation language: https://fr.wiktionary.org/wiki/Modèle:T
102 base_translation_data.lang_code = template_node.template_parameters.get(
103 1, ""
104 )
105 base_translation_data.lang = clean_node(
106 wxr, page_data[-1], template_node
107 )
108 elif template_node.template_name.startswith("trad"):
109 # Translation term: https://fr.wiktionary.org/wiki/Modèle:trad
110 if 2 not in template_node.template_parameters: # required parameter 110 ↛ 111line 110 didn't jump to line 111 because the condition on line 110 was never true
111 return
112 translation_data = base_translation_data.model_copy(deep=True)
113 term_nodes = template_node.template_parameters.get(
114 "dif", template_node.template_parameters.get(2)
115 )
116 if base_translation_data.lang_code == "ja":
117 expanded_term_nodes = wxr.wtp.parse(
118 wxr.wtp.node_to_wikitext(term_nodes), expand_all=True
119 )
120 ruby_data, node_without_ruby = extract_ruby(
121 wxr, expanded_term_nodes.children
122 )
123 translation_data.ruby = ruby_data
124 translation_data.word = clean_node(wxr, None, node_without_ruby)
125 else:
126 translation_data.word = clean_node(wxr, None, term_nodes)
127 translation_data.roman = clean_node(
128 wxr,
129 None,
130 (
131 template_node.template_parameters.get(
132 "tr", template_node.template_parameters.get("R", "")
133 )
134 ),
135 )
136 # traditional writing of Chinese and Korean word
137 translation_data.traditional_writing = clean_node(
138 wxr, None, template_node.template_parameters.get("tradi", "")
139 )
140 if 3 in template_node.template_parameters:
141 for tag_character in template_node.template_parameters[3]:
142 if tag_character in TRAD_TAGS: 142 ↛ 141line 142 didn't jump to line 141 because the condition on line 142 was always true
143 translation_data.tags.append(TRAD_TAGS[tag_character])
144 if translation_data.lang_code == "":
145 translation_data.lang_code = template_node.template_parameters.get(
146 1, ""
147 )
148 if translation_data.lang == "":
149 translation_data.lang = code_to_name(
150 translation_data.lang_code, "fr"
151 ).capitalize()
152 if len(translation_data.word) > 0: 152 ↛ 159line 152 didn't jump to line 159 because the condition on line 152 was always true
153 page_data[-1].translations.append(translation_data)
154 elif translation_data is not None:
155 tag = clean_node(wxr, None, template_node).strip("()")
156 if len(tag) > 0: 156 ↛ 159line 156 didn't jump to line 159 because the condition on line 156 was always true
157 translation_data.raw_tags.append(tag)
158 translate_raw_tags(translation_data)
159 return translation_data
162# https://fr.wiktionary.org/wiki/Modèle:trad
163TRAD_TAGS: dict[str, str] = {
164 "m": "masculine",
165 "f": "feminine",
166 "n": "neuter",
167 "c": "common",
168 "s": "singular",
169 "p": "plural",
170 "d": "dual",
171 "a": "animate",
172 "i": "inanimate",
173}