Coverage for src/wiktextract/extractor/fr/translation.py: 92%
74 statements
« prev ^ index » next coverage.py v7.9.0, created at 2025-06-13 07:43 +0000
« prev ^ index » next coverage.py v7.9.0, created at 2025-06-13 07:43 +0000
1from mediawiki_langcodes import code_to_name
2from wikitextprocessor.parser import NodeKind, TemplateNode, WikiNode
4from ...page import clean_node
5from ...wxr_context import WiktextractContext
6from ..ruby import extract_ruby
7from .models import Translation, WordEntry
8from .tags import translate_raw_tags
11def extract_translation_section(
12 wxr: WiktextractContext, page_data: list[WordEntry], level_node: WikiNode
13) -> None:
14 base_translation_data = Translation()
15 for level_node_child in level_node.filter_empty_str_child():
16 if isinstance(level_node_child, WikiNode): 16 ↛ 15line 16 didn't jump to line 15 because the condition on line 16 was always true
17 if level_node_child.kind == NodeKind.TEMPLATE:
18 # get sense from "trad-début" template
19 process_translation_templates(
20 wxr,
21 level_node_child,
22 page_data,
23 base_translation_data,
24 None,
25 )
26 elif level_node_child.kind == NodeKind.LIST:
27 for list_item_node in level_node_child.find_child(
28 NodeKind.LIST_ITEM
29 ):
30 previous_node = None
31 translation_data = None
32 for child_node in list_item_node.filter_empty_str_child():
33 if isinstance(child_node, WikiNode):
34 if child_node.kind == NodeKind.TEMPLATE:
35 translation_data = (
36 process_translation_templates(
37 wxr,
38 child_node,
39 page_data,
40 base_translation_data,
41 translation_data,
42 )
43 )
44 elif child_node.kind == NodeKind.ITALIC: 44 ↛ 51line 44 didn't jump to line 51 because the condition on line 44 was always true
45 process_italic_node(
46 wxr,
47 child_node,
48 previous_node,
49 translation_data,
50 )
51 previous_node = child_node
54def process_italic_node(
55 wxr: WiktextractContext,
56 italic_node: WikiNode,
57 previous_node: WikiNode | None,
58 translation_data: Translation | None,
59) -> None:
60 # add italic text after a "trad" template as a tag
61 tag = clean_node(wxr, None, italic_node)
62 if ( 62 ↛ exitline 62 didn't return from function 'process_italic_node' because the condition on line 62 was always true
63 tag.startswith("(")
64 and tag.endswith(")")
65 and previous_node is not None
66 and previous_node.kind == NodeKind.TEMPLATE
67 and previous_node.template_name.startswith("trad")
68 and translation_data is not None
69 ):
70 tag = tag.strip("()")
71 if len(tag) > 0: 71 ↛ exitline 71 didn't return from function 'process_italic_node' because the condition on line 71 was always true
72 translation_data.raw_tags.append(tag)
73 translate_raw_tags(translation_data)
76def process_translation_templates(
77 wxr: WiktextractContext,
78 template_node: TemplateNode,
79 page_data: list[WordEntry],
80 base_translation_data: Translation,
81 translation_data: Translation | None,
82) -> Translation | None:
83 if template_node.template_name == "trad-fin":
84 # ignore translation end template
85 return
86 elif template_node.template_name == "trad-début":
87 # translation box start: https://fr.wiktionary.org/wiki/Modèle:trad-début
88 sense_parameter = template_node.template_parameters.get(1, "")
89 sense_text = clean_node(wxr, None, sense_parameter)
90 base_translation_data.sense = sense_text
91 sense_index_str = template_node.template_parameters.get(2, "0")
92 if isinstance(sense_index_str, str) and sense_index_str.isdecimal(): 92 ↛ 154line 92 didn't jump to line 154 because the condition on line 92 was always true
93 base_translation_data.sense_index = int(sense_index_str)
95 elif template_node.template_name == "T":
96 # Translation language: https://fr.wiktionary.org/wiki/Modèle:T
97 base_translation_data.lang_code = template_node.template_parameters.get(
98 1, ""
99 )
100 base_translation_data.lang = clean_node(
101 wxr, page_data[-1], template_node
102 )
103 elif template_node.template_name.startswith("trad"):
104 # Translation term: https://fr.wiktionary.org/wiki/Modèle:trad
105 if 2 not in template_node.template_parameters: # required parameter 105 ↛ 106line 105 didn't jump to line 106 because the condition on line 105 was never true
106 return
107 translation_data = base_translation_data.model_copy(deep=True)
108 term_nodes = template_node.template_parameters.get(
109 "dif", template_node.template_parameters.get(2)
110 )
111 if base_translation_data.lang_code == "ja":
112 expanded_term_nodes = wxr.wtp.parse(
113 wxr.wtp.node_to_wikitext(term_nodes), expand_all=True
114 )
115 ruby_data, node_without_ruby = extract_ruby(
116 wxr, expanded_term_nodes.children
117 )
118 translation_data.ruby = ruby_data
119 translation_data.word = clean_node(wxr, None, node_without_ruby)
120 else:
121 translation_data.word = clean_node(wxr, None, term_nodes)
122 translation_data.roman = clean_node(
123 wxr,
124 None,
125 (
126 template_node.template_parameters.get(
127 "tr", template_node.template_parameters.get("R", "")
128 )
129 ),
130 )
131 # traditional writing of Chinese and Korean word
132 translation_data.traditional_writing = clean_node(
133 wxr, None, template_node.template_parameters.get("tradi", "")
134 )
135 if 3 in template_node.template_parameters:
136 for tag_character in template_node.template_parameters[3]:
137 if tag_character in TRAD_TAGS: 137 ↛ 136line 137 didn't jump to line 136 because the condition on line 137 was always true
138 translation_data.tags.append(TRAD_TAGS[tag_character])
139 if translation_data.lang_code == "":
140 translation_data.lang_code = template_node.template_parameters.get(
141 1, ""
142 )
143 if translation_data.lang == "":
144 translation_data.lang = code_to_name(
145 translation_data.lang_code, "fr"
146 ).capitalize()
147 if len(translation_data.word) > 0: 147 ↛ 154line 147 didn't jump to line 154 because the condition on line 147 was always true
148 page_data[-1].translations.append(translation_data)
149 elif translation_data is not None:
150 tag = clean_node(wxr, None, template_node).strip("()")
151 if len(tag) > 0: 151 ↛ 154line 151 didn't jump to line 154 because the condition on line 151 was always true
152 translation_data.raw_tags.append(tag)
153 translate_raw_tags(translation_data)
154 return translation_data
157# https://fr.wiktionary.org/wiki/Modèle:trad
158TRAD_TAGS: dict[str, str] = {
159 "m": "masculine",
160 "f": "feminine",
161 "n": "neuter",
162 "c": "common",
163 "s": "singular",
164 "p": "plural",
165 "d": "dual",
166 "a": "animate",
167 "i": "inanimate",
168}