Coverage for src/wiktextract/extractor/it/translation.py: 91%
55 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1import re
3from mediawiki_langcodes import name_to_code
4from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
6from ...page import clean_node
7from ...wxr_context import WiktextractContext
8from .models import Translation, WordEntry
11def extract_translation_section(
12 wxr: WiktextractContext,
13 page_data: list[WordEntry],
14 level_node: LevelNode,
15) -> None:
16 # https://it.wiktionary.org/wiki/Aiuto:Traduzioni
17 sense = ""
18 translations = []
19 cats = {}
20 for node in level_node.children:
21 if isinstance(node, TemplateNode) and node.template_name == "Trad1":
22 sense = clean_node(wxr, cats, node.template_parameters.get(1, ""))
23 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
24 for list_item in node.find_child(NodeKind.LIST_ITEM):
25 translations.extend(
26 extract_translation_list_item(wxr, list_item, sense)
27 )
29 for data in page_data:
30 if data.lang_code == page_data[-1].lang_code: 30 ↛ 29line 30 didn't jump to line 29 because the condition on line 30 was always true
31 data.translations.extend(translations)
32 data.categories.extend(cats.get("categories", []))
35TR_GENDER_TAGS = {
36 "c": "common",
37 "f": "feminine",
38 "m": "masculine",
39 "n": "neuter",
40}
43def extract_translation_list_item(
44 wxr: WiktextractContext, list_item: WikiNode, sense: str
45) -> list[Translation]:
46 translations = []
47 lang_name = "unknown"
48 lang_code = "unknown"
49 before_colon = True
50 for index, node in enumerate(list_item.children):
51 if before_colon and isinstance(node, str) and ":" in node:
52 before_colon = False
53 lang_name = clean_node(
54 wxr,
55 None,
56 list_item.children[:index] + [node[: node.index(":")]],
57 )
58 for n in list_item.children[:index]:
59 if isinstance(n, TemplateNode):
60 lang_code = n.template_name
61 break
62 if lang_code == "unknown":
63 new_lang_code = name_to_code(lang_name, "it")
64 if new_lang_code != "": 64 ↛ 50line 64 didn't jump to line 50 because the condition on line 64 was always true
65 lang_code = new_lang_code
66 elif not before_colon and isinstance(node, WikiNode):
67 match node.kind:
68 case NodeKind.LINK:
69 word = clean_node(wxr, None, node)
70 if word != "": 70 ↛ 50line 70 didn't jump to line 50 because the condition on line 70 was always true
71 translations.append(
72 Translation(
73 word=word,
74 sense=sense,
75 lang=lang_name,
76 lang_code=lang_code,
77 )
78 )
79 case NodeKind.ITALIC: 79 ↛ 50line 79 didn't jump to line 50 because the pattern on line 79 always matched
80 raw_tag = clean_node(wxr, None, node)
81 if raw_tag in TR_GENDER_TAGS and len(translations) > 0: 81 ↛ 83line 81 didn't jump to line 83 because the condition on line 81 was always true
82 translations[-1].tags.append(TR_GENDER_TAGS[raw_tag])
83 elif raw_tag != "" and len(translations) > 0:
84 translations[-1].raw_tags.append(raw_tag)
85 elif not before_colon and isinstance(node, str):
86 m = re.search(r"\((.+)\)", node)
87 if m is not None and len(translations) > 0:
88 translations[-1].roman = m.group(1)
90 return translations