Coverage for src/wiktextract/extractor/it/translation.py: 91%

1import re

3from mediawiki_langcodes import name_to_code

4from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode

6from ...page import clean_node

7from ...wxr_context import WiktextractContext

8from .models import Translation, WordEntry

11def extract_translation_section(

12 wxr: WiktextractContext,

13 page_data: list[WordEntry],

14 level_node: LevelNode,

15) -> None:

16 # https://it.wiktionary.org/wiki/Aiuto:Traduzioni

17 sense = ""

18 translations = []

19 cats = {}

20 for node in level_node.children:

21 if isinstance(node, TemplateNode) and node.template_name == "Trad1":

22 sense = clean_node(wxr, cats, node.template_parameters.get(1, ""))

23 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:

24 for list_item in node.find_child(NodeKind.LIST_ITEM):

25 translations.extend(

26 extract_translation_list_item(wxr, list_item, sense)

27 )

29 for data in page_data:

30 if data.lang_code == page_data[-1].lang_code: 30 ↛ 29line 30 didn't jump to line 29 because the condition on line 30 was always true

31 data.translations.extend(translations)

32 data.categories.extend(cats.get("categories", []))

35TR_GENDER_TAGS = {

36 "c": "common",

37 "f": "feminine",

38 "m": "masculine",

39 "n": "neuter",

40}

43def extract_translation_list_item(

44 wxr: WiktextractContext, list_item: WikiNode, sense: str

45) -> list[Translation]:

46 translations = []

47 lang_name = "unknown"

48 lang_code = "unknown"

49 before_colon = True

50 for index, node in enumerate(list_item.children):

51 if before_colon and isinstance(node, str) and ":" in node:

52 before_colon = False

53 lang_name = clean_node(

54 wxr,

55 None,

56 list_item.children[:index] + [node[: node.index(":")]],

57 )

58 for n in list_item.children[:index]:

59 if isinstance(n, TemplateNode):

60 lang_code = n.template_name

61 break

62 if lang_code == "unknown":

63 new_lang_code = name_to_code(lang_name, "it")

64 if new_lang_code != "": 64 ↛ 50line 64 didn't jump to line 50 because the condition on line 64 was always true

65 lang_code = new_lang_code

66 elif not before_colon and isinstance(node, WikiNode):

67 match node.kind:

68 case NodeKind.LINK:

69 word = clean_node(wxr, None, node)

70 if word != "": 70 ↛ 50line 70 didn't jump to line 50 because the condition on line 70 was always true

71 translations.append(

72 Translation(

73 word=word,

74 sense=sense,

75 lang=lang_name,

76 lang_code=lang_code,

77 )

78 )

79 case NodeKind.ITALIC: 79 ↛ 50line 79 didn't jump to line 50 because the pattern on line 79 always matched

80 raw_tag = clean_node(wxr, None, node)

81 if raw_tag in TR_GENDER_TAGS and len(translations) > 0: 81 ↛ 83line 81 didn't jump to line 83 because the condition on line 81 was always true

82 translations[-1].tags.append(TR_GENDER_TAGS[raw_tag])

83 elif raw_tag != "" and len(translations) > 0:

84 translations[-1].raw_tags.append(raw_tag)

85 elif not before_colon and isinstance(node, str):

86 m = re.search(r"\((.+)\)", node)

87 if m is not None and len(translations) > 0:

88 translations[-1].roman = m.group(1)

90 return translations