Coverage for src/wiktextract/extractor/it/translation.py: 91%

55 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2024-12-27 08:07 +0000

1import re 

2 

3from mediawiki_langcodes import name_to_code 

4from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

5 

6from ...page import clean_node 

7from ...wxr_context import WiktextractContext 

8from .models import Translation, WordEntry 

9 

10 

11def extract_translation_section( 

12 wxr: WiktextractContext, 

13 page_data: list[WordEntry], 

14 level_node: LevelNode, 

15) -> None: 

16 # https://it.wiktionary.org/wiki/Aiuto:Traduzioni 

17 sense = "" 

18 translations = [] 

19 cats = {} 

20 for node in level_node.children: 

21 if isinstance(node, TemplateNode) and node.template_name == "Trad1": 

22 sense = clean_node(wxr, cats, node.template_parameters.get(1, "")) 

23 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 

24 for list_item in node.find_child(NodeKind.LIST_ITEM): 

25 translations.extend( 

26 extract_translation_list_item(wxr, list_item, sense) 

27 ) 

28 

29 for data in page_data: 

30 if data.lang_code == page_data[-1].lang_code: 30 ↛ 29line 30 didn't jump to line 29 because the condition on line 30 was always true

31 data.translations.extend(translations) 

32 data.categories.extend(cats.get("categories", [])) 

33 

34 

35TR_GENDER_TAGS = { 

36 "c": "common", 

37 "f": "feminine", 

38 "m": "masculine", 

39 "n": "neuter", 

40} 

41 

42 

43def extract_translation_list_item( 

44 wxr: WiktextractContext, list_item: WikiNode, sense: str 

45) -> list[Translation]: 

46 translations = [] 

47 lang_name = "unknown" 

48 lang_code = "unknown" 

49 before_colon = True 

50 for index, node in enumerate(list_item.children): 

51 if before_colon and isinstance(node, str) and ":" in node: 

52 before_colon = False 

53 lang_name = clean_node( 

54 wxr, 

55 None, 

56 list_item.children[:index] + [node[: node.index(":")]], 

57 ) 

58 for n in list_item.children[:index]: 

59 if isinstance(n, TemplateNode): 

60 lang_code = n.template_name 

61 break 

62 if lang_code == "unknown": 

63 new_lang_code = name_to_code(lang_name, "it") 

64 if new_lang_code != "": 64 ↛ 50line 64 didn't jump to line 50 because the condition on line 64 was always true

65 lang_code = new_lang_code 

66 elif not before_colon and isinstance(node, WikiNode): 

67 match node.kind: 

68 case NodeKind.LINK: 

69 word = clean_node(wxr, None, node) 

70 if word != "": 70 ↛ 50line 70 didn't jump to line 50 because the condition on line 70 was always true

71 translations.append( 

72 Translation( 

73 word=word, 

74 sense=sense, 

75 lang=lang_name, 

76 lang_code=lang_code, 

77 ) 

78 ) 

79 case NodeKind.ITALIC: 79 ↛ 50line 79 didn't jump to line 50 because the pattern on line 79 always matched

80 raw_tag = clean_node(wxr, None, node) 

81 if raw_tag in TR_GENDER_TAGS and len(translations) > 0: 81 ↛ 83line 81 didn't jump to line 83 because the condition on line 81 was always true

82 translations[-1].tags.append(TR_GENDER_TAGS[raw_tag]) 

83 elif raw_tag != "" and len(translations) > 0: 

84 translations[-1].raw_tags.append(raw_tag) 

85 elif not before_colon and isinstance(node, str): 

86 m = re.search(r"\((.+)\)", node) 

87 if m is not None and len(translations) > 0: 

88 translations[-1].roman = m.group(1) 

89 

90 return translations