Coverage for src/wiktextract/extractor/ru/translation.py: 90%

63 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-10-25 10:11 +0000

1from mediawiki_langcodes import name_to_code 

2from wikitextprocessor import HTMLNode, NodeKind, TemplateNode, WikiNode 

3 

4from ...page import clean_node 

5from ...wxr_context import WiktextractContext 

6from .models import Translation, WordEntry 

7from .tags import translate_raw_tags 

8 

9 

10def extract_translations( 

11 wxr: WiktextractContext, 

12 word_entry: WordEntry, 

13 level3_node: WikiNode, 

14): 

15 for template_node in level3_node.find_child(NodeKind.TEMPLATE): 

16 if template_node.template_name == "перев-блок": 16 ↛ 15line 16 didn't jump to line 15 because the condition on line 16 was always true

17 process_translate_block_template(wxr, word_entry, template_node) 

18 

19 

20def process_translate_block_template( 

21 wxr: WiktextractContext, 

22 word_entry: WordEntry, 

23 template_node: TemplateNode, 

24) -> None: 

25 # https://ru.wiktionary.org/wiki/Шаблон:перев-блок 

26 expanded_template = wxr.wtp.parse( 

27 wxr.wtp.node_to_wikitext(template_node), expand_all=True 

28 ) 

29 sense = clean_node(wxr, None, template_node.template_parameters.get(1, "")) 

30 for list_item in expanded_template.find_child_recursively( 

31 NodeKind.LIST_ITEM 

32 ): 

33 translation = Translation(word="", lang="", sense=sense) 

34 for node in list_item.children: 

35 if isinstance(node, WikiNode): 

36 if node.kind == NodeKind.HTML: 

37 if node.tag == "sub": 

38 translation.lang_code = clean_node( 

39 wxr, None, node.children 

40 ) 

41 elif node.tag == "sup": 

42 # language index 

43 title = node.attrs.get("title", "") 

44 if len(title) > 0: 44 ↛ 34line 44 didn't jump to line 34 because the condition on line 44 was always true

45 translation.raw_tags.append(title) 

46 elif node.tag == "span": 46 ↛ 34line 46 didn't jump to line 34 because the condition on line 46 was always true

47 process_translate_list_span_tag( 

48 wxr, word_entry, translation, node 

49 ) 

50 elif node.kind == NodeKind.LINK: 50 ↛ 34line 50 didn't jump to line 34 because the condition on line 50 was always true

51 translation.lang = clean_node(wxr, None, node) 

52 elif isinstance(node, str) and len(node.strip(" ():\n")) > 0: 

53 translation.raw_tags.append(node.strip(" ():\n")) 

54 

55 if translation.word != "" and translation.lang != "": 55 ↛ 30line 55 didn't jump to line 30 because the condition on line 55 was always true

56 if translation.lang_code == "": 56 ↛ 57line 56 didn't jump to line 57 because the condition on line 56 was never true

57 translation.lang_code = name_to_code(translation.lang, "ru") 

58 translate_raw_tags(translation) 

59 word_entry.translations.append(translation) 

60 

61 

62def process_translate_list_span_tag( 

63 wxr: WiktextractContext, 

64 word_entry: WordEntry, 

65 translation: Translation, 

66 span_node: HTMLNode, 

67) -> None: 

68 for node in span_node.children: 

69 if isinstance(node, WikiNode): 

70 if node.kind == NodeKind.LINK: 

71 translation.word = clean_node(wxr, None, node) 

72 elif isinstance(node, HTMLNode) and node.tag in ["span", "i"]: 

73 # gender tag 

74 tag = clean_node(wxr, None, node) 

75 if len(tag) > 0: 75 ↛ 68line 75 didn't jump to line 68 because the condition on line 75 was always true

76 translation.raw_tags.append(tag) 

77 elif node.kind == NodeKind.ITALIC: 

78 translation.raw_tags.append(clean_node(wxr, None, node)) 

79 elif isinstance(node, str): 79 ↛ 68line 79 didn't jump to line 68 because the condition on line 79 was always true

80 # convert escaped characters like " " 

81 text = clean_node(wxr, None, node) 

82 if text.endswith((",", ";")): 

83 # this list item has multiple translation words 

84 striped_text = text.strip(",: ") 

85 if striped_text.startswith("(") and striped_text.endswith(")"): 

86 translation.roman = striped_text.strip("()") 

87 if translation.word != "" and translation.lang != "": 87 ↛ 96line 87 didn't jump to line 96 because the condition on line 87 was always true

88 if translation.lang_code == "": 88 ↛ 89line 88 didn't jump to line 89 because the condition on line 88 was never true

89 translation.lang_code = name_to_code( 

90 translation.lang, "ru" 

91 ) 

92 translate_raw_tags(translation) 

93 word_entry.translations.append( 

94 translation.model_copy(deep=True) 

95 ) 

96 translation.word = "" 

97 translation.roman = "" 

98 translation.tags = [] 

99 translation.raw_tags = [] 

100 elif text.startswith("(") and text.endswith(")"): 

101 translation.roman = text.strip("()")