Coverage for src/wiktextract/extractor/ru/translation.py: 90%

1from mediawiki_langcodes import name_to_code

2from wikitextprocessor import HTMLNode, NodeKind, TemplateNode, WikiNode

4from ...page import clean_node

5from ...wxr_context import WiktextractContext

6from .models import Translation, WordEntry

7from .tags import translate_raw_tags

10def extract_translations(

11 wxr: WiktextractContext,

12 word_entry: WordEntry,

13 level3_node: WikiNode,

14):

15 for template_node in level3_node.find_child(NodeKind.TEMPLATE):

16 if template_node.template_name == "перев-блок": 16 ↛ 15line 16 didn't jump to line 15 because the condition on line 16 was always true

17 process_translate_block_template(wxr, word_entry, template_node)

20def process_translate_block_template(

21 wxr: WiktextractContext,

22 word_entry: WordEntry,

23 template_node: TemplateNode,

24) -> None:

25 # https://ru.wiktionary.org/wiki/Шаблон:перев-блок

26 expanded_template = wxr.wtp.parse(

27 wxr.wtp.node_to_wikitext(template_node), expand_all=True

28 )

29 sense = clean_node(wxr, None, template_node.template_parameters.get(1, ""))

30 for list_item in expanded_template.find_child_recursively(

31 NodeKind.LIST_ITEM

32 ):

33 translation = Translation(word="", lang="", sense=sense)

34 for node in list_item.children:

35 if isinstance(node, WikiNode):

36 if node.kind == NodeKind.HTML:

37 if node.tag == "sub":

38 translation.lang_code = clean_node(

39 wxr, None, node.children

40 )

41 elif node.tag == "sup":

42 # language index

43 title = node.attrs.get("title", "")

44 if len(title) > 0: 44 ↛ 34line 44 didn't jump to line 34 because the condition on line 44 was always true

45 translation.raw_tags.append(title)

46 elif node.tag == "span": 46 ↛ 34line 46 didn't jump to line 34 because the condition on line 46 was always true

47 process_translate_list_span_tag(

48 wxr, word_entry, translation, node

49 )

50 elif node.kind == NodeKind.LINK: 50 ↛ 34line 50 didn't jump to line 34 because the condition on line 50 was always true

51 translation.lang = clean_node(wxr, None, node)

52 elif isinstance(node, str) and len(node.strip(" ():\n")) > 0:

53 translation.raw_tags.append(node.strip(" ():\n"))

55 if translation.word != "" and translation.lang != "": 55 ↛ 30line 55 didn't jump to line 30 because the condition on line 55 was always true

56 if translation.lang_code == "": 56 ↛ 57line 56 didn't jump to line 57 because the condition on line 56 was never true

57 translation.lang_code = name_to_code(translation.lang, "ru")

58 translate_raw_tags(translation)

59 word_entry.translations.append(translation)

62def process_translate_list_span_tag(

63 wxr: WiktextractContext,

64 word_entry: WordEntry,

65 translation: Translation,

66 span_node: HTMLNode,

67) -> None:

68 for node in span_node.children:

69 if isinstance(node, WikiNode):

70 if node.kind == NodeKind.LINK:

71 translation.word = clean_node(wxr, None, node)

72 elif isinstance(node, HTMLNode) and node.tag in ["span", "i"]:

73 # gender tag

74 tag = clean_node(wxr, None, node)

75 if len(tag) > 0: 75 ↛ 68line 75 didn't jump to line 68 because the condition on line 75 was always true

76 translation.raw_tags.append(tag)

77 elif node.kind == NodeKind.ITALIC:

78 translation.raw_tags.append(clean_node(wxr, None, node))

79 elif isinstance(node, str): 79 ↛ 68line 79 didn't jump to line 68 because the condition on line 79 was always true

80 # convert escaped characters like " "

81 text = clean_node(wxr, None, node)

82 if text.endswith((",", ";")):

83 # this list item has multiple translation words

84 striped_text = text.strip(",: ")

85 if striped_text.startswith("(") and striped_text.endswith(")"):

86 translation.roman = striped_text.strip("()")

87 if translation.word != "" and translation.lang != "": 87 ↛ 96line 87 didn't jump to line 96 because the condition on line 87 was always true

88 if translation.lang_code == "": 88 ↛ 89line 88 didn't jump to line 89 because the condition on line 88 was never true

89 translation.lang_code = name_to_code(

90 translation.lang, "ru"

91 )

92 translate_raw_tags(translation)

93 word_entry.translations.append(

94 translation.model_copy(deep=True)

95 )

96 translation.word = ""

97 translation.roman = ""

98 translation.tags = []

99 translation.raw_tags = []

100 elif text.startswith("(") and text.endswith(")"):

101 translation.roman = text.strip("()")