Coverage for src/wiktextract/extractor/nl/translation.py: 92%

1import re

3from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode

5from ...page import clean_node

6from ...wxr_context import WiktextractContext

7from .models import Translation, WordEntry

8from .tags import LIST_ITEM_TAG_TEMPLATES

11def extract_translation_section(

12 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode

13) -> None:

14 sense = ""

15 sense_index = 0

16 for node in level_node.find_child(NodeKind.TEMPLATE | NodeKind.LIST):

17 if isinstance(node, TemplateNode) and node.template_name == "trans-top":

18 first_arg = clean_node(

19 wxr, None, node.template_parameters.get(1, "")

20 )

21 m = re.match(r"(\d+)\.", first_arg)

22 if m is not None: 22 ↛ 26line 22 didn't jump to line 26 because the condition on line 22 was always true

23 sense_index = int(m.group(1))

24 sense = first_arg[m.end() :].strip()

25 else:

26 sense = first_arg

27 elif node.kind == NodeKind.LIST: 27 ↛ 16line 27 didn't jump to line 16 because the condition on line 27 was always true

28 for list_item in node.find_child(NodeKind.LIST_ITEM):

29 extract_translation_list_item(

30 wxr, word_entry, list_item, sense, sense_index

31 )

34def extract_translation_list_item(

35 wxr: WiktextractContext,

36 word_entry: WordEntry,

37 list_item: WikiNode,

38 sense: str,

39 sense_index: int,

40) -> None:

41 before_colon = True

42 lang_name = ""

43 brackets = 0

44 roman_str = ""

45 for index, node in enumerate(list_item.children):

46 if before_colon and isinstance(node, str) and ":" in node:

47 before_colon = False

48 lang_name = (

49 clean_node(wxr, None, list_item.children[:index])

50 + node[: node.index(":")].strip()

51 )

52 elif not before_colon:

53 if brackets == 0 and isinstance(node, TemplateNode):

54 if node.template_name == "trad":

55 tr_word = clean_node(

56 wxr, None, node.template_parameters.get(2, "")

57 )

58 if tr_word != "": 58 ↛ 45line 58 didn't jump to line 45 because the condition on line 58 was always true

59 word_entry.translations.append(

60 Translation(

61 lang=lang_name,

62 lang_code=node.template_parameters.get(1, ""),

63 word=tr_word,

64 sense=sense,

65 sense_index=sense_index,

66 )

67 )

68 elif ( 68 ↛ 45line 68 didn't jump to line 45 because the condition on line 68 was always true

69 node.template_name in LIST_ITEM_TAG_TEMPLATES

70 and len(word_entry.translations) > 0

71 ):

72 word_entry.translations[-1].tags.append(

73 LIST_ITEM_TAG_TEMPLATES[node.template_name]

74 )

75 elif isinstance(node, str):

76 for c in node:

77 if c == "(":

78 brackets += 1

79 elif c == ")":

80 brackets -= 1

81 if brackets == 0: 81 ↛ 76line 81 didn't jump to line 76 because the condition on line 81 was always true

82 if len(word_entry.translations) > 0: 82 ↛ 84line 82 didn't jump to line 84 because the condition on line 82 was always true

83 word_entry.translations[-1].roman = roman_str

84 roman_str = ""

85 elif brackets > 0:

86 roman_str += c

87 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:

88 for next_list_item in node.find_child(NodeKind.LIST_ITEM):

89 extract_translation_list_item(

90 wxr, word_entry, next_list_item, sense, sense_index

91 )

92 elif brackets > 0: 92 ↛ 45line 92 didn't jump to line 45 because the condition on line 92 was always true

93 roman_str += clean_node(wxr, None, node)