Coverage for src/wiktextract/extractor/nl/translation.py: 92%

54 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2024-12-27 08:07 +0000

1import re 

2 

3from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

4 

5from ...page import clean_node 

6from ...wxr_context import WiktextractContext 

7from .models import Translation, WordEntry 

8from .tags import LIST_ITEM_TAG_TEMPLATES 

9 

10 

11def extract_translation_section( 

12 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode 

13) -> None: 

14 sense = "" 

15 sense_index = 0 

16 for node in level_node.find_child(NodeKind.TEMPLATE | NodeKind.LIST): 

17 if isinstance(node, TemplateNode) and node.template_name == "trans-top": 

18 first_arg = clean_node( 

19 wxr, None, node.template_parameters.get(1, "") 

20 ) 

21 m = re.match(r"(\d+)\.", first_arg) 

22 if m is not None: 22 ↛ 26line 22 didn't jump to line 26 because the condition on line 22 was always true

23 sense_index = int(m.group(1)) 

24 sense = first_arg[m.end() :].strip() 

25 else: 

26 sense = first_arg 

27 elif node.kind == NodeKind.LIST: 27 ↛ 16line 27 didn't jump to line 16 because the condition on line 27 was always true

28 for list_item in node.find_child(NodeKind.LIST_ITEM): 

29 extract_translation_list_item( 

30 wxr, word_entry, list_item, sense, sense_index 

31 ) 

32 

33 

34def extract_translation_list_item( 

35 wxr: WiktextractContext, 

36 word_entry: WordEntry, 

37 list_item: WikiNode, 

38 sense: str, 

39 sense_index: int, 

40) -> None: 

41 before_colon = True 

42 lang_name = "" 

43 brackets = 0 

44 roman_str = "" 

45 for index, node in enumerate(list_item.children): 

46 if before_colon and isinstance(node, str) and ":" in node: 

47 before_colon = False 

48 lang_name = ( 

49 clean_node(wxr, None, list_item.children[:index]) 

50 + node[: node.index(":")].strip() 

51 ) 

52 elif not before_colon: 

53 if brackets == 0 and isinstance(node, TemplateNode): 

54 if node.template_name == "trad": 

55 tr_word = clean_node( 

56 wxr, None, node.template_parameters.get(2, "") 

57 ) 

58 if tr_word != "": 58 ↛ 45line 58 didn't jump to line 45 because the condition on line 58 was always true

59 word_entry.translations.append( 

60 Translation( 

61 lang=lang_name, 

62 lang_code=node.template_parameters.get(1, ""), 

63 word=tr_word, 

64 sense=sense, 

65 sense_index=sense_index, 

66 ) 

67 ) 

68 elif ( 68 ↛ 45line 68 didn't jump to line 45 because the condition on line 68 was always true

69 node.template_name in LIST_ITEM_TAG_TEMPLATES 

70 and len(word_entry.translations) > 0 

71 ): 

72 word_entry.translations[-1].tags.append( 

73 LIST_ITEM_TAG_TEMPLATES[node.template_name] 

74 ) 

75 elif isinstance(node, str): 

76 for c in node: 

77 if c == "(": 

78 brackets += 1 

79 elif c == ")": 

80 brackets -= 1 

81 if brackets == 0: 81 ↛ 76line 81 didn't jump to line 76 because the condition on line 81 was always true

82 if len(word_entry.translations) > 0: 82 ↛ 84line 82 didn't jump to line 84 because the condition on line 82 was always true

83 word_entry.translations[-1].roman = roman_str 

84 roman_str = "" 

85 elif brackets > 0: 

86 roman_str += c 

87 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 

88 for next_list_item in node.find_child(NodeKind.LIST_ITEM): 

89 extract_translation_list_item( 

90 wxr, word_entry, next_list_item, sense, sense_index 

91 ) 

92 elif brackets > 0: 92 ↛ 45line 92 didn't jump to line 45 because the condition on line 92 was always true

93 roman_str += clean_node(wxr, None, node)