Coverage for src/wiktextract/extractor/nl/translation.py: 92%

49 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-10-25 10:11 +0000

1import re 

2 

3from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

4 

5from ...page import clean_node 

6from ...wxr_context import WiktextractContext 

7from .models import Translation, WordEntry 

8 

9 

10def extract_translation_section( 

11 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode 

12) -> None: 

13 sense = "" 

14 sense_index = 0 

15 for node in level_node.find_child(NodeKind.TEMPLATE | NodeKind.LIST): 

16 if isinstance(node, TemplateNode) and node.template_name == "trans-top": 

17 first_arg = clean_node( 

18 wxr, None, node.template_parameters.get(1, "") 

19 ) 

20 m = re.match(r"(\d+)\.", first_arg) 

21 if m is not None: 21 ↛ 25line 21 didn't jump to line 25 because the condition on line 21 was always true

22 sense_index = int(m.group(1)) 

23 sense = first_arg[m.end() :].strip() 

24 else: 

25 sense = first_arg 

26 elif node.kind == NodeKind.LIST: 26 ↛ 15line 26 didn't jump to line 15 because the condition on line 26 was always true

27 for list_item in node.find_child(NodeKind.LIST_ITEM): 

28 extract_translation_list_item( 

29 wxr, word_entry, list_item, sense, sense_index 

30 ) 

31 

32 

33TR_TEMPLATES = { 

34 "m": "masculine", 

35 "f": "feminine", 

36 "n": "neuter", 

37 "c": "common", 

38 "s": "singular", 

39 "p": "plural", 

40 "a": "animate", 

41 "i": "inanimate", 

42} 

43 

44 

45def extract_translation_list_item( 

46 wxr: WiktextractContext, 

47 word_entry: WordEntry, 

48 list_item: WikiNode, 

49 sense: str, 

50 sense_index: int, 

51) -> None: 

52 before_colon = True 

53 lang_name = "" 

54 brackets = 0 

55 roman_str = "" 

56 for index, node in enumerate(list_item.children): 

57 if before_colon and isinstance(node, str) and ":" in node: 

58 before_colon = False 

59 lang_name = ( 

60 clean_node(wxr, None, list_item.children[:index]) 

61 + node[: node.index(":")].strip() 

62 ) 

63 elif not before_colon: 

64 if brackets == 0 and isinstance(node, TemplateNode): 

65 if node.template_name == "trad": 

66 word_entry.translations.append( 

67 Translation( 

68 lang=lang_name, 

69 lang_code=node.template_parameters.get(1, ""), 

70 word=clean_node( 

71 wxr, None, node.template_parameters.get(2, "") 

72 ), 

73 sense=sense, 

74 sense_index=sense_index, 

75 ) 

76 ) 

77 elif ( 77 ↛ 56line 77 didn't jump to line 56

78 node.template_name in TR_TEMPLATES 

79 and len(word_entry.translations) > 0 

80 ): 

81 word_entry.translations[-1].tags.append( 

82 TR_TEMPLATES[node.template_name] 

83 ) 

84 elif isinstance(node, str): 

85 for c in node: 

86 if c == "(": 

87 brackets += 1 

88 elif c == ")": 

89 brackets -= 1 

90 if brackets == 0: 90 ↛ 85line 90 didn't jump to line 85 because the condition on line 90 was always true

91 if len(word_entry.translations) > 0: 91 ↛ 93line 91 didn't jump to line 93 because the condition on line 91 was always true

92 word_entry.translations[-1].roman = roman_str 

93 roman_str = "" 

94 elif brackets > 0: 

95 roman_str += c 

96 elif brackets > 0: 96 ↛ 56line 96 didn't jump to line 56 because the condition on line 96 was always true

97 roman_str += clean_node(wxr, None, node)