Coverage for src/wiktextract/extractor/id/translation.py: 78%

65 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-04 10:58 +0000

1from mediawiki_langcodes import name_to_code 

2from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

3 

4from ...page import clean_node 

5from ...wxr_context import WiktextractContext 

6from .models import Translation, WordEntry 

7from .tags import translate_raw_tags 

8 

9 

10def extract_translation_section( 

11 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode 

12) -> None: 

13 sense = "" 

14 for node in level_node.children: 

15 if isinstance(node, TemplateNode) and node.template_name in [ 

16 "trans-top", 

17 "kotak mulai", 

18 "kotak awal", 

19 ]: 

20 sense = clean_node(wxr, None, node.template_parameters.get(1, "")) 

21 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 

22 for list_item in node.find_child(NodeKind.LIST_ITEM): 

23 extract_translation_list_item(wxr, word_entry, list_item, sense) 

24 

25 

26def extract_translation_list_item( 

27 wxr: WiktextractContext, 

28 word_entry: WordEntry, 

29 list_item: WikiNode, 

30 sense: str, 

31) -> None: 

32 lang_name = "unknown" 

33 lang_code = "unknown" 

34 for index, node in enumerate(list_item.children): 

35 if isinstance(node, str) and ":" in node and lang_name == "unknown": 

36 lang_name = ( 

37 clean_node(wxr, None, list_item.children[:index]) 

38 + node[: node.index(":")].strip() 

39 ) 

40 lang_name = lang_name.removeprefix("bahasa ").strip() 

41 if lang_name == "": 41 ↛ 42line 41 didn't jump to line 42 because the condition on line 41 was never true

42 lang_name = "unknown" 

43 if lang_name != "unknown": 43 ↛ 34line 43 didn't jump to line 34 because the condition on line 43 was always true

44 lang_code = name_to_code(lang_name, "id") 

45 if lang_code == "": 45 ↛ 46line 45 didn't jump to line 46 because the condition on line 45 was never true

46 lang_code = "unknown" 

47 elif isinstance(node, TemplateNode) and node.template_name in [ 

48 "t", 

49 "t+", 

50 "trad-", 

51 "trad+", 

52 "t-simple", 

53 ]: 

54 extract_t_template(wxr, word_entry, node, lang_name, sense) 

55 elif isinstance(node, TemplateNode) and node.template_name in [ 

56 "qualifier", 

57 "q", 

58 "qual", 

59 "f", 

60 "n", 

61 "p", 

62 ]: 

63 extract_qualifier_template(wxr, word_entry, node) 

64 elif ( 64 ↛ 69line 64 didn't jump to line 69 because the condition on line 64 was never true

65 isinstance(node, WikiNode) 

66 and node.kind == NodeKind.LINK 

67 and lang_name != "unknown" 

68 ): 

69 word = clean_node(wxr, None, node) 

70 if word != "": 

71 word_entry.translations.append( 

72 Translation( 

73 word=word, 

74 lang=lang_name, 

75 lang_code=lang_code, 

76 sense=sense, 

77 ) 

78 ) 

79 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 79 ↛ 80line 79 didn't jump to line 80 because the condition on line 79 was never true

80 for child_list_item in node.find_child(NodeKind.LIST_ITEM): 

81 extract_translation_list_item( 

82 wxr, word_entry, child_list_item, sense 

83 ) 

84 

85 

86def extract_t_template( 

87 wxr: WiktextractContext, 

88 word_entry: WordEntry, 

89 t_node: TemplateNode, 

90 lang_name: str, 

91 sense: str, 

92) -> None: 

93 lang_code = clean_node(wxr, None, t_node.template_parameters.get(1, "")) 

94 if lang_code == "": 94 ↛ 95line 94 didn't jump to line 95 because the condition on line 94 was never true

95 lang_code = "unknown" 

96 tr_data = Translation( 

97 word="", lang=lang_name, lang_code=lang_code, sense=sense 

98 ) 

99 expanded_node = wxr.wtp.parse( 

100 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

101 ) 

102 for span_tag in expanded_node.find_html_recursively("span"): 

103 if span_tag.attrs.get("lang") == lang_code and tr_data.word == "": 

104 tr_data.word = clean_node(wxr, None, span_tag) 

105 elif "tr Latn" == span_tag.attrs.get("class", ""): 

106 tr_data.roman = clean_node(wxr, None, span_tag) 

107 

108 tr_data.lit = clean_node( 

109 wxr, None, t_node.template_parameters.get("lit", "") 

110 ) 

111 for abbr_tag in expanded_node.find_html_recursively("abbr"): 111 ↛ 112line 111 didn't jump to line 112 because the loop on line 111 never started

112 tr_data.raw_tags.append(clean_node(wxr, None, abbr_tag)) 

113 

114 if tr_data.word != "": 114 ↛ exitline 114 didn't return from function 'extract_t_template' because the condition on line 114 was always true

115 translate_raw_tags(tr_data) 

116 word_entry.translations.append(tr_data) 

117 for link_node in expanded_node.find_child(NodeKind.LINK): 117 ↛ 118line 117 didn't jump to line 118 because the loop on line 117 never started

118 clean_node(wxr, word_entry, link_node) 

119 

120 

121def extract_qualifier_template( 

122 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode 

123) -> None: 

124 t_str = clean_node(wxr, None, t_node).strip("() ") 

125 for raw_tag in t_str.split(","): 

126 raw_tag = raw_tag.strip() 

127 if raw_tag != "" and len(word_entry.translations) > 0: 127 ↛ 125line 127 didn't jump to line 125 because the condition on line 127 was always true

128 word_entry.translations[-1].raw_tags.append(raw_tag) 

129 if len(word_entry.translations) > 0: 129 ↛ exitline 129 didn't return from function 'extract_qualifier_template' because the condition on line 129 was always true

130 translate_raw_tags(word_entry.translations[-1])