Coverage for src/wiktextract/extractor/tr/translation.py: 64%

76 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-12 08:27 +0000

1from mediawiki_langcodes import name_to_code 

2from wikitextprocessor.parser import ( 

3 LEVEL_KIND_FLAGS, 

4 LevelNode, 

5 NodeKind, 

6 TemplateNode, 

7 WikiNode, 

8) 

9 

10from ...page import clean_node 

11from ...wxr_context import WiktextractContext 

12from .models import Translation, WordEntry 

13from .tags import translate_raw_tags 

14 

15 

16def extract_translation_section( 

17 wxr: WiktextractContext, 

18 word_entry: WordEntry, 

19 level_node: LevelNode, 

20 sense: str = "", 

21 from_trans_see: bool = False, 

22 source: str = "", 

23) -> None: 

24 for node in level_node.children: 

25 if ( 

26 isinstance(node, TemplateNode) 

27 and node.template_name.lower() in ["üst", "trans-top"] 

28 and not (sense != "" and from_trans_see) 

29 ): 

30 sense = clean_node(wxr, None, node.template_parameters.get(1, "")) 

31 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 

32 for list_item in node.find_child(NodeKind.LIST_ITEM): 

33 extract_translation_list_item( 

34 wxr, word_entry, list_item, sense, source 

35 ) 

36 elif ( 36 ↛ 48line 36 didn't jump to line 48 because the condition on line 36 was never true

37 isinstance(node, TemplateNode) 

38 and node.template_name 

39 in [ 

40 "çeviri yönlendirme", 

41 "Türk dilleri-yönlendirme", 

42 "tercüme-yönlendirme", 

43 "çeviri-yönlendirme", 

44 "tercüme yönlendirme", 

45 ] 

46 and not from_trans_see 

47 ): 

48 extract_trans_see_template(wxr, word_entry, node) 

49 

50 

51def extract_translation_list_item( 

52 wxr: WiktextractContext, 

53 word_entry: WordEntry, 

54 list_item: WikiNode, 

55 sense: str, 

56 source: str, 

57) -> None: 

58 lang_name = "unknown" 

59 after_colon = False 

60 for index, node in enumerate(list_item.children): 

61 if isinstance(node, str) and ":" in node and lang_name == "unknown": 

62 lang_name = clean_node( 

63 wxr, 

64 None, 

65 list_item.children[:index] + [node[: node.rindex(":")]], 

66 ).strip(": ") 

67 after_colon = True 

68 elif isinstance(node, TemplateNode) and node.template_name in [ 

69 "ç", 

70 "çeviri", 

71 ]: 

72 extract_çeviri_template( 

73 wxr, word_entry, node, sense, lang_name, source 

74 ) 

75 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 

76 for child_list_item in node.find_child(NodeKind.LIST_ITEM): 

77 extract_translation_list_item( 

78 wxr, word_entry, child_list_item, sense, source 

79 ) 

80 elif ( 

81 after_colon 

82 and isinstance(node, WikiNode) 

83 and node.kind == NodeKind.LINK 

84 ): 

85 word = clean_node(wxr, None, node) 

86 if word != "": 86 ↛ 60line 86 didn't jump to line 60 because the condition on line 86 was always true

87 word_entry.translations.append( 

88 Translation( 

89 word=word, 

90 lang=lang_name or "unknown", 

91 lang_code=name_to_code(lang_name, "tr") or "unknown", 

92 sense=sense, 

93 source=source, 

94 ) 

95 ) 

96 

97 

98def extract_çeviri_template( 

99 wxr: WiktextractContext, 

100 word_entry: WordEntry, 

101 t_node: TemplateNode, 

102 sense: str, 

103 lang_name: str, 

104 source: str, 

105) -> None: 

106 lang_code = clean_node( 

107 wxr, None, t_node.template_parameters.get(1, "unknown") 

108 ) 

109 expanded_node = wxr.wtp.parse( 

110 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

111 ) 

112 tr_data = Translation( 

113 word="", 

114 lang_code=lang_code, 

115 lang=lang_name or "unknown", 

116 sense=sense, 

117 source=source, 

118 ) 

119 for span_tag in expanded_node.find_html( 119 ↛ 124line 119 didn't jump to line 124 because the loop on line 119 didn't complete

120 "span", attr_name="lang", attr_value=lang_code 

121 ): 

122 tr_data.word = clean_node(wxr, None, span_tag) 

123 break 

124 for abbr_tag in expanded_node.find_html_recursively("abbr"): 

125 raw_tag = clean_node(wxr, None, abbr_tag) 

126 if raw_tag != "": 126 ↛ 124line 126 didn't jump to line 124 because the condition on line 126 was always true

127 tr_data.raw_tags.append(raw_tag) 

128 for span_tag in expanded_node.find_html("span"): 

129 span_class = span_tag.attrs.get("class", "") 

130 if span_class in ["tr", "tr Latn"]: 

131 tr_data.roman = clean_node(wxr, None, span_tag) 

132 break 

133 if tr_data.word != "": 133 ↛ 136line 133 didn't jump to line 136 because the condition on line 133 was always true

134 translate_raw_tags(tr_data) 

135 word_entry.translations.append(tr_data) 

136 clean_node(wxr, word_entry, expanded_node) 

137 

138 

139def extract_trans_see_template( 

140 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode 

141): 

142 # https://tr.wiktionary.org/wiki/Şablon:çeviri_yönlendirme 

143 sense = clean_node(wxr, None, t_node.template_parameters.get(1, "")) 

144 page_titles = [] 

145 if 2 in t_node.template_parameters: 

146 for index in range(2, 11): 

147 if index not in t_node.template_parameters: 

148 break 

149 page_titles.append( 

150 clean_node(wxr, None, t_node.template_parameters[index]) 

151 ) 

152 else: 

153 page_titles.append( 

154 clean_node(wxr, None, t_node.template_parameters.get(1, "")) 

155 ) 

156 for page_title in page_titles: 

157 if "#" in page_title: 

158 page_title = page_title[: page_title.index("#")] 

159 page_body = wxr.wtp.get_page_body(page_title, 0) 

160 if page_body is None: 

161 return 

162 root = wxr.wtp.parse(page_body) 

163 target_node = find_subpage_section(wxr, root, "Çeviriler") 

164 if target_node is not None: 

165 extract_translation_section( 

166 wxr, 

167 word_entry, 

168 target_node, 

169 sense=sense, 

170 from_trans_see=True, 

171 source=page_title, 

172 ) 

173 

174 

175def find_subpage_section( 

176 wxr: WiktextractContext, root: WikiNode, target_section: str 

177) -> WikiNode | None: 

178 for level_node in root.find_child_recursively(LEVEL_KIND_FLAGS): 

179 section_title = clean_node(wxr, None, level_node.largs) 

180 if section_title == target_section: 

181 return level_node 

182 return None