Coverage for src/wiktextract/extractor/tr/translation.py: 64%

1from mediawiki_langcodes import name_to_code

2from wikitextprocessor.parser import (

3 LEVEL_KIND_FLAGS,

4 LevelNode,

5 NodeKind,

6 TemplateNode,

7 WikiNode,

10from ...page import clean_node

11from ...wxr_context import WiktextractContext

12from .models import Translation, WordEntry

13from .tags import translate_raw_tags

16def extract_translation_section(

17 wxr: WiktextractContext,

18 word_entry: WordEntry,

19 level_node: LevelNode,

20 sense: str = "",

21 from_trans_see: bool = False,

22 source: str = "",

23) -> None:

24 for node in level_node.children:

25 if (

26 isinstance(node, TemplateNode)

27 and node.template_name.lower() in ["üst", "trans-top"]

28 and not (sense != "" and from_trans_see)

29 ):

30 sense = clean_node(wxr, None, node.template_parameters.get(1, ""))

31 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:

32 for list_item in node.find_child(NodeKind.LIST_ITEM):

33 extract_translation_list_item(

34 wxr, word_entry, list_item, sense, source

35 )

36 elif ( 36 ↛ 48line 36 didn't jump to line 48 because the condition on line 36 was never true

37 isinstance(node, TemplateNode)

38 and node.template_name

39 in [

40 "çeviri yönlendirme",

41 "Türk dilleri-yönlendirme",

42 "tercüme-yönlendirme",

43 "çeviri-yönlendirme",

44 "tercüme yönlendirme",

45 ]

46 and not from_trans_see

47 ):

48 extract_trans_see_template(wxr, word_entry, node)

51def extract_translation_list_item(

52 wxr: WiktextractContext,

53 word_entry: WordEntry,

54 list_item: WikiNode,

55 sense: str,

56 source: str,

57) -> None:

58 lang_name = "unknown"

59 after_colon = False

60 for index, node in enumerate(list_item.children):

61 if isinstance(node, str) and ":" in node and lang_name == "unknown":

62 lang_name = clean_node(

63 wxr,

64 None,

65 list_item.children[:index] + [node[: node.rindex(":")]],

66 ).strip(": ")

67 after_colon = True

68 elif isinstance(node, TemplateNode) and node.template_name in [

69 "ç",

70 "çeviri",

71 ]:

72 extract_çeviri_template(

73 wxr, word_entry, node, sense, lang_name, source

74 )

75 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:

76 for child_list_item in node.find_child(NodeKind.LIST_ITEM):

77 extract_translation_list_item(

78 wxr, word_entry, child_list_item, sense, source

79 )

80 elif (

81 after_colon

82 and isinstance(node, WikiNode)

83 and node.kind == NodeKind.LINK

84 ):

85 word = clean_node(wxr, None, node)

86 if word != "": 86 ↛ 60line 86 didn't jump to line 60 because the condition on line 86 was always true

87 word_entry.translations.append(

88 Translation(

89 word=word,

90 lang=lang_name or "unknown",

91 lang_code=name_to_code(lang_name, "tr") or "unknown",

92 sense=sense,

93 source=source,

94 )

95 )

98def extract_çeviri_template(

99 wxr: WiktextractContext,

100 word_entry: WordEntry,

101 t_node: TemplateNode,

102 sense: str,

103 lang_name: str,

104 source: str,

105) -> None:

106 lang_code = clean_node(

107 wxr, None, t_node.template_parameters.get(1, "unknown")

108 )

109 expanded_node = wxr.wtp.parse(

110 wxr.wtp.node_to_wikitext(t_node), expand_all=True

111 )

112 tr_data = Translation(

113 word="",

114 lang_code=lang_code,

115 lang=lang_name or "unknown",

116 sense=sense,

117 source=source,

118 )

119 for span_tag in expanded_node.find_html( 119 ↛ 124line 119 didn't jump to line 124 because the loop on line 119 didn't complete

120 "span", attr_name="lang", attr_value=lang_code

121 ):

122 tr_data.word = clean_node(wxr, None, span_tag)

123 break

124 for abbr_tag in expanded_node.find_html_recursively("abbr"):

125 raw_tag = clean_node(wxr, None, abbr_tag)

126 if raw_tag != "": 126 ↛ 124line 126 didn't jump to line 124 because the condition on line 126 was always true

127 tr_data.raw_tags.append(raw_tag)

128 for span_tag in expanded_node.find_html("span"):

129 span_class = span_tag.attrs.get("class", "")

130 if span_class in ["tr", "tr Latn"]:

131 tr_data.roman = clean_node(wxr, None, span_tag)

132 break

133 if tr_data.word != "": 133 ↛ 136line 133 didn't jump to line 136 because the condition on line 133 was always true

134 translate_raw_tags(tr_data)

135 word_entry.translations.append(tr_data)

136 clean_node(wxr, word_entry, expanded_node)

137

138

139def extract_trans_see_template(

140 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode

141):

142 # https://tr.wiktionary.org/wiki/Şablon:çeviri_yönlendirme

143 sense = clean_node(wxr, None, t_node.template_parameters.get(1, ""))

144 page_titles = []

145 if 2 in t_node.template_parameters:

146 for index in range(2, 11):

147 if index not in t_node.template_parameters:

148 break

149 page_titles.append(

150 clean_node(wxr, None, t_node.template_parameters[index])

151 )

152 else:

153 page_titles.append(

154 clean_node(wxr, None, t_node.template_parameters.get(1, ""))

155 )

156 for page_title in page_titles:

157 if "#" in page_title:

158 page_title = page_title[: page_title.index("#")]

159 page_body = wxr.wtp.get_page_body(page_title, 0)

160 if page_body is None:

161 return

162 root = wxr.wtp.parse(page_body)

163 target_node = find_subpage_section(wxr, root, "Çeviriler")

164 if target_node is not None:

165 extract_translation_section(

166 wxr,

167 word_entry,

168 target_node,

169 sense=sense,

170 from_trans_see=True,

171 source=page_title,

172 )

173

174

175def find_subpage_section(

176 wxr: WiktextractContext, root: WikiNode, target_section: str

177) -> WikiNode | None:

178 for level_node in root.find_child_recursively(LEVEL_KIND_FLAGS):

179 section_title = clean_node(wxr, None, level_node.largs)

180 if section_title == target_section:

181 return level_node

182 return None

Coverage for src / wiktextract / extractor / tr / translation.py: 64%

76 statements