Coverage for src/wiktextract/extractor/tr/translation.py: 95%

50 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-15 05:18 +0000

1from mediawiki_langcodes import name_to_code 

2from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

3 

4from ...page import clean_node 

5from ...wxr_context import WiktextractContext 

6from .models import Translation, WordEntry 

7from .tags import translate_raw_tags 

8 

9 

10def extract_translation_section( 

11 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode 

12) -> None: 

13 sense = "" 

14 for node in level_node.children: 

15 if isinstance(node, TemplateNode) and node.template_name.lower() in [ 

16 "üst", 

17 "trans-top", 

18 ]: 

19 sense = clean_node(wxr, None, node.template_parameters.get(1, "")) 

20 for list_node in level_node.find_child(NodeKind.LIST): 

21 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

22 extract_translation_list_item(wxr, word_entry, list_item, sense) 

23 

24 

25def extract_translation_list_item( 

26 wxr: WiktextractContext, 

27 word_entry: WordEntry, 

28 list_item: WikiNode, 

29 sense: str, 

30) -> None: 

31 lang_name = "unknown" 

32 after_colon = False 

33 for index, node in enumerate(list_item.children): 

34 if isinstance(node, str) and ":" in node and lang_name == "unknown": 

35 lang_name = clean_node( 

36 wxr, 

37 None, 

38 list_item.children[:index] + [node[: node.rindex(":")]], 

39 ).strip(": ") 

40 after_colon = True 

41 elif isinstance(node, TemplateNode) and node.template_name in [ 

42 "ç", 

43 "çeviri", 

44 ]: 

45 extract_çeviri_template(wxr, word_entry, node, sense, lang_name) 

46 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 

47 for child_list_item in node.find_child(NodeKind.LIST_ITEM): 

48 extract_translation_list_item( 

49 wxr, word_entry, child_list_item, sense 

50 ) 

51 elif ( 

52 after_colon 

53 and isinstance(node, WikiNode) 

54 and node.kind == NodeKind.LINK 

55 ): 

56 word = clean_node(wxr, None, node) 

57 if word != "": 57 ↛ 33line 57 didn't jump to line 33 because the condition on line 57 was always true

58 word_entry.translations.append( 

59 Translation( 

60 word=word, 

61 lang=lang_name or "unknown", 

62 lang_code=name_to_code(lang_name, "tr") or "unknown", 

63 ) 

64 ) 

65 

66 

67def extract_çeviri_template( 

68 wxr: WiktextractContext, 

69 word_entry: WordEntry, 

70 t_node: TemplateNode, 

71 sense: str, 

72 lang_name: str, 

73) -> None: 

74 lang_code = clean_node( 

75 wxr, None, t_node.template_parameters.get(1, "unknown") 

76 ) 

77 expanded_node = wxr.wtp.parse( 

78 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

79 ) 

80 tr_data = Translation( 

81 word="", lang_code=lang_code, lang=lang_name or "unknown", sense=sense 

82 ) 

83 for span_tag in expanded_node.find_html( 83 ↛ 88line 83 didn't jump to line 88 because the loop on line 83 didn't complete

84 "span", attr_name="lang", attr_value=lang_code 

85 ): 

86 tr_data.word = clean_node(wxr, None, span_tag) 

87 break 

88 for abbr_tag in expanded_node.find_html_recursively("abbr"): 

89 raw_tag = clean_node(wxr, None, abbr_tag) 

90 if raw_tag != "": 90 ↛ 88line 90 didn't jump to line 88 because the condition on line 90 was always true

91 tr_data.raw_tags.append(raw_tag) 

92 for span_tag in expanded_node.find_html("span"): 

93 span_class = span_tag.attrs.get("class", "") 

94 if span_class in ["tr", "tr Latn"]: 

95 tr_data.roman = clean_node(wxr, None, span_tag) 

96 break 

97 if tr_data.word != "": 97 ↛ 100line 97 didn't jump to line 100 because the condition on line 97 was always true

98 translate_raw_tags(tr_data) 

99 word_entry.translations.append(tr_data) 

100 clean_node(wxr, word_entry, expanded_node)