Coverage for src/wiktextract/extractor/cs/translation.py: 93%

36 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-17 08:19 +0000

1from wikitextprocessor import HTMLNode, NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from .models import Translation, WordEntry 

6from .tags import translate_raw_tags 

7 

8 

9def extract_translation_section( 

10 wxr: WiktextractContext, word_entry: WordEntry, level_node: WikiNode 

11): 

12 sense_index = 0 

13 for list_node in level_node.find_child(NodeKind.LIST): 

14 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

15 sense_index += 1 

16 for t_node in list_item.find_child(NodeKind.TEMPLATE): 

17 if ( 17 ↛ 16line 17 didn't jump to line 16 because the condition on line 17 was always true

18 t_node.template_name == "Překlady" 

19 and len(t_node.template_parameters) > 0 

20 ): 

21 extract_překlady_template( 

22 wxr, word_entry, t_node, sense_index 

23 ) 

24 

25 

26def extract_překlady_template( 

27 wxr: WiktextractContext, 

28 word_entry: WordEntry, 

29 t_node: TemplateNode, 

30 sense_index: int, 

31): 

32 # https://cs.wiktionary.org/wiki/Šablona:Překlady 

33 expanded_node = wxr.wtp.parse( 

34 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

35 ) 

36 sense = "" 

37 translations = [] 

38 for dfn_tag in expanded_node.find_html_recursively("dfn"): 

39 sense = clean_node(wxr, None, dfn_tag) 

40 for li_tag in expanded_node.find_html_recursively("li"): 

41 lang_name = "unknown" 

42 for node in li_tag.children: 

43 if ( 

44 isinstance(node, str) 

45 and lang_name == "unknown" 

46 and node.strip().endswith(":") 

47 ): 

48 lang_name = node.strip().removesuffix(":") or "unknown" 

49 elif ( 

50 isinstance(node, HTMLNode) 

51 and node.tag == "span" 

52 and "translation-item" in node.attrs.get("class", "").split() 

53 ): 

54 word = clean_node(wxr, None, node) 

55 if word == "": 55 ↛ 56line 55 didn't jump to line 56 because the condition on line 55 was never true

56 continue 

57 translations.append( 

58 Translation( 

59 word=word, 

60 lang=lang_name, 

61 lang_code=node.attrs.get("lang", "unknown"), 

62 sense=sense, 

63 sense_index=sense_index, 

64 ) 

65 ) 

66 elif ( 

67 isinstance(node, HTMLNode) 

68 and node.tag == "abbr" 

69 and "genus" in node.attrs.get("class", "").split() 

70 ): 

71 raw_tag = node.attrs.get("title", "") 

72 if raw_tag != "" and len(translations) > 0: 72 ↛ 42line 72 didn't jump to line 42 because the condition on line 72 was always true

73 translations[-1].raw_tags.append(raw_tag) 

74 translate_raw_tags(translations[-1]) 

75 

76 word_entry.translations.extend(translations) 

77 clean_node(wxr, word_entry, expanded_node)