Coverage for src/wiktextract/extractor/cs/translation.py: 93%

1from wikitextprocessor import HTMLNode, NodeKind, TemplateNode, WikiNode

3from ...page import clean_node

4from ...wxr_context import WiktextractContext

5from .models import Translation, WordEntry

6from .tags import translate_raw_tags

9def extract_translation_section(

10 wxr: WiktextractContext, word_entry: WordEntry, level_node: WikiNode

11):

12 sense_index = 0

13 for list_node in level_node.find_child(NodeKind.LIST):

14 for list_item in list_node.find_child(NodeKind.LIST_ITEM):

15 sense_index += 1

16 for t_node in list_item.find_child(NodeKind.TEMPLATE):

17 if ( 17 ↛ 16line 17 didn't jump to line 16 because the condition on line 17 was always true

18 t_node.template_name == "Překlady"

19 and len(t_node.template_parameters) > 0

20 ):

21 extract_překlady_template(

22 wxr, word_entry, t_node, sense_index

23 )

26def extract_překlady_template(

27 wxr: WiktextractContext,

28 word_entry: WordEntry,

29 t_node: TemplateNode,

30 sense_index: int,

31):

32 # https://cs.wiktionary.org/wiki/Šablona:Překlady

33 expanded_node = wxr.wtp.parse(

34 wxr.wtp.node_to_wikitext(t_node), expand_all=True

35 )

36 sense = ""

37 translations = []

38 for dfn_tag in expanded_node.find_html_recursively("dfn"):

39 sense = clean_node(wxr, None, dfn_tag)

40 for li_tag in expanded_node.find_html_recursively("li"):

41 lang_name = "unknown"

42 for node in li_tag.children:

43 if (

44 isinstance(node, str)

45 and lang_name == "unknown"

46 and node.strip().endswith(":")

47 ):

48 lang_name = node.strip().removesuffix(":") or "unknown"

49 elif (

50 isinstance(node, HTMLNode)

51 and node.tag == "span"

52 and "translation-item" in node.attrs.get("class", "").split()

53 ):

54 word = clean_node(wxr, None, node)

55 if word == "": 55 ↛ 56line 55 didn't jump to line 56 because the condition on line 55 was never true

56 continue

57 translations.append(

58 Translation(

59 word=word,

60 lang=lang_name,

61 lang_code=node.attrs.get("lang", "unknown"),

62 sense=sense,

63 sense_index=sense_index,

64 )

65 )

66 elif (

67 isinstance(node, HTMLNode)

68 and node.tag == "abbr"

69 and "genus" in node.attrs.get("class", "").split()

70 ):

71 raw_tag = node.attrs.get("title", "")

72 if raw_tag != "" and len(translations) > 0: 72 ↛ 42line 72 didn't jump to line 42 because the condition on line 72 was always true

73 translations[-1].raw_tags.append(raw_tag)

74 translate_raw_tags(translations[-1])

76 word_entry.translations.extend(translations)

77 clean_node(wxr, word_entry, expanded_node)

Coverage for src / wiktextract / extractor / cs / translation.py: 93%

36 statements