Coverage for src/wiktextract/extractor/id/translation.py: 78%

1from mediawiki_langcodes import name_to_code

2from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode

4from ...page import clean_node

5from ...wxr_context import WiktextractContext

6from .models import Translation, WordEntry

7from .tags import translate_raw_tags

10def extract_translation_section(

11 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode

12) -> None:

13 sense = ""

14 for node in level_node.children:

15 if isinstance(node, TemplateNode) and node.template_name in [

16 "trans-top",

17 "kotak mulai",

18 "kotak awal",

19 ]:

20 sense = clean_node(wxr, None, node.template_parameters.get(1, ""))

21 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:

22 for list_item in node.find_child(NodeKind.LIST_ITEM):

23 extract_translation_list_item(wxr, word_entry, list_item, sense)

26def extract_translation_list_item(

27 wxr: WiktextractContext,

28 word_entry: WordEntry,

29 list_item: WikiNode,

30 sense: str,

31) -> None:

32 lang_name = "unknown"

33 lang_code = "unknown"

34 for index, node in enumerate(list_item.children):

35 if isinstance(node, str) and ":" in node and lang_name == "unknown":

36 lang_name = (

37 clean_node(wxr, None, list_item.children[:index])

38 + node[: node.index(":")].strip()

39 )

40 lang_name = lang_name.removeprefix("bahasa ").strip()

41 if lang_name == "": 41 ↛ 42line 41 didn't jump to line 42 because the condition on line 41 was never true

42 lang_name = "unknown"

43 if lang_name != "unknown": 43 ↛ 34line 43 didn't jump to line 34 because the condition on line 43 was always true

44 lang_code = name_to_code(lang_name, "id")

45 if lang_code == "": 45 ↛ 46line 45 didn't jump to line 46 because the condition on line 45 was never true

46 lang_code = "unknown"

47 elif isinstance(node, TemplateNode) and node.template_name in [

48 "t",

49 "t+",

50 "trad-",

51 "trad+",

52 "t-simple",

53 ]:

54 extract_t_template(wxr, word_entry, node, lang_name, sense)

55 elif isinstance(node, TemplateNode) and node.template_name in [

56 "qualifier",

57 "q",

58 "qual",

59 "f",

60 "n",

61 "p",

62 ]:

63 extract_qualifier_template(wxr, word_entry, node)

64 elif ( 64 ↛ 69line 64 didn't jump to line 69 because the condition on line 64 was never true

65 isinstance(node, WikiNode)

66 and node.kind == NodeKind.LINK

67 and lang_name != "unknown"

68 ):

69 word = clean_node(wxr, None, node)

70 if word != "":

71 word_entry.translations.append(

72 Translation(

73 word=word,

74 lang=lang_name,

75 lang_code=lang_code,

76 sense=sense,

77 )

78 )

79 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 79 ↛ 80line 79 didn't jump to line 80 because the condition on line 79 was never true

80 for child_list_item in node.find_child(NodeKind.LIST_ITEM):

81 extract_translation_list_item(

82 wxr, word_entry, child_list_item, sense

83 )

86def extract_t_template(

87 wxr: WiktextractContext,

88 word_entry: WordEntry,

89 t_node: TemplateNode,

90 lang_name: str,

91 sense: str,

92) -> None:

93 lang_code = clean_node(wxr, None, t_node.template_parameters.get(1, ""))

94 if lang_code == "": 94 ↛ 95line 94 didn't jump to line 95 because the condition on line 94 was never true

95 lang_code = "unknown"

96 tr_data = Translation(

97 word="", lang=lang_name, lang_code=lang_code, sense=sense

98 )

99 expanded_node = wxr.wtp.parse(

100 wxr.wtp.node_to_wikitext(t_node), expand_all=True

101 )

102 for span_tag in expanded_node.find_html_recursively("span"):

103 if span_tag.attrs.get("lang") == lang_code and tr_data.word == "":

104 tr_data.word = clean_node(wxr, None, span_tag)

105 elif "tr Latn" == span_tag.attrs.get("class", ""):

106 tr_data.roman = clean_node(wxr, None, span_tag)

107

108 tr_data.lit = clean_node(

109 wxr, None, t_node.template_parameters.get("lit", "")

110 )

111 for abbr_tag in expanded_node.find_html_recursively("abbr"): 111 ↛ 112line 111 didn't jump to line 112 because the loop on line 111 never started

112 tr_data.raw_tags.append(clean_node(wxr, None, abbr_tag))

113

114 if tr_data.word != "": 114 ↛ exitline 114 didn't return from function 'extract_t_template' because the condition on line 114 was always true

115 translate_raw_tags(tr_data)

116 word_entry.translations.append(tr_data)

117 for link_node in expanded_node.find_child(NodeKind.LINK): 117 ↛ 118line 117 didn't jump to line 118 because the loop on line 117 never started

118 clean_node(wxr, word_entry, link_node)

119

120

121def extract_qualifier_template(

122 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode

123) -> None:

124 t_str = clean_node(wxr, None, t_node).strip("() ")

125 for raw_tag in t_str.split(","):

126 raw_tag = raw_tag.strip()

127 if raw_tag != "" and len(word_entry.translations) > 0: 127 ↛ 125line 127 didn't jump to line 125 because the condition on line 127 was always true

128 word_entry.translations[-1].raw_tags.append(raw_tag)

129 if len(word_entry.translations) > 0: 129 ↛ exitline 129 didn't return from function 'extract_qualifier_template' because the condition on line 129 was always true

130 translate_raw_tags(word_entry.translations[-1])