Coverage for src/wiktextract/extractor/id/example.py: 41%

1from wikitextprocessor import NodeKind, TemplateNode, WikiNode

3from ...page import clean_node

4from ...wxr_context import WiktextractContext

5from ..share import calculate_bold_offsets

6from .linkage import extract_syn_template

7from .models import Example, Sense, WordEntry

10def extract_example_list_item(

11 wxr: WiktextractContext,

12 word_entry: WordEntry,

13 sense: Sense,

14 list_item: WikiNode,

15) -> None:

16 italic_node = None

17 for node in list_item.children:

18 if isinstance(node, TemplateNode):

19 if node.template_name in ["ux", "uxi"]: 19 ↛ 20line 19 didn't jump to line 20 because the condition on line 19 was never true

20 extract_ux_template(wxr, sense, node)

21 elif node.template_name in [ 21 ↛ 29line 21 didn't jump to line 29 because the condition on line 21 was always true

22 "sinonim",

23 "syn",

24 "synonyms",

25 "synonym of",

26 "sinonim dari",

27 ]:

28 extract_syn_template(wxr, word_entry, node, "synonyms")

29 elif node.template_name == "antonim":

30 extract_syn_template(wxr, word_entry, node, "antonyms")

31 elif isinstance(node, WikiNode):

32 if node.kind == NodeKind.ITALIC:

33 italic_node = node

34 elif node.kind == NodeKind.LIST and italic_node is not None: 34 ↛ 17line 34 didn't jump to line 17 because the condition on line 34 was always true

35 italic_text = clean_node(wxr, None, italic_node)

36 if italic_text == "": 36 ↛ 37line 36 didn't jump to line 37 because the condition on line 36 was never true

37 continue

38 for child_list_item in node.find_child(NodeKind.LIST_ITEM):

39 e_data = Example(

40 text=italic_text,

41 translation=clean_node(

42 wxr, sense, child_list_item.children

43 ),

44 )

45 calculate_bold_offsets(

46 wxr,

47 italic_node,

48 e_data.text,

49 e_data,

50 "bold_text_offsets",

51 )

52 calculate_bold_offsets(

53 wxr,

54 child_list_item,

55 e_data.translation,

56 e_data,

57 "bold_translation_offsets",

58 )

59 sense.examples.append(e_data)

60 italic_node = None

62 if italic_node is not None: 62 ↛ 63line 62 didn't jump to line 63 because the condition on line 62 was never true

63 e_data = Example(text=clean_node(wxr, None, italic_node))

64 calculate_bold_offsets(

65 wxr, italic_node, e_data.text, e_data, "bold_text_offsets"

66 )

67 sense.examples.append(e_data)

70def extract_ux_template(

71 wxr: WiktextractContext, sense: Sense, t_node: TemplateNode

72) -> None:

73 expanded_node = wxr.wtp.parse(

74 wxr.wtp.node_to_wikitext(t_node), expand_all=True

75 )

76 e_data = Example(text="")

77 for i_tag in expanded_node.find_html_recursively("i"):

78 i_class = i_tag.attrs.get("class", "")

79 if "e-example" in i_class:

80 e_data.text = clean_node(wxr, None, i_tag)

81 calculate_bold_offsets(

82 wxr, i_tag, e_data.text, e_data, "bold_text_offsets"

83 )

84 elif "e-transliteration" in i_class:

85 e_data.roman = clean_node(wxr, None, i_tag)

86 calculate_bold_offsets(

87 wxr, i_tag, e_data.roman, e_data, "bold_roman_offsets"

88 )

89 for span_tag in expanded_node.find_html_recursively("span"):

90 span_class = span_tag.attrs.get("class", "")

91 if "e-translation" in span_class:

92 e_data.translation = clean_node(wxr, None, span_tag)

93 calculate_bold_offsets(

94 wxr,

95 span_tag,

96 e_data.translation,

97 e_data,

98 "bold_translation_offsets",

99 )

100 elif "e-literally" in span_class:

101 e_data.literal_meaning = clean_node(wxr, None, span_tag)

102 calculate_bold_offsets(

103 wxr,

104 span_tag,

105 e_data.literal_meaning,

106 e_data,

107 "bold_literal_offsets",

108 )

109 elif "qualifier-content" in span_class:

110 raw_tag = clean_node(wxr, None, span_tag)

111 if raw_tag != "":

112 e_data.raw_tags.append(raw_tag)

113

114 e_data.ref = clean_node(

115 wxr, None, t_node.template_parameters.get("ref", "")

116 )

117 if e_data.text != "":

118 sense.examples.append(e_data)

119 for link_node in expanded_node.find_child(NodeKind.LINK):

120 clean_node(wxr, sense, link_node)