Coverage for src/wiktextract/extractor/id/example.py: 41%

61 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-04 10:58 +0000

1from wikitextprocessor import NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from ..share import calculate_bold_offsets 

6from .linkage import extract_syn_template 

7from .models import Example, Sense, WordEntry 

8 

9 

10def extract_example_list_item( 

11 wxr: WiktextractContext, 

12 word_entry: WordEntry, 

13 sense: Sense, 

14 list_item: WikiNode, 

15) -> None: 

16 italic_node = None 

17 for node in list_item.children: 

18 if isinstance(node, TemplateNode): 

19 if node.template_name in ["ux", "uxi"]: 19 ↛ 20line 19 didn't jump to line 20 because the condition on line 19 was never true

20 extract_ux_template(wxr, sense, node) 

21 elif node.template_name in [ 21 ↛ 29line 21 didn't jump to line 29 because the condition on line 21 was always true

22 "sinonim", 

23 "syn", 

24 "synonyms", 

25 "synonym of", 

26 "sinonim dari", 

27 ]: 

28 extract_syn_template(wxr, word_entry, node, "synonyms") 

29 elif node.template_name == "antonim": 

30 extract_syn_template(wxr, word_entry, node, "antonyms") 

31 elif isinstance(node, WikiNode): 

32 if node.kind == NodeKind.ITALIC: 

33 italic_node = node 

34 elif node.kind == NodeKind.LIST and italic_node is not None: 34 ↛ 17line 34 didn't jump to line 17 because the condition on line 34 was always true

35 italic_text = clean_node(wxr, None, italic_node) 

36 if italic_text == "": 36 ↛ 37line 36 didn't jump to line 37 because the condition on line 36 was never true

37 continue 

38 for child_list_item in node.find_child(NodeKind.LIST_ITEM): 

39 e_data = Example( 

40 text=italic_text, 

41 translation=clean_node( 

42 wxr, sense, child_list_item.children 

43 ), 

44 ) 

45 calculate_bold_offsets( 

46 wxr, 

47 italic_node, 

48 e_data.text, 

49 e_data, 

50 "bold_text_offsets", 

51 ) 

52 calculate_bold_offsets( 

53 wxr, 

54 child_list_item, 

55 e_data.translation, 

56 e_data, 

57 "bold_translation_offsets", 

58 ) 

59 sense.examples.append(e_data) 

60 italic_node = None 

61 

62 if italic_node is not None: 62 ↛ 63line 62 didn't jump to line 63 because the condition on line 62 was never true

63 e_data = Example(text=clean_node(wxr, None, italic_node)) 

64 calculate_bold_offsets( 

65 wxr, italic_node, e_data.text, e_data, "bold_text_offsets" 

66 ) 

67 sense.examples.append(e_data) 

68 

69 

70def extract_ux_template( 

71 wxr: WiktextractContext, sense: Sense, t_node: TemplateNode 

72) -> None: 

73 expanded_node = wxr.wtp.parse( 

74 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

75 ) 

76 e_data = Example(text="") 

77 for i_tag in expanded_node.find_html_recursively("i"): 

78 i_class = i_tag.attrs.get("class", "") 

79 if "e-example" in i_class: 

80 e_data.text = clean_node(wxr, None, i_tag) 

81 calculate_bold_offsets( 

82 wxr, i_tag, e_data.text, e_data, "bold_text_offsets" 

83 ) 

84 elif "e-transliteration" in i_class: 

85 e_data.roman = clean_node(wxr, None, i_tag) 

86 calculate_bold_offsets( 

87 wxr, i_tag, e_data.roman, e_data, "bold_roman_offsets" 

88 ) 

89 for span_tag in expanded_node.find_html_recursively("span"): 

90 span_class = span_tag.attrs.get("class", "") 

91 if "e-translation" in span_class: 

92 e_data.translation = clean_node(wxr, None, span_tag) 

93 calculate_bold_offsets( 

94 wxr, 

95 span_tag, 

96 e_data.translation, 

97 e_data, 

98 "bold_translation_offsets", 

99 ) 

100 elif "e-literally" in span_class: 

101 e_data.literal_meaning = clean_node(wxr, None, span_tag) 

102 calculate_bold_offsets( 

103 wxr, 

104 span_tag, 

105 e_data.literal_meaning, 

106 e_data, 

107 "bold_literal_offsets", 

108 ) 

109 elif "qualifier-content" in span_class: 

110 raw_tag = clean_node(wxr, None, span_tag) 

111 if raw_tag != "": 

112 e_data.raw_tags.append(raw_tag) 

113 

114 e_data.ref = clean_node( 

115 wxr, None, t_node.template_parameters.get("ref", "") 

116 ) 

117 if e_data.text != "": 

118 sense.examples.append(e_data) 

119 for link_node in expanded_node.find_child(NodeKind.LINK): 

120 clean_node(wxr, sense, link_node)