Coverage for src/wiktextract/extractor/vi/example.py: 50%

82 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-12 08:27 +0000

1from wikitextprocessor import NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from ..ruby import extract_ruby 

6from ..share import calculate_bold_offsets 

7from .linkage import ( 

8 GLOSS_LIST_LINKAGE_TEMPLATES, 

9 extract_gloss_list_linkage_template, 

10) 

11from .models import Example, Sense, WordEntry 

12from .tags import translate_raw_tags 

13 

14 

15def extract_example_list_item( 

16 wxr: WiktextractContext, 

17 word_entry: WordEntry, 

18 sense: Sense, 

19 list_item: WikiNode, 

20 ref: str = "", 

21): 

22 for index, node in enumerate(list_item.children): 

23 if ( 

24 isinstance(node, WikiNode) 

25 and node.kind == NodeKind.ITALIC 

26 and node.contain_node(NodeKind.BOLD) 

27 ): 

28 e_text = clean_node(wxr, None, node) 

29 if e_text != "": 29 ↛ 22line 29 didn't jump to line 22 because the condition on line 29 was always true

30 e_data = Example(text=e_text) 

31 calculate_bold_offsets( 

32 wxr, node, e_text, e_data, "bold_text_offsets" 

33 ) 

34 e_data.translation = clean_node( 

35 wxr, None, list_item.children[index + 1 :] 

36 ).strip("—- \n") 

37 sense.examples.append(e_data) 

38 break 

39 elif isinstance(node, TemplateNode): 

40 if node.template_name in ["ux", "usex", "ux2", "uxi"]: 40 ↛ 41line 40 didn't jump to line 41 because the condition on line 40 was never true

41 extract_ux_template(wxr, sense, node) 

42 elif node.template_name.startswith(("quote-", "RQ:")): 

43 ref = extract_quote_template(wxr, sense, node) 

44 elif node.template_name in GLOSS_LIST_LINKAGE_TEMPLATES: 44 ↛ 22line 44 didn't jump to line 22 because the condition on line 44 was always true

45 extract_gloss_list_linkage_template( 

46 wxr, 

47 word_entry, 

48 node, 

49 GLOSS_LIST_LINKAGE_TEMPLATES[node.template_name], 

50 " ".join(word_entry.senses[-1].glosses) 

51 if len(word_entry.senses) > 0 

52 else "", 

53 ) 

54 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 54 ↛ 55line 54 didn't jump to line 55 because the condition on line 54 was never true

55 for child_list_item in node.find_child(NodeKind.LIST_ITEM): 

56 extract_example_list_item( 

57 wxr, word_entry, sense, child_list_item, ref 

58 ) 

59 

60 

61def extract_ux_template( 

62 wxr: WiktextractContext, sense: Sense, t_node: TemplateNode 

63): 

64 expanded_node = wxr.wtp.parse( 

65 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

66 ) 

67 e_data = Example(text="") 

68 for i_tag in expanded_node.find_html_recursively("i"): 

69 i_class = i_tag.attrs.get("class", "") 

70 if "e-example" in i_class: 

71 e_data.text = clean_node(wxr, None, i_tag) 

72 calculate_bold_offsets( 

73 wxr, i_tag, e_data.text, e_data, "bold_text_offsets" 

74 ) 

75 elif "e-transliteration" in i_class: 

76 e_data.roman = clean_node(wxr, None, i_tag) 

77 calculate_bold_offsets( 

78 wxr, i_tag, e_data.roman, e_data, "bold_roman_offsets" 

79 ) 

80 for span_tag in expanded_node.find_html_recursively("span"): 

81 span_class = span_tag.attrs.get("class", "") 

82 if "e-translation" in span_class: 

83 e_data.translation = clean_node(wxr, None, span_tag) 

84 calculate_bold_offsets( 

85 wxr, 

86 span_tag, 

87 e_data.translation, 

88 e_data, 

89 "bold_translation_offsets", 

90 ) 

91 elif "e-literally" in span_class: 

92 e_data.literal_meaning = clean_node(wxr, None, span_tag) 

93 calculate_bold_offsets( 

94 wxr, 

95 span_tag, 

96 e_data.literal_meaning, 

97 e_data, 

98 "bold_literal_offsets", 

99 ) 

100 elif "qualifier-content" in span_class: 

101 raw_tag = clean_node(wxr, None, span_tag) 

102 if raw_tag != "": 

103 e_data.raw_tags.append(raw_tag) 

104 

105 e_data.ref = clean_node( 

106 wxr, None, t_node.template_parameters.get("ref", "") 

107 ) 

108 if e_data.text != "": 

109 translate_raw_tags(e_data) 

110 sense.examples.append(e_data) 

111 for link_node in expanded_node.find_child(NodeKind.LINK): 

112 clean_node(wxr, sense, link_node) 

113 

114 

115def extract_quote_template( 

116 wxr: WiktextractContext, 

117 sense: Sense, 

118 t_node: TemplateNode, 

119) -> str: 

120 ref = "" 

121 if all( 121 ↛ 124line 121 didn't jump to line 124 because the condition on line 121 was never true

122 arg not in t_node.template_parameters for arg in ["text", "passage", 7] 

123 ): 

124 ref = clean_node(wxr, sense, t_node) 

125 else: 

126 expanded_node = wxr.wtp.parse( 

127 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

128 ) 

129 example = Example(text="") 

130 for span_tag in expanded_node.find_html_recursively("span"): 

131 span_class = span_tag.attrs.get("class", "") 

132 if "cited-source" == span_class: 

133 example.ref = clean_node(wxr, None, span_tag) 

134 elif "e-quotation" in span_class: 

135 example.ruby, node_without_ruby = extract_ruby(wxr, span_tag) 

136 example.text = clean_node(wxr, None, node_without_ruby) 

137 calculate_bold_offsets( 

138 wxr, span_tag, example.text, example, "bold_text_offsets" 

139 ) 

140 elif "e-translation" in span_class: 140 ↛ 141line 140 didn't jump to line 141 because the condition on line 140 was never true

141 example.translation = clean_node(wxr, None, span_tag) 

142 calculate_bold_offsets( 

143 wxr, 

144 span_tag, 

145 example.translation, 

146 example, 

147 "bold_translation_text", 

148 ) 

149 for i_tag in expanded_node.find_html_recursively( 149 ↛ 152line 149 didn't jump to line 152 because the loop on line 149 never started

150 "i", attr_name="class", attr_value="e-transliteration" 

151 ): 

152 example.roman = clean_node(wxr, None, i_tag) 

153 calculate_bold_offsets( 

154 wxr, i_tag, example.roman, example, "bold_roman_offsets" 

155 ) 

156 break 

157 if example.text != "": 157 ↛ 159line 157 didn't jump to line 159 because the condition on line 157 was always true

158 sense.examples.append(example) 

159 clean_node(wxr, sense, expanded_node) 

160 

161 return ref