Coverage for src/wiktextract/extractor/ko/example.py: 95%

82 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2024-12-27 08:07 +0000

1from wikitextprocessor import NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from ..ruby import extract_ruby 

6from ..share import set_sound_file_url_fields 

7from .models import Example, Sense, Sound 

8 

9 

10def extract_example_list_item( 

11 wxr: WiktextractContext, 

12 sense: Sense, 

13 list_item: WikiNode, 

14 lang_code: str, 

15 parent_example: Example | None = None, 

16) -> None: 

17 example = Example() if parent_example is None else parent_example 

18 e_text_nodes = [] 

19 e_tr_nodes = [] 

20 after_lang_template = False 

21 for node in list_item.children: 

22 if isinstance(node, TemplateNode) and node.template_name == "lang": 

23 after_lang_template = True 

24 extract_example_lang_template(wxr, example, node, lang_code) 

25 elif isinstance(node, TemplateNode) and node.template_name.startswith( 

26 ("따옴", "지봉유설") 

27 ): 

28 example.ref = ( 

29 clean_node(wxr, None, node).strip("() ").removeprefix("따옴◄") 

30 ) 

31 elif isinstance(node, TemplateNode) and node.template_name in [ 

32 "예문", 

33 "ux", 

34 "uxi", 

35 ]: 

36 extract_ux_template(wxr, sense, example, node) 

37 break 

38 elif after_lang_template: 

39 e_tr_nodes.append(node) 

40 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 

41 break 

42 elif ( 

43 isinstance(node, WikiNode) 

44 and node.kind == NodeKind.LINK 

45 and len(node.largs) > 0 

46 and len(node.largs[0]) > 0 

47 and isinstance(node.largs[0][0], str) 

48 and node.largs[0][0].startswith("File:") 

49 ): 

50 sound = Sound() 

51 sound_file = node.largs[0][0].removeprefix("File:").strip() 

52 set_sound_file_url_fields(wxr, sound_file, sound) 

53 if sound.audio != "": 53 ↛ 21line 53 didn't jump to line 21 because the condition on line 53 was always true

54 example.sounds.append(sound) 

55 else: 

56 e_text_nodes.append(node) 

57 

58 e_text = clean_node(wxr, sense, e_text_nodes) 

59 if e_text != "": 

60 example.text = e_text 

61 e_tr = clean_node(wxr, sense, e_tr_nodes) 

62 if e_tr != "": 

63 example.translation = e_tr 

64 

65 if len(example.text) > 0: 

66 if lang_code == "zh" and "/" in example.text: 

67 for index, text in enumerate(example.text.split("/", 1)): 

68 new_example = example.model_copy(deep=True) 

69 new_example.text = text 

70 new_example.tags.append( 

71 "Traditional Chinese" 

72 if index == 0 

73 else "Simplified Chinese" 

74 ) 

75 sense.examples.append(new_example) 

76 else: 

77 sense.examples.append(example) 

78 

79 for nested_list in list_item.find_child(NodeKind.LIST): 

80 for nested_list_item in nested_list.find_child(NodeKind.LIST_ITEM): 

81 extract_example_list_item( 

82 wxr, 

83 sense, 

84 nested_list_item, 

85 lang_code, 

86 example if example.text == "" else Example(), 

87 ) 

88 

89 

90def extract_example_lang_template( 

91 wxr: WiktextractContext, 

92 example: Example, 

93 node: TemplateNode, 

94 lang_code: str, 

95) -> None: 

96 # https://ko.wiktionary.org/wiki/틀:lang 

97 if lang_code == "ja": 

98 example.ruby, text_nodes = extract_ruby( 

99 wxr, 

100 wxr.wtp.parse( 

101 wxr.wtp.node_to_wikitext(node.template_parameters.get(2, "")), 

102 expand_all=True, 

103 ).children, 

104 ) 

105 example.text = clean_node(wxr, None, text_nodes) 

106 else: 

107 example.text = clean_node( 

108 wxr, None, node.template_parameters.get(2, "") 

109 ) 

110 example.translation = clean_node( 

111 wxr, None, node.template_parameters.get(4, "") 

112 ) 

113 if lang_code == "zh" and "(" in example.text and example.text.endswith(")"): 

114 roman_start_index = example.text.index("(") 

115 example.roman = example.text[roman_start_index:].strip("() ") 

116 example.text = example.text[:roman_start_index].strip() 

117 

118 

119def extract_ux_template( 

120 wxr: WiktextractContext, 

121 sense: Sense, 

122 example: Example, 

123 t_node: TemplateNode, 

124) -> None: 

125 # https://ko.wiktionary.org/wiki/틀:ux 

126 # https://ko.wiktionary.org/wiki/모듈:usex/templates 

127 lang_code = t_node.template_parameters.get(1, "") 

128 expanded_node = wxr.wtp.parse( 

129 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

130 ) 

131 if lang_code == "ja": 

132 for span_tag in expanded_node.find_html_recursively("span"): 

133 span_class = span_tag.attrs.get("class", "") 

134 if span_class == "Jpan": 

135 example.ruby, no_ruby = extract_ruby(wxr, span_tag) 

136 example.text = clean_node(wxr, None, no_ruby) 

137 elif span_class == "tr": 137 ↛ 132line 137 didn't jump to line 132 because the condition on line 137 was always true

138 example.roman = clean_node(wxr, None, span_tag) 

139 example.translation = clean_node( 

140 wxr, None, t_node.template_parameters.get(4, "") 

141 ) 

142 example.literal_meaning = clean_node( 

143 wxr, None, t_node.template_parameters.get("lit", "") 

144 ) 

145 if example.ref == "": 145 ↛ 168line 145 didn't jump to line 168 because the condition on line 145 was always true

146 example.ref = clean_node( 

147 wxr, None, t_node.template_parameters.get("ref", "") 

148 ) 

149 else: 

150 example.text = clean_node( 

151 wxr, None, t_node.template_parameters.get(2, "") 

152 ) 

153 example.translation = clean_node( 

154 wxr, None, t_node.template_parameters.get(3, "") 

155 ) 

156 example.note = clean_node( 

157 wxr, None, t_node.template_parameters.get("footer", "") 

158 ) 

159 if example.ref == "": 159 ↛ 163line 159 didn't jump to line 163 because the condition on line 159 was always true

160 example.ref = clean_node( 

161 wxr, None, t_node.template_parameters.get("출처", "") 

162 ) 

163 if example.ref == "": 163 ↛ 164line 163 didn't jump to line 164 because the condition on line 163 was never true

164 example.ref = clean_node( 

165 wxr, None, t_node.template_parameters.get("source", "") 

166 ) 

167 

168 for link_node in expanded_node.find_child(NodeKind.LINK): 

169 clean_node(wxr, sense, link_node)