Coverage for src/wiktextract/extractor/ko/example.py: 95%

67 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-10-25 10:11 +0000

1from wikitextprocessor import NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from ..ruby import extract_ruby 

6from .models import Example, Sense 

7 

8 

9def extract_example_list_item( 

10 wxr: WiktextractContext, 

11 sense: Sense, 

12 list_item: WikiNode, 

13 lang_code: str, 

14 parent_example: Example | None = None, 

15) -> None: 

16 example = Example() if parent_example is None else parent_example 

17 after_lang_template = False 

18 for node in list_item.children: 

19 if isinstance(node, TemplateNode) and node.template_name == "lang": 

20 after_lang_template = True 

21 extract_example_lang_template(wxr, example, node, lang_code) 

22 elif isinstance(node, TemplateNode) and node.template_name.startswith( 

23 ("따옴", "지봉유설") 

24 ): 

25 example.ref = ( 

26 clean_node(wxr, None, node).strip("() ").removeprefix("따옴◄") 

27 ) 

28 elif isinstance(node, TemplateNode) and node.template_name in [ 

29 "예문", 

30 "ux", 

31 "uxi", 

32 ]: 

33 extract_ux_template(wxr, sense, example, node) 

34 break 

35 elif after_lang_template: 

36 example.translation += clean_node(wxr, None, node) 

37 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 

38 break 

39 else: 

40 example.text += clean_node(wxr, None, node) 

41 

42 if len(example.text) > 0: 

43 if lang_code == "zh" and "/" in example.text: 

44 for index, text in enumerate(example.text.split("/", 1)): 

45 new_example = example.model_copy(deep=True) 

46 new_example.text = text 

47 new_example.tags.append( 

48 "Traditional Chinese" 

49 if index == 0 

50 else "Simplified Chinese" 

51 ) 

52 sense.examples.append(new_example) 

53 else: 

54 sense.examples.append(example) 

55 

56 for nested_list in list_item.find_child(NodeKind.LIST): 

57 for nested_list_item in nested_list.find_child(NodeKind.LIST_ITEM): 

58 extract_example_list_item( 

59 wxr, sense, nested_list_item, lang_code, example 

60 ) 

61 

62 

63def extract_example_lang_template( 

64 wxr: WiktextractContext, 

65 example: Example, 

66 node: TemplateNode, 

67 lang_code: str, 

68) -> None: 

69 # https://ko.wiktionary.org/wiki/틀:lang 

70 if lang_code == "ja": 

71 example.ruby, text_nodes = extract_ruby( 

72 wxr, 

73 wxr.wtp.parse( 

74 wxr.wtp.node_to_wikitext(node.template_parameters.get(2, "")), 

75 expand_all=True, 

76 ).children, 

77 ) 

78 example.text = clean_node(wxr, None, text_nodes) 

79 else: 

80 example.text = clean_node( 

81 wxr, None, node.template_parameters.get(2, "") 

82 ) 

83 example.translation = clean_node( 

84 wxr, None, node.template_parameters.get(4, "") 

85 ) 

86 if lang_code == "zh" and "(" in example.text and example.text.endswith(")"): 

87 roman_start_index = example.text.index("(") 

88 example.roman = example.text[roman_start_index:].strip("() ") 

89 example.text = example.text[:roman_start_index].strip() 

90 

91 

92def extract_ux_template( 

93 wxr: WiktextractContext, 

94 sense: Sense, 

95 example: Example, 

96 t_node: TemplateNode, 

97) -> None: 

98 # https://ko.wiktionary.org/wiki/틀:ux 

99 # https://ko.wiktionary.org/wiki/모듈:usex/templates 

100 lang_code = t_node.template_parameters.get(1, "") 

101 expanded_node = wxr.wtp.parse( 

102 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

103 ) 

104 if lang_code == "ja": 

105 for span_tag in expanded_node.find_html_recursively("span"): 

106 span_class = span_tag.attrs.get("class", "") 

107 if span_class == "Jpan": 

108 example.ruby, no_ruby = extract_ruby(wxr, span_tag) 

109 example.text = clean_node(wxr, None, no_ruby) 

110 elif span_class == "tr": 110 ↛ 105line 110 didn't jump to line 105 because the condition on line 110 was always true

111 example.roman = clean_node(wxr, None, span_tag) 

112 example.translation = clean_node( 

113 wxr, None, t_node.template_parameters.get(4, "") 

114 ) 

115 example.literal_meaning = clean_node( 

116 wxr, None, t_node.template_parameters.get("lit", "") 

117 ) 

118 if example.ref == "": 118 ↛ 141line 118 didn't jump to line 141 because the condition on line 118 was always true

119 example.ref = clean_node( 

120 wxr, None, t_node.template_parameters.get("ref", "") 

121 ) 

122 else: 

123 example.text = clean_node( 

124 wxr, None, t_node.template_parameters.get(2, "") 

125 ) 

126 example.translation = clean_node( 

127 wxr, None, t_node.template_parameters.get(3, "") 

128 ) 

129 example.note = clean_node( 

130 wxr, None, t_node.template_parameters.get("footer", "") 

131 ) 

132 if example.ref == "": 132 ↛ 136line 132 didn't jump to line 136 because the condition on line 132 was always true

133 example.ref = clean_node( 

134 wxr, None, t_node.template_parameters.get("출처", "") 

135 ) 

136 if example.ref == "": 136 ↛ 137line 136 didn't jump to line 137 because the condition on line 136 was never true

137 example.ref = clean_node( 

138 wxr, None, t_node.template_parameters.get("source", "") 

139 ) 

140 

141 for link_node in expanded_node.find_child(NodeKind.LINK): 

142 clean_node(wxr, sense, link_node)