Coverage for src/wiktextract/extractor/it/example.py: 93%

73 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2024-12-27 08:07 +0000

1from wikitextprocessor import NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from ..ruby import extract_ruby 

6from .models import Example, Sense 

7 

8 

9def extract_example_list_item( 

10 wxr: WiktextractContext, sense: Sense, list_item: WikiNode, lang_code: str 

11) -> None: 

12 examples = [] 

13 before_italic = True 

14 text_nodes = [] 

15 roman = "" 

16 translation = "" 

17 ref = "" 

18 has_zh_tradsem = False 

19 for index, node in enumerate(list_item.children): 

20 if ( 

21 isinstance(node, TemplateNode) 

22 and node.template_name == "zh-tradsem" 

23 ): 

24 examples.extend(extract_zh_tradsem(wxr, node)) 

25 has_zh_tradsem = True 

26 elif isinstance(node, WikiNode): 

27 match node.kind: 

28 case NodeKind.ITALIC: 

29 if lang_code in ["zh", "ja"]: 

30 if before_italic: 30 ↛ 19line 30 didn't jump to line 19 because the condition on line 30 was always true

31 roman = clean_node(wxr, sense, node) 

32 before_italic = False 

33 else: 

34 examples.append( 

35 Example(text=clean_node(wxr, sense, node)) 

36 ) 

37 case NodeKind.LIST: 

38 for tr_list_item in node.find_child(NodeKind.LIST_ITEM): 

39 translation = clean_node( 

40 wxr, sense, tr_list_item.children 

41 ) 

42 case _ if lang_code in ["zh", "ja"]: 42 ↛ 19line 42 didn't jump to line 19 because the pattern on line 42 always matched

43 if before_italic: 43 ↛ 19line 43 didn't jump to line 19 because the condition on line 43 was always true

44 text_nodes.append(node) 

45 elif isinstance(node, str) and "-" in node: 

46 for t_node in list_item.find_child(NodeKind.TEMPLATE): 

47 if t_node.template_name == "Term": 

48 ref = clean_node(wxr, None, t_node).strip("()") 

49 break 

50 translation = clean_node( 

51 wxr, 

52 sense, 

53 wxr.wtp.node_to_wikitext( 

54 [node[node.index("-") + 1 :]] 

55 + [ 

56 n 

57 for n in list_item.children[index + 1 :] 

58 if not ( 

59 isinstance(n, TemplateNode) 

60 and n.template_name == "Term" 

61 ) 

62 ] 

63 ), 

64 ) 

65 if not has_zh_tradsem and len(examples) > 1: 65 ↛ 66line 65 didn't jump to line 66 because the condition on line 65 was never true

66 examples.clear() 

67 examples.append( 

68 Example( 

69 text=clean_node( 

70 wxr, 

71 None, 

72 list_item.children[:index] 

73 + [node[: node.index("-")]], 

74 ) 

75 ) 

76 ) 

77 break 

78 elif lang_code in ["zh", "ja"] and len(examples) == 0 and before_italic: 

79 text_nodes.append(node) 

80 

81 if lang_code in ["zh", "ja"] and len(examples) == 0 and len(text_nodes) > 0: 

82 expanded_nodes = wxr.wtp.parse( 

83 wxr.wtp.node_to_wikitext(text_nodes), expand_all=True 

84 ) 

85 example = Example() 

86 example.ruby, node_without_ruby = extract_ruby( 

87 wxr, expanded_nodes.children 

88 ) 

89 example.text = ( 

90 clean_node(wxr, sense, node_without_ruby) 

91 .replace(" ", "") 

92 .strip("(") 

93 ) 

94 examples.append(example) 

95 

96 if not has_zh_tradsem and len(examples) > 1: 

97 examples.clear() 

98 examples.append( 

99 Example( 

100 text=clean_node( 

101 wxr, None, list(list_item.invert_find_child(NodeKind.LIST)) 

102 ) 

103 ) 

104 ) 

105 

106 for example in examples: 

107 if roman != "": 

108 example.roman = roman 

109 if translation != "": 

110 example.translation = translation 

111 if ref != "": 

112 example.ref = ref 

113 if example.text != "": 113 ↛ 106line 113 didn't jump to line 106 because the condition on line 113 was always true

114 sense.examples.append(example) 

115 

116 

117def extract_zh_tradsem( 

118 wxr: WiktextractContext, t_node: TemplateNode 

119) -> list[Example]: 

120 # https://it.wiktionary.org/wiki/Template:zh-tradsem 

121 examples = [] 

122 for arg_index in [1, 2]: 

123 arg_value = clean_node( 

124 wxr, None, t_node.template_parameters.get(arg_index, "") 

125 ).replace(" ", "") 

126 if arg_value != "": 126 ↛ 122line 126 didn't jump to line 122 because the condition on line 126 was always true

127 example = Example(text=arg_value) 

128 if arg_index == 1: 

129 example.tags.append("Traditional Chinese") 

130 elif arg_index == 2: 130 ↛ 132line 130 didn't jump to line 132 because the condition on line 130 was always true

131 example.tags.append("Simplified Chinese") 

132 examples.append(example) 

133 

134 return examples