Coverage for src/wiktextract/extractor/ku/example.py: 86%

65 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-04 10:58 +0000

1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from ..share import calculate_bold_offsets 

6from .linkage import LINKAGE_TEMPLATES, extract_nyms_template 

7from .models import Example, Sense, WordEntry 

8 

9 

10def extract_example_list_item( 

11 wxr: WiktextractContext, 

12 word_entry: WordEntry, 

13 sense: Sense, 

14 list_item: WikiNode, 

15) -> None: 

16 for node in list_item.children: 

17 if isinstance(node, TemplateNode): 

18 if node.template_name == "jêder" or node.template_name.startswith( 

19 "jêder-" 

20 ): 

21 extract_jêder_template(wxr, sense, node) 

22 elif node.template_name in ["mk", "mînak", "ux", "nimûne", "nim"]: 

23 extract_nimûne_template(wxr, sense, node) 

24 elif ( 

25 node.template_name in ["deng", "audio"] 

26 and len(sense.examples) > 0 

27 ): 

28 from .sound import extract_deng_template 

29 

30 extract_deng_template(wxr, sense.examples[-1], node) 

31 sense.categories.extend(sense.examples[-1].categories) 

32 elif node.template_name in LINKAGE_TEMPLATES: 32 ↛ 16line 32 didn't jump to line 16 because the condition on line 32 was always true

33 extract_nyms_template(wxr, word_entry, node) 

34 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 

35 for child_list_item in node.find_child(NodeKind.LIST_ITEM): 

36 extract_example_list_item( 

37 wxr, word_entry, sense, child_list_item 

38 ) 

39 elif isinstance(node, WikiNode) and node.kind == NodeKind.ITALIC: 

40 e_data = Example(text=clean_node(wxr, None, node)) 

41 calculate_bold_offsets( 

42 wxr, node, e_data.text, e_data, "bold_text_offsets" 

43 ) 

44 if e_data.text != "": 44 ↛ 16line 44 didn't jump to line 16 because the condition on line 44 was always true

45 sense.examples.append(e_data) 

46 

47 

48def extract_jêder_template( 

49 wxr: WiktextractContext, sense: Sense, t_node: TemplateNode 

50) -> None: 

51 # https://ku.wiktionary.org/wiki/Şablon:jêder 

52 expanded_node = wxr.wtp.parse( 

53 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

54 ) 

55 text_arg = t_node.template_parameters.get("jêgirtin", "") 

56 roman_arg = t_node.template_parameters.get("tr", "") 

57 trans_arg = t_node.template_parameters.get("werger", "") 

58 e_data = Example( 

59 text=clean_node(wxr, None, text_arg), 

60 roman=clean_node(wxr, None, roman_arg), 

61 translation=clean_node(wxr, None, trans_arg), 

62 ) 

63 calculate_bold_offsets( 

64 wxr, 

65 wxr.wtp.parse(wxr.wtp.node_to_wikitext(text_arg)), 

66 e_data.text, 

67 e_data, 

68 "bold_text_offsets", 

69 ) 

70 calculate_bold_offsets( 

71 wxr, 

72 wxr.wtp.parse(wxr.wtp.node_to_wikitext(roman_arg)), 

73 e_data.roman, 

74 e_data, 

75 "bold_roman_offsets", 

76 ) 

77 calculate_bold_offsets( 

78 wxr, 

79 wxr.wtp.parse(wxr.wtp.node_to_wikitext(trans_arg)), 

80 e_data.translation, 

81 e_data, 

82 "bold_translation_offsets", 

83 ) 

84 for span_tag in expanded_node.find_html( 

85 "span", attr_name="class", attr_value="jeder" 

86 ): 

87 e_data.ref = clean_node(wxr, None, span_tag).strip("— ()") 

88 if e_data.text != "": 88 ↛ 90line 88 didn't jump to line 90 because the condition on line 88 was always true

89 sense.examples.append(e_data) 

90 clean_node(wxr, sense, expanded_node) 

91 

92 

93def extract_nimûne_template( 

94 wxr: WiktextractContext, sense: Sense, t_node: TemplateNode 

95) -> None: 

96 # https://ku.wiktionary.org/wiki/Şablon:nimûne 

97 expanded_node = wxr.wtp.parse( 

98 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

99 ) 

100 e_data = Example(text="") 

101 for i_tag in expanded_node.find_html_recursively("i"): 

102 i_class = i_tag.attrs.get("class", "") 

103 if "e-example" in i_class: 

104 e_data.text = clean_node(wxr, None, i_tag) 

105 calculate_bold_offsets( 

106 wxr, i_tag, e_data.text, e_data, "bold_text_offsets" 

107 ) 

108 elif "e-transliteration" in i_class: 

109 e_data.roman = clean_node(wxr, None, i_tag) 

110 calculate_bold_offsets( 

111 wxr, i_tag, e_data.roman, e_data, "bold_roman_offsets" 

112 ) 

113 for span_tag in expanded_node.find_html_recursively("span"): 

114 span_class = span_tag.attrs.get("class", "") 

115 if "e-translation" in span_class: 115 ↛ 113line 115 didn't jump to line 113 because the condition on line 115 was always true

116 e_data.translation = clean_node(wxr, None, span_tag) 

117 calculate_bold_offsets( 

118 wxr, 

119 span_tag, 

120 e_data.translation, 

121 e_data, 

122 "bold_translation_offsets", 

123 ) 

124 if e_data.text != "": 124 ↛ 126line 124 didn't jump to line 126 because the condition on line 124 was always true

125 sense.examples.append(e_data) 

126 clean_node(wxr, sense, expanded_node) 

127 

128 

129def extract_example_section( 

130 wxr: WiktextractContext, 

131 word_entry: WordEntry, 

132 level_node: LevelNode, 

133) -> None: 

134 if len(word_entry.senses) > 0: 

135 for list_node in level_node.find_child(NodeKind.LIST): 

136 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

137 extract_example_list_item( 

138 wxr, word_entry, word_entry.senses[0], list_item 

139 )