Coverage for src/wiktextract/extractor/ku/descendant.py: 84%

84 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-04 08:12 +0000

1from wikitextprocessor import ( 

2 HTMLNode, 

3 LevelNode, 

4 NodeKind, 

5 TemplateNode, 

6 WikiNode, 

7) 

8 

9from ...page import clean_node 

10from ...wxr_context import WiktextractContext 

11from .models import Descendant, WordEntry 

12from .tags import translate_raw_tags 

13 

14 

15def extract_descendant_section( 

16 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode 

17) -> None: 

18 for list_node in level_node.find_child(NodeKind.LIST): 

19 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

20 extract_desc_list_item(wxr, word_entry, [], list_item) 

21 

22 

23def extract_desc_list_item( 

24 wxr: WiktextractContext, 

25 word_entry: WordEntry, 

26 parent_data: list[Descendant], 

27 list_item: WikiNode, 

28) -> None: 

29 desc_list = [] 

30 for node in list_item.children: 

31 if isinstance(node, TemplateNode): 

32 desc = None 

33 if node.template_name == "dû": 

34 desc = extract_dû_template(wxr, word_entry, node, parent_data) 

35 elif node.template_name == "dardû": 

36 desc = extract_dardû_template( 

37 wxr, word_entry, node, parent_data 

38 ) 

39 elif node.template_name == "g" and len(desc_list) > 0: 39 ↛ 43line 39 didn't jump to line 43 because the condition on line 39 was always true

40 desc = extract_g_template( 

41 wxr, word_entry, node, desc_list[-1], parent_data 

42 ) 

43 if desc is not None: 

44 desc_list.append(desc) 

45 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 

46 for child_list_item in node.find_child(NodeKind.LIST_ITEM): 

47 extract_desc_list_item( 

48 wxr, word_entry, desc_list, child_list_item 

49 ) 

50 

51 

52def extract_dû_template( 

53 wxr: WiktextractContext, 

54 word_entry: WordEntry, 

55 t_node: TemplateNode, 

56 parent_descs: list[Descendant], 

57) -> Descendant | None: 

58 expanded_node = wxr.wtp.parse( 

59 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

60 ) 

61 desc = Descendant( 

62 word="", 

63 lang_code=clean_node(wxr, None, t_node.template_parameters.get(1, "")), 

64 lang="unknown", 

65 ) 

66 extract_expanded_dû_template(wxr, desc, parent_descs, expanded_node) 

67 if desc.word != "": 

68 for parent_desc in parent_descs: 

69 parent_desc.descendants.append(desc) 

70 if len(parent_descs) == 0: 

71 word_entry.descendants.append(desc) 

72 return desc 

73 return None 

74 

75 

76def extract_expanded_dû_template( 

77 wxr: WiktextractContext, 

78 desc: Descendant, 

79 parent_descs: list[Descendant], 

80 expanded_node: WikiNode, 

81) -> None: 

82 for node in expanded_node.children: 

83 if isinstance(node, str) and ":" in node and desc.lang == "unknown": 

84 desc.lang = node[: node.index(":")].strip() 

85 elif isinstance(node, HTMLNode) and node.tag == "span": 

86 span_lang = node.attrs.get("lang", "") 

87 span_class = node.attrs.get("class", "") 

88 if span_lang.endswith("-Latn"): 

89 desc.roman = clean_node(wxr, None, node) 

90 elif span_lang != "": 

91 desc.word = clean_node(wxr, None, node) 

92 if desc.lang_code == "unknown": 

93 desc.lang_code = span_lang 

94 elif span_class == "mention-gloss": 94 ↛ 95line 94 didn't jump to line 95 because the condition on line 94 was never true

95 desc.sense = clean_node(wxr, None, node) 

96 

97 

98def extract_dardû_template( 

99 wxr: WiktextractContext, 

100 word_entry: WordEntry, 

101 t_node: TemplateNode, 

102 parent_descs: list[Descendant], 

103) -> Descendant | None: 

104 expanded_node = wxr.wtp.parse( 

105 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

106 ) 

107 desc = Descendant( 

108 word="", 

109 lang_code=clean_node(wxr, None, t_node.template_parameters.get(1, "")), 

110 lang="unknown", 

111 ) 

112 extract_expanded_dû_template(wxr, desc, parent_descs, expanded_node) 

113 for dd_tag in expanded_node.find_html_recursively("dd"): 

114 child_desc = Descendant(word="", lang_code="unknown", lang="unknown") 

115 extract_expanded_dû_template(wxr, child_desc, [desc], dd_tag) 

116 if child_desc.word != "": 116 ↛ 113line 116 didn't jump to line 113 because the condition on line 116 was always true

117 desc.descendants.append(child_desc) 

118 if desc.word != "": 118 ↛ 124line 118 didn't jump to line 124 because the condition on line 118 was always true

119 for parent_desc in parent_descs: 119 ↛ 120line 119 didn't jump to line 120 because the loop on line 119 never started

120 parent_desc.descendants.append(desc) 

121 if len(parent_descs) == 0: 121 ↛ 123line 121 didn't jump to line 123 because the condition on line 121 was always true

122 word_entry.descendants.append(desc) 

123 return desc 

124 return None 

125 

126 

127def extract_g_template( 

128 wxr: WiktextractContext, 

129 word_entry: WordEntry, 

130 t_node: TemplateNode, 

131 previous_desc: Descendant, 

132 parent_descs: list[Descendant], 

133) -> Descendant | None: 

134 expanded_node = wxr.wtp.parse( 

135 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

136 ) 

137 raw_tags = [] 

138 for span_tag in expanded_node.find_html( 138 ↛ 141line 138 didn't jump to line 141 because the loop on line 138 never started

139 "span", attr_name="class", attr_value="gender" 

140 ): 

141 for abbr_tag in span_tag.find_html("abbr"): 

142 raw_tag = clean_node(wxr, None, abbr_tag) 

143 if raw_tag not in ["", "?"]: 

144 raw_tags.append(raw_tag) 

145 desc = Descendant( 

146 word=clean_node( 

147 wxr, 

148 None, 

149 t_node.template_parameters.get( 

150 2, t_node.template_parameters.get("cuda", "") 

151 ), 

152 ), 

153 lang=previous_desc.lang, 

154 lang_code=previous_desc.lang_code, 

155 roman=clean_node(wxr, None, t_node.template_parameters.get("tr", "")), 

156 sense=clean_node(wxr, None, t_node.template_parameters.get("w", "")), 

157 raw_tags=raw_tags, 

158 ) 

159 if desc.word != "": 159 ↛ 166line 159 didn't jump to line 166 because the condition on line 159 was always true

160 translate_raw_tags(desc) 

161 for parent_desc in parent_descs: 161 ↛ 162line 161 didn't jump to line 162 because the loop on line 161 never started

162 parent_desc.descendants.append(desc) 

163 if len(parent_descs) == 0: 163 ↛ 165line 163 didn't jump to line 165 because the condition on line 163 was always true

164 word_entry.descendants.append(desc) 

165 return desc 

166 return None