Coverage for src/wiktextract/extractor/cs/pos.py: 92%

1from wikitextprocessor.parser import (

2 LEVEL_KIND_FLAGS,

3 LevelNode,

4 NodeKind,

5 TemplateNode,

6 WikiNode,

9from ...page import clean_node

10from ...wxr_context import WiktextractContext

11from .example import extract_example_list_item

12from .models import AltForm, Sense, WordEntry

13from .section_titles import POS_DATA

14from .tags import translate_raw_tags

17def extract_pos_section(

18 wxr: WiktextractContext,

19 page_data: list[WordEntry],

20 base_data: WordEntry,

21 level_node: LevelNode,

22 pos_title: str,

23):

24 page_data.append(base_data.model_copy(deep=True))

25 page_data[-1].pos_title = pos_title

26 pos_data = POS_DATA[pos_title]

27 page_data[-1].pos = pos_data["pos"]

28 base_data.pos = pos_data["pos"]

29 page_data[-1].tags.extend(pos_data.get("tags", []))

30 has_child_section = level_node.contain_node(LEVEL_KIND_FLAGS)

32 for list_node in level_node.find_child(NodeKind.LIST):

33 if list_node.sarg != "*":

34 continue

35 for list_item in list_node.find_child(NodeKind.LIST_ITEM):

36 if has_child_section:

37 for italic_node in list_item.find_child(NodeKind.ITALIC):

38 italic_str = clean_node(wxr, None, italic_node)

39 for raw_tag in italic_str.split():

40 if raw_tag not in ["", "rod"]:

41 page_data[-1].raw_tags.append(raw_tag)

42 else:

43 for link_node in list_item.find_child(NodeKind.LINK): 43 ↛ 35line 43 didn't jump to line 35 because the loop on line 43 didn't complete

44 word = clean_node(wxr, None, link_node)

45 if word != "": 45 ↛ 43line 45 didn't jump to line 43 because the condition on line 45 was always true

46 page_data[-1].senses.append(

47 Sense(

48 glosses=[

49 clean_node(wxr, None, list_item.children)

50 ],

51 tags=["form-of"],

52 form_of=[AltForm(word=word)],

53 )

54 )

55 if "form-of" not in page_data[-1]: 55 ↛ 57line 55 didn't jump to line 57 because the condition on line 55 was always true

56 page_data[-1].tags.append("form-of")

57 break

59 translate_raw_tags(page_data[-1])

62def extract_sense_section(

63 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode

64):

65 for list_node in level_node.find_child(NodeKind.LIST):

66 if list_node.sarg != "#":

67 continue

68 for list_item in list_node.find_child(NodeKind.LIST_ITEM):

69 extract_gloss_list_item(wxr, word_entry, list_item)

72def extract_gloss_list_item(

73 wxr: WiktextractContext,

74 word_entry: WordEntry,

75 list_item: WikiNode,

76 parent_sense: Sense | None = None,

77):

78 sense = (

79 parent_sense.model_copy(deep=True)

80 if parent_sense is not None

81 else Sense()

82 )

83 gloss_nodes = []

84 for node in list_item.children:

85 if isinstance(node, TemplateNode) and node.template_name == "Příznaky":

86 extract_příznaky_template(wxr, sense, node)

87 elif isinstance(node, WikiNode) and node.kind == NodeKind.ITALIC:

88 raw_tags = clean_node(wxr, None, node)

89 if raw_tags.startswith("(") and raw_tags.endswith(")"):

90 for raw_tag in raw_tags.strip("() ").split(","):

91 raw_tag = raw_tag.strip()

92 if raw_tag != "": 92 ↛ 90line 92 didn't jump to line 90 because the condition on line 92 was always true

93 sense.raw_tags.append(raw_tag)

94 elif node.contain_node(NodeKind.LINK): 94 ↛ 110line 94 didn't jump to line 110 because the condition on line 94 was always true

95 gloss_nodes.append(node)

96 link_nodes = list(

97 node.find_child(NodeKind.LINK, with_index=True)

98 )

99 if (

100 len(link_nodes) == 1

101 and link_nodes[0][0] != 0

102 and link_nodes[0][0] == len(node.children) - 1

103 ):

104 word = clean_node(wxr, None, link_nodes[0][1])

105 if word != "": 105 ↛ 108line 105 didn't jump to line 108 because the condition on line 105 was always true

106 sense.form_of.append(AltForm(word=word))

107 sense.tags.append("form-of")

108 break

109 else:

110 gloss_nodes.append(node)

111 elif not (isinstance(node, WikiNode) and node.kind == NodeKind.LIST):

112 gloss_nodes.append(node)

113

114 gloss = clean_node(wxr, sense, gloss_nodes)

115 if gloss != "": 115 ↛ 120line 115 didn't jump to line 120 because the condition on line 115 was always true

116 sense.glosses.append(gloss)

117 translate_raw_tags(sense)

118 word_entry.senses.append(sense)

119

120 for child_list in list_item.find_child(NodeKind.LIST):

121 if child_list.sarg.startswith("#") and child_list.sarg.endswith("#"):

122 for child_list_item in child_list.find_child(NodeKind.LIST_ITEM):

123 extract_gloss_list_item(wxr, word_entry, child_list_item, sense)

124 elif child_list.sarg.startswith("#") and child_list.sarg.endswith( 124 ↛ 120line 124 didn't jump to line 120 because the condition on line 124 was always true

125 (":", "*")

126 ):

127 for child_list_item in child_list.find_child(NodeKind.LIST_ITEM):

128 extract_example_list_item(wxr, sense, child_list_item)

129

130

131def extract_příznaky_template(

132 wxr: WiktextractContext, sense: Sense, t_node: TemplateNode

133):

134 # https://cs.wiktionary.org/wiki/Šablona:Příznaky

135 text = clean_node(wxr, sense, t_node).strip("() ")

136 for raw_tag in text.split(","):

137 raw_tag = raw_tag.strip()

138 if raw_tag != "": 138 ↛ 136line 138 didn't jump to line 136 because the condition on line 138 was always true

139 sense.raw_tags.append(raw_tag)

140

141

142def extract_note_section(

143 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode

144):

145 word_entry.note = clean_node(

146 wxr,

147 word_entry,

148 list(level_node.invert_find_child(LEVEL_KIND_FLAGS, True)),

149 )

Coverage for src / wiktextract / extractor / cs / pos.py: 92%

84 statements