Coverage for src/wiktextract/extractor/cs/pos.py: 93%

64 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-12 08:27 +0000

1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from .example import extract_example_list_item 

6from .models import Sense, WordEntry 

7from .section_titles import POS_DATA 

8from .tags import translate_raw_tags 

9 

10 

11def extract_pos_section( 

12 wxr: WiktextractContext, 

13 page_data: list[WordEntry], 

14 base_data: WordEntry, 

15 level_node: LevelNode, 

16 pos_title: str, 

17): 

18 page_data.append(base_data.model_copy(deep=True)) 

19 page_data[-1].pos_title = pos_title 

20 pos_data = POS_DATA[pos_title] 

21 page_data[-1].pos = pos_data["pos"] 

22 base_data.pos = pos_data["pos"] 

23 page_data[-1].tags.extend(pos_data.get("tags", [])) 

24 

25 for list_node in level_node.find_child(NodeKind.LIST): 

26 if list_node.sarg != "*": 26 ↛ 27line 26 didn't jump to line 27 because the condition on line 26 was never true

27 continue 

28 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

29 for italic_node in list_item.find_child(NodeKind.ITALIC): 

30 italic_str = clean_node(wxr, None, italic_node) 

31 for raw_tag in italic_str.split(): 

32 if raw_tag not in ["", "rod"]: 

33 page_data[-1].raw_tags.append(raw_tag) 

34 

35 translate_raw_tags(page_data[-1]) 

36 

37 

38def extract_sense_section( 

39 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode 

40): 

41 for list_node in level_node.find_child(NodeKind.LIST): 

42 if list_node.sarg != "#": 42 ↛ 43line 42 didn't jump to line 43 because the condition on line 42 was never true

43 continue 

44 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

45 extract_gloss_list_item(wxr, word_entry, list_item) 

46 

47 

48def extract_gloss_list_item( 

49 wxr: WiktextractContext, 

50 word_entry: WordEntry, 

51 list_item: WikiNode, 

52 parent_sense: Sense | None = None, 

53): 

54 sense = ( 

55 parent_sense.model_copy(deep=True) 

56 if parent_sense is not None 

57 else Sense() 

58 ) 

59 gloss_nodes = [] 

60 for node in list_item.children: 

61 if isinstance(node, TemplateNode) and node.template_name == "Příznaky": 

62 extract_příznaky_template(wxr, sense, node) 

63 elif isinstance(node, WikiNode) and node.kind == NodeKind.ITALIC: 

64 raw_tags = clean_node(wxr, None, node) 

65 if raw_tags.startswith("(") and raw_tags.endswith(")"): 

66 for raw_tag in raw_tags.strip("() ").split(","): 

67 raw_tag = raw_tag.strip() 

68 if raw_tag != "": 68 ↛ 66line 68 didn't jump to line 66 because the condition on line 68 was always true

69 sense.raw_tags.append(raw_tag) 

70 else: 

71 gloss_nodes.append(node) 

72 elif not (isinstance(node, WikiNode) and node.kind == NodeKind.LIST): 

73 gloss_nodes.append(node) 

74 

75 gloss = clean_node(wxr, sense, gloss_nodes) 

76 if gloss != "": 76 ↛ 81line 76 didn't jump to line 81 because the condition on line 76 was always true

77 sense.glosses.append(gloss) 

78 translate_raw_tags(sense) 

79 word_entry.senses.append(sense) 

80 

81 for child_list in list_item.find_child(NodeKind.LIST): 

82 if child_list.sarg.startswith("#") and child_list.sarg.endswith("#"): 

83 for child_list_item in child_list.find_child(NodeKind.LIST_ITEM): 

84 extract_gloss_list_item(wxr, word_entry, child_list_item, sense) 

85 elif child_list.sarg.startswith("#") and child_list.sarg.endswith( 85 ↛ 81line 85 didn't jump to line 81 because the condition on line 85 was always true

86 (":", "*") 

87 ): 

88 for child_list_item in child_list.find_child(NodeKind.LIST_ITEM): 

89 extract_example_list_item(wxr, sense, child_list_item) 

90 

91 

92def extract_příznaky_template( 

93 wxr: WiktextractContext, sense: Sense, t_node: TemplateNode 

94): 

95 # https://cs.wiktionary.org/wiki/Šablona:Příznaky 

96 text = clean_node(wxr, sense, t_node).strip("() ") 

97 for raw_tag in text.split(","): 

98 raw_tag = raw_tag.strip() 

99 if raw_tag != "": 99 ↛ 97line 99 didn't jump to line 97 because the condition on line 99 was always true

100 sense.raw_tags.append(raw_tag)