Coverage for src/wiktextract/extractor/de/form.py: 77%

51 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-13 10:14 +0000

1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from .models import Form, WordEntry 

6from .tags import translate_raw_tags 

7from .utils import extract_sense_index 

8 

9 

10def extracrt_form_section( 

11 wxr: WiktextractContext, 

12 word_entry: WordEntry, 

13 level_node: LevelNode, 

14 tags: list[str], 

15) -> None: 

16 for list_item_node in level_node.find_child_recursively(NodeKind.LIST_ITEM): 

17 sense_idx = "" 

18 raw_tags = [] 

19 find_form = False 

20 for child in list_item_node.children: 

21 if isinstance(child, str) and child.startswith("["): 21 ↛ 22line 21 didn't jump to line 22 because the condition on line 21 was never true

22 sense_idx, _ = extract_sense_index(child) 

23 elif isinstance(child, WikiNode) and child.kind == NodeKind.ITALIC: 

24 raw_tag = clean_node(wxr, None, child) 

25 if raw_tag.endswith(":"): 25 ↛ 20line 25 didn't jump to line 20 because the condition on line 25 was always true

26 raw_tags.append(raw_tag.removesuffix(":").strip()) 

27 elif ( 

28 isinstance(child, WikiNode) and child.kind == NodeKind.LINK 

29 ) or ( 

30 isinstance(child, TemplateNode) 

31 and child.template_name in ["Arab", "Arabische Schrift"] 

32 ): 

33 form_text = clean_node(wxr, None, child) 

34 if form_text != "": 34 ↛ 20line 34 didn't jump to line 20 because the condition on line 34 was always true

35 form_data = Form( 

36 form=form_text, 

37 tags=tags, 

38 sense_index=sense_idx, 

39 raw_tags=raw_tags, 

40 ) 

41 translate_raw_tags(form_data) 

42 word_entry.forms.append(form_data) 

43 raw_tags.clear() 

44 find_form = True 

45 elif isinstance(child, TemplateNode): 45 ↛ 46line 45 didn't jump to line 46 because the condition on line 45 was never true

46 t_text = clean_node(wxr, word_entry, child) 

47 if t_text.endswith(":"): 

48 raw_tags.append(t_text.removesuffix(":").strip()) 

49 

50 if not find_form: # plain text 50 ↛ 51line 50 didn't jump to line 51 because the condition on line 50 was never true

51 word = clean_node(wxr, None, list_item_node.children) 

52 if word != "": 

53 form_data = Form( 

54 form=word, 

55 sense_index=sense_idx, 

56 raw_tags=raw_tags, 

57 tags=tags, 

58 ) 

59 translate_raw_tags(form_data) 

60 word_entry.forms.append(form_data) 

61 

62 

63def extract_transcription_section( 

64 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode 

65) -> None: 

66 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM): 

67 text = clean_node( 

68 wxr, 

69 None, 

70 list( 

71 list_item.invert_find_child( 

72 NodeKind.LIST, include_empty_str=True 

73 ) 

74 ), 

75 ) 

76 raw_tag = "" 

77 for roman in text.split(","): 

78 if ":" in roman: 

79 raw_tag = roman[: roman.index(":")].strip() 

80 roman = roman[roman.index(":") + 1 :].strip() 

81 roman = roman.strip() 

82 if roman != "": 82 ↛ 77line 82 didn't jump to line 77 because the condition on line 82 was always true

83 form = Form(form=roman, tags=["transcription"]) 

84 if raw_tag != "": 

85 form.raw_tags.append(raw_tag) 

86 translate_raw_tags(form) 

87 word_entry.forms.append(form)