Coverage for src/wiktextract/extractor/de/form.py: 77%

51 statements  

« prev     ^ index     » next       coverage.py v7.9.0, created at 2025-06-13 07:43 +0000

1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from .models import Form, WordEntry 

6from .tags import translate_raw_tags 

7from .utils import extract_sense_index 

8 

9 

10def extracrt_form_section( 

11 wxr: WiktextractContext, 

12 word_entry: WordEntry, 

13 level_node: LevelNode, 

14 tags: list[str], 

15) -> None: 

16 for list_item_node in level_node.find_child_recursively(NodeKind.LIST_ITEM): 

17 sense_idx = "" 

18 raw_tags = [] 

19 find_form = False 

20 for child in list_item_node.children: 

21 if isinstance(child, str) and child.startswith("["): 21 ↛ 22line 21 didn't jump to line 22 because the condition on line 21 was never true

22 sense_idx, _ = extract_sense_index(child) 

23 elif isinstance(child, WikiNode) and child.kind == NodeKind.ITALIC: 

24 raw_tag = clean_node(wxr, None, child) 

25 if raw_tag.endswith(":"): 25 ↛ 20line 25 didn't jump to line 20 because the condition on line 25 was always true

26 raw_tags.append(raw_tag.removesuffix(":").strip()) 

27 elif ( 

28 isinstance(child, WikiNode) and child.kind == NodeKind.LINK 

29 ) or ( 

30 isinstance(child, TemplateNode) 

31 and child.template_name in ["Arab", "Arabische Schrift"] 

32 ): 

33 form_text = clean_node(wxr, None, child) 

34 if form_text != "": 34 ↛ 20line 34 didn't jump to line 20 because the condition on line 34 was always true

35 form_data = Form( 

36 form=form_text, 

37 tags=tags, 

38 sense_index=sense_idx, 

39 raw_tags=raw_tags, 

40 ) 

41 translate_raw_tags(form_data) 

42 word_entry.forms.append(form_data) 

43 raw_tags.clear() 

44 find_form = True 

45 elif isinstance(child, TemplateNode): 45 ↛ 46line 45 didn't jump to line 46 because the condition on line 45 was never true

46 t_text = clean_node(wxr, word_entry, child) 

47 if t_text.endswith(":"): 

48 raw_tags.append(t_text.removesuffix(":").strip()) 

49 

50 if not find_form: # plain text 50 ↛ 51line 50 didn't jump to line 51 because the condition on line 50 was never true

51 word = clean_node(wxr, None, list_item_node.children) 

52 if word != "": 

53 form_data = Form( 

54 form=word, 

55 sense_index=sense_idx, 

56 raw_tags=raw_tags, 

57 tags=tags, 

58 ) 

59 translate_raw_tags(form_data) 

60 word_entry.forms.append(form_data) 

61 

62 

63def extract_transcription_section( 

64 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode 

65) -> None: 

66 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM): 

67 text = clean_node( 

68 wxr, None, list(list_item.invert_find_child(NodeKind.LIST)) 

69 ) 

70 raw_tag = "" 

71 for roman in text.split(","): 

72 if ":" in roman: 

73 raw_tag = roman[: roman.index(":")].strip() 

74 roman = roman[roman.index(":") + 1 :].strip() 

75 roman = roman.strip() 

76 if roman != "": 76 ↛ 71line 76 didn't jump to line 71 because the condition on line 76 was always true

77 form = Form(form=roman, tags=["transcription"]) 

78 if raw_tag != "": 

79 form.raw_tags.append(raw_tag) 

80 translate_raw_tags(form) 

81 word_entry.forms.append(form)