Coverage for src/wiktextract/extractor/de/form.py: 77%

1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode

3from ...page import clean_node

4from ...wxr_context import WiktextractContext

5from .models import Form, WordEntry

6from .tags import translate_raw_tags

7from .utils import extract_sense_index

10def extracrt_form_section(

11 wxr: WiktextractContext,

12 word_entry: WordEntry,

13 level_node: LevelNode,

14 tags: list[str],

15) -> None:

16 for list_item_node in level_node.find_child_recursively(NodeKind.LIST_ITEM):

17 sense_idx = ""

18 raw_tags = []

19 find_form = False

20 for child in list_item_node.children:

21 if isinstance(child, str) and child.startswith("["): 21 ↛ 22line 21 didn't jump to line 22 because the condition on line 21 was never true

22 sense_idx, _ = extract_sense_index(child)

23 elif isinstance(child, WikiNode) and child.kind == NodeKind.ITALIC:

24 raw_tag = clean_node(wxr, None, child)

25 if raw_tag.endswith(":"): 25 ↛ 20line 25 didn't jump to line 20 because the condition on line 25 was always true

26 raw_tags.append(raw_tag.removesuffix(":").strip())

27 elif (

28 isinstance(child, WikiNode) and child.kind == NodeKind.LINK

29 ) or (

30 isinstance(child, TemplateNode)

31 and child.template_name in ["Arab", "Arabische Schrift"]

32 ):

33 form_text = clean_node(wxr, None, child)

34 if form_text != "": 34 ↛ 20line 34 didn't jump to line 20 because the condition on line 34 was always true

35 form_data = Form(

36 form=form_text,

37 tags=tags,

38 sense_index=sense_idx,

39 raw_tags=raw_tags,

40 )

41 translate_raw_tags(form_data)

42 word_entry.forms.append(form_data)

43 raw_tags.clear()

44 find_form = True

45 elif isinstance(child, TemplateNode): 45 ↛ 46line 45 didn't jump to line 46 because the condition on line 45 was never true

46 t_text = clean_node(wxr, word_entry, child)

47 if t_text.endswith(":"):

48 raw_tags.append(t_text.removesuffix(":").strip())

50 if not find_form: # plain text 50 ↛ 51line 50 didn't jump to line 51 because the condition on line 50 was never true

51 word = clean_node(wxr, None, list_item_node.children)

52 if word != "":

53 form_data = Form(

54 form=word,

55 sense_index=sense_idx,

56 raw_tags=raw_tags,

57 tags=tags,

58 )

59 translate_raw_tags(form_data)

60 word_entry.forms.append(form_data)

63def extract_transcription_section(

64 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode

65) -> None:

66 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM):

67 text = clean_node(

68 wxr, None, list(list_item.invert_find_child(NodeKind.LIST))

69 )

70 raw_tag = ""

71 for roman in text.split(","):

72 if ":" in roman:

73 raw_tag = roman[: roman.index(":")].strip()

74 roman = roman[roman.index(":") + 1 :].strip()

75 roman = roman.strip()

76 if roman != "": 76 ↛ 71line 76 didn't jump to line 71 because the condition on line 76 was always true

77 form = Form(form=roman, tags=["transcription"])

78 if raw_tag != "":

79 form.raw_tags.append(raw_tag)

80 translate_raw_tags(form)

81 word_entry.forms.append(form)