Coverage for src/wiktextract/extractor/it/linkage.py: 74%

1from wikitextprocessor import LevelNode, NodeKind, WikiNode

3from ...page import clean_node

4from ...wxr_context import WiktextractContext

5from .models import Form, Linkage, WordEntry

6from .tags import translate_raw_tags

9def extract_linkage_section(

10 wxr: WiktextractContext,

11 page_data: list[WordEntry],

12 level_node: LevelNode,

13 linkage_type: str,

14) -> None:

15 linkages = []

16 for list_node in level_node.find_child(NodeKind.LIST):

17 for list_item in list_node.find_child(NodeKind.LIST_ITEM):

18 linkages.extend(

19 extract_proverb_list_item(wxr, list_item)

20 if linkage_type == "proverbs"

21 else extract_linkage_list_item(wxr, list_item)

22 )

24 for l_data in linkages:

25 translate_raw_tags(l_data)

27 for data in page_data:

28 if data.lang_code == page_data[-1].lang_code: 28 ↛ 27line 28 didn't jump to line 27 because the condition on line 28 was always true

29 getattr(data, linkage_type).extend(linkages)

32def extract_linkage_list_item(

33 wxr: WiktextractContext, list_item: WikiNode

34) -> list[Linkage]:

35 raw_tags = []

36 linkages = []

37 for node in list_item.children:

38 if isinstance(node, WikiNode):

39 match node.kind:

40 case NodeKind.LINK:

41 node_str = clean_node(wxr, None, node)

42 if node_str != "": 42 ↛ 37line 42 didn't jump to line 37 because the condition on line 42 was always true

43 linkages.append(

44 Linkage(word=node_str, raw_tags=raw_tags)

45 )

46 raw_tags.clear()

47 case NodeKind.TEMPLATE | NodeKind.ITALIC: 47 ↛ 37line 47 didn't jump to line 37 because the pattern on line 47 always matched

48 node_str = clean_node(wxr, None, node)

49 if node_str.startswith("(") and node_str.endswith(")"): 49 ↛ 37line 49 didn't jump to line 37 because the condition on line 49 was always true

50 raw_tags.append(node_str.strip("()"))

51 elif isinstance(node, str): 51 ↛ 37line 51 didn't jump to line 37 because the condition on line 51 was always true

52 for word_str in node.split(","):

53 word_str = word_str.strip()

54 if word_str.startswith("(") and word_str.endswith(")"):

55 raw_tags.append(word_str.strip("()"))

56 elif word_str != "":

57 linkages.append(Linkage(word=word_str, raw_tags=raw_tags))

58 raw_tags.clear()

59 return linkages

62def extract_proverb_list_item(

63 wxr: WiktextractContext, list_item: WikiNode

64) -> list[Linkage]:

65 proverb = Linkage(word="")

66 for index, node in enumerate(list_item.children): 66 ↛ 76line 66 didn't jump to line 76 because the loop on line 66 didn't complete

67 if isinstance(node, WikiNode) and node.kind == NodeKind.ITALIC:

68 proverb.word = clean_node(wxr, None, node)

69 elif isinstance(node, str) and ":" in node:

70 proverb.sense = clean_node(

71 wxr,

72 None,

73 [node[node.index(":") + 1 :]] + list_item.children[index + 1 :],

74 )

75 break

76 return [proverb] if proverb.word != "" else []

79def extract_form_section(

80 wxr: WiktextractContext, page_data: list[WordEntry], level_node: LevelNode

81) -> None:

82 forms = []

83 for list_node in level_node.find_child(NodeKind.LIST):

84 for list_item in list_node.find_child(NodeKind.LIST_ITEM):

85 for link_node in list_item.find_child(NodeKind.LINK):

86 word = clean_node(wxr, None, link_node)

87 if word != "":

88 forms.append(Form(form=word))

89 for data in page_data:

90 if data.lang_code == page_data[-1].lang_code:

91 data.forms.extend(forms)