Coverage for src/wiktextract/extractor/ku/example.py: 86%

1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode

3from ...page import clean_node

4from ...wxr_context import WiktextractContext

5from ..share import calculate_bold_offsets

6from .linkage import LINKAGE_TEMPLATES, extract_nyms_template

7from .models import Example, Sense, WordEntry

10def extract_example_list_item(

11 wxr: WiktextractContext,

12 word_entry: WordEntry,

13 sense: Sense,

14 list_item: WikiNode,

15) -> None:

16 for node in list_item.children:

17 if isinstance(node, TemplateNode):

18 if node.template_name == "jêder" or node.template_name.startswith(

19 "jêder-"

20 ):

21 extract_jêder_template(wxr, sense, node)

22 elif node.template_name in ["mk", "mînak", "ux", "nimûne", "nim"]:

23 extract_nimûne_template(wxr, sense, node)

24 elif (

25 node.template_name in ["deng", "audio"]

26 and len(sense.examples) > 0

27 ):

28 from .sound import extract_deng_template

30 extract_deng_template(wxr, sense.examples[-1], node)

31 sense.categories.extend(sense.examples[-1].categories)

32 elif node.template_name in LINKAGE_TEMPLATES: 32 ↛ 16line 32 didn't jump to line 16 because the condition on line 32 was always true

33 extract_nyms_template(wxr, word_entry, node)

34 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:

35 for child_list_item in node.find_child(NodeKind.LIST_ITEM):

36 extract_example_list_item(

37 wxr, word_entry, sense, child_list_item

38 )

39 elif isinstance(node, WikiNode) and node.kind == NodeKind.ITALIC:

40 e_data = Example(text=clean_node(wxr, None, node))

41 calculate_bold_offsets(

42 wxr, node, e_data.text, e_data, "bold_text_offsets"

43 )

44 if e_data.text != "": 44 ↛ 16line 44 didn't jump to line 16 because the condition on line 44 was always true

45 sense.examples.append(e_data)

48def extract_jêder_template(

49 wxr: WiktextractContext, sense: Sense, t_node: TemplateNode

50) -> None:

51 # https://ku.wiktionary.org/wiki/Şablon:jêder

52 expanded_node = wxr.wtp.parse(

53 wxr.wtp.node_to_wikitext(t_node), expand_all=True

54 )

55 text_arg = t_node.template_parameters.get("jêgirtin", "")

56 roman_arg = t_node.template_parameters.get("tr", "")

57 trans_arg = t_node.template_parameters.get("werger", "")

58 e_data = Example(

59 text=clean_node(wxr, None, text_arg),

60 roman=clean_node(wxr, None, roman_arg),

61 translation=clean_node(wxr, None, trans_arg),

62 )

63 calculate_bold_offsets(

64 wxr,

65 wxr.wtp.parse(wxr.wtp.node_to_wikitext(text_arg)),

66 e_data.text,

67 e_data,

68 "bold_text_offsets",

69 )

70 calculate_bold_offsets(

71 wxr,

72 wxr.wtp.parse(wxr.wtp.node_to_wikitext(roman_arg)),

73 e_data.roman,

74 e_data,

75 "bold_roman_offsets",

76 )

77 calculate_bold_offsets(

78 wxr,

79 wxr.wtp.parse(wxr.wtp.node_to_wikitext(trans_arg)),

80 e_data.translation,

81 e_data,

82 "bold_translation_offsets",

83 )

84 for span_tag in expanded_node.find_html(

85 "span", attr_name="class", attr_value="jeder"

86 ):

87 e_data.ref = clean_node(wxr, None, span_tag).strip("— ()")

88 if e_data.text != "": 88 ↛ 90line 88 didn't jump to line 90 because the condition on line 88 was always true

89 sense.examples.append(e_data)

90 clean_node(wxr, sense, expanded_node)

93def extract_nimûne_template(

94 wxr: WiktextractContext, sense: Sense, t_node: TemplateNode

95) -> None:

96 # https://ku.wiktionary.org/wiki/Şablon:nimûne

97 expanded_node = wxr.wtp.parse(

98 wxr.wtp.node_to_wikitext(t_node), expand_all=True

99 )

100 e_data = Example(text="")

101 for i_tag in expanded_node.find_html_recursively("i"):

102 i_class = i_tag.attrs.get("class", "")

103 if "e-example" in i_class:

104 e_data.text = clean_node(wxr, None, i_tag)

105 calculate_bold_offsets(

106 wxr, i_tag, e_data.text, e_data, "bold_text_offsets"

107 )

108 elif "e-transliteration" in i_class:

109 e_data.roman = clean_node(wxr, None, i_tag)

110 calculate_bold_offsets(

111 wxr, i_tag, e_data.roman, e_data, "bold_roman_offsets"

112 )

113 for span_tag in expanded_node.find_html_recursively("span"):

114 span_class = span_tag.attrs.get("class", "")

115 if "e-translation" in span_class: 115 ↛ 113line 115 didn't jump to line 113 because the condition on line 115 was always true

116 e_data.translation = clean_node(wxr, None, span_tag)

117 calculate_bold_offsets(

118 wxr,

119 span_tag,

120 e_data.translation,

121 e_data,

122 "bold_translation_offsets",

123 )

124 if e_data.text != "": 124 ↛ 126line 124 didn't jump to line 126 because the condition on line 124 was always true

125 sense.examples.append(e_data)

126 clean_node(wxr, sense, expanded_node)

127

128

129def extract_example_section(

130 wxr: WiktextractContext,

131 word_entry: WordEntry,

132 level_node: LevelNode,

133) -> None:

134 if len(word_entry.senses) > 0:

135 for list_node in level_node.find_child(NodeKind.LIST):

136 for list_item in list_node.find_child(NodeKind.LIST_ITEM):

137 extract_example_list_item(

138 wxr, word_entry, word_entry.senses[0], list_item

139 )