Coverage for src/wiktextract/extractor/ja/pos.py: 92%

1from wikitextprocessor.parser import LevelNode, NodeKind, TemplateNode, WikiNode

3from ...page import clean_node

4from ...wxr_context import WiktextractContext

5from ..ruby import extract_ruby

6from .example import extract_example_list_item

7from .header import extract_header_nodes

8from .models import AltForm, Sense, WordEntry

9from .section_titles import POS_DATA

10from .tags import translate_raw_tags

13def parse_pos_section(

14 wxr: WiktextractContext,

15 page_data: list[WordEntry],

16 base_data: WordEntry,

17 level_node: LevelNode,

18 pos_title: str,

19) -> None:

20 page_data.append(base_data.model_copy(deep=True))

21 page_data[-1].pos_title = pos_title

22 pos_data = POS_DATA[pos_title]

23 page_data[-1].pos = pos_data["pos"]

24 page_data[-1].tags.extend(pos_data.get("tags", []))

26 gloss_list_start = 0

27 for list_index, list_node in level_node.find_child(NodeKind.LIST, True):

28 if not list_node.sarg.endswith("#"): # linkage list

29 continue

30 if gloss_list_start == 0: 30 ↛ 32line 30 didn't jump to line 32 because the condition on line 30 was always true

31 gloss_list_start = list_index

32 for list_item in list_node.find_child(NodeKind.LIST_ITEM):

33 process_gloss_list_item(wxr, page_data[-1], list_item)

34 extract_header_nodes(

35 wxr, page_data[-1], level_node.children[:gloss_list_start]

36 )

37 if gloss_list_start == 0:

38 page_data.pop()

41def process_gloss_list_item(

42 wxr: WiktextractContext,

43 word_entry: WordEntry,

44 list_item_node: WikiNode,

45 parent_gloss: str = "",

46) -> None:

47 gloss_nodes = list(

48 list_item_node.invert_find_child(NodeKind.LIST, include_empty_str=True)

49 )

50 sense_data = Sense()

51 find_form_of_data(wxr, word_entry, sense_data, list_item_node)

52 if len(parent_gloss) > 0:

53 sense_data.glosses.append(parent_gloss)

54 gloss_only_nodes = []

55 for gloss_node in gloss_nodes:

56 if isinstance(gloss_node, TemplateNode):

57 if gloss_node.template_name in ("context", "タグ"):

58 # https://ja.wiktionary.org/wiki/テンプレート:context

59 # https://ja.wiktionary.org/wiki/テンプレート:タグ

60 for raw_tag in (

61 clean_node(wxr, sense_data, gloss_node)

62 .strip("()")

63 .split(",")

64 ):

65 raw_tag = raw_tag.strip()

66 if len(raw_tag) > 0: 66 ↛ 60line 66 didn't jump to line 60 because the condition on line 66 was always true

67 sense_data.raw_tags.append(raw_tag)

68 elif gloss_node.template_name == "wikipedia-s":

69 expanded_text = clean_node(wxr, None, gloss_node)

70 gloss_only_nodes.append(

71 expanded_text.removesuffix("⁽ʷᵖ⁾").strip()

72 )

73 elif gloss_node.template_name == "wp": 73 ↛ 74line 73 didn't jump to line 74 because the condition on line 73 was never true

74 continue

75 else:

76 gloss_only_nodes.append(gloss_node)

77 else:

78 gloss_only_nodes.append(gloss_node)

79 expanded_gloss = wxr.wtp.parse(

80 wxr.wtp.node_to_wikitext(gloss_only_nodes), expand_all=True

81 )

82 ruby, no_ruby = extract_ruby(wxr, expanded_gloss.children)

83 gloss_text = clean_node(wxr, sense_data, no_ruby)

84 sense_data.ruby = ruby

85 if len(gloss_text) > 0: 85 ↛ 90line 85 didn't jump to line 90 because the condition on line 85 was always true

86 sense_data.glosses.append(gloss_text)

87 translate_raw_tags(sense_data)

88 word_entry.senses.append(sense_data)

90 for nest_gloss_list in list_item_node.find_child(NodeKind.LIST):

91 if nest_gloss_list.sarg.endswith(("*", ":")):

92 for example_list_item in nest_gloss_list.find_child(

93 NodeKind.LIST_ITEM

94 ):

95 extract_example_list_item(

96 wxr, word_entry, sense_data, example_list_item

97 )

98 elif nest_gloss_list.sarg.endswith("#"): 98 ↛ 90line 98 didn't jump to line 90 because the condition on line 98 was always true

99 for nest_list_item in nest_gloss_list.find_child(

100 NodeKind.LIST_ITEM

101 ):

102 process_gloss_list_item(

103 wxr, word_entry, nest_list_item, gloss_text

104 )

105

106

107def find_form_of_data(

108 wxr: WiktextractContext,

109 word_entry: WordEntry,

110 sense: Sense,

111 list_item_node: WikiNode,

112) -> None:

113 for node in list_item_node.find_child(NodeKind.TEMPLATE):

114 if node.template_name.endswith(" of"):

115 expanded_node = wxr.wtp.parse(

116 wxr.wtp.node_to_wikitext(node), expand_all=True

117 )

118 for link_node in expanded_node.find_child_recursively( 118 ↛ 113line 118 didn't jump to line 113 because the loop on line 118 didn't complete

119 NodeKind.LINK

120 ):

121 form_of = clean_node(wxr, None, link_node)

122 if form_of != "": 122 ↛ 118line 122 didn't jump to line 118 because the condition on line 122 was always true

123 sense.form_of.append(AltForm(word=form_of))

124 break

125 if "form-of" in word_entry.tags and len(sense.form_of) == 0:

126 for link_node in list_item_node.find_child(NodeKind.LINK): 126 ↛ exitline 126 didn't return from function 'find_form_of_data' because the loop on line 126 didn't complete

127 form_of = clean_node(wxr, None, link_node)

128 if form_of != "": 128 ↛ 126line 128 didn't jump to line 126 because the condition on line 128 was always true

129 sense.form_of.append(AltForm(word=form_of))

130 break