Coverage for src/wiktextract/extractor/ko/linkage.py: 85%

59 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-10-25 10:11 +0000

1import re 

2 

3from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

4 

5from ...page import clean_node 

6from ...wxr_context import WiktextractContext 

7from .models import Linkage, WordEntry 

8from .section_titles import LINKAGE_SECTIONS 

9 

10LINKAGE_TEMPLATES = frozenset(["파생어 상자", "합성어 상자"]) 

11 

12 

13def extract_linkage_template( 

14 wxr: WiktextractContext, 

15 word_entry: WordEntry, 

16 node: TemplateNode, 

17) -> None: 

18 # https://ko.wiktionary.org/wiki/틀:파생어_상자 

19 # https://ko.wiktionary.org/wiki/틀:합성어_상자 

20 if node.template_name in ["파생어 상자", "합성어 상자"]: 20 ↛ exitline 20 didn't return from function 'extract_linkage_template' because the condition on line 20 was always true

21 for key in range(1, 41): 21 ↛ exitline 21 didn't return from function 'extract_linkage_template' because the loop on line 21 didn't complete

22 if key not in node.template_parameters: 

23 break 

24 word = clean_node(wxr, None, node.template_parameters[key]) 

25 if word != "": 25 ↛ 21line 25 didn't jump to line 21 because the condition on line 25 was always true

26 word_entry.derived.append( 

27 Linkage( 

28 word=word, 

29 sense=word_entry.senses[-1].glosses[-1] 

30 if len(word_entry.senses) > 0 

31 else "", 

32 ) 

33 ) 

34 

35 

36def extract_linkage_section( 

37 wxr: WiktextractContext, 

38 word_entry: WordEntry, 

39 level_node: LevelNode, 

40 linkage_type: str, 

41) -> None: 

42 if linkage_type == "proverbs": 

43 extract_proverb_section(wxr, word_entry, level_node) 

44 else: 

45 from .translation import extract_translation_template 

46 

47 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM): 

48 extract_linkage_list_item(wxr, word_entry, list_item, linkage_type) 

49 

50 for t_node in level_node.find_child(NodeKind.TEMPLATE): 50 ↛ 51line 50 didn't jump to line 51 because the loop on line 50 never started

51 extract_linkage_template(wxr, word_entry, t_node) 

52 if t_node.template_name == "외국어": 

53 extract_translation_template(wxr, word_entry, t_node) 

54 

55 

56def extract_linkage_list_item( 

57 wxr: WiktextractContext, 

58 word_entry: WordEntry, 

59 list_item: WikiNode, 

60 linkage_type: str, 

61) -> None: 

62 raw_tag = "" 

63 is_roman = False 

64 for child in list_item.children: 

65 if isinstance(child, str): 

66 if ":" in child: 

67 l_type_str = child[: child.index(":")].strip() 

68 if l_type_str in LINKAGE_SECTIONS: 68 ↛ 64line 68 didn't jump to line 64 because the condition on line 68 was always true

69 linkage_type = LINKAGE_SECTIONS[l_type_str] 

70 else: 

71 m = re.search(r"\(([^()]+)\)", child) 

72 if m is not None: 

73 raw_tag = m.group(1).strip() 

74 is_roman = re.search(r"[a-z]", raw_tag) is not None 

75 

76 for link_node in list_item.find_child(NodeKind.LINK): 

77 word = clean_node(wxr, None, link_node) 

78 if word != "": 78 ↛ 76line 78 didn't jump to line 76 because the condition on line 78 was always true

79 linkage = Linkage( 

80 word=word, 

81 sense=word_entry.senses[-1].glosses[-1] 

82 if len(word_entry.senses) > 0 

83 else "", 

84 ) 

85 if len(raw_tag) > 0: 

86 if is_roman: 86 ↛ 89line 86 didn't jump to line 89 because the condition on line 86 was always true

87 linkage.roman = raw_tag 

88 else: 

89 linkage.raw_tags.append(raw_tag) 

90 getattr(word_entry, linkage_type).append(linkage) 

91 

92 

93def extract_proverb_section( 

94 wxr: WiktextractContext, 

95 word_entry: WordEntry, 

96 level_node: LevelNode, 

97) -> None: 

98 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM): 

99 linkage = Linkage(word="") 

100 for index, child in enumerate(list_item.children): 100 ↛ 109line 100 didn't jump to line 109 because the loop on line 100 didn't complete

101 if isinstance(child, str) and ":" in child: 

102 linkage.word = clean_node(wxr, None, list_item.children[:index]) 

103 linkage.word += child[: child.index(":")].strip() 

104 linkage.sense = child[child.index(":") + 1 :].strip() 

105 linkage.sense += clean_node( 

106 wxr, None, list_item.children[index + 1 :] 

107 ) 

108 break 

109 if linkage.word != "": 109 ↛ 98line 109 didn't jump to line 98 because the condition on line 109 was always true

110 word_entry.proverbs.append(linkage)