Coverage for src/wiktextract/extractor/ko/translation.py: 93%

52 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2024-12-27 08:07 +0000

1import re 

2 

3from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

4 

5from ...page import clean_node 

6from ...wxr_context import WiktextractContext 

7from .models import Translation, WordEntry 

8from .tags import translate_raw_tags 

9 

10 

11def extract_translation_section( 

12 wxr: WiktextractContext, 

13 word_entry: WordEntry, 

14 level_node: LevelNode, 

15) -> None: 

16 for t_node in level_node.find_child(NodeKind.TEMPLATE): 

17 if t_node.template_name == "외국어": 17 ↛ 16line 17 didn't jump to line 16 because the condition on line 17 was always true

18 extract_translation_template(wxr, word_entry, t_node) 

19 

20 

21def extract_translation_template( 

22 wxr: WiktextractContext, 

23 word_entry: WordEntry, 

24 t_node: TemplateNode, 

25 sense: str = "", 

26) -> None: 

27 # https://ko.wiktionary.org/wiki/틀:외국어 

28 t_sense = clean_node(wxr, None, t_node.template_parameters.get("덧", "")) 

29 if t_sense != "": 29 ↛ 30line 29 didn't jump to line 30 because the condition on line 29 was never true

30 sense = t_sense 

31 for key in [1, 2]: 

32 arg_value = t_node.template_parameters.get(key, []) 

33 parse_arg = wxr.wtp.parse(wxr.wtp.node_to_wikitext(arg_value)) 

34 for list_item in parse_arg.find_child_recursively(NodeKind.LIST_ITEM): 

35 extract_translation_list_item(wxr, word_entry, list_item, sense) 

36 

37 

38def extract_translation_list_item( 

39 wxr: WiktextractContext, 

40 word_entry: WordEntry, 

41 list_item: WikiNode, 

42 sense: str, 

43) -> None: 

44 lang_code = "unknown" 

45 lang_name = "unknown" 

46 for node in list_item.children: 

47 if isinstance(node, str) and lang_name == "unknown": 

48 m = re.search(r"\(([\w-]+)\):", node) 

49 if m is not None: 49 ↛ 46line 49 didn't jump to line 46 because the condition on line 49 was always true

50 lang_code = m.group(1) 

51 lang_name = node[: m.start()].strip() 

52 elif isinstance(node, WikiNode) and node.kind == NodeKind.LINK: 

53 word = clean_node(wxr, None, node) 

54 if word != "": 54 ↛ 46line 54 didn't jump to line 46 because the condition on line 54 was always true

55 word_entry.translations.append( 

56 Translation( 

57 lang=lang_name, 

58 lang_code=lang_code, 

59 word=word, 

60 sense=sense, 

61 ) 

62 ) 

63 elif isinstance(node, str) and "(" in node and ")" in node: 

64 text = "" 

65 brackets = 0 

66 for c in node: 

67 if c == "(": 

68 brackets += 1 

69 elif c == ")": 

70 brackets -= 1 

71 if ( 

72 brackets == 0 

73 and text.strip() != "" 

74 and len(word_entry.translations) > 0 

75 ): 

76 text = text.strip() 

77 if re.search(r"[a-z]", text): 

78 word_entry.translations[-1].roman = text 

79 else: 

80 for raw_tag in text.split("/"): 

81 raw_tag = raw_tag.strip() 

82 if raw_tag not in ["", "-"]: 82 ↛ 80line 82 didn't jump to line 80 because the condition on line 82 was always true

83 word_entry.translations[-1].raw_tags.append( 

84 raw_tag 

85 ) 

86 translate_raw_tags(word_entry.translations[-1]) 

87 text = "" 

88 elif brackets > 0: 

89 text += c