Coverage for src/wiktextract/extractor/ko/translation.py: 94%

47 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-10-25 10:11 +0000

1import re 

2 

3from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

4 

5from ...page import clean_node 

6from ...wxr_context import WiktextractContext 

7from .models import Translation, WordEntry 

8 

9 

10def extract_translation_section( 

11 wxr: WiktextractContext, 

12 word_entry: WordEntry, 

13 level_node: LevelNode, 

14) -> None: 

15 for t_node in level_node.find_child(NodeKind.TEMPLATE): 

16 if t_node.template_name == "외국어": 16 ↛ 15line 16 didn't jump to line 15 because the condition on line 16 was always true

17 extract_translation_template(wxr, word_entry, t_node) 

18 

19 

20def extract_translation_template( 

21 wxr: WiktextractContext, 

22 word_entry: WordEntry, 

23 t_node: TemplateNode, 

24 sense: str = "", 

25) -> None: 

26 # https://ko.wiktionary.org/wiki/틀:외국어 

27 t_sense = clean_node(wxr, None, t_node.template_parameters.get("덧", "")) 

28 if t_sense != "": 28 ↛ 29line 28 didn't jump to line 29 because the condition on line 28 was never true

29 sense = t_sense 

30 for key in [1, 2]: 

31 arg_value = t_node.template_parameters.get(key, []) 

32 parse_arg = wxr.wtp.parse(wxr.wtp.node_to_wikitext(arg_value)) 

33 for list_item in parse_arg.find_child_recursively(NodeKind.LIST_ITEM): 

34 extract_translation_list_item(wxr, word_entry, list_item, sense) 

35 

36 

37def extract_translation_list_item( 

38 wxr: WiktextractContext, 

39 word_entry: WordEntry, 

40 list_item: WikiNode, 

41 sense: str, 

42) -> None: 

43 lang_code = "unknown" 

44 lang_name = "unknown" 

45 for node in list_item.children: 

46 if isinstance(node, str) and lang_name == "unknown": 

47 m = re.search(r"\((\w+)\):", node) 

48 if m is not None: 48 ↛ 45line 48 didn't jump to line 45 because the condition on line 48 was always true

49 lang_code = m.group(1) 

50 lang_name = node[: m.start()].strip() 

51 elif isinstance(node, WikiNode) and node.kind == NodeKind.LINK: 

52 word = clean_node(wxr, None, node) 

53 if word != "": 53 ↛ 45line 53 didn't jump to line 45 because the condition on line 53 was always true

54 word_entry.translations.append( 

55 Translation( 

56 lang=lang_name, 

57 lang_code=lang_code, 

58 word=word, 

59 sense=sense, 

60 ) 

61 ) 

62 elif isinstance(node, str) and "(" in node and ")" in node: 

63 text = "" 

64 brackets = 0 

65 for c in node: 

66 if c == "(": 

67 brackets += 1 

68 elif c == ")": 

69 brackets -= 1 

70 if ( 

71 brackets == 0 

72 and text.strip() != "" 

73 and len(word_entry.translations) > 0 

74 ): 

75 text = text.strip() 

76 if re.search(r"[a-z]", text): 

77 word_entry.translations[-1].roman = text 

78 else: 

79 word_entry.translations[-1].raw_tags.append(text) 

80 text = "" 

81 elif brackets > 0: 

82 text += c