Coverage for src/wiktextract/extractor/es/etymology.py: 97%

24 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-10-25 10:11 +0000

1from typing import cast 

2 

3from wikitextprocessor.parser import ( 

4 LEVEL_KIND_FLAGS, 

5 NodeKind, 

6 TemplateNode, 

7 WikiNode, 

8) 

9 

10from ...page import clean_node 

11from ...wxr_context import WiktextractContext 

12from .models import TemplateData, WordEntry 

13 

14 

15def process_etymology_block( 

16 wxr: WiktextractContext, 

17 entry: WordEntry, 

18 level_node: WikiNode, 

19) -> None: 

20 """ 

21 https://es.wiktionary.org/wiki/Plantilla:etimología 

22 https://es.wiktionary.org/wiki/Plantilla:etimología2 

23 

24 When the etymology templates have no arguments that means this word has 

25 no etymology info yet. 

26 

27 If they only have the "leng" (language) param, that means there's no info 

28 and this word is from a language other than Spanish. 

29 

30 When there's no info, "etymology_text" should be missing. 

31 """ 

32 

33 has_etymology_info = False 

34 

35 for template_node in level_node.find_child_recursively(NodeKind.TEMPLATE): 

36 # no-op type-annotation cast; we softly assert template_node is a 

37 # TemplateNode, which has .template_name, to quiet the type-checker. 

38 template_node = cast(TemplateNode, template_node) 

39 if "etim" not in template_node.template_name: 

40 # We don't want to keep any other template data other than 

41 # the main etymology templates (and maybe Plantilla:etim) 

42 continue 

43 

44 entry.etymology_templates = entry.etymology_templates or [] 

45 

46 etymology_template = TemplateData( 

47 name=template_node.template_name, 

48 expansion=clean_node(wxr, None, template_node), 

49 ) 

50 

51 if etymology_template.expansion in ( 

52 # "Please fill in this etymology, thank you..." 

53 "Si puedes, incorpórala: ver cómo.", 

54 "Préstamo no adaptado.", 

55 "Este lema en este idioma es ampliable. " 

56 "Retira este aviso si la mayor parte de las acepciones ya están incluidas.", # noqa:E501 

57 ): 

58 continue 

59 

60 args = {} 

61 for index, param in template_node.template_parameters.items(): 

62 args[str(index)] = ( 

63 param 

64 if isinstance(param, str) 

65 else clean_node(wxr, None, param) 

66 ) 

67 # if any other index other than "leng" is encountered, 

68 # has_etymology => True 

69 has_etymology_info = has_etymology_info or index != "leng" 

70 if args and not (len(args) == 1 and "leng" in args): 70 ↛ 73line 70 didn't jump to line 73 because the condition on line 70 was always true

71 etymology_template.args = args 

72 

73 entry.etymology_templates.append(etymology_template) 

74 

75 if has_etymology_info: 

76 entry.etymology_text = clean_node( 

77 wxr, None, list(level_node.invert_find_child(LEVEL_KIND_FLAGS)) 

78 )