Coverage for src/wiktextract/extractor/pt/etymology.py: 89%

39 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-15 05:18 +0000

1from wikitextprocessor.parser import ( 

2 LEVEL_KIND_FLAGS, 

3 LevelNode, 

4 NodeKind, 

5 TemplateNode, 

6 WikiNode, 

7) 

8 

9from ...page import clean_node 

10from ...wxr_context import WiktextractContext 

11from .models import Attestation, WordEntry 

12 

13 

14def extract_etymology_section( 

15 wxr: WiktextractContext, 

16 page_data: list[WordEntry], 

17 level_node: LevelNode, 

18) -> None: 

19 cats = {} 

20 e_nodes = [] 

21 e_texts = [] 

22 attestations = [] 

23 for node in level_node.children: 

24 if isinstance(node, WikiNode) and node.kind in LEVEL_KIND_FLAGS: 24 ↛ 25line 24 didn't jump to line 25 because the condition on line 24 was never true

25 break 

26 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 

27 e_text = clean_node(wxr, cats, e_nodes).lstrip(": ") 

28 if e_text != "": 28 ↛ 30line 28 didn't jump to line 30 because the condition on line 28 was always true

29 e_texts.append(e_text) 

30 e_nodes.clear() 

31 for list_item in node.find_child(NodeKind.LIST_ITEM): 

32 e_text = clean_node(wxr, cats, list_item.children) 

33 if e_text != "": 33 ↛ 31line 33 didn't jump to line 31 because the condition on line 33 was always true

34 e_texts.append(e_text) 

35 elif isinstance(node, TemplateNode) and node.template_name == "datação": 

36 attestations = extract_defdate_template(wxr, cats, node) 

37 else: 

38 e_nodes.append(node) 

39 

40 if len(e_nodes) > 0: 

41 e_text = clean_node(wxr, cats, e_nodes).lstrip(": ") 

42 if e_text != "": 42 ↛ 44line 42 didn't jump to line 44 because the condition on line 42 was always true

43 e_texts.append(e_text) 

44 for data in page_data: 

45 if data.lang_code == page_data[-1].lang_code: 45 ↛ 44line 45 didn't jump to line 44 because the condition on line 45 was always true

46 data.etymology_texts.extend(e_texts) 

47 data.categories.extend(cats.get("categories", [])) 

48 data.attestations.extend(attestations) 

49 

50 

51def extract_defdate_template( 

52 wxr: WiktextractContext, cats: dict[str, list[str]], t_node: TemplateNode 

53) -> list[Attestation]: 

54 attestations = [] 

55 date = ( 

56 clean_node(wxr, cats, t_node) 

57 .removeprefix("(Datação:") 

58 .removesuffix(")") 

59 .strip() 

60 ) 

61 if date != "": 61 ↛ 63line 61 didn't jump to line 63 because the condition on line 61 was always true

62 attestations.append(Attestation(date=date)) 

63 return attestations