Coverage for src/wiktextract/extractor/pt/pronunciation.py: 97%

38 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2024-12-27 08:07 +0000

1from wikitextprocessor.parser import ( 

2 LEVEL_KIND_FLAGS, 

3 LevelNode, 

4 NodeKind, 

5 WikiNode, 

6) 

7 

8from ...page import clean_node 

9from ...wxr_context import WiktextractContext 

10from .models import Sound, WordEntry 

11from .tags import translate_raw_tags 

12 

13 

14def extract_pronunciation_section( 

15 wxr: WiktextractContext, 

16 page_data: list[WordEntry], 

17 level_node: LevelNode, 

18) -> None: 

19 raw_tags = [] 

20 sounds = [] 

21 title_text = clean_node(wxr, None, level_node.largs) 

22 if title_text not in ["", "Pronúncia"]: 

23 raw_tags.append(title_text) 

24 

25 for list_node in level_node.find_child(NodeKind.LIST): 

26 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

27 sounds.extend( 

28 extract_pronunciation_list_item(wxr, list_item, raw_tags) 

29 ) 

30 

31 for child_level_node in level_node.find_child(LEVEL_KIND_FLAGS): 

32 extract_pronunciation_section(wxr, page_data, child_level_node) 

33 

34 for data in page_data: 

35 if data.lang_code == page_data[-1].lang_code: 35 ↛ 34line 35 didn't jump to line 34 because the condition on line 35 was always true

36 for sound in sounds: 

37 translate_raw_tags(sound) 

38 data.sounds.append(sound) 

39 

40 

41def extract_pronunciation_list_item( 

42 wxr: WiktextractContext, list_item: WikiNode, raw_tags: list[str] 

43) -> list[Sound]: 

44 sounds = [] 

45 for index, node in enumerate(list_item.children): 

46 if isinstance(node, str) and ":" in node: 

47 raw_tag = clean_node(wxr, None, list_item.children[:index]) 

48 sound_value = clean_node( 

49 wxr, 

50 None, 

51 [node[node.index(":") + 1 :]] 

52 + [ 

53 n 

54 for n in list_item.children[index + 1 :] 

55 if not (isinstance(n, WikiNode) and n.kind == NodeKind.LIST) 

56 ], 

57 ) 

58 if sound_value != "": 

59 sound = Sound(ipa=sound_value, raw_tags=raw_tags) 

60 if raw_tag == "X-SAMPA": 

61 sound.tags.append("X-SAMPA") 

62 sounds.append(sound) 

63 elif raw_tag != "": 63 ↛ 45line 63 didn't jump to line 45 because the condition on line 63 was always true

64 raw_tags.append(raw_tag) 

65 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 

66 for child_list_item in node.find_child(NodeKind.LIST_ITEM): 

67 sounds.extend( 

68 extract_pronunciation_list_item( 

69 wxr, child_list_item, raw_tags 

70 ) 

71 ) 

72 

73 return sounds