Coverage for src/wiktextract/extractor/pt/pronunciation.py: 94%

40 statements  

« prev     ^ index     » next       coverage.py v7.9.0, created at 2025-06-13 07:43 +0000

1from wikitextprocessor.parser import ( 

2 LEVEL_KIND_FLAGS, 

3 LevelNode, 

4 NodeKind, 

5 WikiNode, 

6) 

7 

8from ...page import clean_node 

9from ...wxr_context import WiktextractContext 

10from .models import Sound, WordEntry 

11from .tags import translate_raw_tags 

12 

13 

14def extract_pronunciation_section( 

15 wxr: WiktextractContext, 

16 page_data: list[WordEntry], 

17 level_node: LevelNode, 

18) -> None: 

19 raw_tags = [] 

20 sounds = [] 

21 title_text = clean_node(wxr, None, level_node.largs) 

22 if title_text not in ["", "Pronúncia"]: 

23 raw_tags.append(title_text) 

24 

25 for list_node in level_node.find_child(NodeKind.LIST): 

26 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

27 sounds.extend( 

28 extract_pronunciation_list_item( 

29 wxr, list_item, page_data[-1].lang_code, raw_tags 

30 ) 

31 ) 

32 

33 for child_level_node in level_node.find_child(LEVEL_KIND_FLAGS): 

34 extract_pronunciation_section(wxr, page_data, child_level_node) 

35 

36 for data in page_data: 

37 if data.lang_code == page_data[-1].lang_code: 37 ↛ 36line 37 didn't jump to line 36 because the condition on line 37 was always true

38 for sound in sounds: 

39 data.sounds.append(sound) 

40 

41 

42def extract_pronunciation_list_item( 

43 wxr: WiktextractContext, 

44 list_item: WikiNode, 

45 lang_code: str, 

46 parent_raw_tags: list[str], 

47) -> list[Sound]: 

48 raw_tags = parent_raw_tags[:] 

49 sounds = [] 

50 for index, node in enumerate(list_item.children): 

51 if isinstance(node, str) and ":" in node: 

52 raw_tag = clean_node(wxr, None, list_item.children[:index]) 

53 if raw_tag != "": 53 ↛ 55line 53 didn't jump to line 55 because the condition on line 53 was always true

54 raw_tags.append(raw_tag) 

55 sound_value = clean_node( 

56 wxr, 

57 None, 

58 [node[node.index(":") + 1 :]] 

59 + [ 

60 n 

61 for n in list_item.children[index + 1 :] 

62 if not (isinstance(n, WikiNode) and n.kind == NodeKind.LIST) 

63 ], 

64 ) 

65 if sound_value != "": 

66 sound = Sound(raw_tags=raw_tags) 

67 if lang_code == "zh": 67 ↛ 68line 67 didn't jump to line 68 because the condition on line 67 was never true

68 sound.zh_pron = sound_value 

69 else: 

70 sound.ipa = sound_value 

71 translate_raw_tags(sound) 

72 sounds.append(sound) 

73 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 

74 for child_list_item in node.find_child(NodeKind.LIST_ITEM): 

75 sounds.extend( 

76 extract_pronunciation_list_item( 

77 wxr, child_list_item, lang_code, raw_tags 

78 ) 

79 ) 

80 

81 return sounds