Coverage for src/wiktextract/extractor/pt/pronunciation.py: 94%

46 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-03 06:55 +0000

1import re 

2 

3from wikitextprocessor.parser import ( 

4 LEVEL_KIND_FLAGS, 

5 LevelNode, 

6 NodeKind, 

7 WikiNode, 

8) 

9 

10from ...page import clean_node 

11from ...wxr_context import WiktextractContext 

12from .models import Sound, WordEntry 

13from .tags import translate_raw_tags 

14 

15 

16def extract_pronunciation_section( 

17 wxr: WiktextractContext, 

18 page_data: list[WordEntry], 

19 level_node: LevelNode, 

20) -> None: 

21 raw_tags = [] 

22 sounds = [] 

23 title_text = clean_node(wxr, None, level_node.largs) 

24 if title_text not in ["", "Pronúncia"]: 

25 raw_tags.append(title_text) 

26 

27 for list_node in level_node.find_child(NodeKind.LIST): 

28 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

29 sounds.extend( 

30 extract_pronunciation_list_item( 

31 wxr, list_item, page_data[-1].lang_code, raw_tags 

32 ) 

33 ) 

34 

35 for child_level_node in level_node.find_child(LEVEL_KIND_FLAGS): 

36 extract_pronunciation_section(wxr, page_data, child_level_node) 

37 

38 for data in page_data: 

39 if data.lang_code == page_data[-1].lang_code: 39 ↛ 38line 39 didn't jump to line 38 because the condition on line 39 was always true

40 for sound in sounds: 

41 data.sounds.append(sound) 

42 

43 

44def extract_pronunciation_list_item( 

45 wxr: WiktextractContext, 

46 list_item: WikiNode, 

47 lang_code: str, 

48 parent_raw_tags: list[str], 

49) -> list[Sound]: 

50 raw_tags = parent_raw_tags[:] 

51 sounds = [] 

52 if len(list_item.children) == 1 and isinstance(list_item.children[0], str): 

53 # Match minimal sections ` /ipa/ ` or ` [ipa] ` 

54 if re.match(r"\s*(/[^/]+/|\[[^][]+\])\s*$", list_item.children[0]): 54 ↛ 58line 54 didn't jump to line 58 because the condition on line 54 was always true

55 sound_value = clean_node(wxr, None, list_item.children[0]).strip() 

56 sound = Sound(ipa=sound_value) 

57 return [sound] 

58 for index, node in enumerate(list_item.children): 

59 if isinstance(node, str) and ":" in node: 

60 raw_tag = clean_node(wxr, None, list_item.children[:index]) 

61 if raw_tag != "": 61 ↛ 63line 61 didn't jump to line 63 because the condition on line 61 was always true

62 raw_tags.append(raw_tag) 

63 sound_value = clean_node( 

64 wxr, 

65 None, 

66 [node[node.index(":") + 1 :]] 

67 + [ 

68 n 

69 for n in list_item.children[index + 1 :] 

70 if not (isinstance(n, WikiNode) and n.kind == NodeKind.LIST) 

71 ], 

72 ) 

73 if sound_value != "": 

74 sound = Sound(raw_tags=raw_tags) 

75 if lang_code == "zh": 75 ↛ 76line 75 didn't jump to line 76 because the condition on line 75 was never true

76 sound.zh_pron = sound_value 

77 else: 

78 sound.ipa = sound_value 

79 translate_raw_tags(sound) 

80 sounds.append(sound) 

81 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 

82 for child_list_item in node.find_child(NodeKind.LIST_ITEM): 

83 sounds.extend( 

84 extract_pronunciation_list_item( 

85 wxr, child_list_item, lang_code, raw_tags 

86 ) 

87 ) 

88 

89 return sounds