Coverage for src/wiktextract/extractor/pt/pronunciation.py: 94%

1from wikitextprocessor.parser import (

2 LEVEL_KIND_FLAGS,

3 LevelNode,

4 NodeKind,

5 WikiNode,

8from ...page import clean_node

9from ...wxr_context import WiktextractContext

10from .models import Sound, WordEntry

11from .tags import translate_raw_tags

14def extract_pronunciation_section(

15 wxr: WiktextractContext,

16 page_data: list[WordEntry],

17 level_node: LevelNode,

18) -> None:

19 raw_tags = []

20 sounds = []

21 title_text = clean_node(wxr, None, level_node.largs)

22 if title_text not in ["", "Pronúncia"]:

23 raw_tags.append(title_text)

25 for list_node in level_node.find_child(NodeKind.LIST):

26 for list_item in list_node.find_child(NodeKind.LIST_ITEM):

27 sounds.extend(

28 extract_pronunciation_list_item(

29 wxr, list_item, page_data[-1].lang_code, raw_tags

30 )

31 )

33 for child_level_node in level_node.find_child(LEVEL_KIND_FLAGS):

34 extract_pronunciation_section(wxr, page_data, child_level_node)

36 for data in page_data:

37 if data.lang_code == page_data[-1].lang_code: 37 ↛ 36line 37 didn't jump to line 36 because the condition on line 37 was always true

38 for sound in sounds:

39 data.sounds.append(sound)

42def extract_pronunciation_list_item(

43 wxr: WiktextractContext,

44 list_item: WikiNode,

45 lang_code: str,

46 parent_raw_tags: list[str],

47) -> list[Sound]:

48 raw_tags = parent_raw_tags[:]

49 sounds = []

50 for index, node in enumerate(list_item.children):

51 if isinstance(node, str) and ":" in node:

52 raw_tag = clean_node(wxr, None, list_item.children[:index])

53 if raw_tag != "": 53 ↛ 55line 53 didn't jump to line 55 because the condition on line 53 was always true

54 raw_tags.append(raw_tag)

55 sound_value = clean_node(

56 wxr,

57 None,

58 [node[node.index(":") + 1 :]]

59 + [

60 n

61 for n in list_item.children[index + 1 :]

62 if not (isinstance(n, WikiNode) and n.kind == NodeKind.LIST)

63 ],

64 )

65 if sound_value != "":

66 sound = Sound(raw_tags=raw_tags)

67 if lang_code == "zh": 67 ↛ 68line 67 didn't jump to line 68 because the condition on line 67 was never true

68 sound.zh_pron = sound_value

69 else:

70 sound.ipa = sound_value

71 translate_raw_tags(sound)

72 sounds.append(sound)

73 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:

74 for child_list_item in node.find_child(NodeKind.LIST_ITEM):

75 sounds.extend(

76 extract_pronunciation_list_item(

77 wxr, child_list_item, lang_code, raw_tags

78 )

79 )

81 return sounds