Coverage for src/wiktextract/extractor/pl/sound.py: 73%

48 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2024-12-27 08:07 +0000

1from wikitextprocessor.parser import NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from ..share import set_sound_file_url_fields 

6from .models import Sound, WordEntry 

7from .tags import translate_raw_tags 

8 

9SOUND_TAG_TEMPLATES = frozenset(["RP", "amer", "lp", "lm"]) 

10 

11 

12def extract_sound_section( 

13 wxr: WiktextractContext, 

14 base_data: WordEntry, 

15 level_node: WikiNode, 

16) -> None: 

17 has_list = False 

18 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM): 

19 has_list = True 

20 raw_tags = [] 

21 for template_node in list_item.find_child(NodeKind.TEMPLATE): 

22 process_sound_template(wxr, base_data, template_node, raw_tags) 

23 if not has_list: 

24 # could have preformatted node, can't use `find_child()` 

25 for template_node in level_node.find_child_recursively( 

26 NodeKind.TEMPLATE 

27 ): 

28 process_sound_template(wxr, base_data, template_node, []) 

29 

30 

31def process_sound_template( 

32 wxr: WiktextractContext, 

33 base_data: WordEntry, 

34 template_node: TemplateNode, 

35 raw_tags: list[str], 

36) -> None: 

37 if template_node.template_name.startswith(("IPA", "AS", "SAMPA")): 

38 ipa = clean_node( 

39 wxr, None, template_node.template_parameters.get(1, "") 

40 ) 

41 if isinstance(ipa, str) and len(ipa) > 0: 41 ↛ exitline 41 didn't return from function 'process_sound_template' because the condition on line 41 was always true

42 sound = Sound(ipa=ipa, raw_tags=raw_tags) 

43 if template_node.template_name.startswith("AS"): 

44 sound.tags.append("Slavic-alphabet") 

45 elif template_node.template_name == "SAMPA": 

46 sound.tags.append("SAMPA") 

47 translate_raw_tags(sound) 

48 base_data.sounds.append(sound) 

49 elif template_node.template_name.startswith("audio"): 

50 audio_file = template_node.template_parameters.get(1, "") 

51 if isinstance(audio_file, str) and len(audio_file) > 0: 51 ↛ exitline 51 didn't return from function 'process_sound_template' because the condition on line 51 was always true

52 sound = Sound(raw_tags=raw_tags) 

53 set_sound_file_url_fields(wxr, audio_file, sound) 

54 translate_raw_tags(sound) 

55 base_data.sounds.append(sound) 

56 raw_tags.clear() 

57 elif template_node.template_name in SOUND_TAG_TEMPLATES: 57 ↛ 59line 57 didn't jump to line 59 because the condition on line 57 was always true

58 raw_tags.append(clean_node(wxr, None, template_node)) 

59 elif template_node.template_name in ("pinyin", "zhuyin"): 

60 zh_pron = template_node.template_parameters.get(1, "") 

61 if isinstance(zh_pron, str) and len(zh_pron) > 0: 

62 sound = Sound(zh_pron=zh_pron, raw_tags=raw_tags) 

63 if template_node.template_name == "pinyin": 

64 sound.tags.append("Pinyin") 

65 elif template_node.template_name == "zhuyin": 

66 sound.tags.append("Bopomofo") 

67 translate_raw_tags(sound) 

68 base_data.sounds.append(sound)