Coverage for src/wiktextract/extractor/pl/sound.py: 67%

1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode

3from ...page import clean_node

4from ...wxr_context import WiktextractContext

5from ..share import set_sound_file_url_fields

6from .models import Sound, WordEntry

7from .tags import translate_raw_tags

9SOUND_TAG_TEMPLATES = frozenset(["RP", "amer", "lp", "lm"])

12def extract_sound_section(

13 wxr: WiktextractContext,

14 base_data: WordEntry,

15 level_node: WikiNode,

16) -> None:

17 has_list = False

18 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM):

19 has_list = True

20 raw_tags = []

21 for template_node in list_item.find_child(NodeKind.TEMPLATE):

22 process_sound_template(wxr, base_data, template_node, raw_tags)

23 if not has_list:

24 # could have preformatted node, can't use `find_child()`

25 for template_node in level_node.find_child_recursively(

26 NodeKind.TEMPLATE

27 ):

28 process_sound_template(wxr, base_data, template_node, [])

31def process_sound_template(

32 wxr: WiktextractContext,

33 base_data: WordEntry,

34 template_node: TemplateNode,

35 raw_tags: list[str],

36) -> None:

37 if template_node.template_name.startswith(("IPA", "AS", "SAMPA")):

38 ipa = clean_node(

39 wxr, None, template_node.template_parameters.get(1, "")

40 )

41 if isinstance(ipa, str) and len(ipa) > 0: 41 ↛ exitline 41 didn't return from function 'process_sound_template' because the condition on line 41 was always true

42 sound = Sound(ipa=ipa, raw_tags=raw_tags)

43 if template_node.template_name.startswith("AS"):

44 sound.tags.append("Slavic-alphabet")

45 elif template_node.template_name == "SAMPA":

46 sound.tags.append("SAMPA")

47 translate_raw_tags(sound)

48 base_data.sounds.append(sound)

49 elif template_node.template_name.startswith("audio"):

50 audio_file = template_node.template_parameters.get(1, "")

51 if isinstance(audio_file, str) and len(audio_file) > 0: 51 ↛ exitline 51 didn't return from function 'process_sound_template' because the condition on line 51 was always true

52 sound = Sound(raw_tags=raw_tags)

53 set_sound_file_url_fields(wxr, audio_file, sound)

54 translate_raw_tags(sound)

55 base_data.sounds.append(sound)

56 raw_tags.clear()

57 elif template_node.template_name in SOUND_TAG_TEMPLATES: 57 ↛ 59line 57 didn't jump to line 59 because the condition on line 57 was always true

58 raw_tags.append(clean_node(wxr, None, template_node))

59 elif template_node.template_name in ("pinyin", "zhuyin"):

60 zh_pron = template_node.template_parameters.get(1, "")

61 if isinstance(zh_pron, str) and len(zh_pron) > 0:

62 sound = Sound(zh_pron=zh_pron, raw_tags=raw_tags)

63 if template_node.template_name == "pinyin":

64 sound.tags.append("Pinyin")

65 elif template_node.template_name == "zhuyin":

66 sound.tags.append("Bopomofo")

67 translate_raw_tags(sound)

68 base_data.sounds.append(sound)

71def extract_morphology_section(

72 wxr: WiktextractContext, base_data: WordEntry, level_node: LevelNode

73) -> None:

74 # "preformatted" node

75 for t_node in level_node.find_child_recursively(NodeKind.TEMPLATE):

76 if t_node.template_name == "morfeo":

77 base_data.hyphenation = clean_node(wxr, base_data, t_node)