Coverage for src/wiktextract/extractor/nl/sound.py: 93%

1from wikitextprocessor import LevelNode, NodeKind, TemplateNode

3from ...page import clean_node

4from ...wxr_context import WiktextractContext

5from ..share import set_sound_file_url_fields

6from .models import Hyphenation, Sound, WordEntry

9def extract_sound_section(

10 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode

11) -> None:

12 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM):

13 sound = Sound()

14 for t_node in list_item.find_child(NodeKind.TEMPLATE):

15 if t_node.template_name == "audio":

16 extract_audio_template(wxr, word_entry, sound, t_node)

17 elif t_node.template_name.startswith("IPA"):

18 extract_ipa_template(wxr, word_entry, sound, t_node)

19 elif t_node.template_name == "pron-reg":

20 extract_pron_reg_template(wxr, sound, t_node)

22 if sound.ipa != "" or sound.audio != "": 22 ↛ 12line 22 didn't jump to line 12 because the condition on line 22 was always true

23 word_entry.sounds.append(sound)

26def extract_audio_template(

27 wxr: WiktextractContext,

28 word_entry: WordEntry,

29 sound: Sound,

30 t_node: TemplateNode,

31) -> None:

32 # https://nl.wiktionary.org/wiki/Sjabloon:audio

33 audio_file = clean_node(wxr, None, t_node.template_parameters.get(1, ""))

34 if audio_file not in ["", "..."]: 34 ↛ exitline 34 didn't return from function 'extract_audio_template' because the condition on line 34 was always true

35 set_sound_file_url_fields(wxr, audio_file, sound)

36 clean_node(wxr, word_entry, t_node)

39def extract_ipa_template(

40 wxr: WiktextractContext,

41 word_entry: WordEntry,

42 sound: Sound,

43 t_node: TemplateNode,

44) -> None:

45 # https://nl.wiktionary.org/wiki/Sjabloon:IPA-nl-standaard

46 # https://nl.wiktionary.org/wiki/Sjabloon:IPA

47 sound.ipa = clean_node(wxr, None, t_node.template_parameters.get(1, ""))

48 clean_node(wxr, word_entry, t_node)

51def extract_pron_reg_template(

52 wxr: WiktextractContext, sound: Sound, t_node: TemplateNode

53) -> None:

54 # location tag

55 # https://nl.wiktionary.org/wiki/Sjabloon:pron-reg

56 expanded_node = wxr.wtp.parse(

57 wxr.wtp.node_to_wikitext(t_node), expand_all=True

58 )

59 for link_node in expanded_node.find_child_recursively(NodeKind.LINK):

60 sound.raw_tags.append(clean_node(wxr, None, link_node))

63def extract_hyphenation_section(

64 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode

65) -> None:

66 for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM): 66 ↛ exitline 66 didn't return from function 'extract_hyphenation_section' because the loop on line 66 didn't complete

67 h_str = clean_node(wxr, None, list_item.children)

68 if h_str != "": 68 ↛ 70line 68 didn't jump to line 70 because the condition on line 68 was always true

69 word_entry.hyphenations.append(Hyphenation(parts=h_str.split("·")))

70 break