Coverage for src/wiktextract/extractor/de/pronunciation.py: 93%

1from wikitextprocessor.parser import LevelNode, NodeKind, TemplateNode, WikiNode

3from ...page import clean_node

4from ...wxr_context import WiktextractContext

5from ..share import set_sound_file_url_fields

6from .models import Sound, WordEntry

7from .tags import translate_raw_tags

10def extract_pronunciation_section(

11 wxr: WiktextractContext,

12 word_entry: WordEntry,

13 level_node: LevelNode,

14) -> None:

15 for list_node in level_node.find_child(NodeKind.LIST):

16 for list_item in list_node.find_child(NodeKind.LIST_ITEM):

17 for sound in extract_pron_list_item(wxr, list_item):

18 word_entry.sounds.append(sound)

19 word_entry.categories.extend(sound.categories)

22def extract_pron_list_item(

23 wxr: WiktextractContext, list_item: WikiNode

24) -> list[Sound]:

25 raw_tags = []

26 sounds = []

27 for node in list_item.find_child(

28 NodeKind.TEMPLATE | NodeKind.ITALIC | NodeKind.LIST

29 ):

30 match node.kind:

31 case NodeKind.ITALIC:

32 node_text = clean_node(wxr, None, node)

33 if node_text.endswith(":"): 33 ↛ 27line 33 didn't jump to line 27 because the condition on line 33 was always true

34 raw_tags.append(node_text.removesuffix(":"))

35 case NodeKind.LIST:

36 for next_list_item in node.find_child(NodeKind.LIST_ITEM):

37 sounds.extend(extract_pron_list_item(wxr, next_list_item))

38 case NodeKind.TEMPLATE: 38 ↛ 27line 38 didn't jump to line 27 because the pattern on line 38 always matched

39 match node.template_name:

40 case "Lautschrift":

41 ipa = clean_node(

42 wxr,

43 None,

44 node.template_parameters.get(1, ""),

45 )

46 if ipa != "": 46 ↛ 27line 46 didn't jump to line 27 because the condition on line 46 was always true

47 sounds.append(Sound(ipa=ipa))

48 clean_node(wxr, sounds[-1], node)

49 case "Audio":

50 new_sound = extract_audio_template(wxr, node)

51 if new_sound is not None: 51 ↛ 27line 51 didn't jump to line 27 because the condition on line 51 was always true

52 sounds.append(new_sound)

53 case "Reim":

54 rhyme = clean_node(

55 wxr,

56 None,

57 node.template_parameters.get(1, ""),

58 )

59 if rhyme != "": 59 ↛ 27line 59 didn't jump to line 27 because the condition on line 59 was always true

60 sounds.append(Sound(rhymes=rhyme))

61 clean_node(wxr, sounds[-1], node)

63 for sound in sounds:

64 sound.raw_tags.extend(raw_tags)

65 translate_raw_tags(sound)

66 return sounds

69def extract_audio_template(

70 wxr: WiktextractContext, t_node: TemplateNode

71) -> Sound | None:

72 # https://de.wiktionary.org/wiki/Vorlage:Audio

73 filename = clean_node(wxr, None, t_node.template_parameters.get(1, ""))

74 if filename.strip() == "": 74 ↛ 75line 74 didn't jump to line 75 because the condition on line 74 was never true

75 return None

76 sound = Sound()

77 set_sound_file_url_fields(wxr, filename, sound)

78 expanded_node = wxr.wtp.parse(

79 wxr.wtp.node_to_wikitext(t_node), expand_all=True

80 )

81 for link_node in expanded_node.find_child(NodeKind.LINK):

82 link_str = clean_node(wxr, None, link_node)

83 if "(" in link_str:

84 sound.raw_tags.append(

85 link_str[link_str.index("(") + 1 :].strip(")")

86 )

87 clean_node(wxr, sound, expanded_node)

88 return sound