Coverage for src/wiktextract/extractor/de/pronunciation.py: 93%

57 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2024-12-27 08:07 +0000

1from wikitextprocessor.parser import LevelNode, NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from ..share import set_sound_file_url_fields 

6from .models import Sound, WordEntry 

7from .tags import translate_raw_tags 

8 

9 

10def extract_pronunciation_section( 

11 wxr: WiktextractContext, 

12 word_entry: WordEntry, 

13 level_node: LevelNode, 

14) -> None: 

15 for list_node in level_node.find_child(NodeKind.LIST): 

16 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

17 for sound in extract_pron_list_item(wxr, list_item): 

18 word_entry.sounds.append(sound) 

19 word_entry.categories.extend(sound.categories) 

20 

21 

22def extract_pron_list_item( 

23 wxr: WiktextractContext, list_item: WikiNode 

24) -> list[Sound]: 

25 raw_tags = [] 

26 sounds = [] 

27 for node in list_item.find_child( 

28 NodeKind.TEMPLATE | NodeKind.ITALIC | NodeKind.LIST 

29 ): 

30 match node.kind: 

31 case NodeKind.ITALIC: 

32 node_text = clean_node(wxr, None, node) 

33 if node_text.endswith(":"): 33 ↛ 27line 33 didn't jump to line 27 because the condition on line 33 was always true

34 raw_tags.append(node_text.removesuffix(":")) 

35 case NodeKind.LIST: 

36 for next_list_item in node.find_child(NodeKind.LIST_ITEM): 

37 sounds.extend(extract_pron_list_item(wxr, next_list_item)) 

38 case NodeKind.TEMPLATE: 38 ↛ 27line 38 didn't jump to line 27 because the pattern on line 38 always matched

39 match node.template_name: 

40 case "Lautschrift": 

41 ipa = clean_node( 

42 wxr, 

43 None, 

44 node.template_parameters.get(1, ""), 

45 ) 

46 if ipa != "": 46 ↛ 27line 46 didn't jump to line 27 because the condition on line 46 was always true

47 sounds.append(Sound(ipa=ipa)) 

48 clean_node(wxr, sounds[-1], node) 

49 case "Audio": 

50 new_sound = extract_audio_template(wxr, node) 

51 if new_sound is not None: 51 ↛ 27line 51 didn't jump to line 27 because the condition on line 51 was always true

52 sounds.append(new_sound) 

53 case "Reim": 

54 rhyme = clean_node( 

55 wxr, 

56 None, 

57 node.template_parameters.get(1, ""), 

58 ) 

59 if rhyme != "": 59 ↛ 27line 59 didn't jump to line 27 because the condition on line 59 was always true

60 sounds.append(Sound(rhymes=rhyme)) 

61 clean_node(wxr, sounds[-1], node) 

62 

63 for sound in sounds: 

64 sound.raw_tags.extend(raw_tags) 

65 translate_raw_tags(sound) 

66 return sounds 

67 

68 

69def extract_audio_template( 

70 wxr: WiktextractContext, t_node: TemplateNode 

71) -> Sound | None: 

72 # https://de.wiktionary.org/wiki/Vorlage:Audio 

73 filename = clean_node(wxr, None, t_node.template_parameters.get(1, "")) 

74 if filename.strip() == "": 74 ↛ 75line 74 didn't jump to line 75 because the condition on line 74 was never true

75 return None 

76 sound = Sound() 

77 set_sound_file_url_fields(wxr, filename, sound) 

78 expanded_node = wxr.wtp.parse( 

79 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

80 ) 

81 for link_node in expanded_node.find_child(NodeKind.LINK): 

82 link_str = clean_node(wxr, None, link_node) 

83 if "(" in link_str: 

84 sound.raw_tags.append( 

85 link_str[link_str.index("(") + 1 :].strip(")") 

86 ) 

87 clean_node(wxr, sound, expanded_node) 

88 return sound