Coverage for src/wiktextract/extractor/de/pronunciation.py: 92%

57 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-12 08:27 +0000

1from wikitextprocessor.parser import LevelNode, NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from ..share import set_sound_file_url_fields 

6from .models import Sound, WordEntry 

7from .tags import translate_raw_tags 

8 

9 

10def extract_pronunciation_section( 

11 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode 

12) -> None: 

13 for list_node in level_node.find_child(NodeKind.LIST): 

14 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

15 for sound in extract_pron_list_item(wxr, list_item): 

16 word_entry.sounds.append(sound) 

17 word_entry.categories.extend(sound.categories) 

18 

19 

20def extract_pron_list_item( 

21 wxr: WiktextractContext, list_item: WikiNode 

22) -> list[Sound]: 

23 raw_tags = [] 

24 sounds = [] 

25 for node in list_item.find_child( 

26 NodeKind.TEMPLATE | NodeKind.ITALIC | NodeKind.LIST 

27 ): 

28 match node.kind: 

29 case NodeKind.ITALIC: 

30 node_text = clean_node(wxr, None, node) 

31 if node_text.endswith(":") and node_text != "auth:": 31 ↛ 25line 31 didn't jump to line 25 because the condition on line 31 was always true

32 raw_tags.append(node_text.removesuffix(":")) 

33 case NodeKind.LIST: 

34 for next_list_item in node.find_child(NodeKind.LIST_ITEM): 

35 sounds.extend(extract_pron_list_item(wxr, next_list_item)) 

36 case NodeKind.TEMPLATE: 36 ↛ 25line 36 didn't jump to line 25 because the pattern on line 36 always matched

37 match node.template_name: 

38 case "Lautschrift": 

39 sound = Sound(ipa="", raw_tags=raw_tags) 

40 sound.ipa = clean_node(wxr, sound, node) 

41 if sound.ipa != "": 41 ↛ 25line 41 didn't jump to line 25 because the condition on line 41 was always true

42 translate_raw_tags(sound) 

43 sounds.append(sound) 

44 raw_tags.clear() 

45 case "Audio": 

46 new_sound = extract_audio_template(wxr, node) 

47 if new_sound is not None: 47 ↛ 25line 47 didn't jump to line 25 because the condition on line 47 was always true

48 sounds.append(new_sound) 

49 case "Reim": 

50 rhyme = clean_node( 

51 wxr, 

52 None, 

53 node.template_parameters.get(1, ""), 

54 ) 

55 if rhyme != "": 55 ↛ 25line 55 didn't jump to line 25 because the condition on line 55 was always true

56 sounds.append(Sound(rhymes=rhyme)) 

57 clean_node(wxr, sounds[-1], node) 

58 

59 return sounds 

60 

61 

62def extract_audio_template( 

63 wxr: WiktextractContext, t_node: TemplateNode 

64) -> Sound | None: 

65 # https://de.wiktionary.org/wiki/Vorlage:Audio 

66 filename = clean_node(wxr, None, t_node.template_parameters.get(1, "")) 

67 if filename.strip() == "": 67 ↛ 68line 67 didn't jump to line 68 because the condition on line 67 was never true

68 return None 

69 sound = Sound() 

70 set_sound_file_url_fields(wxr, filename, sound) 

71 expanded_node = wxr.wtp.parse( 

72 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

73 ) 

74 for link_node in expanded_node.find_child(NodeKind.LINK): 

75 link_str = clean_node(wxr, None, link_node) 

76 if "(" in link_str: 

77 sound.raw_tags.append( 

78 link_str[link_str.index("(") + 1 :].strip(")") 

79 ) 

80 clean_node(wxr, sound, expanded_node) 

81 translate_raw_tags(sound) 

82 return sound