Coverage for src/wiktextract/extractor/th/alt_form.py: 88%

62 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-15 05:18 +0000

1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from .models import Form, WordEntry 

6from .tags import translate_raw_tags 

7 

8 

9def extract_alt_form_section( 

10 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode 

11) -> None: 

12 for list_node in level_node.find_child(NodeKind.LIST): 

13 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

14 for node in list_item.children: 

15 if ( 

16 isinstance(node, TemplateNode) 

17 and node.template_name == "alt" 

18 ): 

19 extract_alt_template(wxr, word_entry, node) 

20 elif isinstance(node, TemplateNode) and node.template_name in [ 

21 "l", 

22 "link", 

23 ]: 

24 extract_l_template(wxr, word_entry, node) 

25 

26 for t_node in level_node.find_child(NodeKind.TEMPLATE): 

27 if t_node.template_name == "lo-alt": 27 ↛ 26line 27 didn't jump to line 26 because the condition on line 27 was always true

28 extract_lo_alt_template(wxr, word_entry, t_node) 

29 

30 

31def extract_alt_template( 

32 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode 

33) -> None: 

34 expanded_node = wxr.wtp.parse( 

35 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

36 ) 

37 lang_code = clean_node(wxr, None, t_node.template_parameters.get(1, "")) 

38 extract_alt_expanded_nodes(wxr, word_entry, expanded_node, lang_code) 

39 

40 

41def extract_alt_expanded_nodes( 

42 wxr: WiktextractContext, 

43 word_entry: WordEntry, 

44 root: WikiNode, 

45 lang_code: str, 

46) -> None: 

47 raw_tags = [] 

48 for italic_node in root.find_child(NodeKind.ITALIC): 48 ↛ 56line 48 didn't jump to line 56 because the loop on line 48 didn't complete

49 raw_tags_str = clean_node(wxr, None, italic_node) 

50 for raw_tag in raw_tags_str.split(","): 

51 raw_tag = raw_tag.strip() 

52 if raw_tag != "": 52 ↛ 50line 52 didn't jump to line 50 because the condition on line 52 was always true

53 raw_tags.append(raw_tag) 

54 break 

55 

56 for span_tag in root.find_html("span"): 

57 span_lang = span_tag.attrs.get("lang", "") 

58 if span_lang == lang_code: 

59 form = Form(form=clean_node(wxr, None, span_tag), raw_tags=raw_tags) 

60 if form.form != "": 60 ↛ 56line 60 didn't jump to line 56 because the condition on line 60 was always true

61 translate_raw_tags(form) 

62 word_entry.forms.append(form) 

63 elif span_lang.endswith("-Latn") and len(word_entry.forms) > 0: 

64 word_entry.forms[-1].roman = clean_node(wxr, None, span_tag) 

65 

66 clean_node(wxr, word_entry, root) 

67 

68 

69def extract_lo_alt_template( 

70 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode 

71) -> None: 

72 expanded_node = wxr.wtp.parse( 

73 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

74 ) 

75 for list_node in expanded_node.find_child(NodeKind.LIST): 

76 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

77 extract_alt_expanded_nodes(wxr, word_entry, list_item, "lo") 

78 

79 

80def extract_l_template( 

81 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode 

82) -> None: 

83 form = Form( 

84 form=clean_node(wxr, None, t_node.template_parameters.get(2, "")) 

85 ) 

86 if form.form != "": 86 ↛ exitline 86 didn't return from function 'extract_l_template' because the condition on line 86 was always true

87 word_entry.forms.append(form) 

88 

89 

90def extract_romanization_section( 

91 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode 

92) -> None: 

93 for list_node in level_node.find_child(NodeKind.LIST): 

94 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

95 for node in list_item.children: 

96 if ( 

97 isinstance(node, TemplateNode) 

98 and node.template_name == "RTGS" 

99 ): 

100 roman = clean_node( 

101 wxr, None, node.template_parameters.get(1, "") 

102 ) 

103 if roman != "": 103 ↛ 95line 103 didn't jump to line 95 because the condition on line 103 was always true

104 form = Form(form=roman, tags=["romanization", "RTGS"]) 

105 word_entry.forms.append(form) 

106 for link_node in level_node.find_child(NodeKind.LINK): 106 ↛ 107line 106 didn't jump to line 107 because the loop on line 106 never started

107 roman = clean_node(wxr, None, link_node) 

108 if roman != "": 

109 form = Form(form=roman, tags=["romanization"]) 

110 word_entry.forms.append(form)