Coverage for src/wiktextract/extractor/th/alt

1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode

3from ...page import clean_node

4from ...wxr_context import WiktextractContext

5from .models import Form, WordEntry

6from .tags import translate_raw_tags

9def extract_alt_form_section(

10 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode

11) -> None:

12 for list_node in level_node.find_child(NodeKind.LIST):

13 for list_item in list_node.find_child(NodeKind.LIST_ITEM):

14 for node in list_item.children:

15 if (

16 isinstance(node, TemplateNode)

17 and node.template_name == "alt"

18 ):

19 extract_alt_template(wxr, word_entry, node)

20 elif isinstance(node, TemplateNode) and node.template_name in [

21 "l",

22 "link",

23 ]:

24 extract_l_template(wxr, word_entry, node)

26 for t_node in level_node.find_child(NodeKind.TEMPLATE):

27 if t_node.template_name == "lo-alt": 27 ↛ 26line 27 didn't jump to line 26 because the condition on line 27 was always true

28 extract_lo_alt_template(wxr, word_entry, t_node)

31def extract_alt_template(

32 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode

33) -> None:

34 expanded_node = wxr.wtp.parse(

35 wxr.wtp.node_to_wikitext(t_node), expand_all=True

36 )

37 lang_code = clean_node(wxr, None, t_node.template_parameters.get(1, ""))

38 extract_alt_expanded_nodes(wxr, word_entry, expanded_node, lang_code)

41def extract_alt_expanded_nodes(

42 wxr: WiktextractContext,

43 word_entry: WordEntry,

44 root: WikiNode,

45 lang_code: str,

46) -> None:

47 raw_tags = []

48 for italic_node in root.find_child(NodeKind.ITALIC): 48 ↛ 56line 48 didn't jump to line 56 because the loop on line 48 didn't complete

49 raw_tags_str = clean_node(wxr, None, italic_node)

50 for raw_tag in raw_tags_str.split(","):

51 raw_tag = raw_tag.strip()

52 if raw_tag != "": 52 ↛ 50line 52 didn't jump to line 50 because the condition on line 52 was always true

53 raw_tags.append(raw_tag)

54 break

56 for span_tag in root.find_html("span"):

57 span_lang = span_tag.attrs.get("lang", "")

58 if span_lang == lang_code:

59 form = Form(form=clean_node(wxr, None, span_tag), raw_tags=raw_tags)

60 if form.form != "": 60 ↛ 56line 60 didn't jump to line 56 because the condition on line 60 was always true

61 translate_raw_tags(form)

62 word_entry.forms.append(form)

63 elif span_lang.endswith("-Latn") and len(word_entry.forms) > 0:

64 word_entry.forms[-1].roman = clean_node(wxr, None, span_tag)

66 clean_node(wxr, word_entry, root)

69def extract_lo_alt_template(

70 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode

71) -> None:

72 expanded_node = wxr.wtp.parse(

73 wxr.wtp.node_to_wikitext(t_node), expand_all=True

74 )

75 for list_node in expanded_node.find_child(NodeKind.LIST):

76 for list_item in list_node.find_child(NodeKind.LIST_ITEM):

77 extract_alt_expanded_nodes(wxr, word_entry, list_item, "lo")

80def extract_l_template(

81 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode

82) -> None:

83 form = Form(

84 form=clean_node(wxr, None, t_node.template_parameters.get(2, ""))

85 )

86 if form.form != "": 86 ↛ exitline 86 didn't return from function 'extract_l_template' because the condition on line 86 was always true

87 word_entry.forms.append(form)

90def extract_romanization_section(

91 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode

92) -> None:

93 for list_node in level_node.find_child(NodeKind.LIST):

94 for list_item in list_node.find_child(NodeKind.LIST_ITEM):

95 for node in list_item.children:

96 if (

97 isinstance(node, TemplateNode)

98 and node.template_name == "RTGS"

99 ):

100 roman = clean_node(

101 wxr, None, node.template_parameters.get(1, "")

102 )

103 if roman != "": 103 ↛ 95line 103 didn't jump to line 95 because the condition on line 103 was always true

104 form = Form(form=roman, tags=["romanization", "RTGS"])

105 word_entry.forms.append(form)

106 for link_node in level_node.find_child(NodeKind.LINK): 106 ↛ 107line 106 didn't jump to line 107 because the loop on line 106 never started

107 roman = clean_node(wxr, None, link_node)

108 if roman != "":

109 form = Form(form=roman, tags=["romanization"])

110 word_entry.forms.append(form)

Coverage for src / wiktextract / extractor / th / alt_form.py: 88%

62 statements