Coverage for src/wiktextract/extractor/th/alt_form.py: 88%
62 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from .models import Form, WordEntry
6from .tags import translate_raw_tags
9def extract_alt_form_section(
10 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode
11) -> None:
12 for list_node in level_node.find_child(NodeKind.LIST):
13 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
14 for node in list_item.children:
15 if (
16 isinstance(node, TemplateNode)
17 and node.template_name == "alt"
18 ):
19 extract_alt_template(wxr, word_entry, node)
20 elif isinstance(node, TemplateNode) and node.template_name in [
21 "l",
22 "link",
23 ]:
24 extract_l_template(wxr, word_entry, node)
26 for t_node in level_node.find_child(NodeKind.TEMPLATE):
27 if t_node.template_name == "lo-alt": 27 ↛ 26line 27 didn't jump to line 26 because the condition on line 27 was always true
28 extract_lo_alt_template(wxr, word_entry, t_node)
31def extract_alt_template(
32 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode
33) -> None:
34 expanded_node = wxr.wtp.parse(
35 wxr.wtp.node_to_wikitext(t_node), expand_all=True
36 )
37 lang_code = clean_node(wxr, None, t_node.template_parameters.get(1, ""))
38 extract_alt_expanded_nodes(wxr, word_entry, expanded_node, lang_code)
41def extract_alt_expanded_nodes(
42 wxr: WiktextractContext,
43 word_entry: WordEntry,
44 root: WikiNode,
45 lang_code: str,
46) -> None:
47 raw_tags = []
48 for italic_node in root.find_child(NodeKind.ITALIC): 48 ↛ 56line 48 didn't jump to line 56 because the loop on line 48 didn't complete
49 raw_tags_str = clean_node(wxr, None, italic_node)
50 for raw_tag in raw_tags_str.split(","):
51 raw_tag = raw_tag.strip()
52 if raw_tag != "": 52 ↛ 50line 52 didn't jump to line 50 because the condition on line 52 was always true
53 raw_tags.append(raw_tag)
54 break
56 for span_tag in root.find_html("span"):
57 span_lang = span_tag.attrs.get("lang", "")
58 if span_lang == lang_code:
59 form = Form(form=clean_node(wxr, None, span_tag), raw_tags=raw_tags)
60 if form.form != "": 60 ↛ 56line 60 didn't jump to line 56 because the condition on line 60 was always true
61 translate_raw_tags(form)
62 word_entry.forms.append(form)
63 elif span_lang.endswith("-Latn") and len(word_entry.forms) > 0:
64 word_entry.forms[-1].roman = clean_node(wxr, None, span_tag)
66 clean_node(wxr, word_entry, root)
69def extract_lo_alt_template(
70 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode
71) -> None:
72 expanded_node = wxr.wtp.parse(
73 wxr.wtp.node_to_wikitext(t_node), expand_all=True
74 )
75 for list_node in expanded_node.find_child(NodeKind.LIST):
76 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
77 extract_alt_expanded_nodes(wxr, word_entry, list_item, "lo")
80def extract_l_template(
81 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode
82) -> None:
83 form = Form(
84 form=clean_node(wxr, None, t_node.template_parameters.get(2, ""))
85 )
86 if form.form != "": 86 ↛ exitline 86 didn't return from function 'extract_l_template' because the condition on line 86 was always true
87 word_entry.forms.append(form)
90def extract_romanization_section(
91 wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode
92) -> None:
93 for list_node in level_node.find_child(NodeKind.LIST):
94 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
95 for node in list_item.children:
96 if (
97 isinstance(node, TemplateNode)
98 and node.template_name == "RTGS"
99 ):
100 roman = clean_node(
101 wxr, None, node.template_parameters.get(1, "")
102 )
103 if roman != "": 103 ↛ 95line 103 didn't jump to line 95 because the condition on line 103 was always true
104 form = Form(form=roman, tags=["romanization", "RTGS"])
105 word_entry.forms.append(form)
106 for link_node in level_node.find_child(NodeKind.LINK): 106 ↛ 107line 106 didn't jump to line 107 because the loop on line 106 never started
107 roman = clean_node(wxr, None, link_node)
108 if roman != "":
109 form = Form(form=roman, tags=["romanization"])
110 word_entry.forms.append(form)