Coverage for src/wiktextract/extractor/pt/head_line.py: 90%
27 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
1from wikitextprocessor import NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from .models import Form, WordEntry
6from .tags import translate_raw_tags
9def extract_head_line_nodes(
10 wxr: WiktextractContext,
11 word_entry: WordEntry,
12 nodes: list[WikiNode | str],
13) -> None:
14 is_first_bold = True
15 for node in nodes:
16 if isinstance(node, TemplateNode) and node.template_name in [
17 "g",
18 "gramática",
19 "gênero",
20 "m",
21 "f",
22 "n",
23 "c",
24 "c2g",
25 "pr",
26 "c.",
27 "fp",
28 "mp",
29 ]:
30 extract_gramática_template(wxr, word_entry, node)
31 elif isinstance(node, TemplateNode) and node.template_name == "datação": 31 ↛ 32line 31 didn't jump to line 32 because the condition on line 31 was never true
32 from .etymology import extract_defdate_template
34 word_entry.attestations.extend(
35 extract_defdate_template(wxr, word_entry, node)
36 )
37 elif (
38 isinstance(node, WikiNode)
39 and node.kind == NodeKind.BOLD
40 and is_first_bold
41 ):
42 extract_head_line_bold_node(wxr, word_entry, node)
43 is_first_bold = False
46def extract_gramática_template(
47 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode
48) -> None:
49 # https://pt.wiktionary.org/wiki/Predefinição:gramática
50 expanded_node = wxr.wtp.parse(
51 wxr.wtp.node_to_wikitext(t_node), expand_all=True
52 )
53 for italic_node in expanded_node.find_child(NodeKind.ITALIC):
54 raw_tag = clean_node(wxr, None, italic_node)
55 if raw_tag != "": 55 ↛ 53line 55 didn't jump to line 53 because the condition on line 55 was always true
56 word_entry.raw_tags.append(raw_tag)
57 translate_raw_tags(word_entry)
60def extract_head_line_bold_node(
61 wxr: WiktextractContext, word_entry: WordEntry, bold_node: WikiNode
62):
63 word = clean_node(wxr, None, bold_node)
64 if word != "" and word != wxr.wtp.title:
65 word_entry.forms.append(Form(form=word, tags=["canonical"]))