Coverage for src/wiktextract/extractor/tr/linkage.py: 87%
51 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 05:18 +0000
1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from .models import Form, Linkage, WordEntry
6from .tags import translate_raw_tags
9def extract_linkage_section(
10 wxr: WiktextractContext,
11 word_entry: WordEntry,
12 level_node: LevelNode,
13 l_type: str,
14 tags: list[str],
15) -> None:
16 sense = ""
17 l_list = []
18 for node in level_node.children:
19 if isinstance(node, TemplateNode) and node.template_name.lower() in [ 19 ↛ 23line 19 didn't jump to line 23 because the condition on line 19 was never true
20 "üst",
21 "trans-top",
22 ]:
23 sense = clean_node(wxr, None, node.template_parameters.get(1, ""))
24 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
25 for list_node in level_node.find_child(NodeKind.LIST):
26 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
27 l_list.extend(
28 extract_linkage_list_item(
29 wxr, word_entry, list_item, tags, sense
30 )
31 )
32 for link_node in level_node.find_child(NodeKind.LINK):
33 word = clean_node(wxr, None, link_node)
34 if word != "": 34 ↛ 32line 34 didn't jump to line 32 because the condition on line 34 was always true
35 l_list.append(Linkage(word=word, tags=tags))
37 if l_type == "forms": 37 ↛ 38line 37 didn't jump to line 38 because the condition on line 37 was never true
38 for l_data in l_list:
39 word_entry.forms.append(
40 Form(
41 form=l_data.word,
42 tags=l_data.tags,
43 raw_tags=l_data.raw_tags,
44 roman=l_data.roman,
45 )
46 )
47 else:
48 getattr(word_entry, l_type).extend(l_list)
51def extract_linkage_list_item(
52 wxr: WiktextractContext,
53 word_entry: WordEntry,
54 list_item: WikiNode,
55 tags: list[str],
56 sense: str,
57) -> list[Linkage]:
58 l_list = []
59 for node in list_item.children:
60 if (isinstance(node, WikiNode) and node.kind == NodeKind.LINK) or (
61 isinstance(node, TemplateNode)
62 and node.template_name in ["bağlantı", "l", "b"]
63 ):
64 l_data = Linkage(
65 word=clean_node(wxr, None, node), sense=sense, tags=tags
66 )
67 if l_data.word != "": 67 ↛ 59line 67 didn't jump to line 59 because the condition on line 67 was always true
68 l_list.append(l_data)
69 elif isinstance(node, TemplateNode):
70 if node.template_name in ["anlam", "mânâ", "mana"]:
71 sense = clean_node(wxr, None, node).strip("(): ")
72 elif node.template_name == "şerh" and len(l_list) > 0: 72 ↛ 59line 72 didn't jump to line 59 because the condition on line 72 was always true
73 raw_tag = clean_node(wxr, None, node).strip("() ")
74 if raw_tag != "": 74 ↛ 59line 74 didn't jump to line 59 because the condition on line 74 was always true
75 l_list[-1].raw_tags.append(raw_tag)
76 translate_raw_tags(l_list[-1])
77 return l_list
80GLOSS_LIST_LINKAGE_TEMPLATES = {
81 "eş anlamlılar": "synonyms",
82 "zıt anlamlılar": "antonyms",
83 "zıt anlamlı": "antonyms",
84 "alt kavramlar": "hyponyms",
85}
88def extract_gloss_list_linkage_template(
89 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode
90) -> None:
91 expanded_node = wxr.wtp.parse(
92 wxr.wtp.node_to_wikitext(t_node), expand_all=True
93 )
94 l_list = []
95 for span_tag in expanded_node.find_html("span"):
96 if word_entry.lang_code == span_tag.attrs.get("lang", ""):
97 l_data = Linkage(
98 word=clean_node(wxr, None, span_tag),
99 sense=" ".join(
100 word_entry.senses[-1].glosses
101 if len(word_entry.senses) > 0
102 else ""
103 ),
104 )
105 if l_data.word != "": 105 ↛ 95line 105 didn't jump to line 95 because the condition on line 105 was always true
106 l_list.append(l_data)
107 elif "Latn" in span_tag.attrs.get("class", "") and len(l_list) > 0:
108 l_list[-1].roman = clean_node(wxr, None, span_tag)
109 getattr(
110 word_entry, GLOSS_LIST_LINKAGE_TEMPLATES[t_node.template_name]
111 ).extend(l_list)