Coverage for src/wiktextract/extractor/tr/linkage.py: 87%

51 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-15 05:18 +0000

1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode 

2 

3from ...page import clean_node 

4from ...wxr_context import WiktextractContext 

5from .models import Form, Linkage, WordEntry 

6from .tags import translate_raw_tags 

7 

8 

9def extract_linkage_section( 

10 wxr: WiktextractContext, 

11 word_entry: WordEntry, 

12 level_node: LevelNode, 

13 l_type: str, 

14 tags: list[str], 

15) -> None: 

16 sense = "" 

17 l_list = [] 

18 for node in level_node.children: 

19 if isinstance(node, TemplateNode) and node.template_name.lower() in [ 19 ↛ 23line 19 didn't jump to line 23 because the condition on line 19 was never true

20 "üst", 

21 "trans-top", 

22 ]: 

23 sense = clean_node(wxr, None, node.template_parameters.get(1, "")) 

24 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: 

25 for list_node in level_node.find_child(NodeKind.LIST): 

26 for list_item in list_node.find_child(NodeKind.LIST_ITEM): 

27 l_list.extend( 

28 extract_linkage_list_item( 

29 wxr, word_entry, list_item, tags, sense 

30 ) 

31 ) 

32 for link_node in level_node.find_child(NodeKind.LINK): 

33 word = clean_node(wxr, None, link_node) 

34 if word != "": 34 ↛ 32line 34 didn't jump to line 32 because the condition on line 34 was always true

35 l_list.append(Linkage(word=word, tags=tags)) 

36 

37 if l_type == "forms": 37 ↛ 38line 37 didn't jump to line 38 because the condition on line 37 was never true

38 for l_data in l_list: 

39 word_entry.forms.append( 

40 Form( 

41 form=l_data.word, 

42 tags=l_data.tags, 

43 raw_tags=l_data.raw_tags, 

44 roman=l_data.roman, 

45 ) 

46 ) 

47 else: 

48 getattr(word_entry, l_type).extend(l_list) 

49 

50 

51def extract_linkage_list_item( 

52 wxr: WiktextractContext, 

53 word_entry: WordEntry, 

54 list_item: WikiNode, 

55 tags: list[str], 

56 sense: str, 

57) -> list[Linkage]: 

58 l_list = [] 

59 for node in list_item.children: 

60 if (isinstance(node, WikiNode) and node.kind == NodeKind.LINK) or ( 

61 isinstance(node, TemplateNode) 

62 and node.template_name in ["bağlantı", "l", "b"] 

63 ): 

64 l_data = Linkage( 

65 word=clean_node(wxr, None, node), sense=sense, tags=tags 

66 ) 

67 if l_data.word != "": 67 ↛ 59line 67 didn't jump to line 59 because the condition on line 67 was always true

68 l_list.append(l_data) 

69 elif isinstance(node, TemplateNode): 

70 if node.template_name in ["anlam", "mânâ", "mana"]: 

71 sense = clean_node(wxr, None, node).strip("(): ") 

72 elif node.template_name == "şerh" and len(l_list) > 0: 72 ↛ 59line 72 didn't jump to line 59 because the condition on line 72 was always true

73 raw_tag = clean_node(wxr, None, node).strip("() ") 

74 if raw_tag != "": 74 ↛ 59line 74 didn't jump to line 59 because the condition on line 74 was always true

75 l_list[-1].raw_tags.append(raw_tag) 

76 translate_raw_tags(l_list[-1]) 

77 return l_list 

78 

79 

80GLOSS_LIST_LINKAGE_TEMPLATES = { 

81 "eş anlamlılar": "synonyms", 

82 "zıt anlamlılar": "antonyms", 

83 "zıt anlamlı": "antonyms", 

84 "alt kavramlar": "hyponyms", 

85} 

86 

87 

88def extract_gloss_list_linkage_template( 

89 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode 

90) -> None: 

91 expanded_node = wxr.wtp.parse( 

92 wxr.wtp.node_to_wikitext(t_node), expand_all=True 

93 ) 

94 l_list = [] 

95 for span_tag in expanded_node.find_html("span"): 

96 if word_entry.lang_code == span_tag.attrs.get("lang", ""): 

97 l_data = Linkage( 

98 word=clean_node(wxr, None, span_tag), 

99 sense=" ".join( 

100 word_entry.senses[-1].glosses 

101 if len(word_entry.senses) > 0 

102 else "" 

103 ), 

104 ) 

105 if l_data.word != "": 105 ↛ 95line 105 didn't jump to line 95 because the condition on line 105 was always true

106 l_list.append(l_data) 

107 elif "Latn" in span_tag.attrs.get("class", "") and len(l_list) > 0: 

108 l_list[-1].roman = clean_node(wxr, None, span_tag) 

109 getattr( 

110 word_entry, GLOSS_LIST_LINKAGE_TEMPLATES[t_node.template_name] 

111 ).extend(l_list)