Coverage for src/wiktextract/extractor/tr/linkage.py: 87%

1from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode

3from ...page import clean_node

4from ...wxr_context import WiktextractContext

5from .models import Form, Linkage, WordEntry

6from .tags import translate_raw_tags

9def extract_linkage_section(

10 wxr: WiktextractContext,

11 word_entry: WordEntry,

12 level_node: LevelNode,

13 l_type: str,

14 tags: list[str],

15) -> None:

16 sense = ""

17 l_list = []

18 for node in level_node.children:

19 if isinstance(node, TemplateNode) and node.template_name.lower() in [ 19 ↛ 23line 19 didn't jump to line 23 because the condition on line 19 was never true

20 "üst",

21 "trans-top",

22 ]:

23 sense = clean_node(wxr, None, node.template_parameters.get(1, ""))

24 elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:

25 for list_node in level_node.find_child(NodeKind.LIST):

26 for list_item in list_node.find_child(NodeKind.LIST_ITEM):

27 l_list.extend(

28 extract_linkage_list_item(

29 wxr, word_entry, list_item, tags, sense

30 )

31 )

32 for link_node in level_node.find_child(NodeKind.LINK):

33 word = clean_node(wxr, None, link_node)

34 if word != "": 34 ↛ 32line 34 didn't jump to line 32 because the condition on line 34 was always true

35 l_list.append(Linkage(word=word, tags=tags))

37 if l_type == "forms": 37 ↛ 38line 37 didn't jump to line 38 because the condition on line 37 was never true

38 for l_data in l_list:

39 word_entry.forms.append(

40 Form(

41 form=l_data.word,

42 tags=l_data.tags,

43 raw_tags=l_data.raw_tags,

44 roman=l_data.roman,

45 )

46 )

47 else:

48 getattr(word_entry, l_type).extend(l_list)

51def extract_linkage_list_item(

52 wxr: WiktextractContext,

53 word_entry: WordEntry,

54 list_item: WikiNode,

55 tags: list[str],

56 sense: str,

57) -> list[Linkage]:

58 l_list = []

59 for node in list_item.children:

60 if (isinstance(node, WikiNode) and node.kind == NodeKind.LINK) or (

61 isinstance(node, TemplateNode)

62 and node.template_name in ["bağlantı", "l", "b"]

63 ):

64 l_data = Linkage(

65 word=clean_node(wxr, None, node), sense=sense, tags=tags

66 )

67 if l_data.word != "": 67 ↛ 59line 67 didn't jump to line 59 because the condition on line 67 was always true

68 l_list.append(l_data)

69 elif isinstance(node, TemplateNode):

70 if node.template_name in ["anlam", "mânâ", "mana"]:

71 sense = clean_node(wxr, None, node).strip("(): ")

72 elif node.template_name == "şerh" and len(l_list) > 0: 72 ↛ 59line 72 didn't jump to line 59 because the condition on line 72 was always true

73 raw_tag = clean_node(wxr, None, node).strip("() ")

74 if raw_tag != "": 74 ↛ 59line 74 didn't jump to line 59 because the condition on line 74 was always true

75 l_list[-1].raw_tags.append(raw_tag)

76 translate_raw_tags(l_list[-1])

77 return l_list

80GLOSS_LIST_LINKAGE_TEMPLATES = {

81 "eş anlamlılar": "synonyms",

82 "zıt anlamlılar": "antonyms",

83 "zıt anlamlı": "antonyms",

84 "alt kavramlar": "hyponyms",

85}

88def extract_gloss_list_linkage_template(

89 wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode

90) -> None:

91 expanded_node = wxr.wtp.parse(

92 wxr.wtp.node_to_wikitext(t_node), expand_all=True

93 )

94 l_list = []

95 for span_tag in expanded_node.find_html("span"):

96 if word_entry.lang_code == span_tag.attrs.get("lang", ""):

97 l_data = Linkage(

98 word=clean_node(wxr, None, span_tag),

99 sense=" ".join(

100 word_entry.senses[-1].glosses

101 if len(word_entry.senses) > 0

102 else ""

103 ),

104 )

105 if l_data.word != "": 105 ↛ 95line 105 didn't jump to line 95 because the condition on line 105 was always true

106 l_list.append(l_data)

107 elif "Latn" in span_tag.attrs.get("class", "") and len(l_list) > 0:

108 l_list[-1].roman = clean_node(wxr, None, span_tag)

109 getattr(

110 word_entry, GLOSS_LIST_LINKAGE_TEMPLATES[t_node.template_name]

111 ).extend(l_list)

Coverage for src / wiktextract / extractor / tr / linkage.py: 87%

51 statements