Coverage for src/wiktextract/extractor/ja/etymology.py: 93%
25 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-27 08:07 +0000
1from wikitextprocessor.parser import LEVEL_KIND_FLAGS, LevelNode, NodeKind
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from .models import WordEntry
8def extract_etymology_section(
9 wxr: WiktextractContext,
10 page_data: list[WordEntry],
11 base_data: WordEntry,
12 level_node: LevelNode,
13) -> None:
14 etymology_texts = []
15 cats = {}
16 for list_node in level_node.find_child(NodeKind.LIST):
17 # don't use `find_child_recursively` to avoid lists in subsection
18 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
19 text = clean_node(
20 wxr, cats, list(list_item.invert_find_child(NodeKind.LIST))
21 )
22 if len(text) > 0: 22 ↛ 18line 22 didn't jump to line 18 because the condition on line 22 was always true
23 etymology_texts.append(text)
24 if len(etymology_texts) == 0:
25 text = clean_node(
26 wxr, cats, list(level_node.invert_find_child(LEVEL_KIND_FLAGS))
27 )
28 if len(text) > 0: 28 ↛ 30line 28 didn't jump to line 30 because the condition on line 28 was always true
29 etymology_texts.append(text)
30 for link in level_node.find_child(NodeKind.LINK):
31 clean_node(wxr, cats, link)
32 base_data.etymology_texts = etymology_texts
33 base_data.categories.extend(cats.get("categories", []))
34 if level_node.kind != NodeKind.LEVEL3: # under POS section
35 for data in page_data:
36 if ( 36 ↛ 35line 36 didn't jump to line 35 because the condition on line 36 was always true
37 data.lang_code == base_data.lang_code
38 and len(data.etymology_texts) == 0
39 ):
40 data.etymology_texts = etymology_texts
41 data.categories.extend(cats.get("categories", []))