Coverage for src/wiktextract/extractor/ja/etymology.py: 93%
25 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-17 08:19 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-17 08:19 +0000
1from wikitextprocessor.parser import LEVEL_KIND_FLAGS, LevelNode, NodeKind
3from ...page import clean_node
4from ...wxr_context import WiktextractContext
5from .models import WordEntry
8def extract_etymology_section(
9 wxr: WiktextractContext,
10 page_data: list[WordEntry],
11 base_data: WordEntry,
12 level_node: LevelNode,
13) -> None:
14 etymology_texts = []
15 cats = {}
16 for list_node in level_node.find_child(NodeKind.LIST):
17 # don't use `find_child_recursively` to avoid lists in subsection
18 for list_item in list_node.find_child(NodeKind.LIST_ITEM):
19 text = clean_node(
20 wxr,
21 cats,
22 list(
23 list_item.invert_find_child(
24 NodeKind.LIST, include_empty_str=True
25 )
26 ),
27 )
28 if len(text) > 0: 28 ↛ 18line 28 didn't jump to line 18 because the condition on line 28 was always true
29 etymology_texts.append(text)
30 if len(etymology_texts) == 0:
31 text = clean_node(
32 wxr,
33 cats,
34 list(
35 level_node.invert_find_child(
36 LEVEL_KIND_FLAGS, include_empty_str=True
37 )
38 ),
39 )
40 if len(text) > 0: 40 ↛ 42line 40 didn't jump to line 42 because the condition on line 40 was always true
41 etymology_texts.append(text)
42 for link in level_node.find_child(NodeKind.LINK):
43 clean_node(wxr, cats, link)
44 base_data.etymology_texts = etymology_texts
45 base_data.categories.extend(cats.get("categories", []))
46 if level_node.kind != NodeKind.LEVEL3: # under POS section
47 for data in page_data:
48 if ( 48 ↛ 47line 48 didn't jump to line 47 because the condition on line 48 was always true
49 data.lang_code == base_data.lang_code
50 and len(data.etymology_texts) == 0
51 ):
52 data.etymology_texts = etymology_texts
53 data.categories.extend(cats.get("categories", []))