Coverage for src/wiktextract/extractor/ja/etymology.py: 93%

1from wikitextprocessor.parser import LEVEL_KIND_FLAGS, LevelNode, NodeKind

3from ...page import clean_node

4from ...wxr_context import WiktextractContext

5from .models import WordEntry

8def extract_etymology_section(

9 wxr: WiktextractContext,

10 page_data: list[WordEntry],

11 base_data: WordEntry,

12 level_node: LevelNode,

13) -> None:

14 etymology_texts = []

15 cats = {}

16 for list_node in level_node.find_child(NodeKind.LIST):

17 # don't use `find_child_recursively` to avoid lists in subsection

18 for list_item in list_node.find_child(NodeKind.LIST_ITEM):

19 text = clean_node(

20 wxr, cats, list(list_item.invert_find_child(NodeKind.LIST))

22 if len(text) > 0: 22 ↛ 18line 22 didn't jump to line 18 because the condition on line 22 was always true

23 etymology_texts.append(text)

24 if len(etymology_texts) == 0:

25 text = clean_node(

26 wxr, cats, list(level_node.invert_find_child(LEVEL_KIND_FLAGS))

28 if len(text) > 0: 28 ↛ 30line 28 didn't jump to line 30 because the condition on line 28 was always true

29 etymology_texts.append(text)

30 for link in level_node.find_child(NodeKind.LINK):

31 clean_node(wxr, cats, link)

32 base_data.etymology_texts = etymology_texts

33 base_data.categories.extend(cats.get("categories", []))

34 if level_node.kind != NodeKind.LEVEL3: # under POS section

35 for data in page_data:

36 if ( 36 ↛ 35line 36 didn't jump to line 35 because the condition on line 36 was always true

37 data.lang_code == base_data.lang_code

38 and len(data.etymology_texts) == 0

39 ):

40 data.etymology_texts = etymology_texts

41 data.categories.extend(cats.get("categories", []))