Skip to content

Commit

Permalink
[WIP] Working on the navigation parser
Browse files Browse the repository at this point in the history
  • Loading branch information
PonteIneptique committed Aug 24, 2024
1 parent 2c618eb commit 08b9be7
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 4 deletions.
61 changes: 58 additions & 3 deletions dapitains/app/ingest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from typing import Dict, List, Optional, Any
from typing import Dict, List, Optional, Any, Tuple
from dapitains.app.database import Collection, Navigation, db
from dapitains.metadata.xml_parser import Catalog
from dapitains.tei.document import Document
import copy

def store_catalog(catalog: Catalog):
for identifier, collection in catalog.objects.items():
Expand All @@ -23,17 +24,23 @@ def get_member_by_path(data: List[Dict[str, Any]], path: List[int]) -> Optional[
"""
current_level = data

for index in path:
path_copy = [] + path
while path_copy:
index = path_copy.pop(0)
try:
current_level = current_level[index]
if 'members' in current_level:
if 'members' in current_level and path_copy:
current_level = current_level['members']
except (IndexError, KeyError):
return None

return current_level


def strip_members(obj: Dict[str, Any]) -> Dict[str, Any]:
return {k: v for k, v in obj.items() if k != "members"}


def generate_paths(data: List[Dict[str, Any]], path: Optional[List[int]] = None) -> Dict[str, List[int]]:
"""
Generate a dictionary mapping each 'ref' in a nested data structure to its path.
Expand Down Expand Up @@ -69,6 +76,54 @@ def recurse(items, current_path):
recurse(data, [])
return paths


def get_nav(
refs: List[Dict[str, Any]],
paths: Dict[str, List[int]],
start_or_ref: Optional[str],
end: Optional[str],
down: Optional[int] = 1
) -> Tuple[List[Dict[str, Any]], Optional[Dict[str, Any]], Optional[Dict[str, Any]]]:

paths_index = list(paths.keys())
start_index, end_index = None, None
if start_or_ref:
start_index = paths_index.index(start_or_ref)
if end:
end_index = paths_index.index(end) + 1

paths = dict(list(paths.items())[start_index:end_index])

current_level = [0]

start_path, end_path = None, None

if start_or_ref:
start_path = paths[start_or_ref]
current_level.append(len(start_path))
if end:
end_path = paths[end]
current_level.append(len(end_path))

current_level = max(current_level)

if down == -1:
down = max(list(map(len, paths.values())))

if down == 0:
paths = {key: value for key, value in paths.items() if len(value) == current_level}
else:
paths = {key: value for key, value in paths.items() if current_level < len(value) <= down + current_level}

return (
[
strip_members(get_member_by_path(refs, path)) for path in paths.values()
],
strip_members(get_member_by_path(refs, start_path)) if start_path else None,
strip_members(get_member_by_path(refs, end_path)) if end_path else None
)


if __name__ == "__main__":
import flask
import os
Expand Down
27 changes: 26 additions & 1 deletion tests/test_db_create.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import flask
from dapitains.app.ingest import generate_paths, get_member_by_path
from dapitains.app.ingest import generate_paths, get_member_by_path, get_nav, strip_members
from dapitains.tei.document import Document
import os

Expand Down Expand Up @@ -48,5 +48,30 @@ def test_simple_path():
"Mark 1:3": [1, 0, 3]
}
}
assert strip_members(get_member_by_path(refs[None], paths[None]["Luke"])) == {'citeType': 'book', 'ref': 'Luke'}
assert get_member_by_path(refs[None], paths[None]["Mark 1:3"]) == {'citeType': 'verse', 'ref': 'Mark 1:3'}


def test_navigation():
doc = Document(f"{local_dir}/tei/base_tei.xml")
refs = {
tree: [ref.json() for ref in obj.find_refs(doc.xml, structure=obj.units)]
for tree, obj in doc.citeStructure.items()
}
paths = {tree: generate_paths(ref) for tree, ref in refs.items()}
assert get_nav(refs[None], paths[None], start_or_ref=None, end=None, down=1) == ([
{'citeType': 'book', 'ref': 'Luke'},
{'citeType': 'book', 'ref': 'Mark'}
], None, None)
assert get_nav(refs[None], paths[None], start_or_ref="Luke 1:1", end="Luke 1#1", down=0) == ([
{'citeType': 'verse', 'ref': 'Luke 1:1'},
{'citeType': 'verse', 'ref': 'Luke 1:2'},
{'citeType': 'bloup', 'ref': 'Luke 1#1'}
], {'citeType': 'verse', 'ref': 'Luke 1:1'}, {'citeType': 'bloup', 'ref': 'Luke 1#1'})
assert get_nav(refs[None], paths[None], start_or_ref="Luke 1:1", end="Mark 1:2", down=0) == ([
{'citeType': 'verse', 'ref': 'Luke 1:1'},
{'citeType': 'verse', 'ref': 'Luke 1:2'},
{'citeType': 'bloup', 'ref': 'Luke 1#1'},
{'citeType': 'verse', 'ref': 'Mark 1:1'},
{'citeType': 'verse', 'ref': 'Mark 1:2'}
], {'citeType': 'verse', 'ref': 'Luke 1:1'}, {'citeType': 'verse', 'ref': 'Mark 1:2'})

0 comments on commit 08b9be7

Please sign in to comment.