Skip to content

Commit

Permalink
Allow Dataset.load() of urls with specified keys
Browse files Browse the repository at this point in the history
  • Loading branch information
sveinugu committed Nov 18, 2024
1 parent 6324625 commit 6cc3675
Showing 1 changed file with 21 additions and 2 deletions.
23 changes: 21 additions & 2 deletions src/omnipy/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,10 +563,18 @@ def save(self, path: str):
tar.close()

def load(self,
paths_or_urls: 'str | Iterable[str] | HttpUrlModel | HttpUrlDataset',
by_file_suffix: bool = False) -> list[asyncio.Task] | None:
paths_or_urls: 'str | Iterable[str] | HttpUrlModel | HttpUrlDataset '
'| Mapping[str, str | HttpUrlModel] | None' = None,
by_file_suffix: bool = False,
**kwargs: 'str | HttpUrlModel') -> list[asyncio.Task] | None:
from omnipy import HttpUrlDataset, HttpUrlModel

if paths_or_urls is None:
assert len(kwargs) > 0, 'No paths or urls specified'
paths_or_urls = kwargs
else:
assert len(kwargs) == 0, 'No keyword arguments allowed when paths_or_urls is specified'

match paths_or_urls:
case HttpUrlDataset():
return self._load_http_urls(paths_or_urls)
Expand All @@ -580,6 +588,17 @@ def load(self,
except ValidationError:
return self._load_paths([paths_or_urls], by_file_suffix)
return self._load_http_urls(http_url_dataset)

case Mapping():
try:
http_url_dataset = HttpUrlDataset(paths_or_urls)
except ValidationError as exp:
raise NotImplementedError(
'Loading files with specified keys is not yet '
'implemented, as only tar.gz file import is '
'supported until serializers have been refactored.') from exp
return self._load_http_urls(http_url_dataset)

case Iterable():
try:
path_or_url_iterable = cast(Iterable[str], paths_or_urls)
Expand Down

0 comments on commit 6cc3675

Please sign in to comment.