From 6cc3675561fc009025435f18a01c0ee16c33489a Mon Sep 17 00:00:00 2001 From: Sveinung Gundersen Date: Mon, 18 Nov 2024 09:46:10 -0600 Subject: [PATCH] Allow Dataset.load() of urls with specified keys --- src/omnipy/data/dataset.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/omnipy/data/dataset.py b/src/omnipy/data/dataset.py index 2857b3df..1f89e473 100644 --- a/src/omnipy/data/dataset.py +++ b/src/omnipy/data/dataset.py @@ -563,10 +563,18 @@ def save(self, path: str): tar.close() def load(self, - paths_or_urls: 'str | Iterable[str] | HttpUrlModel | HttpUrlDataset', - by_file_suffix: bool = False) -> list[asyncio.Task] | None: + paths_or_urls: 'str | Iterable[str] | HttpUrlModel | HttpUrlDataset ' + '| Mapping[str, str | HttpUrlModel] | None' = None, + by_file_suffix: bool = False, + **kwargs: 'str | HttpUrlModel') -> list[asyncio.Task] | None: from omnipy import HttpUrlDataset, HttpUrlModel + if paths_or_urls is None: + assert len(kwargs) > 0, 'No paths or urls specified' + paths_or_urls = kwargs + else: + assert len(kwargs) == 0, 'No keyword arguments allowed when paths_or_urls is specified' + match paths_or_urls: case HttpUrlDataset(): return self._load_http_urls(paths_or_urls) @@ -580,6 +588,17 @@ def load(self, except ValidationError: return self._load_paths([paths_or_urls], by_file_suffix) return self._load_http_urls(http_url_dataset) + + case Mapping(): + try: + http_url_dataset = HttpUrlDataset(paths_or_urls) + except ValidationError as exp: + raise NotImplementedError( + 'Loading files with specified keys is not yet ' + 'implemented, as only tar.gz file import is ' + 'supported until serializers have been refactored.') from exp + return self._load_http_urls(http_url_dataset) + case Iterable(): try: path_or_url_iterable = cast(Iterable[str], paths_or_urls)