From 884883c54ce9c56bf2867c8c05bc045645756bdf Mon Sep 17 00:00:00 2001 From: Sveinung Gundersen Date: Tue, 19 Dec 2023 10:36:46 +0100 Subject: [PATCH] Cleanup --- src/omnipy/__init__.py | 4 +--- src/omnipy/api/protocols/public/hub.py | 2 +- src/omnipy/data/dataset.py | 4 ++-- src/omnipy/data/model.py | 4 ++-- src/omnipy/data/serializer.py | 25 ++++++++++++------------- src/omnipy/modules/json/serializers.py | 1 - src/omnipy/modules/raw/tasks.py | 3 ++- tests/data/test_dataset.py | 2 +- tests/modules/raw/test_tasks.py | 4 ++-- tests/util/__init__.py | 2 +- tests/util/helpers/__init__.py | 2 +- 11 files changed, 25 insertions(+), 28 deletions(-) diff --git a/src/omnipy/__init__.py b/src/omnipy/__init__.py index 4a792b1a..38c78068 100644 --- a/src/omnipy/__init__.py +++ b/src/omnipy/__init__.py @@ -1,9 +1,6 @@ __version__ = '0.12.2' -import importlib import os -import sys -from typing import Optional from omnipy.data.dataset import Dataset from omnipy.data.model import Model @@ -137,6 +134,7 @@ 'convert_dataset_csv_to_pandas', 'convert_dataset_pandas_to_csv', 'convert_dataset_list_of_dicts_to_pandas', + 'extract_columns_as_files', 'decode_bytes', 'modify_all_lines', 'modify_datafile_contents', diff --git a/src/omnipy/api/protocols/public/hub.py b/src/omnipy/api/protocols/public/hub.py index c5d9c386..86f2b684 100644 --- a/src/omnipy/api/protocols/public/hub.py +++ b/src/omnipy/api/protocols/public/hub.py @@ -64,7 +64,7 @@ def __init__( local: IsEngine | None = None, # noqa prefect: IsEngine | None = None, # noqa registry: IsRunStateRegistry | None = None, # noqa - serializers: IsSerializerRegistry | None = None, + serializers: IsSerializerRegistry | None = None, # noqa root_log: IsRootLogObjects | None = None, # noqa *args: object, **kwargs: object) -> None: diff --git a/src/omnipy/data/dataset.py b/src/omnipy/data/dataset.py index 61c087e9..b0907dc6 100644 --- a/src/omnipy/data/dataset.py +++ b/src/omnipy/data/dataset.py @@ -428,7 +428,7 @@ def __repr__(self): print(get_calling_module_name()) if get_calling_module_name() in INTERACTIVE_MODULES: _waiting_for_terminal_repr(True) - return _table_repr(self) + return self._table_repr() return self._trad_repr() def _trad_repr(self) -> str: @@ -443,7 +443,7 @@ def _table_repr(self) -> str: humanize.naturalsize(objsize.get_deep_size(v))) for i, (k, v) in enumerate(self.items())), ('#', 'Data file name', 'Type', 'Length', 'Size (in memory)'), - tablefmt="rounded_outline", + tablefmt='rounded_outline', ) _waiting_for_terminal_repr(False) return ret diff --git a/src/omnipy/data/model.py b/src/omnipy/data/model.py index 85ec468c..9451b789 100644 --- a/src/omnipy/data/model.py +++ b/src/omnipy/data/model.py @@ -686,7 +686,7 @@ def _table_repr(self) -> str: bottom_structure), ), maxcolwidths=[header_column_width, data_column_width], - tablefmt="rounded_grid", + tablefmt='rounded_grid', ) else: out = tabulate( @@ -696,7 +696,7 @@ def _table_repr(self) -> str: os.linesep.join(new_structure_lines)), ), maxcolwidths=[header_column_width, data_column_width], - tablefmt="rounded_grid", + tablefmt='rounded_grid', ) _waiting_for_terminal_repr(False) diff --git a/src/omnipy/data/serializer.py b/src/omnipy/data/serializer.py index 71069f75..b07c0244 100644 --- a/src/omnipy/data/serializer.py +++ b/src/omnipy/data/serializer.py @@ -194,18 +194,17 @@ def load_from_tar_file_path_based_on_dataset_cls(self, else: log = print - with tarfile.open(tar_file_path, 'r:gz') as tarfile_obj: - serializers = self.detect_tar_file_serializers_from_dataset_cls(to_dataset) - if len(serializers) == 0: - log(f'No serializer for Dataset with type "{type(to_dataset)}" can be ' - f'determined. Aborting load.') - else: - for serializer in serializers: - log(f'Reading dataset from a gzipped tarpack at' - f' "{os.path.abspath(tar_file_path)}" with serializer type: ' - f'"{type(serializer)}"') + serializers = self.detect_tar_file_serializers_from_dataset_cls(to_dataset) + if len(serializers) == 0: + log(f'No serializer for Dataset with type "{type(to_dataset)}" can be ' + f'determined. Aborting load.') + else: + for serializer in serializers: + log(f'Reading dataset from a gzipped tarpack at' + f' "{os.path.abspath(tar_file_path)}" with serializer type: ' + f'"{type(serializer)}"') - with open(tar_file_path, 'rb') as tarfile_binary: - out_dataset = serializer.deserialize(tarfile_binary.read(), any) + with open(tar_file_path, 'rb') as tarfile_binary: + out_dataset = serializer.deserialize(tarfile_binary.read(), any) - return out_dataset + return out_dataset diff --git a/src/omnipy/modules/json/serializers.py b/src/omnipy/modules/json/serializers.py index e36a78c1..a4cab647 100644 --- a/src/omnipy/modules/json/serializers.py +++ b/src/omnipy/modules/json/serializers.py @@ -1,6 +1,5 @@ from typing import IO, Type -from omnipy.data.dataset import Dataset from omnipy.data.serializer import TarFileSerializer from ...api.protocols.public.data import IsDataset diff --git a/src/omnipy/modules/raw/tasks.py b/src/omnipy/modules/raw/tasks.py index 6aaa04de..15a528eb 100644 --- a/src/omnipy/modules/raw/tasks.py +++ b/src/omnipy/modules/raw/tasks.py @@ -16,7 +16,8 @@ def decode_bytes(data: bytes, encoding: str | None = None) -> str: detector = UniversalDetector() for line in data.splitlines(): detector.feed(line) - if detector.done: break + if detector.done: + break detector.close() result = detector.result diff --git a/tests/data/test_dataset.py b/tests/data/test_dataset.py index 2a9f0b31..37fc4ded 100644 --- a/tests/data/test_dataset.py +++ b/tests/data/test_dataset.py @@ -217,7 +217,7 @@ def test_get_items_with_tuple_or_list() -> None: == dataset['data_file_1',] == dataset[('data_file_1',)] == dataset[['data_file_1']] \ == Dataset[Model[int]](data_file_1=123) assert dataset[0, 2] == dataset[(0, 2)] == dataset[[0, 2]] \ - == dataset['data_file_1','data_file_3'] == dataset[('data_file_1', 'data_file_3')] \ + == dataset['data_file_1', 'data_file_3'] == dataset[('data_file_1', 'data_file_3')] \ == dataset[['data_file_1', 'data_file_3']] == dataset[[0, 'data_file_3']] \ == Dataset[Model[int]](data_file_1=dataset['data_file_1'], data_file_3=dataset['data_file_3']) \ diff --git a/tests/modules/raw/test_tasks.py b/tests/modules/raw/test_tasks.py index 5c3a5bb2..e9e72f46 100644 --- a/tests/modules/raw/test_tasks.py +++ b/tests/modules/raw/test_tasks.py @@ -27,12 +27,12 @@ class DecodeCaseInfo(NamedTuple): for case in test_cases: assert decode_bytes.run( Dataset[Model[bytes]](a=case.bytes_data), encoding=case.encoding)['a'].contents == \ - case.target_str + case.target_str for case in test_cases: assert decode_bytes.run( Dataset[Model[bytes]](a=case.bytes_data), encoding=None)['a'].contents == \ - case.target_str + case.target_str assert decode_bytes.run( Dataset[Model[bytes]](dict([(case.encoding, case.bytes_data) for case in test_cases])), diff --git a/tests/util/__init__.py b/tests/util/__init__.py index b4bb73e2..1e67b9f0 100644 --- a/tests/util/__init__.py +++ b/tests/util/__init__.py @@ -61,4 +61,4 @@ def inherited_parent_staticmethod() -> bool: return True def inherited_parent_method(self) -> bool: - return True \ No newline at end of file + return True diff --git a/tests/util/helpers/__init__.py b/tests/util/helpers/__init__.py index 225b93d9..2eee8efb 100644 --- a/tests/util/helpers/__init__.py +++ b/tests/util/helpers/__init__.py @@ -3,4 +3,4 @@ # For test_helpers::test_get_calling_module_name def other_module_call_get_calling_module_name() -> str: - return get_calling_module_name() \ No newline at end of file + return get_calling_module_name()