Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prototype of table view of Dataset + Model for interactive use #157

Merged
merged 12 commits into from
Dec 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ typing-inspect = "^0.8.0"
#orjson = "^3.8.0"
#python-slugify = "^7.0.0"
isort = "^5.12.0"
chardet = "^5.2.0"
pathspec = "0.12.1"
tabulate = "^0.9.0"
devtools = "^0.12.2"
objsize = "^0.7.0"
humanize = "^4.9.0"

[tool.poetry.group.dev.dependencies]
deepdiff = "^6.2.1"
Expand All @@ -48,6 +54,7 @@ pandas-stubs = "^2.1.1.230928"
pre-commit = "^2.15.0"
pytest-mypy-plugins = "^3.0.0"
devtools = "^0.12.2"
ipython = "^8.18.1"

[tool.poetry.group.docs.dependencies]
furo = "^2022.12.7"
Expand Down
157 changes: 140 additions & 17 deletions src/omnipy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,150 @@
__version__ = '0.12.3'

import os
import sys
from typing import Optional

from omnipy.data.dataset import Dataset
from omnipy.data.model import Model
from omnipy.hub.runtime import Runtime

ROOT_DIR = os.path.dirname(os.path.abspath(__file__))

# TODO: The check disabling runtime for tests also trigger for tests that are run outside of Omnipy,
# breaking tests on the user side.
# Find a better way to disable the global runtime object for Omnipy tests
from omnipy.hub.runtime import runtime
# from omnipy.util.helpers import recursive_module_import
from omnipy.modules.general.tasks import import_directory, split_dataset
from omnipy.modules.json.datasets import (JsonDataset,
JsonDictDataset,
JsonDictOfDictsDataset,
JsonDictOfDictsOfScalarsDataset,
JsonDictOfListsDataset,
JsonDictOfListsOfDictsDataset,
JsonDictOfListsOfScalarsDataset,
JsonDictOfNestedListsDataset,
JsonDictOfScalarsDataset,
JsonListDataset,
JsonListOfDictsDataset,
JsonListOfDictsOfScalarsDataset,
JsonListOfListsDataset,
JsonListOfListsOfScalarsDataset,
JsonListOfNestedDictsDataset,
JsonListOfScalarsDataset,
JsonNestedDictsDataset,
JsonNestedListsDataset,
JsonOnlyDictsDataset,
JsonOnlyListsDataset,
JsonScalarDataset)
from omnipy.modules.json.flows import flatten_nested_json
from omnipy.modules.json.models import (JsonDictModel,
JsonDictOfDictsModel,
JsonDictOfDictsOfScalarsModel,
JsonDictOfListsModel,
JsonDictOfListsOfDictsModel,
JsonDictOfListsOfScalarsModel,
JsonDictOfNestedListsModel,
JsonDictOfScalarsModel,
JsonListModel,
JsonListOfDictsModel,
JsonListOfDictsOfScalarsModel,
JsonListOfListsModel,
JsonListOfListsOfScalarsModel,
JsonListOfNestedDictsModel,
JsonListOfScalarsModel,
JsonModel,
JsonNestedDictsModel,
JsonNestedListsModel,
JsonOnlyDictsModel,
JsonOnlyListsModel,
JsonScalarModel)
from omnipy.modules.json.tasks import (transpose_dict_of_dicts_2_list_of_dicts,
transpose_dicts_2_lists,
transpose_dicts_of_lists_of_dicts_2_lists_of_dicts)
from omnipy.modules.pandas.models import (ListOfPandasDatasetsWithSameNumberOfFiles,
PandasDataset,
PandasModel)
from omnipy.modules.pandas.tasks import (concat_dataframes_across_datasets,
convert_dataset_csv_to_pandas,
convert_dataset_list_of_dicts_to_pandas,
convert_dataset_pandas_to_csv,
extract_columns_as_files)
from omnipy.modules.raw.models import JoinLinesModel, SplitAndStripLinesModel, SplitLinesModel
from omnipy.modules.raw.tasks import (decode_bytes,
modify_all_lines,
modify_datafile_contents,
modify_each_line)
from omnipy.modules.tables.tasks import remove_columns

# from omnipy.util.helpers import recursive_module_import

def _get_runtime() -> Optional['Runtime']:
if 'pytest' not in sys.modules:
return Runtime()
else:
return None

ROOT_DIR = os.path.dirname(os.path.abspath(__file__))

runtime: Optional['Runtime'] = _get_runtime()
__all__ = [
'runtime',
'Dataset',
'Model',
'JsonDataset',
'JsonDictDataset',
'JsonDictOfDictsDataset',
'JsonDictOfDictsOfScalarsDataset',
'JsonDictOfListsDataset',
'JsonDictOfListsOfDictsDataset',
'JsonDictOfListsOfScalarsDataset',
'JsonDictOfNestedListsDataset',
'JsonDictOfScalarsDataset',
'JsonListDataset',
'JsonListOfDictsDataset',
'JsonListOfDictsOfScalarsDataset',
'JsonListOfListsDataset',
'JsonListOfListsOfScalarsDataset',
'JsonListOfNestedDictsDataset',
'JsonListOfScalarsDataset',
'JsonNestedDictsDataset',
'JsonNestedListsDataset',
'JsonOnlyDictsDataset',
'JsonOnlyListsDataset',
'JsonScalarDataset',
'JsonDictModel',
'JsonDictOfDictsModel',
'JsonDictOfDictsOfScalarsModel',
'JsonDictOfListsModel',
'JsonDictOfListsOfDictsModel',
'JsonDictOfListsOfScalarsModel',
'JsonDictOfNestedListsModel',
'JsonDictOfScalarsModel',
'JsonListModel',
'JsonListOfDictsModel',
'JsonListOfDictsOfScalarsModel',
'JsonListOfListsModel',
'JsonListOfListsOfScalarsModel',
'JsonListOfNestedDictsModel',
'JsonListOfScalarsModel',
'JsonModel',
'JsonNestedDictsModel',
'JsonNestedListsModel',
'JsonOnlyDictsModel',
'JsonOnlyListsModel',
'JsonScalarModel',
'ListOfPandasDatasetsWithSameNumberOfFiles',
'PandasModel',
'PandasDataset',
'SplitLinesModel',
'SplitAndStripLinesModel',
'JoinLinesModel',
'import_directory',
'split_dataset',
'flatten_nested_json',
'transpose_dicts_2_lists',
'transpose_dict_of_dicts_2_list_of_dicts',
'transpose_dicts_of_lists_of_dicts_2_lists_of_dicts',
'concat_dataframes_across_datasets',
'convert_dataset_csv_to_pandas',
'convert_dataset_pandas_to_csv',
'convert_dataset_list_of_dicts_to_pandas',
'extract_columns_as_files',
'decode_bytes',
'modify_all_lines',
'modify_datafile_contents',
'modify_each_line',
'remove_columns',
]

__all__ = [Model, Dataset]
#
# def __getattr__(attr_name: str) -> object:
# omnipy = importlib.import_module(__name__)
# all_modules = []
# recursive_module_import(omnipy, all_modules)
# print(all_modules)
19 changes: 16 additions & 3 deletions src/omnipy/api/protocols/public/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def serialize(cls, dataset: IsDataset) -> bytes | memoryview:
pass

@classmethod
def deserialize(cls, serialized: bytes) -> IsDataset:
def deserialize(cls, serialized: bytes, any_file_suffix=False) -> IsDataset:
pass


Expand All @@ -100,7 +100,8 @@ def create_dataset_from_tarfile(cls,
tarfile_bytes: bytes,
data_decode_func: Callable[[IO[bytes]], Any],
dictify_object_func: Callable[[str, Any], dict | str],
import_method='from_data'):
import_method='from_data',
any_file_suffix: bool = False):
...


Expand Down Expand Up @@ -130,8 +131,20 @@ def auto_detect_tar_file_serializer(self, dataset: IsDataset):
def _autodetect_serializer(cls, dataset, serializers):
...

def detect_tar_file_serializers_from_dataset_cls(self, dataset: IsDataset):
...

def detect_tar_file_serializers_from_file_suffix(self, file_suffix: str):
...

def load_from_tar_file_path(self, log_obj: CanLog, tar_file_path: str, to_dataset: IsDataset):
def load_from_tar_file_path_based_on_file_suffix(self,
log_obj: CanLog,
tar_file_path: str,
to_dataset: IsDataset):
...

def load_from_tar_file_path_based_on_dataset_cls(self,
log_obj: CanLog,
tar_file_path: str,
to_dataset: IsDataset):
...
3 changes: 2 additions & 1 deletion src/omnipy/api/protocols/public/hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,15 @@ class IsRuntimeObjects(Protocol):
registry: IsRunStateRegistry
serializers: IsSerializerRegistry
root_log: IsRootLogObjects
waiting_for_terminal_repr: bool

def __init__(
self,
job_creator: IsJobConfigHolder | None = None, # noqa
local: IsEngine | None = None, # noqa
prefect: IsEngine | None = None, # noqa
registry: IsRunStateRegistry | None = None, # noqa
serializers: IsSerializerRegistry | None = None,
serializers: IsSerializerRegistry | None = None, # noqa
root_log: IsRootLogObjects | None = None, # noqa
*args: object,
**kwargs: object) -> None:
Expand Down
4 changes: 2 additions & 2 deletions src/omnipy/compute/mixins/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@


def _setup_serializer_registry() -> IsSerializerRegistry:
from omnipy import runtime
from omnipy.hub.runtime import runtime
if runtime is not None:
return runtime.objects.serializers
else:
Expand Down Expand Up @@ -227,7 +227,7 @@ def _deserialize_and_restore_outputs(self) -> Dataset:
for tar_file_path in self._all_job_output_file_paths_in_reverse_order_for_last_run(
persist_data_dir_path, self._job_name()):
to_dataset = cast(Type[Dataset], self._return_type)
return self._serializer_registry.load_from_tar_file_path(
return self._serializer_registry.load_from_tar_file_path_based_on_file_suffix(
self, tar_file_path, to_dataset())

raise RuntimeError('No persisted output')
Expand Down
Loading