Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added validation layer to SyncedCollection #378

Merged
Merged
Show file tree
Hide file tree
Changes from 49 commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
009a1b6
Added JSONFornmatValidator
vishav1771 Aug 8, 2020
8fc224e
Added AttrDictFormatValidator
vishav1771 Aug 8, 2020
0117187
Added validation to synced_collection and Added doc for validators
vishav1771 Aug 20, 2020
c13222d
linting error
vishav1771 Aug 20, 2020
43f1072
test change
vishav1771 Aug 20, 2020
64df461
Removed extra space from synced_list
vishav1771 Aug 20, 2020
c546c8a
Merge feature/synced_collections into feature/synced_collection/valid…
vishav1771 Aug 22, 2020
4d3a198
Added validation to other backends
vishav1771 Aug 22, 2020
0e70dd0
Added blank line
vishav1771 Aug 22, 2020
4e87740
Update doc in signac/core/syncedattrdict.py
vishav1771 Aug 25, 2020
cf04fb1
Moved Validators to validators.py and removed validator from other co…
vishav1771 Aug 25, 2020
5943057
Added blankline
vishav1771 Aug 25, 2020
a157e4c
Added type annotation for _validators
vishav1771 Aug 25, 2020
0518d76
Added type annotation for _validators
vishav1771 Aug 25, 2020
2accf51
Added Numply conversion
vishav1771 Aug 25, 2020
fb05704
Changed validators to functions
vishav1771 Aug 25, 2020
0021865
Added init_subclass
vishav1771 Aug 26, 2020
3b4ae94
Removed trailing space
vishav1771 Aug 26, 2020
39d630d
Apply suggestions from code review
vishav1771 Aug 27, 2020
cfbb9e6
Removed `bytes` from JSONFromatValidator and documentation update.
vishav1771 Aug 27, 2020
a18d68e
added skip for zarr and redis non str keys test
vishav1771 Aug 27, 2020
51c0afc
removed extra space
vishav1771 Aug 27, 2020
5a90b03
Modified test_keys_non_str_valid_type
vishav1771 Aug 27, 2020
0988835
Apply suggestions from code review
vishav1771 Aug 27, 2020
f558e4a
Update signac/core/validators.py
vishav1771 Aug 27, 2020
4e86e31
linitng erro
vishav1771 Aug 27, 2020
c9d65ef
Added validators property and chenged backend to _backend
vishav1771 Aug 28, 2020
86843a2
Changed ImportError
vishav1771 Aug 28, 2020
09fe82f
Apply suggestions from code review
vishav1771 Aug 31, 2020
18d2c41
Changed Validator to either raise error or no operation
vishav1771 Aug 31, 2020
6a742bf
Added tests
vishav1771 Aug 31, 2020
2f87459
removed blank space
vishav1771 Aug 31, 2020
3e6f33b
Removed extra blank lines
vishav1771 Aug 31, 2020
6242219
Apply suggestions from code review
vishav1771 Sep 1, 2020
9e8453a
Applied chnages from suggestion
vishav1771 Sep 1, 2020
2620521
Updated update for SyncedAttrDict
vishav1771 Sep 1, 2020
6bda14e
Apply suggestions from code review
vishav1771 Sep 2, 2020
6af9361
Updated test_update and added comment in validator
vishav1771 Sep 2, 2020
e625c98
Rename JSON_format_validator to json_format_validator.
bdice Sep 7, 2020
ce11ad6
Correct docstrings for SyncedList.
bdice Sep 8, 2020
06c249e
Add test for generator exhaustion with SyncedList.
bdice Sep 8, 2020
1ca00fc
Convert iterable to list to prevent generator exhaustion during valid…
bdice Sep 8, 2020
2516cbe
Add generator exhaustion test for __iadd__.
bdice Sep 8, 2020
60f7d08
Correct module docstring of SyncedAttrDict and SyncedList
vishav1771 Sep 8, 2020
d771b3c
Updated setdefault
vishav1771 Sep 8, 2020
9e5d47e
Linting error
vishav1771 Sep 8, 2020
c3d5b37
Added test_setdefault and added comments
vishav1771 Sep 8, 2020
3e9ae47
Update signac/core/validators.py
vishav1771 Sep 8, 2020
c4a698d
Updated json_format_validator
vishav1771 Sep 8, 2020
48c44ed
Apply suggestions from code review
vyasr Sep 9, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ omit =

[tool:pytest]
filterwarnings =
ignore: .*[The indexing module | get_statepoint] is deprecated.*: DeprecationWarning
ignore: .*[The indexing module | get_statepoint | Use of.+as key] is deprecated.*: DeprecationWarning

[bumpversion:file:setup.py]

Expand Down
40 changes: 34 additions & 6 deletions signac/core/collection_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,39 @@
import json
import errno
import uuid
import warnings

from .synced_collection import SyncedCollection
from .syncedattrdict import SyncedAttrDict
from .synced_list import SyncedList
from .validators import json_format_validator


def _convert_key_to_str(data):
"""Recursively convert non-string keys to strings for (potentially nested) input collections.

This retains compatibility with auto-casting keys to strings, and will be
removed in signac 2.0.
Input collections must be of "base type" (dict or list).
"""
if isinstance(data, dict):
def _str_key(key):
if not isinstance(key, str):
warnings.warn(f"Use of {type(key).__name__} as key is deprecated "
"and will be removed in version 2.0",
DeprecationWarning)
key = str(key)
return key
return {_str_key(key): _convert_key_to_str(value) for key, value in data.items()}
elif isinstance(data, list):
return [_convert_key_to_str(value) for value in data]
return data


class JSONCollection(SyncedCollection):
"""Implement sync and load using a JSON back end."""

backend = __name__ # type: ignore
_backend = __name__ # type: ignore

def __init__(self, filename=None, write_concern=False, **kwargs):
self._filename = None if filename is None else os.path.realpath(filename)
Expand All @@ -29,7 +52,7 @@ def __init__(self, filename=None, write_concern=False, **kwargs):
super().__init__(**kwargs)

def _load(self):
"""Load the data from a JSON-file."""
"""Load the data from a JSON file."""
try:
with open(self._filename, 'rb') as file:
blob = file.read()
Expand All @@ -39,9 +62,11 @@ def _load(self):
return None

def _sync(self):
"""Write the data to json file."""
"""Write the data to JSON file."""
data = self.to_base()
# Serialize data:
# Converting non-string keys to string
data = _convert_key_to_str(data)
# Serialize data
blob = json.dumps(data).encode()
# When write_concern flag is set, we write the data into dummy file and then
# replace that file with original file.
Expand All @@ -57,10 +82,13 @@ def _sync(self):
file.write(blob)


JSONCollection.add_validator(json_format_validator)


class JSONDict(JSONCollection, SyncedAttrDict):
"""A dict-like mapping interface to a persistent JSON file.

The JSONDict inherits from :class:`~core.collection_api.SyncedCollection`
The JSONDict inherits from :class:`~core.synced_collection.SyncedCollection`
and :class:`~core.syncedattrdict.SyncedAttrDict`.

.. code-block:: python
Expand Down Expand Up @@ -106,7 +134,7 @@ class JSONDict(JSONCollection, SyncedAttrDict):
class JSONList(JSONCollection, SyncedList):
"""A non-string sequence interface to a persistent JSON file.

The JSONList inherits from :class:`~core.collection_api.SyncedCollection`
The JSONList inherits from :class:`~core.synced_collection.SyncedCollection`
and :class:`~core.syncedlist.SyncedList`.

.. code-block:: python
Expand Down
6 changes: 3 additions & 3 deletions signac/core/collection_mongodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
class MongoDBCollection(SyncedCollection):
"""Implement sync and load using a MongoDB backend."""

backend = __name__ # type: ignore
_backend = __name__ # type: ignore

def __init__(self, collection=None, **kwargs):
import bson # for InvalidDocument
import bson # for InvalidDocument error

self._collection = collection
self._errors = bson.errors
Expand Down Expand Up @@ -53,7 +53,7 @@ def _pseudo_deepcopy(self):
class MongoDBDict(MongoDBCollection, SyncedAttrDict):
"""A dict-like mapping interface to a persistent Mongo-database.

The MongoDBDict inherits from :class:`~core.collection_api.MongoCollection`
The MongoDBDict inherits from :class:`~core.synced_collection.MongoCollection`
and :class:`~core.syncedattrdict.SyncedAttrDict`.

.. code-block:: python
Expand Down
4 changes: 2 additions & 2 deletions signac/core/collection_redis.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
class RedisCollection(SyncedCollection):
"""Implement sync and load using a Redis backend."""

backend = __name__ # type: ignore
_backend = __name__ # type: ignore

def __init__(self, client=None, **kwargs):
self._client = client
Expand Down Expand Up @@ -90,7 +90,7 @@ class RedisDict(RedisCollection, SyncedAttrDict):
class RedisList(RedisCollection, SyncedList):
"""A non-string sequence interface to a persistent Redis file.

The RedisList inherits from :class:`~core.collection_api.SyncedCollection`
The RedisList inherits from :class:`~core.synced_collection.SyncedCollection`
and :class:`~core.syncedlist.SyncedList`.

.. code-block:: python
Expand Down
6 changes: 3 additions & 3 deletions signac/core/collection_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
class ZarrCollection(SyncedCollection):
"""Implement sync and load using a Zarr backend."""

backend = __name__ # type: ignore
_backend = __name__ # type: ignore

def __init__(self, group=None, **kwargs):
import numcodecs # zarr depends on numcodecs
Expand Down Expand Up @@ -47,7 +47,7 @@ def __deepcopy__(self, memo):
class ZarrDict(ZarrCollection, SyncedAttrDict):
"""A dict-like mapping interface to a persistent Zarr-database.

The ZarrDict inherits from :class:`~core.collection_api.ZarrCollection`
The ZarrDict inherits from :class:`~core.synced_collection.ZarrCollection`
and :class:`~core.syncedattrdict.SyncedAttrDict`.

.. code-block:: python
Expand Down Expand Up @@ -92,7 +92,7 @@ class ZarrDict(ZarrCollection, SyncedAttrDict):
class ZarrList(ZarrCollection, SyncedList):
"""A non-string sequence interface to a persistent Zarr file.

The ZarrList inherits from :class:`~core.collection_api.ZarrCollection`
The ZarrList inherits from :class:`~core.synced_collection.ZarrCollection`
and :class:`~core.syncedlist.SyncedList`.

.. code-block:: python
Expand Down
68 changes: 55 additions & 13 deletions signac/core/synced_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
methods the methods that must be implemented by any subclass to match the API.
"""

from typing import List, Callable
from contextlib import contextmanager
from abc import abstractmethod
from collections import defaultdict
Expand All @@ -29,7 +30,8 @@ class SyncedCollection(Collection):
in the underlying backend. The backend name wil be same as the module name.
"""

backend = None
_backend = None
_validators: List[Callable] = []

def __init__(self, name=None, parent=None):
self._data = None
Expand All @@ -41,47 +43,82 @@ def __init__(self, name=None, parent=None):
"Illegal argument combination, one of the two arguments, "
"parent or name must be None, but not both.")

@classmethod
def __init_subclass__(cls):
"""Add ``_validator`` attribute to every subclass.

Subclasses contain a list of validators that are applied to collection input data.
vishav1771 marked this conversation as resolved.
Show resolved Hide resolved
Every subclass must have a separate list so that distinct sets of validators can
be registered to each of them.
"""
cls._validators = []

@classmethod
def register(cls, *args):
"""Register the synced data structures.
r"""Register the synced data structures.

Registry is used when recursively converting synced data structures to determine
what to convert their children into.
The registry is used by :meth:`from_base` to determine the appropriate subclass
of :class:`SyncedCollection` that should be constructed from a particular object.
This functionality is necessary for converting nested data structures because
given, for instance, a dict of lists, it must be possible to map the nested lists to
the appropriate subclass of :class:`SyncedList` corresponding to the desired
backend.

Parameters
----------
*args
\*args
Classes to register
"""
if not hasattr(cls, 'registry'):
cls.registry = defaultdict(list)
for _cls in args:
cls.registry[_cls.backend].append(_cls)
for base_cls in args:
cls.registry[base_cls._backend].append(base_cls)

@property
def validators(self):
vyasr marked this conversation as resolved.
Show resolved Hide resolved
"""Return the list of validators applied to the instance."""
validators = []
# Classes inherit the validators of their parent classes.
for base_cls in type(self).__mro__:
if hasattr(base_cls, '_validators'):
validators.extend([v for v in base_cls._validators if v not in validators])
return validators

@classmethod
def add_validator(cls, *args):
vyasr marked this conversation as resolved.
Show resolved Hide resolved
r"""Register validator.

Parameters
----------
\*args
Validator(s) to register.
"""
cls._validators.extend([v for v in args if v not in cls._validators])

@classmethod
def from_base(cls, data, backend=None, **kwargs):
"""Dynamically resolve the type of object to the corresponding synced collection.
r"""Dynamically resolve the type of object to the corresponding synced collection.

Parameters
----------
data : any
Data to be converted from base class.
backend: str
Name of backend for synchronization. Default to backend of class.
**kwargs:
\*\*kwargs:
Kwargs passed to instance of synced collection.

Returns
-------
data : object
Synced object of corresponding base type.
"""
backend = cls.backend if backend is None else backend
backend = cls._backend if backend is None else backend
if backend is None:
raise ValueError("No backend found.")
for _cls in cls.registry[backend]:
if _cls.is_base_type(data):
return _cls(data=data, **kwargs)
for base_cls in cls.registry[backend]:
if base_cls.is_base_type(data):
return base_cls(data=data, **kwargs)
if NUMPY:
if isinstance(data, numpy.number):
return data.item()
Expand Down Expand Up @@ -133,6 +170,11 @@ def load(self):
else:
self._parent.load()

def _validate(self, data):
"""Validate the input data."""
for validator in self.validators:
validator(data)

# The following methods share a common implementation for
# all data structures and regardless of backend.
def __getitem__(self, key):
Expand Down
27 changes: 20 additions & 7 deletions signac/core/synced_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"""Implements the SyncedList class.

This implements the list data-structure for SyncedCollection API by
vyasr marked this conversation as resolved.
Show resolved Hide resolved
implementing the convert methods (`to_base`, `from_base`) for lists.
implementing the convert method (`to_base`) for lists.
vyasr marked this conversation as resolved.
Show resolved Hide resolved
"""

from collections.abc import Sequence
Expand All @@ -20,25 +20,27 @@
class SyncedList(SyncedCollection, MutableSequence):
"""Implementation of list data structure.

The SyncedList inherits from :class:`~core.collection_api.SyncedCollection`
The SyncedList inherits from :class:`~core.synced_collection.SyncedCollection`
and :class:`~collections.abc.MutableSequence`. Therefore, it behaves similar
to a :class:`list`.

.. warning::

While the SyncedList object behaves like a dictionary, there are
While the SyncedList object behaves like a :class:`list`, there are
important distinctions to remember. In particular, because operations
are reflected as changes to an underlying backend, copying (even deep
copying) a SyncedList instance may exhibit unexpected behavior. If a
true copy is required, you should use the `to_base()` method to get a
dictionary representation, and if necessary construct a new SyncedList.
:class:`list` representation, and if necessary construct a new
SyncedList.
"""

def __init__(self, data=None, **kwargs):
super().__init__(**kwargs)
if data is None:
vyasr marked this conversation as resolved.
Show resolved Hide resolved
self._data = []
else:
self._validate(data)
if NUMPY and isinstance(data, numpy.ndarray):
data = data.tolist()
with self._suspend_sync():
Expand Down Expand Up @@ -85,6 +87,7 @@ def _update(self, data=None):
"""Update the instance of SyncedList with data using depth-first traversal."""
if data is None:
data = []
self._validate(data)
if isinstance(data, Sequence) and not isinstance(data, str):
with self._suspend_sync():
# This loop avoids rebuilding existing synced collections for performance.
Expand Down Expand Up @@ -123,8 +126,9 @@ def reset(self, data=None):
"""
if data is None:
data = []
if NUMPY and isinstance(data, numpy.ndarray):
elif NUMPY and isinstance(data, numpy.ndarray):
data = data.tolist()
self._validate(data)
if isinstance(data, Sequence) and not isinstance(data, str):
with self._suspend_sync():
self._data = [self.from_base(data=value, parent=self) for value in data]
Expand All @@ -135,6 +139,7 @@ def reset(self, data=None):
.format(type(data)))

def __setitem__(self, key, value):
self._validate(value)
self.load()
with self._suspend_sync():
self._data[key] = self.from_base(data=value, parent=self)
Expand All @@ -145,28 +150,36 @@ def __reversed__(self):
return reversed(self._data)

def __iadd__(self, iterable):
# Convert iterable to a list to ensure generators are exhausted only once
vyasr marked this conversation as resolved.
Show resolved Hide resolved
iterable_data = list(iterable)
self._validate(iterable_data)
self.load()
with self._suspend_sync():
self._data += [self.from_base(data=value, parent=self) for value in iterable]
self._data += [self.from_base(data=value, parent=self) for value in iterable_data]
self.sync()
return self

def insert(self, index, item):
self._validate(item)
self.load()
with self._suspend_sync():
self._data.insert(index, self.from_base(data=item, parent=self))
self.sync()

def append(self, item):
self._validate(item)
self.load()
with self._suspend_sync():
self._data.append(self.from_base(data=item, parent=self))
self.sync()

def extend(self, iterable):
# Convert iterable to a list to ensure generators are exhausted only once
iterable_data = list(iterable)
self._validate(iterable_data)
self.load()
with self._suspend_sync():
self._data.extend([self.from_base(data=value, parent=self) for value in iterable])
self._data.extend([self.from_base(data=value, parent=self) for value in iterable_data])
self.sync()

def remove(self, item):
Expand Down
Loading