From 14c70f0f12ec8c23ce133a4a4b91a9213f377e20 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 5 Feb 2021 23:01:23 -0600 Subject: [PATCH 1/6] Defer state point initialization when lazy loading. --- signac/contrib/job.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/signac/contrib/job.py b/signac/contrib/job.py index 703b30ca4..486cf621f 100644 --- a/signac/contrib/job.py +++ b/signac/contrib/job.py @@ -275,9 +275,6 @@ def __init__(self, project, statepoint=None, _id=None): else: # Only an id was provided. State point will be loaded lazily. self._id = _id - self._statepoint = _StatePointDict( - jobs=[self], filename=self._statepoint_filename - ) self._statepoint_requires_init = True def _initialize_lazy_properties(self): @@ -383,7 +380,7 @@ def reset_statepoint(self, new_statepoint): The job's new state point. """ - self._statepoint.reset(new_statepoint) + self.statepoint.reset(new_statepoint) def update_statepoint(self, update, overwrite=False): """Change the state point of this job while preserving job data. @@ -429,7 +426,7 @@ def update_statepoint(self, update, overwrite=False): if statepoint.get(key, value) != value: raise KeyError(key) statepoint.update(update) - self._statepoint.reset(statepoint) + self.statepoint.reset(statepoint) @property def statepoint(self): @@ -456,6 +453,9 @@ def statepoint(self): """ if self._statepoint_requires_init: # Load state point data lazily (on access). + self._statepoint = _StatePointDict( + jobs=[self], filename=self._statepoint_filename + ) statepoint = self._statepoint.load(self.id) # Update the project's state point cache when loaded lazily @@ -474,7 +474,7 @@ def statepoint(self, new_statepoint): The new state point to be assigned. """ - self._statepoint.reset(new_statepoint) + self.statepoint.reset(new_statepoint) @property def sp(self): @@ -657,7 +657,7 @@ def init(self, force=False): try: # Attempt early exit if the state point file exists and is valid. try: - statepoint = self._statepoint.load(self.id) + statepoint = self.statepoint.load(self.id) except Exception: # Any exception means this method cannot exit early. @@ -674,8 +674,8 @@ def init(self, force=False): # The state point save will not overwrite an existing file on # disk unless force is True, so the subsequent load will catch # when a preexisting invalid file was present. - self._statepoint.save(force=force) - statepoint = self._statepoint.load(self.id) + self.statepoint.save(force=force) + statepoint = self.statepoint.load(self.id) # Update the project's state point cache if the saved file is valid. self._project._register(self.id, statepoint) From 6f3280cef3cf5875a86e2a2d4d51612826a3e9ab Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 5 Feb 2021 23:14:37 -0600 Subject: [PATCH 2/6] Allow validation to be disabled in SyncedCollection._update. --- signac/contrib/job.py | 2 +- signac/synced_collections/data_types/synced_dict.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/signac/contrib/job.py b/signac/contrib/job.py index 486cf621f..f95718483 100644 --- a/signac/contrib/job.py +++ b/signac/contrib/job.py @@ -215,7 +215,7 @@ def load(self, job_id): raise JobsCorruptedError([job_id]) with self._suspend_sync: - self._update(data) + self._update(data, validate=False) return data diff --git a/signac/synced_collections/data_types/synced_dict.py b/signac/synced_collections/data_types/synced_dict.py index c3aa4e655..4ff41528a 100644 --- a/signac/synced_collections/data_types/synced_dict.py +++ b/signac/synced_collections/data_types/synced_dict.py @@ -99,7 +99,7 @@ def is_base_type(cls, data): """ return _mapping_resolver.get_type(data) == "MAPPING" - def _update(self, data=None): + def _update(self, data=None, validate=True): """Update the in-memory representation to match the provided data. The purpose of this method is to update the SyncedCollection to match @@ -135,7 +135,8 @@ def _update(self, data=None): except KeyError: # If the item wasn't present at all, we can simply # assign it. - self._validate({key: new_value}) + if validate: + self._validate({key: new_value}) self._data[key] = self._from_base(new_value, parent=self) else: if new_value == existing: @@ -153,7 +154,8 @@ def _update(self, data=None): # (in which case we would have tried to update it), OR # 2) The existing value is a SyncedCollection, but # the new value is not a compatible type for _update. - self._validate({key: new_value}) + if validate: + self._validate({key: new_value}) self._data[key] = self._from_base(new_value, parent=self) to_remove = [key for key in self._data if key not in data] From 60534de4cc770074ad8986ed4833850059192d7c Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 19 Feb 2021 15:10:41 -0600 Subject: [PATCH 3/6] Unify reset_statepoint logic across methods. --- signac/contrib/job.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/signac/contrib/job.py b/signac/contrib/job.py index f95718483..7c7263c9b 100644 --- a/signac/contrib/job.py +++ b/signac/contrib/job.py @@ -380,7 +380,18 @@ def reset_statepoint(self, new_statepoint): The job's new state point. """ - self.statepoint.reset(new_statepoint) + if self._statepoint_requires_init: + # Instantiate state point data lazily - no load is required, since + # we are provided with the new state point data. + self._statepoint = _StatePointDict( + jobs=[self], filename=self._statepoint_filename, data=new_statepoint + ) + + # Update the project's state point cache when loaded lazily + self._project._register(self.id, new_statepoint) + self._statepoint_requires_init = False + else: + self.statepoint.reset(new_statepoint) def update_statepoint(self, update, overwrite=False): """Change the state point of this job while preserving job data. @@ -424,9 +435,12 @@ def update_statepoint(self, update, overwrite=False): if not overwrite: for key, value in update.items(): if statepoint.get(key, value) != value: - raise KeyError(key) + raise KeyError( + f"Key {key} was provided but already exists in the " + "mapping with another value." + ) statepoint.update(update) - self.statepoint.reset(statepoint) + self.reset_statepoint(statepoint) @property def statepoint(self): @@ -474,7 +488,7 @@ def statepoint(self, new_statepoint): The new state point to be assigned. """ - self.statepoint.reset(new_statepoint) + self.reset_statepoint(new_statepoint) @property def sp(self): From e013e8f7a0a00a58f34aec1e3d223fe9750ac5d5 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 19 Feb 2021 15:16:47 -0600 Subject: [PATCH 4/6] Revert validation-related changes. --- signac/contrib/job.py | 2 +- signac/synced_collections/data_types/synced_dict.py | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/signac/contrib/job.py b/signac/contrib/job.py index 7c7263c9b..0c70b9394 100644 --- a/signac/contrib/job.py +++ b/signac/contrib/job.py @@ -215,7 +215,7 @@ def load(self, job_id): raise JobsCorruptedError([job_id]) with self._suspend_sync: - self._update(data, validate=False) + self._update(data) return data diff --git a/signac/synced_collections/data_types/synced_dict.py b/signac/synced_collections/data_types/synced_dict.py index 4ff41528a..c3aa4e655 100644 --- a/signac/synced_collections/data_types/synced_dict.py +++ b/signac/synced_collections/data_types/synced_dict.py @@ -99,7 +99,7 @@ def is_base_type(cls, data): """ return _mapping_resolver.get_type(data) == "MAPPING" - def _update(self, data=None, validate=True): + def _update(self, data=None): """Update the in-memory representation to match the provided data. The purpose of this method is to update the SyncedCollection to match @@ -135,8 +135,7 @@ def _update(self, data=None, validate=True): except KeyError: # If the item wasn't present at all, we can simply # assign it. - if validate: - self._validate({key: new_value}) + self._validate({key: new_value}) self._data[key] = self._from_base(new_value, parent=self) else: if new_value == existing: @@ -154,8 +153,7 @@ def _update(self, data=None, validate=True): # (in which case we would have tried to update it), OR # 2) The existing value is a SyncedCollection, but # the new value is not a compatible type for _update. - if validate: - self._validate({key: new_value}) + self._validate({key: new_value}) self._data[key] = self._from_base(new_value, parent=self) to_remove = [key for key in self._data if key not in data] From 656598efdb61af909cce20fee371654369f4130b Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 19 Feb 2021 19:04:57 -0600 Subject: [PATCH 5/6] Add test, fix bug. --- signac/contrib/job.py | 11 +++++------ tests/test_job.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/signac/contrib/job.py b/signac/contrib/job.py index 0c70b9394..e1c81fe72 100644 --- a/signac/contrib/job.py +++ b/signac/contrib/job.py @@ -384,14 +384,13 @@ def reset_statepoint(self, new_statepoint): # Instantiate state point data lazily - no load is required, since # we are provided with the new state point data. self._statepoint = _StatePointDict( - jobs=[self], filename=self._statepoint_filename, data=new_statepoint + jobs=[self], filename=self._statepoint_filename ) - - # Update the project's state point cache when loaded lazily - self._project._register(self.id, new_statepoint) self._statepoint_requires_init = False - else: - self.statepoint.reset(new_statepoint) + self.statepoint.reset(new_statepoint) + + # Update the project's state point cache when loaded lazily + self._project._register(self.id, new_statepoint) def update_statepoint(self, update, overwrite=False): """Change the state point of this job while preserving job data. diff --git a/tests/test_job.py b/tests/test_job.py index e49a2ae4d..544d777af 100644 --- a/tests/test_job.py +++ b/tests/test_job.py @@ -1016,6 +1016,39 @@ def test_reset_statepoint_job(self): with pytest.raises(DestinationExistsError): src_job.reset_statepoint(dst) + def test_reset_statepoint_job_lazy_access(self): + key = "move_job" + d = testdata() + src = test_token + dst = dict(test_token) + dst["dst"] = True + src_job = self.open_job(src) + src_job.document[key] = d + assert key in src_job.document + assert len(src_job.document) == 1 + src_job.data[key] = d + assert key in src_job.data + assert len(src_job.data) == 1 + # Clear the project's state point cache to force lazy load + self.project._sp_cache.clear() + src_job_by_id = self.open_job(id=src_job.id) + # Check that the state point will be instantiated lazily during the + # call to reset_statepoint + assert src_job_by_id._statepoint_requires_init + src_job_by_id.reset_statepoint(dst) + src_job = self.open_job(src) + dst_job = self.open_job(dst) + assert key in dst_job.document + assert len(dst_job.document) == 1 + assert key not in src_job.document + assert key in dst_job.data + assert len(dst_job.data) == 1 + assert key not in src_job.data + with pytest.raises(RuntimeError): + src_job.reset_statepoint(dst) + with pytest.raises(DestinationExistsError): + src_job.reset_statepoint(dst) + @pytest.mark.skipif(not H5PY, reason="test requires the h5py package") def test_reset_statepoint_project(self): key = "move_job" From 291107104a2246ff99d1cea53611b3edb196c092 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 19 Feb 2021 19:16:33 -0600 Subject: [PATCH 6/6] Update tests/test_job.py --- tests/test_job.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_job.py b/tests/test_job.py index 544d777af..d1d6a383c 100644 --- a/tests/test_job.py +++ b/tests/test_job.py @@ -1016,6 +1016,7 @@ def test_reset_statepoint_job(self): with pytest.raises(DestinationExistsError): src_job.reset_statepoint(dst) + @pytest.mark.skipif(not H5PY, reason="test requires the h5py package") def test_reset_statepoint_job_lazy_access(self): key = "move_job" d = testdata()