Skip to content

Commit

Permalink
Pass information about the Job directories existence from Project to …
Browse files Browse the repository at this point in the history
…Job.

This allows Job to avoid some `stat` calls which greatly improves performance on
networked file systems.
  • Loading branch information
joaander committed Feb 8, 2024
1 parent 99aaf28 commit 57ef8a9
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 7 deletions.
21 changes: 17 additions & 4 deletions signac/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,8 @@ class Job:
Jobs can be opened by ``statepoint`` or ``id_``. If both values are
provided, it is the user's responsibility to ensure that the values
correspond.
correspond. Set ``directory_known`` to ``True`` when the job directory
is known to exist - this skips some expensive isdir checks.
Parameters
----------
Expand All @@ -259,6 +260,8 @@ class Job:
State point for the job. (Default value = None)
id_ : str, optional
The job identifier. (Default value = None)
directory_known : bool, optional
Set to true when the job directory is known to exist. (Default value = False)
"""

Expand All @@ -275,10 +278,11 @@ class Job:
KEY_DATA = "signac_data"
"The job's datastore key."

def __init__(self, project, statepoint=None, id_=None):
def __init__(self, project, statepoint=None, id_=None, directory_known=False):
self._project = project
self._lock = RLock()
self._initialize_lazy_properties()
self._directory_known = directory_known

if statepoint is None and id_ is None:
raise ValueError("Either statepoint or id_ must be provided.")
Expand Down Expand Up @@ -714,8 +718,13 @@ def init(self, force=False, validate_statepoint=True):
with self._lock:
try:
# Fast early exit when not validating.
if not validate_statepoint and os.path.isdir(self.path):
return self
if not validate_statepoint:
if self._directory_known:
return self

if os.path.isdir(self.path):
self._directory_known = True
return self

# Attempt early exit if the state point file exists and is valid.
try:
Expand All @@ -733,6 +742,8 @@ def init(self, force=False, validate_statepoint=True):
)
raise

self._directory_known = True

# The state point save will not overwrite an existing file on
# disk unless force is True, so the subsequent load will catch
# when a preexisting invalid file was present.
Expand Down Expand Up @@ -806,6 +817,8 @@ def remove(self):
self._document = None
self._stores = None

self._directory_known = False

def move(self, project):
"""Move this job to project.
Expand Down
8 changes: 5 additions & 3 deletions signac/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,7 @@ def open_job(self, statepoint=None, id=None):
elif not self._contains_job_id(id):
# id does not exist in the project data space
raise KeyError(id)
return Job(project=self, id_=id)
return Job(project=self, id_=id, directory_known=True)

def _job_dirs(self):
"""Generate ids of jobs in the workspace.
Expand Down Expand Up @@ -1631,7 +1631,7 @@ def get_job(cls, path=None):
project = cls.get_project(os.path.join(job_path, os.pardir))

# Return the matched job id from the found project
return Job(project=project, id_=job_id)
return Job(project=project, id_=job_id, directory_known=True)

def __getstate__(self):
state = dict(self.__dict__)
Expand Down Expand Up @@ -1688,7 +1688,9 @@ def __init__(self, project, ids):
self._ids_iterator = iter(ids)

def __next__(self):
return Job(project=self._project, id_=next(self._ids_iterator))
return Job(
project=self._project, id_=next(self._ids_iterator), directory_known=True
)

def __iter__(self):
return type(self)(self._project, self._ids)
Expand Down

0 comments on commit 57ef8a9

Please sign in to comment.