Skip to content

Commit

Permalink
Move Spec to spec module
Browse files Browse the repository at this point in the history
  • Loading branch information
tomwhite committed Oct 4, 2023
1 parent ee9c5ef commit 10a62f7
Show file tree
Hide file tree
Showing 11 changed files with 124 additions and 119 deletions.
2 changes: 1 addition & 1 deletion cubed/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

from .array_api import Array
from .core.array import (
Spec,
compute,
measure_reserved_mem,
measure_reserved_memory,
Expand All @@ -24,6 +23,7 @@
from .core.ops import from_array, from_zarr, map_blocks, store, to_zarr
from .nan_functions import nanmean, nansum
from .runtime.types import Callback, TaskEndEvent
from .spec import Spec

__all__ = [
"__version__",
Expand Down
1 change: 0 additions & 1 deletion cubed/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# flake8: noqa
from .array import (
CoreArray,
Spec,
compute,
gensym,
measure_reserved_mem,
Expand Down
113 changes: 3 additions & 110 deletions cubed/core/array.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from operator import mul
from typing import Optional, TypeVar, Union
from typing import Optional, TypeVar
from warnings import warn

import numpy as np
from toolz import map, reduce

from cubed.runtime.types import Callback, Executor
from cubed.spec import Spec
from cubed.storage.zarr import open_if_lazy_zarr_array
from cubed.utils import chunk_memory, convert_to_bytes
from cubed.utils import chunk_memory
from cubed.vendor.dask.array.core import normalize_chunks

from .plan import arrays_to_plan
Expand Down Expand Up @@ -193,114 +194,6 @@ def __repr__(self):
return f"cubed.core.CoreArray<{self.name}, shape={self.shape}, dtype={self.dtype}, chunks={self.chunks}>"


class Spec:
"""Specification of resources available to run a computation."""

def __init__(
self,
work_dir: Union[str, None] = None,
max_mem: Union[int, None] = None,
allowed_mem: Union[int, str, None] = None,
reserved_mem: Union[int, str, None] = 0,
executor: Union[Executor, None] = None,
storage_options: Union[dict, None] = None,
):
"""
Specify resources available to run a computation.
Parameters
----------
work_dir : str or None
The directory path (specified as an fsspec URL) used for storing intermediate data.
max_mem : int, optional
**Deprecated**. The maximum memory available to a worker for data use for the computation, in bytes.
allowed_mem : int or str, optional
The total memory available to a worker for running a task, in bytes.
If int it should be >=0. If str it should be of form <value><unit> where unit can be kB, MB, GB, TB etc.
This includes any ``reserved_mem`` that has been set.
reserved_mem : int or str, optional
The memory reserved on a worker for non-data use when running a task, in bytes.
If int it should be >=0. If str it should be of form <value><unit> where unit can be kB, MB, GB, TB etc.
executor : Executor, optional
The default executor for running computations.
storage_options : dict, optional
Storage options to be passed to fsspec.
"""

if max_mem is not None:
warn(
"`max_mem` is deprecated, please use `allowed_mem` instead",
DeprecationWarning,
stacklevel=2,
)

self._work_dir = work_dir

self._reserved_mem = convert_to_bytes(reserved_mem or 0)
if allowed_mem is None:
self._allowed_mem = (max_mem or 0) + self.reserved_mem
else:
self._allowed_mem = convert_to_bytes(allowed_mem)

self._executor = executor
self._storage_options = storage_options

@property
def work_dir(self) -> Optional[str]:
"""The directory path (specified as an fsspec URL) used for storing intermediate data."""
return self._work_dir

@property
def allowed_mem(self) -> int:
"""
The total memory available to a worker for running a task, in bytes.
This includes any ``reserved_mem`` that has been set.
"""
return self._allowed_mem

@property
def reserved_mem(self) -> int:
"""
The memory reserved on a worker for non-data use when running a task, in bytes.
See Also
--------
cubed.measure_reserved_mem
"""
return self._reserved_mem

@property
def executor(self) -> Optional[Executor]:
"""The default executor for running computations."""
return self._executor

@property
def storage_options(self) -> Optional[dict]:
"""Storage options to be passed to fsspec."""
return self._storage_options

def __repr__(self) -> str:
return (
f"cubed.Spec(work_dir={self._work_dir}, allowed_mem={self._allowed_mem}, "
f"reserved_mem={self._reserved_mem}, executor={self._executor}, storage_options={self._storage_options})"
)

def __eq__(self, other):
if isinstance(other, Spec):
return (
self.work_dir == other.work_dir
and self.allowed_mem == other.allowed_mem
and self.reserved_mem == other.reserved_mem
and self.executor == other.executor
and self.storage_options == other.storage_options
)
else:
return False


def check_array_specs(arrays):
specs = [a.spec for a in arrays if hasattr(a, "spec")]
if not all(s == specs[0] for s in specs):
Expand Down
2 changes: 1 addition & 1 deletion cubed/runtime/executors/coiled.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
import coiled
from networkx import MultiDiGraph

from cubed.core.array import Spec
from cubed.runtime.pipeline import visit_nodes
from cubed.runtime.types import Callback, DagExecutor
from cubed.runtime.utils import execution_stats, handle_callbacks
from cubed.spec import Spec


def make_coiled_function(func, coiled_kwargs):
Expand Down
2 changes: 1 addition & 1 deletion cubed/runtime/executors/dask_distributed_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@
from dask.distributed import Client
from networkx import MultiDiGraph

from cubed.core.array import Spec
from cubed.runtime.executors.asyncio import async_map_unordered
from cubed.runtime.pipeline import visit_node_generations, visit_nodes
from cubed.runtime.types import Callback, CubedPipeline, DagExecutor
from cubed.runtime.utils import execution_stats, gensym, handle_callbacks
from cubed.spec import Spec


# note we can't call `pipeline_func` just `func` here as it clashes with `dask.distributed.Client.map``
Expand Down
2 changes: 1 addition & 1 deletion cubed/runtime/executors/lithops.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from lithops.wait import ALWAYS, ANY_COMPLETED
from networkx import MultiDiGraph

from cubed.core.array import Spec
from cubed.runtime.backup import should_launch_backup
from cubed.runtime.executors.lithops_retries import (
RetryingFunctionExecutor,
Expand All @@ -29,6 +28,7 @@
from cubed.runtime.pipeline import visit_node_generations, visit_nodes
from cubed.runtime.types import Callback, DagExecutor
from cubed.runtime.utils import handle_callbacks
from cubed.spec import Spec

logger = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion cubed/runtime/executors/modal.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
from networkx import MultiDiGraph
from tenacity import retry, retry_if_exception_type, stop_after_attempt

from cubed.core.array import Spec
from cubed.runtime.pipeline import visit_nodes
from cubed.runtime.types import Callback, DagExecutor
from cubed.runtime.utils import execute_with_stats, handle_callbacks
from cubed.spec import Spec

RUNTIME_MEMORY_MIB = 2000

Expand Down
2 changes: 1 addition & 1 deletion cubed/runtime/executors/modal_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from networkx import MultiDiGraph
from tenacity import retry, retry_if_exception_type, stop_after_attempt

from cubed.core.array import Spec
from cubed.runtime.executors.asyncio import async_map_unordered
from cubed.runtime.executors.modal import (
Container,
Expand All @@ -20,6 +19,7 @@
from cubed.runtime.pipeline import visit_node_generations, visit_nodes
from cubed.runtime.types import Callback, DagExecutor
from cubed.runtime.utils import handle_callbacks
from cubed.spec import Spec


# We need map_unordered for the use_backups implementation
Expand Down
2 changes: 1 addition & 1 deletion cubed/runtime/executors/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

from networkx import MultiDiGraph

from cubed.core.array import Spec
from cubed.runtime.pipeline import visit_nodes
from cubed.runtime.types import Callback, CubedPipeline, DagExecutor, TaskEndEvent
from cubed.spec import Spec


def exec_stage_func(func: Callable[..., Any], *args, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion cubed/runtime/executors/python_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
from networkx import MultiDiGraph
from tenacity import Retrying, stop_after_attempt

from cubed.core.array import Spec
from cubed.runtime.executors.asyncio import async_map_unordered
from cubed.runtime.pipeline import visit_node_generations, visit_nodes
from cubed.runtime.types import Callback, CubedPipeline, DagExecutor
from cubed.runtime.utils import execution_stats, handle_callbacks
from cubed.spec import Spec


@execution_stats
Expand Down
113 changes: 113 additions & 0 deletions cubed/spec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
from typing import Optional, Union
from warnings import warn

from cubed.runtime.types import Executor
from cubed.utils import convert_to_bytes


class Spec:
"""Specification of resources available to run a computation."""

def __init__(
self,
work_dir: Union[str, None] = None,
max_mem: Union[int, None] = None,
allowed_mem: Union[int, str, None] = None,
reserved_mem: Union[int, str, None] = 0,
executor: Union[Executor, None] = None,
storage_options: Union[dict, None] = None,
):
"""
Specify resources available to run a computation.
Parameters
----------
work_dir : str or None
The directory path (specified as an fsspec URL) used for storing intermediate data.
max_mem : int, optional
**Deprecated**. The maximum memory available to a worker for data use for the computation, in bytes.
allowed_mem : int or str, optional
The total memory available to a worker for running a task, in bytes.
If int it should be >=0. If str it should be of form <value><unit> where unit can be kB, MB, GB, TB etc.
This includes any ``reserved_mem`` that has been set.
reserved_mem : int or str, optional
The memory reserved on a worker for non-data use when running a task, in bytes.
If int it should be >=0. If str it should be of form <value><unit> where unit can be kB, MB, GB, TB etc.
executor : Executor, optional
The default executor for running computations.
storage_options : dict, optional
Storage options to be passed to fsspec.
"""

if max_mem is not None:
warn(
"`max_mem` is deprecated, please use `allowed_mem` instead",
DeprecationWarning,
stacklevel=2,
)

self._work_dir = work_dir

self._reserved_mem = convert_to_bytes(reserved_mem or 0)
if allowed_mem is None:
self._allowed_mem = (max_mem or 0) + self.reserved_mem
else:
self._allowed_mem = convert_to_bytes(allowed_mem)

self._executor = executor
self._storage_options = storage_options

@property
def work_dir(self) -> Optional[str]:
"""The directory path (specified as an fsspec URL) used for storing intermediate data."""
return self._work_dir

@property
def allowed_mem(self) -> int:
"""
The total memory available to a worker for running a task, in bytes.
This includes any ``reserved_mem`` that has been set.
"""
return self._allowed_mem

@property
def reserved_mem(self) -> int:
"""
The memory reserved on a worker for non-data use when running a task, in bytes.
See Also
--------
cubed.measure_reserved_mem
"""
return self._reserved_mem

@property
def executor(self) -> Optional[Executor]:
"""The default executor for running computations."""
return self._executor

@property
def storage_options(self) -> Optional[dict]:
"""Storage options to be passed to fsspec."""
return self._storage_options

def __repr__(self) -> str:
return (
f"cubed.Spec(work_dir={self._work_dir}, allowed_mem={self._allowed_mem}, "
f"reserved_mem={self._reserved_mem}, executor={self._executor}, storage_options={self._storage_options})"
)

def __eq__(self, other):
if isinstance(other, Spec):
return (
self.work_dir == other.work_dir
and self.allowed_mem == other.allowed_mem
and self.reserved_mem == other.reserved_mem
and self.executor == other.executor
and self.storage_options == other.storage_options
)
else:
return False

0 comments on commit 10a62f7

Please sign in to comment.