Skip to content

Commit

Permalink
Merge pull request #103 from melonora/add_operations
Browse files Browse the repository at this point in the history
Add operations to multiscale spatial image
  • Loading branch information
melonora authored Nov 20, 2024
2 parents 1c275cb + 7883e79 commit 7254a37
Show file tree
Hide file tree
Showing 7 changed files with 174 additions and 21 deletions.
26 changes: 20 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,13 @@ during construction.

```
<xarray.DataArray 'image' (y: 128, x: 128)> Size: 16kB
array([[166, 225, 182, ..., 118, 153, 82],
[194, 64, 15, ..., 50, 45, 146],
[232, 41, 44, ..., 148, 245, 170],
array([[170, 79, 215, ..., 31, 151, 150],
[ 77, 181, 1, ..., 217, 176, 228],
[193, 91, 240, ..., 132, 152, 41],
...,
[ 99, 94, 7, ..., 136, 205, 30],
[ 52, 244, 229, ..., 108, 45, 58],
[ 72, 122, 212, ..., 183, 73, 188]], dtype=uint8)
[ 50, 140, 231, ..., 80, 236, 28],
[ 89, 46, 180, ..., 84, 42, 140],
[ 96, 148, 240, ..., 61, 43, 255]], dtype=uint8)
Coordinates:
* y (y) float64 1kB 0.0 1.0 2.0 3.0 4.0 ... 124.0 125.0 126.0 127.0
* x (x) float64 1kB 0.0 1.0 2.0 3.0 4.0 ... 124.0 125.0 126.0 127.0
Expand Down Expand Up @@ -136,6 +136,20 @@ Group: /
image (y, x, c) float64 40kB dask.array<chunksize=(50, 50, 2), meta=np.ndarray>
```

While the decorator allows you to define your own methods to map over datasets
in the `DataTree` while ignoring those datasets not having dimensions, this
library also provides a few convenience methods. For example, the transpose
method we saw earlier can also be applied as follows:

```python
multiscale = multiscale.msi.transpose("y", "x", "c")
```

Other methods implemented this way are `reindex`, equivalent to the
`xr.DataArray`
[reindex](https://docs.xarray.dev/en/stable/generated/xarray.DataArray.reindex.html)
method and `assign_coords`, equivalent to `xr.Dataset` `assign_coords` method.

Store as an Open Microscopy Environment-Next Generation File Format ([OME-NGFF])
/ [netCDF] [Zarr] store.

Expand Down
100 changes: 98 additions & 2 deletions multiscale_spatial_image/multiscale_spatial_image.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
from typing import Union
from typing import Union, Iterable, Any

from xarray import DataTree, register_datatree_accessor
import numpy as np
from collections.abc import MutableMapping
from collections.abc import MutableMapping, Hashable
from pathlib import Path
from zarr.storage import BaseStore
from multiscale_spatial_image.operations import (
transpose,
reindex_data_arrays,
assign_coords,
)


@register_datatree_accessor("msi")
Expand Down Expand Up @@ -121,3 +126,94 @@ def to_zarr(
self._dt.ds = self._dt.ds.assign_attrs(**ngff_metadata)

self._dt.to_zarr(store, mode=mode, **kwargs)

def transpose(self, *dims: Hashable) -> DataTree:
"""Return a `DataTree` with all dimensions of arrays in datasets transposed.
This method automatically skips those nodes of the `DataTree` that do not contain
dimensions. Note that for `Dataset`s themselves, the order of dimensions stays the same.
In case of a `DataTree` node missing specified dimensions an error is raised.
Parameters
----------
*dims : Hashable | None
If not specified, reverse the dimensions on each array. Otherwise,
reorder the dimensions to the order that the `dims` are specified..
"""
return self._dt.map_over_datasets(transpose, *dims)

def reindex_data_arrays(
self,
indexers: dict[str, Any],
method: str | None = None,
tolerance: float | Iterable[float] | str | None = None,
copy: bool = False,
fill_value: int | dict[str, int] | None = None,
**indexer_kwargs: Any,
):
"""
Reindex the `DataArray`s present in the datasets at each scale level of the MultiscaleSpatialImage.
From the original xarray docstring: Conform this object onto the indexes of another object, filling in missing
values with fill_value. The default fill value is NaN.
Parameters
----------
indexers : dict | None
Dictionary with keys given by dimension names and values given by arrays of coordinates tick labels.
Any mis-matched coordinate values will be filled in with NaN, and any mis-matched dimension names will
simply be ignored. One of indexers or indexers_kwargs must be provided.
method : str | None
Method to use for filling index values in indexers not found on this data array:
- None (default): don’t fill gaps
- pad / ffill: propagate last valid index value forward
- backfill / bfill: propagate next valid index value backward
- nearest: use nearest valid index value
tolerance: float | Iterable[float] | str | None
Maximum distance between original and new labels for inexact matches. The values of the index at the
matching locations must satisfy the equation abs(index[indexer] - target) <= tolerance. Tolerance may
be a scalar value, which applies the same tolerance to all values, or list-like, which applies variable
tolerance per element. List-like must be the same size as the index and its dtype must exactly match the
index’s type.
copy : bool
If copy=True, data in the return value is always copied. If copy=False and reindexing is unnecessary, or
can be performed with only slice operations, then the output may share memory with the input. In either
case, a new xarray object is always returned.
fill_value: int | dict[str, int] | None
Value to use for newly missing values. If a dict-like, maps variable names (including coordinates) to fill
values. Use this data array’s name to refer to the data array’s values.
**indexer_kwargs
The keyword arguments form of indexers. One of indexers or indexers_kwargs must be provided.
"""
return self._dt.map_over_datasets(
reindex_data_arrays,
indexers,
method,
tolerance,
copy,
fill_value,
*indexer_kwargs,
)

def assign_coords(self, coords, **coords_kwargs):
"""
Assign new coordinates to all `Dataset`s in the `DataTree` having dimensions.
Returns a new `Dataset` at each scale level of the `MultiscaleSpatialImage` with all the original data in
addition to the new coordinates.
Parameters
----------
coords
A mapping whose keys are the names of the coordinates and values are the coordinates to assign.
The mapping will generally be a dict or Coordinates.
- If a value is a standard data value — for example, a DataArray, scalar, or array — the data is simply
assigned as a coordinate.
- If a value is callable, it is called with this object as the only parameter, and the return value is
used as new coordinate variables.
- A coordinate can also be defined and attached to an existing dimension using a tuple with the first
element the dimension name and the second element the values for this new coordinate.
**coords_kwargs
The keyword arguments form of coords. One of coords or coords_kwargs must be provided.
"""
return self._dt.map_over_datasets(assign_coords, coords, *coords_kwargs)
3 changes: 3 additions & 0 deletions multiscale_spatial_image/operations/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .operations import assign_coords, transpose, reindex_data_arrays

__all__ = ["assign_coords", "transpose", "reindex_data_arrays"]
18 changes: 18 additions & 0 deletions multiscale_spatial_image/operations/operations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from multiscale_spatial_image.utils import skip_non_dimension_nodes
from xarray import Dataset
from typing import Any


@skip_non_dimension_nodes
def assign_coords(ds: Dataset, *args: Any, **kwargs: Any) -> Dataset:
return ds.assign_coords(*args, **kwargs)


@skip_non_dimension_nodes
def transpose(ds: Dataset, *args: Any, **kwargs: Any) -> Dataset:
return ds.transpose(*args, **kwargs)


@skip_non_dimension_nodes
def reindex_data_arrays(ds: Dataset, *args: Any, **kwargs: Any) -> Dataset:
return ds["image"].reindex(*args, **kwargs).to_dataset()
13 changes: 13 additions & 0 deletions test/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pytest
import numpy as np
from spatial_image import to_spatial_image
from multiscale_spatial_image import to_multiscale


@pytest.fixture()
def multiscale_data():
data = np.zeros((3, 200, 200))
dims = ("c", "y", "x")
scale_factors = [2, 2]
image = to_spatial_image(array_like=data, dims=dims)
return to_multiscale(image, scale_factors=scale_factors)
17 changes: 17 additions & 0 deletions test/test_operations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
def test_transpose(multiscale_data):
multiscale_data = multiscale_data.msi.transpose("y", "x", "c")

for scale in list(multiscale_data.keys()):
assert multiscale_data[scale]["image"].dims == ("y", "x", "c")


def test_reindex_arrays(multiscale_data):
multiscale_data = multiscale_data.msi.reindex_data_arrays({"c": ["r", "g", "b"]})
for scale in list(multiscale_data.keys()):
assert multiscale_data[scale].c.data.tolist() == ["r", "g", "b"]


def test_assign_coords(multiscale_data):
multiscale_data = multiscale_data.msi.assign_coords({"c": ["r", "g", "b"]})
for scale in list(multiscale_data.keys()):
assert multiscale_data[scale].c.data.tolist() == ["r", "g", "b"]
18 changes: 5 additions & 13 deletions test/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,15 @@
import numpy as np
from spatial_image import to_spatial_image
from multiscale_spatial_image import skip_non_dimension_nodes, to_multiscale
from multiscale_spatial_image import skip_non_dimension_nodes


def test_skip_nodes():
data = np.zeros((2, 200, 200))
dims = ("c", "y", "x")
scale_factors = [2, 2]
image = to_spatial_image(array_like=data, dims=dims)
multiscale_img = to_multiscale(image, scale_factors=scale_factors)

def test_skip_nodes(multiscale_data):
@skip_non_dimension_nodes
def transpose(ds, *args, **kwargs):
return ds.transpose(*args, **kwargs)

for scale in list(multiscale_img.keys()):
assert multiscale_img[scale]["image"].dims == ("c", "y", "x")
for scale in list(multiscale_data.keys()):
assert multiscale_data[scale]["image"].dims == ("c", "y", "x")

# applying this function without skipping the root node would fail as the root node does not have dimensions.
result = multiscale_img.map_over_datasets(transpose, "y", "x", "c")
result = multiscale_data.map_over_datasets(transpose, "y", "x", "c")
for scale in list(result.keys()):
assert result[scale]["image"].dims == ("y", "x", "c")

0 comments on commit 7254a37

Please sign in to comment.