Merge pull request #64 from BioImageTools/image2

Add a simpler image class
BioImageTools · Dec 9, 2024 · 83f2c49 · 83f2c49
2 parents 01f4dc1 + 3ad3c6e
commit 83f2c49
Show file tree

Hide file tree

Showing 7 changed files with 215 additions and 151 deletions.
diff --git a/docs/tutorial.py b/docs/tutorial.py
@@ -1,59 +1,41 @@
 # # Tutorial
 
+import matplotlib.pyplot as plt
 import zarr
 import zarr.storage
 from rich.pretty import pprint
 
 from ome_zarr_models.v04 import Image
-from ome_zarr_models.v04.coordinate_transformations import (
-    VectorTranslation,
-)
 
-# ## Creating models
+# ## Loading datasets
 #
-# We can create an Image model from a zarr group, that points to an
-# OME-zarr dataset:
+# OME-zarr datasets are just zarr groups with special metadata.
+# To open an OME-zarr dataset, we first open the zarr group, and
+# then create an image object from it. This will validate the
+# metadata.
 
-group = zarr.open("https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0062A/6001240.zarr")
-ome_zarr_image = Image.from_zarr(group)
+group = zarr.open(
+    "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0062A/6001240.zarr", mode="r"
+)
+ome_zarr_image = Image(group=group)
 pprint(ome_zarr_image)
 
-# This image contains both the zarr group, and a model of the multiscales metadata
-
-multiscales_meta = ome_zarr_image.attributes.multiscales
-pprint(multiscales_meta)
-
-# ## Updating models
-#
-# All the fields in the models can be updated in place. When you do this, any
-# validation on the individual field you are updating will take place.
+# No errors, which means the metadata is valid 🎉
 #
-# For example, there is no name for the first multiscales entry, so lets add it
-
-multiscales_meta[0].name = "The first multiscales entry"
-pprint(multiscales_meta)
+# ## Accessing metadata
+# To access the OME-zarr metadata, use the `.attributes` property:
 
-# One constraint in the OME-zarr spec is that the coordinate transforms have to be a
-# scale, or a scale then translation (strictly in that order). So if we try and make a
-# transformation just a translation, it will raise an error.
-
-multiscales_meta[0].datasets[0].coordinateTransformations = VectorTranslation(
-    type="translation", translation=[1, 2, 3]
-)
-
-
-# This means validation happens early, allowing you to catch errors
-# before getting too far.
+metadata = ome_zarr_image.attributes
+pprint(metadata)
+pprint(metadata.multiscales[0].datasets)
 
 # ## Accessing data
 #
 # Although these models do not handle reading or writing data, they do expose the zarr
-# arrays.
+# arrays. For example, to get the highest resolution image:
 
-zarr_arr = ome_zarr_image.group[multiscales_meta[0].datasets[0].path]
+zarr_arr = ome_zarr_image.group[metadata.multiscales[0].datasets[0].path]
 pprint(zarr_arr)
 
-# ## Not using validation
-#
-# If you want to create models that are not validated against the OME-zarr
-# specifciation, you can use the ``model_construct`` method on the models.
+# To finish off, lets plot the first z-slice of the first channel of this data:
+plt.imshow(zarr_arr[0, 0, :, :], cmap="gray")
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -9,8 +9,8 @@ markdown_extensions:
 
 plugins:
   - mkdocs-jupyter:
-      execute: false
-      allow_errors: true
+      execute: true
+      allow_errors: false
   - mkdocstrings:
       handlers:
         python:

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,18 +4,18 @@ dynamic = ["version"]
 description = "Data models for OME-Zarr"
 readme = "README.md"
 requires-python = ">=3.11"
-dependencies = ["zarr", "typing_extensions"]
+dependencies = ["zarr<3", "typing_extensions", "pydantic"]
 
 [project.optional-dependencies]
 docs = [
     "mkdocs>=1.6.1",
     "mkdocstrings-python>=1.12.2",
     "mkdocs-material",
     "mkdocs-jupyter",
-    "gcsfs",
+    "matplotlib",
     "rich",
     "griffe-pydantic",
-    "zarr<3",
+    "fsspec[http]",
 ]
 pydantic = ["pydantic"]
 
@@ -32,10 +32,10 @@ docs = [
     "mkdocstrings-python>=1.12.2",
     "mkdocs-material",
     "mkdocs-jupyter",
-    "gcsfs",
+    "matplotlib",
     "rich",
     "griffe-pydantic",
-    "zarr<3",
+    "fsspec[http]",
 ]
 dev = [
     "jupyter[notebook]>=1.1.1",

diff --git a/src/ome_zarr_models/v04/_image_old.py b/src/ome_zarr_models/v04/_image_old.py
@@ -0,0 +1,128 @@
+from __future__ import annotations
+
+from typing import Self
+
+import zarr.errors
+from pydantic import model_validator
+from pydantic_zarr.v2 import ArraySpec, GroupSpec
+
+from ome_zarr_models.v04.image import ImageAttrs
+from ome_zarr_models.v04.multiscales import Multiscales
+from ome_zarr_models.v04.omero import Omero
+from ome_zarr_models.zarr_utils import get_path
+
+
+def _check_arrays_compatible(data: Image) -> Image:
+    """
+    Check that all the arrays referenced by the `multiscales` metadata meet the
+    following criteria:
+        - they exist
+        - they are not groups
+        - they have dimensionality consistent with the number of axes defined in the
+          metadata.
+    """
+    multimeta = data.attributes.multiscales
+    flat_self = data.to_flat()
+
+    for multiscale in multimeta:
+        multiscale_ndim = len(multiscale.axes)
+        for dataset in multiscale.datasets:
+            try:
+                maybe_arr: ArraySpec | GroupSpec = flat_self[
+                    "/" + dataset.path.lstrip("/")
+                ]
+                if isinstance(maybe_arr, GroupSpec):
+                    msg = f"The node at {dataset.path} is a group, not an array."
+                    raise ValueError(msg)
+                arr_ndim = len(maybe_arr.shape)
+
+                if arr_ndim != multiscale_ndim:
+                    msg = (
+                        f"The multiscale metadata has {multiscale_ndim} axes "
+                        "which does not match the dimensionality of the array "
+                        f"found in this group at {dataset.path} ({arr_ndim}). "
+                        "The number of axes must match the array dimensionality."
+                    )
+
+                    raise ValueError(msg)
+            except KeyError as e:
+                msg = (
+                    f"The multiscale metadata references an array that does not "
+                    f"exist in this group: {dataset.path}"
+                )
+                raise ValueError(msg) from e
+    return data
+
+
+class Image(GroupSpec[ImageAttrs, ArraySpec | GroupSpec]):
+    """
+    An OME-zarr multiscale dataset.
+    """
+
+    _check_arrays_compatible = model_validator(mode="after")(_check_arrays_compatible)
+
+    @classmethod
+    def from_zarr(cls, node: zarr.Group) -> Self:
+        """
+        Create an instance of an OME-zarr image from a `zarr.Group`.
+
+        Parameters
+        ----------
+        node : zarr.Group
+            A Zarr group that has valid OME-NGFF image metadata.
+        """
+        # on unlistable storage backends, the members of this group will be {}
+        guess = GroupSpec.from_zarr(node, depth=0)
+
+        try:
+            multi_meta_maybe = guess.attributes["multiscales"]
+        except KeyError as e:
+            store_path = get_path(node.store)
+            msg = (
+                "Failed to find mandatory `multiscales` key in the attributes of the "
+                "Zarr group at "
+                f"{node.store}://{store_path}://{node.path}."
+            )
+            raise KeyError(msg) from e
+
+        multi_meta = ImageAttrs(multiscales=multi_meta_maybe)
+        members_tree_flat = {}
+        for multiscale in multi_meta.multiscales:
+            for dataset in multiscale.datasets:
+                array_path = f"{node.path}/{dataset.path}"
+                try:
+                    array = zarr.open_array(store=node.store, path=array_path, mode="r")
+                    array_spec = ArraySpec.from_zarr(array)
+                except zarr.errors.ArrayNotFoundError as e:
+                    msg = (
+                        f"Expected to find an array at {array_path}, "
+                        "but no array was found there."
+                    )
+                    raise ValueError(msg) from e
+                except zarr.errors.ContainsGroupError as e:
+                    msg = (
+                        f"Expected to find an array at {array_path}, "
+                        "but a group was found there instead."
+                    )
+                    raise ValueError(msg) from e
+                members_tree_flat["/" + dataset.path] = array_spec
+        members_normalized = GroupSpec.from_flat(members_tree_flat)
+
+        guess_inferred_members = guess.model_copy(
+            update={"members": members_normalized.members}
+        )
+        return cls(**guess_inferred_members.model_dump())
+
+    @property
+    def multiscales(self) -> Multiscales:
+        """
+        Multiscales metadata model.
+        """
+        return self.attributes.multiscales
+
+    @property
+    def omero(self) -> Omero | None:
+        """
+        omero metadata model (if present).
+        """
+        return self.attributes.omero