From 64ad5252ae3a0484628b0a743b8701cba2dbe0a2 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sat, 19 Oct 2024 13:44:25 -0700 Subject: [PATCH 01/14] fix(array): thread order parameter through to array __init__ --- src/zarr/api/asynchronous.py | 10 +++------- src/zarr/core/array.py | 12 +++++------- tests/test_api.py | 16 ++++++++++++++++ tests/test_array.py | 17 +++++++++++++++++ 4 files changed, 41 insertions(+), 14 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 2c423ff59..680433565 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -712,7 +712,7 @@ async def create( dtype: npt.DTypeLike | None = None, compressor: dict[str, JSON] | None = None, # TODO: default and type change fill_value: Any | None = 0, # TODO: need type - order: MemoryOrder | None = None, # TODO: default change + order: MemoryOrder | None = None, store: str | StoreLike | None = None, synchronizer: Any | None = None, overwrite: bool = False, @@ -761,6 +761,7 @@ async def create( Default value to use for uninitialized portions of the array. order : {'C', 'F'}, optional Memory layout to be used within each chunk. + Default is set in Zarr's config (`array.order`). store : Store or str Store or path to directory in file system or name of zip file. synchronizer : object, optional @@ -834,12 +835,6 @@ async def create( else: chunk_shape = shape - if order is not None: - warnings.warn( - "order is deprecated, use config `array.order` instead", - DeprecationWarning, - stacklevel=2, - ) if synchronizer is not None: warnings.warn("synchronizer is not yet implemented", RuntimeWarning, stacklevel=2) if chunk_store is not None: @@ -889,6 +884,7 @@ async def create( codecs=codecs, dimension_names=dimension_names, attributes=attributes, + order=order, **kwargs, ) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 6e3430c41..304cef016 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -471,10 +471,6 @@ async def create( raise ValueError( "dimension_separator cannot be used for arrays with version 3. Use chunk_key_encoding instead." ) - if order is not None: - raise ValueError( - "order cannot be used for arrays with version 3. Use a transpose codec instead." - ) if filters is not None: raise ValueError( "filters cannot be used for arrays with version 3. Use array-to-array codecs instead." @@ -494,6 +490,7 @@ async def create( dimension_names=dimension_names, attributes=attributes, exists_ok=exists_ok, + order=order, ) elif zarr_format == 2: if dtype is str or dtype == "str": @@ -545,6 +542,7 @@ async def _create_v3( dtype: npt.DTypeLike, chunk_shape: ChunkCoords, fill_value: Any | None = None, + order: Literal["C", "F"] | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] @@ -588,7 +586,7 @@ async def _create_v3( attributes=attributes or {}, ) - array = cls(metadata=metadata, store_path=store_path) + array = cls(metadata=metadata, store_path=store_path, order=order) await array._save_metadata(metadata, ensure_parents=True) return array @@ -611,7 +609,7 @@ async def _create_v2( if not exists_ok: await ensure_no_existing_node(store_path, zarr_format=2) if order is None: - order = "C" + order = config.get("array.order", "C") if dimension_separator is None: dimension_separator = "." @@ -627,7 +625,7 @@ async def _create_v2( filters=filters, attributes=attributes, ) - array = cls(metadata=metadata, store_path=store_path) + array = cls(metadata=metadata, store_path=store_path, order=order) await array._save_metadata(metadata, ensure_parents=True) return array diff --git a/tests/test_api.py b/tests/test_api.py index 9b7b4f8b9..ddd7051b1 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -206,6 +206,22 @@ def test_open_with_mode_w_minus(tmp_path: pathlib.Path) -> None: zarr.open(store=tmp_path, mode="w-") +@pytest.mark.parametrize("order", ["C", "F", None]) +@pytest.mark.parametrize("zarr_format", [2, 3]) +def test_array_order(order: str | None, zarr_format: int) -> None: + arr = zarr.ones(shape=(2, 2), order=order, zarr_format=zarr_format) + expected = order or zarr.config.get("array.order") + assert arr.order == expected + + vals = np.asarray(arr) + if expected == "C": + assert vals.flags.c_contiguous + elif expected == "F": + assert vals.flags.f_contiguous + else: + raise AssertionError + + # def test_lazy_loader(): # foo = np.arange(100) # bar = np.arange(100, 0, -1) diff --git a/tests/test_array.py b/tests/test_array.py index 829a04d30..1f79b8879 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -417,3 +417,20 @@ def test_update_attrs(zarr_format: int) -> None: arr2 = zarr.open_array(store=store, zarr_format=zarr_format) assert arr2.attrs["foo"] == "bar" + + +@pytest.mark.parametrize("order", ["C", "F", None]) +@pytest.mark.parametrize("zarr_format", [2, 3]) +@pytest.mark.parametrize("store", ["memory"], indirect=True) +def test_array_create_order(order: str | None, zarr_format: int, store: MemoryStore) -> None: + arr = Array.create(store=store, shape=(2, 2), order=order, zarr_format=zarr_format, dtype="i4") + expected = order or zarr.config.get("array.order") + assert arr.order == expected + + vals = np.asarray(arr) + if expected == "C": + assert vals.flags.c_contiguous + elif expected == "F": + assert vals.flags.f_contiguous + else: + raise AssertionError From 8b706f2863aa427d1b3cf3ec8392c4759a0d2d4e Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 21 Oct 2024 10:08:29 -0700 Subject: [PATCH 02/14] type fixes --- tests/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_api.py b/tests/test_api.py index ddd7051b1..7917c1ab5 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -208,7 +208,7 @@ def test_open_with_mode_w_minus(tmp_path: pathlib.Path) -> None: @pytest.mark.parametrize("order", ["C", "F", None]) @pytest.mark.parametrize("zarr_format", [2, 3]) -def test_array_order(order: str | None, zarr_format: int) -> None: +def test_array_order(order: Literal["C", "F"] | None, zarr_format: ZarrFormat) -> None: arr = zarr.ones(shape=(2, 2), order=order, zarr_format=zarr_format) expected = order or zarr.config.get("array.order") assert arr.order == expected From 59d7f7910765a14cdbd0bc56e310312069e0bdda Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 21 Oct 2024 10:46:32 -0700 Subject: [PATCH 03/14] move defaults --- src/zarr/core/array.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 304cef016..bdafa33f6 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -33,6 +33,7 @@ ZARRAY_JSON, ZATTRS_JSON, ChunkCoords, + MemoryOrder, ShapeLike, ZarrFormat, concurrent_map, @@ -203,14 +204,14 @@ class AsyncArray(Generic[T_ArrayMetadata]): metadata: T_ArrayMetadata store_path: StorePath codec_pipeline: CodecPipeline = field(init=False) - order: Literal["C", "F"] + order: MemoryOrder @overload def __init__( self: AsyncArray[ArrayV2Metadata], metadata: ArrayV2Metadata | ArrayV2MetadataDict, store_path: StorePath, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, ) -> None: ... @overload @@ -218,14 +219,14 @@ def __init__( self: AsyncArray[ArrayV3Metadata], metadata: ArrayV3Metadata | ArrayV3MetadataDict, store_path: StorePath, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, ) -> None: ... def __init__( self, metadata: ArrayMetadata | ArrayMetadataDict, store_path: StorePath, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, ) -> None: if isinstance(metadata, dict): zarr_format = metadata["zarr_format"] @@ -261,7 +262,7 @@ async def create( attributes: dict[str, JSON] | None = None, chunks: ShapeLike | None = None, dimension_separator: Literal[".", "/"] | None = None, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: dict[str, JSON] | None = None, # runtime @@ -350,7 +351,7 @@ async def create( # v2 only chunks: ShapeLike | None = None, dimension_separator: Literal[".", "/"] | None = None, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: dict[str, JSON] | None = None, # runtime @@ -382,7 +383,7 @@ async def create( # v2 only chunks: ShapeLike | None = None, dimension_separator: Literal[".", "/"] | None = None, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: dict[str, JSON] | None = None, # runtime @@ -422,7 +423,6 @@ async def create( V2 only. V3 arrays cannot have a dimension separator. order : Literal["C", "F"], optional The order of the array (default is None). - V2 only. V3 arrays should not have 'order' parameter. filters : list[dict[str, JSON]], optional The filters used to compress the data (default is None). V2 only. V3 arrays should not have 'filters' parameter. @@ -542,7 +542,7 @@ async def _create_v3( dtype: npt.DTypeLike, chunk_shape: ChunkCoords, fill_value: Any | None = None, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] @@ -600,7 +600,7 @@ async def _create_v2( chunks: ChunkCoords, dimension_separator: Literal[".", "/"] | None = None, fill_value: None | float = None, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: dict[str, JSON] | None = None, attributes: dict[str, JSON] | None = None, @@ -608,8 +608,9 @@ async def _create_v2( ) -> AsyncArray[ArrayV2Metadata]: if not exists_ok: await ensure_no_existing_node(store_path, zarr_format=2) + if order is None: - order = config.get("array.order", "C") + order = parse_indexing_order(config.get("array.order")) if dimension_separator is None: dimension_separator = "." @@ -1177,7 +1178,7 @@ def create( # v2 only chunks: ChunkCoords | None = None, dimension_separator: Literal[".", "/"] | None = None, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: dict[str, JSON] | None = None, # runtime @@ -1368,7 +1369,7 @@ def store_path(self) -> StorePath: return self._async_array.store_path @property - def order(self) -> Literal["C", "F"]: + def order(self) -> MemoryOrder: return self._async_array.order @property From 075b92987d07cc399b2023ea61d327572edc3d85 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 21 Oct 2024 10:49:16 -0700 Subject: [PATCH 04/14] apply MemoryOrder type to ArrayV2Metadata --- src/zarr/core/metadata/v2.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py index 2e1833605..f18f2e4e8 100644 --- a/src/zarr/core/metadata/v2.py +++ b/src/zarr/core/metadata/v2.py @@ -25,7 +25,7 @@ from zarr.core.array_spec import ArraySpec from zarr.core.chunk_grids import RegularChunkGrid from zarr.core.chunk_key_encodings import parse_separator -from zarr.core.common import ZARRAY_JSON, ZATTRS_JSON, parse_shapelike +from zarr.core.common import ZARRAY_JSON, ZATTRS_JSON, MemoryOrder, parse_shapelike from zarr.core.config import config, parse_indexing_order from zarr.core.metadata.common import parse_attributes @@ -45,7 +45,7 @@ class ArrayV2Metadata(Metadata): chunks: tuple[int, ...] dtype: np.dtype[Any] fill_value: None | int | float | str | bytes = 0 - order: Literal["C", "F"] = "C" + order: MemoryOrder = "C" filters: tuple[numcodecs.abc.Codec, ...] | None = None dimension_separator: Literal[".", "/"] = "." compressor: numcodecs.abc.Codec | None = None @@ -59,7 +59,7 @@ def __init__( dtype: npt.DTypeLike, chunks: ChunkCoords, fill_value: Any, - order: Literal["C", "F"], + order: MemoryOrder, dimension_separator: Literal[".", "/"] = ".", compressor: numcodecs.abc.Codec | dict[str, JSON] | None = None, filters: Iterable[numcodecs.abc.Codec | dict[str, JSON]] | None = None, @@ -185,7 +185,7 @@ def to_dict(self) -> dict[str, JSON]: return zarray_dict def get_chunk_spec( - self, _chunk_coords: ChunkCoords, order: Literal["C", "F"], prototype: BufferPrototype + self, _chunk_coords: ChunkCoords, order: MemoryOrder, prototype: BufferPrototype ) -> ArraySpec: return ArraySpec( shape=self.chunks, From b072545ef06ef27c060b204e94be6aac44451d93 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 21 Oct 2024 10:51:12 -0700 Subject: [PATCH 05/14] more more --- src/zarr/core/array_spec.py | 8 ++++---- src/zarr/core/metadata/v3.py | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/zarr/core/array_spec.py b/src/zarr/core/array_spec.py index e84a81cb0..c4d9c363f 100644 --- a/src/zarr/core/array_spec.py +++ b/src/zarr/core/array_spec.py @@ -1,11 +1,11 @@ from __future__ import annotations from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Literal +from typing import TYPE_CHECKING, Any import numpy as np -from zarr.core.common import parse_fill_value, parse_order, parse_shapelike +from zarr.core.common import MemoryOrder, parse_fill_value, parse_order, parse_shapelike if TYPE_CHECKING: from zarr.core.buffer import BufferPrototype @@ -17,7 +17,7 @@ class ArraySpec: shape: ChunkCoords dtype: np.dtype[Any] fill_value: Any - order: Literal["C", "F"] + order: MemoryOrder prototype: BufferPrototype def __init__( @@ -25,7 +25,7 @@ def __init__( shape: ChunkCoords, dtype: np.dtype[Any], fill_value: Any, - order: Literal["C", "F"], + order: MemoryOrder, prototype: BufferPrototype, ) -> None: shape_parsed = parse_shapelike(shape) diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index e9d2f92d8..6b6f28dd9 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -31,6 +31,7 @@ JSON, ZARR_JSON, ChunkCoords, + MemoryOrder, parse_named_configuration, parse_shapelike, ) @@ -289,7 +290,7 @@ def ndim(self) -> int: return len(self.shape) def get_chunk_spec( - self, _chunk_coords: ChunkCoords, order: Literal["C", "F"], prototype: BufferPrototype + self, _chunk_coords: ChunkCoords, order: MemoryOrder, prototype: BufferPrototype ) -> ArraySpec: assert isinstance( self.chunk_grid, RegularChunkGrid From 87267342f0bae40dbfd8ad30d237e7c3d6e92a10 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 21 Oct 2024 10:58:17 -0700 Subject: [PATCH 06/14] more more more --- tests/test_api.py | 4 ++-- tests/test_array.py | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/test_api.py b/tests/test_api.py index 7917c1ab5..5b62e3a2f 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -21,7 +21,7 @@ save_array, save_group, ) -from zarr.core.common import ZarrFormat +from zarr.core.common import MemoryOrder, ZarrFormat from zarr.errors import MetadataValidationError from zarr.storage._utils import normalize_path from zarr.storage.memory import MemoryStore @@ -208,7 +208,7 @@ def test_open_with_mode_w_minus(tmp_path: pathlib.Path) -> None: @pytest.mark.parametrize("order", ["C", "F", None]) @pytest.mark.parametrize("zarr_format", [2, 3]) -def test_array_order(order: Literal["C", "F"] | None, zarr_format: ZarrFormat) -> None: +def test_array_order(order: MemoryOrder | None, zarr_format: ZarrFormat) -> None: arr = zarr.ones(shape=(2, 2), order=order, zarr_format=zarr_format) expected = order or zarr.config.get("array.order") assert arr.order == expected diff --git a/tests/test_array.py b/tests/test_array.py index 1f79b8879..f182cb1a1 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -10,7 +10,7 @@ from zarr.codecs import BytesCodec, VLenBytesCodec from zarr.core.array import chunks_initialized from zarr.core.buffer.cpu import NDBuffer -from zarr.core.common import JSON, ZarrFormat +from zarr.core.common import JSON, MemoryOrder, ZarrFormat from zarr.core.group import AsyncGroup from zarr.core.indexing import ceildiv from zarr.core.sync import sync @@ -422,7 +422,9 @@ def test_update_attrs(zarr_format: int) -> None: @pytest.mark.parametrize("order", ["C", "F", None]) @pytest.mark.parametrize("zarr_format", [2, 3]) @pytest.mark.parametrize("store", ["memory"], indirect=True) -def test_array_create_order(order: str | None, zarr_format: int, store: MemoryStore) -> None: +def test_array_create_order( + order: MemoryOrder | None, zarr_format: int, store: MemoryStore +) -> None: arr = Array.create(store=store, shape=(2, 2), order=order, zarr_format=zarr_format, dtype="i4") expected = order or zarr.config.get("array.order") assert arr.order == expected From 858d4fbde29b2e4697a49864187c47c8ec19bdd6 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Tue, 22 Oct 2024 06:34:21 -0700 Subject: [PATCH 07/14] feature(store,group,array): stores learn to delete prefixes when overwriting nodes - add Store.delete_dir and Store.delete_prefix - update array and group creation methods to call delete_dir - change list_prefix to return absolue keys --- src/zarr/abc/store.py | 30 ++++++++++++++++++++++++++++++ src/zarr/core/array.py | 14 ++++++++++++-- src/zarr/core/group.py | 21 ++++++++------------- src/zarr/storage/common.py | 12 ++++++++++++ src/zarr/storage/local.py | 4 +++- src/zarr/storage/logging.py | 3 +++ src/zarr/storage/memory.py | 11 ++++++++--- src/zarr/storage/remote.py | 7 ++++--- src/zarr/storage/zip.py | 2 +- src/zarr/testing/store.py | 5 ++--- tests/test_array.py | 3 +++ tests/test_group.py | 23 ++++++++++++++++++++++- tests/test_store/test_logging.py | 8 ++++++-- tests/test_store/test_zip.py | 2 +- 14 files changed, 115 insertions(+), 30 deletions(-) diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index a995a6bf3..3927bfb9d 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -371,6 +371,36 @@ def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: """ ... + async def delete_dir(self, prefix: str, recursive: bool = True) -> None: + """ + Remove all keys and prefixes in the store that begin with a given prefix. + """ + if not self.supports_deletes: + raise NotImplementedError + if not self.supports_listing: + raise NotImplementedError + self._check_writable() + if recursive: + if not prefix.endswith("/"): + prefix += "/" + async for key in self.list_prefix(prefix): + await self.delete(f"{key}") + else: + async for key in self.list_dir(prefix): + await self.delete(f"{prefix}/{key}") + + async def delete_prefix(self, prefix: str) -> None: + """ + Remove all keys in the store that begin with a given prefix. + """ + if not self.supports_deletes: + raise NotImplementedError + if not self.supports_listing: + raise NotImplementedError + self._check_writable() + async for key in self.list_prefix(prefix): + await self.delete(f"{key}") + def close(self) -> None: """Close the store.""" self._is_open = False diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index bdafa33f6..4549cc3e7 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -554,7 +554,12 @@ async def _create_v3( attributes: dict[str, JSON] | None = None, exists_ok: bool = False, ) -> AsyncArray[ArrayV3Metadata]: - if not exists_ok: + if exists_ok: + if store_path.store.supports_deletes: + await store_path.delete_dir(recursive=True) + else: + await ensure_no_existing_node(store_path, zarr_format=3) + else: await ensure_no_existing_node(store_path, zarr_format=3) shape = parse_shapelike(shape) @@ -606,7 +611,12 @@ async def _create_v2( attributes: dict[str, JSON] | None = None, exists_ok: bool = False, ) -> AsyncArray[ArrayV2Metadata]: - if not exists_ok: + if exists_ok: + if store_path.store.supports_deletes: + await store_path.delete_dir(recursive=True) + else: + await ensure_no_existing_node(store_path, zarr_format=2) + else: await ensure_no_existing_node(store_path, zarr_format=2) if order is None: diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 6e54b7ec9..914f001cc 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -404,7 +404,13 @@ async def from_store( zarr_format: ZarrFormat = 3, ) -> AsyncGroup: store_path = await make_store_path(store) - if not exists_ok: + + if exists_ok: + if store_path.store.supports_deletes: + await store_path.delete_dir(recursive=True) + else: + await ensure_no_existing_node(store_path, zarr_format=zarr_format) + else: await ensure_no_existing_node(store_path, zarr_format=zarr_format) attributes = attributes or {} group = cls( @@ -710,19 +716,8 @@ def _getitem_consolidated( async def delitem(self, key: str) -> None: store_path = self.store_path / key - if self.metadata.zarr_format == 3: - await (store_path / ZARR_JSON).delete() - - elif self.metadata.zarr_format == 2: - await asyncio.gather( - (store_path / ZGROUP_JSON).delete(), # TODO: missing_ok=False - (store_path / ZARRAY_JSON).delete(), # TODO: missing_ok=False - (store_path / ZATTRS_JSON).delete(), # TODO: missing_ok=True - ) - - else: - raise ValueError(f"unexpected zarr_format: {self.metadata.zarr_format}") + await store_path.delete_dir(recursive=True) if self.metadata.consolidated_metadata: self.metadata.consolidated_metadata.metadata.pop(key, None) await self._save_metadata() diff --git a/src/zarr/storage/common.py b/src/zarr/storage/common.py index 9ed8c274d..af434ecd3 100644 --- a/src/zarr/storage/common.py +++ b/src/zarr/storage/common.py @@ -101,6 +101,18 @@ async def delete(self) -> None: """ await self.store.delete(self.path) + async def delete_dir(self, recursive: bool = False) -> None: + """ + Delete all keys with the given prefix from the store. + """ + await self.store.delete_dir(self.path, recursive=recursive) + + async def delete_prefix(self) -> None: + """ + Delete all keys with the given prefix from the store. + """ + await self.store.delete_prefix(self.path) + async def set_if_not_exists(self, default: Buffer) -> None: """ Store a key to ``value`` if the key is not already present. diff --git a/src/zarr/storage/local.py b/src/zarr/storage/local.py index 5c03009a9..0ab2cc0be 100644 --- a/src/zarr/storage/local.py +++ b/src/zarr/storage/local.py @@ -226,7 +226,9 @@ async def list(self) -> AsyncGenerator[str, None]: async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited - to_strip = os.path.join(str(self.root / prefix)) + to_strip = str(self.root) + if prefix.endswith("/"): + prefix = prefix[:-1] for p in (self.root / prefix).rglob("*"): if p.is_file(): yield str(p.relative_to(to_strip)) diff --git a/src/zarr/storage/logging.py b/src/zarr/storage/logging.py index 66fd1687e..453a793cb 100644 --- a/src/zarr/storage/logging.py +++ b/src/zarr/storage/logging.py @@ -230,3 +230,6 @@ def with_mode(self, mode: AccessModeLiteral) -> Self: log_level=self.log_level, log_handler=self.log_handler, ) + + +# TODO: wrap delete methods here diff --git a/src/zarr/storage/memory.py b/src/zarr/storage/memory.py index f942d57b9..fa4ede2a8 100644 --- a/src/zarr/storage/memory.py +++ b/src/zarr/storage/memory.py @@ -1,5 +1,6 @@ from __future__ import annotations +from logging import getLogger from typing import TYPE_CHECKING, Self from zarr.abc.store import ByteRangeRequest, Store @@ -14,6 +15,9 @@ from zarr.core.common import AccessModeLiteral +logger = getLogger(__name__) + + class MemoryStore(Store): """ In-memory store for testing purposes. @@ -137,7 +141,7 @@ async def delete(self, key: str) -> None: try: del self._store_dict[key] except KeyError: - pass + logger.debug("Key %s does not exist.", key) async def set_partial_values(self, key_start_values: Iterable[tuple[str, int, bytes]]) -> None: # docstring inherited @@ -150,9 +154,10 @@ async def list(self) -> AsyncGenerator[str, None]: async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited - for key in self._store_dict: + # note: we materialize all dict keys into a list here so we can mutate the dict in-place (e.g. in delete_prefix) + for key in list(self._store_dict): if key.startswith(prefix): - yield key.removeprefix(prefix) + yield key async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited diff --git a/src/zarr/storage/remote.py b/src/zarr/storage/remote.py index 0a0ec7f7c..0eb72eb34 100644 --- a/src/zarr/storage/remote.py +++ b/src/zarr/storage/remote.py @@ -298,6 +298,7 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited - find_str = f"{self.path}/{prefix}" - for onefile in await self.fs._find(find_str, detail=False, maxdepth=None, withdirs=False): - yield onefile.removeprefix(find_str) + for onefile in await self.fs._find( + f"{self.path}/{prefix}", detail=False, maxdepth=None, withdirs=False + ): + yield onefile.removeprefix(f"{self.path}/") diff --git a/src/zarr/storage/zip.py b/src/zarr/storage/zip.py index 204a381bd..a7b8e1558 100644 --- a/src/zarr/storage/zip.py +++ b/src/zarr/storage/zip.py @@ -241,7 +241,7 @@ async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited async for key in self.list(): if key.startswith(prefix): - yield key.removeprefix(prefix) + yield key async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index b4da75b06..fd9d6e691 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -249,8 +249,7 @@ async def test_list(self, store: S) -> None: async def test_list_prefix(self, store: S) -> None: """ Test that the `list_prefix` method works as intended. Given a prefix, it should return - all the keys in storage that start with this prefix. Keys should be returned with the shared - prefix removed. + all the keys in storage that start with this prefix. """ prefixes = ("", "a/", "a/b/", "a/b/c/") data = self.buffer_cls.from_bytes(b"") @@ -264,7 +263,7 @@ async def test_list_prefix(self, store: S) -> None: expected: tuple[str, ...] = () for key in store_dict: if key.startswith(prefix): - expected += (key.removeprefix(prefix),) + expected += (key,) expected = tuple(sorted(expected)) assert observed == expected diff --git a/tests/test_array.py b/tests/test_array.py index f182cb1a1..dac8ce859 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -48,6 +48,9 @@ def test_array_creation_existing_node( new_dtype = "float32" if exists_ok: + if not store.supports_deletes: + # TODO: confirm you get the expected error here + pytest.skip("store does not support deletes") arr_new = Array.create( spath / "extant", shape=new_shape, diff --git a/tests/test_group.py b/tests/test_group.py index 21e4ef4e5..13f412afa 100644 --- a/tests/test_group.py +++ b/tests/test_group.py @@ -290,6 +290,10 @@ def test_group_open(store: Store, zarr_format: ZarrFormat, exists_ok: bool) -> N with pytest.raises(ContainsGroupError): Group.from_store(store, attributes=attrs, zarr_format=zarr_format, exists_ok=exists_ok) else: + if not store.supports_deletes: + pytest.skip( + "Store does not support deletes but `exists_ok` is True, requiring deletes to override a group" + ) group_created_again = Group.from_store( store, attributes=new_attrs, zarr_format=zarr_format, exists_ok=exists_ok ) @@ -703,6 +707,8 @@ def test_group_creation_existing_node( new_attributes = {"new": True} if exists_ok: + if not store.supports_deletes: + pytest.skip("store does not support deletes but exists_ok is True") node_new = Group.from_store( spath / "extant", attributes=new_attributes, @@ -1075,7 +1081,9 @@ async def test_require_group(store: LocalStore | MemoryStore, zarr_format: ZarrF assert foo_group.attrs == {"foo": 100} # test that we can get the group using require_group and overwrite=True - foo_group = await root.require_group("foo", overwrite=True) + if store.supports_deletes: + foo_group = await root.require_group("foo", overwrite=True) + assert foo_group.attrs == {} _ = await foo_group.create_array( "bar", shape=(10,), dtype="uint8", chunk_shape=(2,), attributes={"foo": 100} @@ -1354,3 +1362,16 @@ def test_group_deprecated_positional_args(method: str) -> None: with pytest.warns(FutureWarning, match=r"Pass name=.*, data=.* as keyword args."): arr = getattr(root, method)("foo_like", data, **kwargs) assert arr.shape == data.shape + + +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) +def test_delitem_removes_children(store: Store, zarr_format: ZarrFormat) -> None: + # https://github.com/zarr-developers/zarr-python/issues/2191 + g1 = zarr.group(store=store) + g1.create_group("0") + g1.create_group("0/0") + arr = g1.create_array("0/0/0", shape=(1,)) + arr[:] = 1 + del g1["0"] + with pytest.raises(KeyError): + g1["0/0"] diff --git a/tests/test_store/test_logging.py b/tests/test_store/test_logging.py index 0258244c5..976da6818 100644 --- a/tests/test_store/test_logging.py +++ b/tests/test_store/test_logging.py @@ -45,10 +45,14 @@ async def test_logging_store_counter(store: Store) -> None: arr[:] = 1 assert wrapped.counter["set"] == 2 - assert wrapped.counter["get"] == 0 # 1 if overwrite=False assert wrapped.counter["list"] == 0 assert wrapped.counter["list_dir"] == 0 - assert wrapped.counter["list_prefix"] == 0 + if store.supports_deletes: + assert wrapped.counter["get"] == 0 # 1 if overwrite=False + assert wrapped.counter["list_prefix"] == 1 + else: + assert wrapped.counter["get"] == 1 + assert wrapped.counter["list_prefix"] == 0 async def test_with_mode(): diff --git a/tests/test_store/test_zip.py b/tests/test_store/test_zip.py index d05422ecd..eb129a1b2 100644 --- a/tests/test_store/test_zip.py +++ b/tests/test_store/test_zip.py @@ -103,4 +103,4 @@ async def test_with_mode(self, store: ZipStore) -> None: @pytest.mark.parametrize("mode", ["a", "w"]) async def test_store_open_mode(self, store_kwargs: dict[str, Any], mode: str) -> None: - super().test_store_open_mode(store_kwargs, mode) + await super().test_store_open_mode(store_kwargs, mode) From 9c173c1220866d4ecdb221cb22daee5d8e28ec8c Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Wed, 23 Oct 2024 06:14:42 -0700 Subject: [PATCH 08/14] fixup --- src/zarr/abc/store.py | 2 +- src/zarr/storage/local.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index 1ed098910..76a71c5a9 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -399,7 +399,7 @@ async def delete_prefix(self, prefix: str) -> None: raise NotImplementedError self._check_writable() async for key in self.list_prefix(prefix): - await self.delete(f"{key}") + await self.delete(key) def close(self) -> None: """Close the store.""" diff --git a/src/zarr/storage/local.py b/src/zarr/storage/local.py index 0ab2cc0be..c08b351b4 100644 --- a/src/zarr/storage/local.py +++ b/src/zarr/storage/local.py @@ -227,8 +227,7 @@ async def list(self) -> AsyncGenerator[str, None]: async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited to_strip = str(self.root) - if prefix.endswith("/"): - prefix = prefix[:-1] + prefix = prefix.rstrip("/") for p in (self.root / prefix).rglob("*"): if p.is_file(): yield str(p.relative_to(to_strip)) From d7a698284671df7c4de62e0ea3010328557cf86e Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Wed, 23 Oct 2024 16:13:24 -0700 Subject: [PATCH 09/14] fixup --- src/zarr/abc/store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index 76a71c5a9..03f42584f 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -384,7 +384,7 @@ async def delete_dir(self, prefix: str, recursive: bool = True) -> None: if not prefix.endswith("/"): prefix += "/" async for key in self.list_prefix(prefix): - await self.delete(f"{key}") + await self.delete(key) else: async for key in self.list_dir(prefix): await self.delete(f"{prefix}/{key}") From c38c2f7cd52dfa884e4f542de6aaed73a57835c7 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Thu, 24 Oct 2024 16:29:35 -0700 Subject: [PATCH 10/14] respond to review --- src/zarr/abc/store.py | 26 +++++--------------------- src/zarr/core/array.py | 4 ++-- src/zarr/core/group.py | 4 ++-- src/zarr/storage/common.py | 13 +++++-------- src/zarr/storage/logging.py | 5 +++++ src/zarr/testing/store.py | 13 +++++++++++++ tests/test_store/test_logging.py | 5 +++-- tests/test_store/test_zip.py | 4 ---- 8 files changed, 35 insertions(+), 39 deletions(-) diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index 03f42584f..c7f21df50 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -342,8 +342,8 @@ def list(self) -> AsyncGenerator[str, None]: @abstractmethod def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: """ - Retrieve all keys in the store that begin with a given prefix. Keys are returned with the - common leading prefix removed. + Retrieve all keys in the store that begin with a given prefix. Keys are returned as + absolute paths (i.e. including the prefix). Parameters ---------- @@ -371,7 +371,7 @@ def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: """ ... - async def delete_dir(self, prefix: str, recursive: bool = True) -> None: + async def delete_dir(self, prefix: str) -> None: """ Remove all keys and prefixes in the store that begin with a given prefix. """ @@ -380,24 +380,8 @@ async def delete_dir(self, prefix: str, recursive: bool = True) -> None: if not self.supports_listing: raise NotImplementedError self._check_writable() - if recursive: - if not prefix.endswith("/"): - prefix += "/" - async for key in self.list_prefix(prefix): - await self.delete(key) - else: - async for key in self.list_dir(prefix): - await self.delete(f"{prefix}/{key}") - - async def delete_prefix(self, prefix: str) -> None: - """ - Remove all keys in the store that begin with a given prefix. - """ - if not self.supports_deletes: - raise NotImplementedError - if not self.supports_listing: - raise NotImplementedError - self._check_writable() + if not prefix.endswith("/"): + prefix += "/" async for key in self.list_prefix(prefix): await self.delete(key) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 4549cc3e7..7b5733116 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -556,7 +556,7 @@ async def _create_v3( ) -> AsyncArray[ArrayV3Metadata]: if exists_ok: if store_path.store.supports_deletes: - await store_path.delete_dir(recursive=True) + await store_path.delete_dir() else: await ensure_no_existing_node(store_path, zarr_format=3) else: @@ -613,7 +613,7 @@ async def _create_v2( ) -> AsyncArray[ArrayV2Metadata]: if exists_ok: if store_path.store.supports_deletes: - await store_path.delete_dir(recursive=True) + await store_path.delete_dir() else: await ensure_no_existing_node(store_path, zarr_format=2) else: diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 914f001cc..5e6803930 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -407,7 +407,7 @@ async def from_store( if exists_ok: if store_path.store.supports_deletes: - await store_path.delete_dir(recursive=True) + await store_path.delete_dir() else: await ensure_no_existing_node(store_path, zarr_format=zarr_format) else: @@ -717,7 +717,7 @@ def _getitem_consolidated( async def delitem(self, key: str) -> None: store_path = self.store_path / key - await store_path.delete_dir(recursive=True) + await store_path.delete_dir() if self.metadata.consolidated_metadata: self.metadata.consolidated_metadata.metadata.pop(key, None) await self._save_metadata() diff --git a/src/zarr/storage/common.py b/src/zarr/storage/common.py index af434ecd3..337fbc59a 100644 --- a/src/zarr/storage/common.py +++ b/src/zarr/storage/common.py @@ -101,17 +101,14 @@ async def delete(self) -> None: """ await self.store.delete(self.path) - async def delete_dir(self, recursive: bool = False) -> None: + async def delete_dir(self) -> None: """ Delete all keys with the given prefix from the store. """ - await self.store.delete_dir(self.path, recursive=recursive) - - async def delete_prefix(self) -> None: - """ - Delete all keys with the given prefix from the store. - """ - await self.store.delete_prefix(self.path) + path = self.path + if not path.endswith("/"): + path += "/" + await self.store.delete_dir(path) async def set_if_not_exists(self, default: Buffer) -> None: """ diff --git a/src/zarr/storage/logging.py b/src/zarr/storage/logging.py index 453a793cb..755d7a58e 100644 --- a/src/zarr/storage/logging.py +++ b/src/zarr/storage/logging.py @@ -222,6 +222,11 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: async for key in self._store.list_dir(prefix=prefix): yield key + async def delete_dir(self, prefix: str) -> None: + # docstring inherited + with self.log(prefix): + await self._store.delete_dir(prefix=prefix) + def with_mode(self, mode: AccessModeLiteral) -> Self: # docstring inherited with self.log(mode): diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index fd9d6e691..b39c548fb 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -213,11 +213,24 @@ async def test_exists(self, store: S) -> None: assert await store.exists("foo/zarr.json") async def test_delete(self, store: S) -> None: + if not store.supports_deletes: + pytest.skip("store does not support deletes") await store.set("foo/zarr.json", self.buffer_cls.from_bytes(b"bar")) assert await store.exists("foo/zarr.json") await store.delete("foo/zarr.json") assert not await store.exists("foo/zarr.json") + async def test_delete_dir(self, store: S) -> None: + if not store.supports_deletes: + pytest.skip("store does not support deletes") + await store.set("zarr.json", self.buffer_cls.from_bytes(b"root")) + await store.set("foo/zarr.json", self.buffer_cls.from_bytes(b"bar")) + await store.set("foo/c/0", self.buffer_cls.from_bytes(b"chunk")) + await store.delete_dir("foo") + assert await store.exists("zarr.json") + assert not await store.exists("foo/zarr.json") + assert not await store.exists("foo/c/0") + async def test_empty(self, store: S) -> None: assert await store.empty() await self.set( diff --git a/tests/test_store/test_logging.py b/tests/test_store/test_logging.py index 976da6818..50db5c1c5 100644 --- a/tests/test_store/test_logging.py +++ b/tests/test_store/test_logging.py @@ -47,12 +47,13 @@ async def test_logging_store_counter(store: Store) -> None: assert wrapped.counter["set"] == 2 assert wrapped.counter["list"] == 0 assert wrapped.counter["list_dir"] == 0 + assert wrapped.counter["list_prefix"] == 0 if store.supports_deletes: assert wrapped.counter["get"] == 0 # 1 if overwrite=False - assert wrapped.counter["list_prefix"] == 1 + assert wrapped.counter["delete_dir"] == 1 else: assert wrapped.counter["get"] == 1 - assert wrapped.counter["list_prefix"] == 0 + assert wrapped.counter["delete_dir"] == 0 async def test_with_mode(): diff --git a/tests/test_store/test_zip.py b/tests/test_store/test_zip.py index eb129a1b2..8dee47449 100644 --- a/tests/test_store/test_zip.py +++ b/tests/test_store/test_zip.py @@ -14,7 +14,6 @@ from zarr.testing.store import StoreTests if TYPE_CHECKING: - from collections.abc import Coroutine from typing import Any @@ -65,9 +64,6 @@ def test_store_supports_partial_writes(self, store: ZipStore) -> None: def test_store_supports_listing(self, store: ZipStore) -> None: assert store.supports_listing - def test_delete(self, store: ZipStore) -> Coroutine[Any, Any, None]: - pass - def test_api_integration(self, store: ZipStore) -> None: root = zarr.open_group(store=store) From cafb46ad88335001e8180c39bb776c4f9310e8f5 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Thu, 24 Oct 2024 16:56:48 -0700 Subject: [PATCH 11/14] fixup --- src/zarr/storage/logging.py | 3 --- src/zarr/testing/store.py | 2 ++ tests/test_array.py | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/zarr/storage/logging.py b/src/zarr/storage/logging.py index 755d7a58e..be259579e 100644 --- a/src/zarr/storage/logging.py +++ b/src/zarr/storage/logging.py @@ -235,6 +235,3 @@ def with_mode(self, mode: AccessModeLiteral) -> Self: log_level=self.log_level, log_handler=self.log_handler, ) - - -# TODO: wrap delete methods here diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index b39c548fb..3aece0f4a 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -224,10 +224,12 @@ async def test_delete_dir(self, store: S) -> None: if not store.supports_deletes: pytest.skip("store does not support deletes") await store.set("zarr.json", self.buffer_cls.from_bytes(b"root")) + await store.set("foo-bar/zarr.json", self.buffer_cls.from_bytes(b"root")) await store.set("foo/zarr.json", self.buffer_cls.from_bytes(b"bar")) await store.set("foo/c/0", self.buffer_cls.from_bytes(b"chunk")) await store.delete_dir("foo") assert await store.exists("zarr.json") + assert await store.exists("foo-bar/zarr.json") assert not await store.exists("foo/zarr.json") assert not await store.exists("foo/c/0") diff --git a/tests/test_array.py b/tests/test_array.py index e827b07b0..96475d43e 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -52,7 +52,6 @@ def test_array_creation_existing_node( if exists_ok: if not store.supports_deletes: - # TODO: confirm you get the expected error here pytest.skip("store does not support deletes") arr_new = Array.create( spath / "extant", From a1e71f1ef723ceeb8640396f366c7888e2df8329 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Thu, 24 Oct 2024 17:03:23 -0700 Subject: [PATCH 12/14] fixup --- tests/test_group.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_group.py b/tests/test_group.py index 7de5b629f..6bacca488 100644 --- a/tests/test_group.py +++ b/tests/test_group.py @@ -1384,7 +1384,7 @@ def test_group_deprecated_positional_args(method: str) -> None: @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) def test_delitem_removes_children(store: Store, zarr_format: ZarrFormat) -> None: # https://github.com/zarr-developers/zarr-python/issues/2191 - g1 = zarr.group(store=store) + g1 = zarr.group(store=store, zarr_format=zarr_format) g1.create_group("0") g1.create_group("0/0") arr = g1.create_array("0/0/0", shape=(1,)) From 5677683736363a0fe8013e604d9ead22a49d2d17 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Mon, 4 Nov 2024 20:52:17 -0800 Subject: [PATCH 13/14] Update src/zarr/abc/store.py --- src/zarr/abc/store.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index c7f21df50..2d26d7dcb 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -342,8 +342,8 @@ def list(self) -> AsyncGenerator[str, None]: @abstractmethod def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: """ - Retrieve all keys in the store that begin with a given prefix. Keys are returned as - absolute paths (i.e. including the prefix). + Retrieve all keys in the store that begin with a given prefix. Keys are returned relative + to the root of the store. Parameters ---------- From 7e9ec76ea9095bf854a8e74fcc0393396531bd67 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 5 Nov 2024 04:53:53 +0000 Subject: [PATCH 14/14] style: pre-commit fixes --- src/zarr/abc/store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index 2d26d7dcb..eefe04d50 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -342,7 +342,7 @@ def list(self) -> AsyncGenerator[str, None]: @abstractmethod def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: """ - Retrieve all keys in the store that begin with a given prefix. Keys are returned relative + Retrieve all keys in the store that begin with a given prefix. Keys are returned relative to the root of the store. Parameters