Skip to content

Commit

Permalink
Added Array.info_complete (#2514)
Browse files Browse the repository at this point in the history
Now that Store.getsize is a thing, we can do info_complete which
includes the number of chunks written and the size of those bytes.

Co-authored-by: Davis Bennett <[email protected]>
Co-authored-by: Norman Rzepka <[email protected]>
  • Loading branch information
3 people authored Nov 29, 2024
1 parent 2961246 commit 206d145
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 8 deletions.
53 changes: 45 additions & 8 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1346,18 +1346,53 @@ def info(self) -> Any:
AsyncArray.info_complete
All information about a group, including dynamic information
like the number of bytes and chunks written.
Examples
--------
>>> arr = await zarr.api.asynchronous.create(
... path="array", shape=(3, 4, 5), chunks=(2, 2, 2))
... )
>>> arr.info
Type : Array
Zarr format : 3
Data type : DataType.float64
Shape : (3, 4, 5)
Chunk shape : (2, 2, 2)
Order : C
Read-only : False
Store type : MemoryStore
Codecs : [{'endian': <Endian.little: 'little'>}]
No. bytes : 480
"""
return self._info()

async def info_complete(self) -> Any:
# TODO: get the size of the object from the store.
extra = {
"count_chunks_initialized": await self.nchunks_initialized(),
# count_bytes_stored isn't yet implemented.
}
return self._info(extra=extra)

def _info(self, extra: dict[str, int] | None = None) -> Any:
"""
Return all the information for an array, including dynamic information like a storage size.
In addition to the static information, this provides
- The count of chunks initialized
- The sum of the bytes written
Returns
-------
ArrayInfo
See Also
--------
AsyncArray.info
A property giving just the statically known information about an array.
"""
return self._info(
await self.nchunks_initialized(),
await self.store_path.store.getsize_prefix(self.store_path.path),
)

def _info(
self, count_chunks_initialized: int | None = None, count_bytes_stored: int | None = None
) -> Any:
kwargs: dict[str, Any] = {}
if self.metadata.zarr_format == 2:
assert isinstance(self.metadata, ArrayV2Metadata)
Expand Down Expand Up @@ -1386,6 +1421,8 @@ def _info(self, extra: dict[str, int] | None = None) -> Any:
_read_only=self.read_only,
_store_type=type(self.store_path.store).__name__,
_count_bytes=self.dtype.itemsize * self.size,
_count_bytes_stored=count_bytes_stored,
_count_chunks_initialized=count_chunks_initialized,
**kwargs,
)

Expand Down
82 changes: 82 additions & 0 deletions tests/test_array.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import dataclasses
import json
import math
import pickle
Expand Down Expand Up @@ -474,6 +475,87 @@ def test_info_v3(self) -> None:
)
assert result == expected

def test_info_complete(self) -> None:
arr = zarr.create(shape=(4, 4), chunks=(2, 2), zarr_format=3)
result = arr.info_complete()
expected = ArrayInfo(
_zarr_format=3,
_data_type=DataType.parse("float64"),
_shape=(4, 4),
_chunk_shape=(2, 2),
_order="C",
_read_only=False,
_store_type="MemoryStore",
_codecs=[BytesCodec()],
_count_bytes=128,
_count_chunks_initialized=0,
_count_bytes_stored=373, # the metadata?
)
assert result == expected

arr[:2, :2] = 10
result = arr.info_complete()
expected = dataclasses.replace(
expected, _count_chunks_initialized=1, _count_bytes_stored=405
)
assert result == expected

async def test_info_v2_async(self) -> None:
arr = await zarr.api.asynchronous.create(shape=(4, 4), chunks=(2, 2), zarr_format=2)
result = arr.info
expected = ArrayInfo(
_zarr_format=2,
_data_type=np.dtype("float64"),
_shape=(4, 4),
_chunk_shape=(2, 2),
_order="C",
_read_only=False,
_store_type="MemoryStore",
_count_bytes=128,
)
assert result == expected

async def test_info_v3_async(self) -> None:
arr = await zarr.api.asynchronous.create(shape=(4, 4), chunks=(2, 2), zarr_format=3)
result = arr.info
expected = ArrayInfo(
_zarr_format=3,
_data_type=DataType.parse("float64"),
_shape=(4, 4),
_chunk_shape=(2, 2),
_order="C",
_read_only=False,
_store_type="MemoryStore",
_codecs=[BytesCodec()],
_count_bytes=128,
)
assert result == expected

async def test_info_complete_async(self) -> None:
arr = await zarr.api.asynchronous.create(shape=(4, 4), chunks=(2, 2), zarr_format=3)
result = await arr.info_complete()
expected = ArrayInfo(
_zarr_format=3,
_data_type=DataType.parse("float64"),
_shape=(4, 4),
_chunk_shape=(2, 2),
_order="C",
_read_only=False,
_store_type="MemoryStore",
_codecs=[BytesCodec()],
_count_bytes=128,
_count_chunks_initialized=0,
_count_bytes_stored=373, # the metadata?
)
assert result == expected

await arr.setitem((slice(2), slice(2)), 10)
result = await arr.info_complete()
expected = dataclasses.replace(
expected, _count_chunks_initialized=1, _count_bytes_stored=405
)
assert result == expected


@pytest.mark.parametrize("store", ["memory"], indirect=True)
@pytest.mark.parametrize("zarr_format", [2, 3])
Expand Down

0 comments on commit 206d145

Please sign in to comment.