Skip to content

Commit

Permalink
Add decompressed OME-Zarr dataset size to iohub info (#248)
Browse files Browse the repository at this point in the history
* adding datastore size to info

* adding uncompressed string

* adding changes for readability

* typo

* Only show decompressed size due to zarr-python bug

* add test for size formatting

* add test for CLI size info

---------

Co-authored-by: Ziwen Liu <[email protected]>
  • Loading branch information
edyoshikun and ziw-liu authored Nov 6, 2024
1 parent 0b301a4 commit 16b5571
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 1 deletion.
27 changes: 27 additions & 0 deletions iohub/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,11 +262,23 @@ def print_info(path: StrOrBytesPath, verbose=False):
print("Zarr hierarchy:")
reader.print_tree()
positions = list(reader.positions())
total_bytes_uncompressed = sum(
p["0"].nbytes for _, p in positions
)
msgs.append(f"Positions:\t\t {len(positions)}")
msgs.append(f"Chunk size:\t\t {positions[0][1][0].chunks}")
msgs.append(
f"No. bytes decompressed:\t\t {total_bytes_uncompressed} "
f"[{sizeof_fmt(total_bytes_uncompressed)}]"
)
else:
total_bytes_uncompressed = reader["0"].nbytes
msgs.append(f"(Z, Y, X) scale (um):\t {tuple(reader.scale[2:])}")
msgs.append(f"Chunk size:\t\t {reader['0'].chunks}")
msgs.append(
f"No. bytes decompressed:\t\t {total_bytes_uncompressed} "
f"[{sizeof_fmt(total_bytes_uncompressed)}]"
)
if verbose:
msgs.extend(
[
Expand All @@ -280,3 +292,18 @@ def print_info(path: StrOrBytesPath, verbose=False):
reader.print_tree()
print("\n".join(msgs))
reader.close()


def sizeof_fmt(num: int) -> str:
"""
Human readable file size
Adapted form:
https://web.archive.org/web/20111010015624/
http://blogmag.net/blog/read/38/Print_human_readable_file_size
"""
if num < 1024:
return f"{num} B"
for x in ["KiB", "MiB", "GiB", "TiB"]:
num /= 1024
if num < 1024:
return f"{num:.1f} {x}"
2 changes: 2 additions & 0 deletions tests/cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,13 @@ def test_cli_info_ome_zarr(verbose):
assert result.exit_code == 0
assert re.search(r"Wells:\s+1", result.output)
assert ("Chunk size" in result.output) == bool(verbose)
assert ("No. bytes decompressed" in result.output) == bool(verbose)
# Test on single position
result_pos = runner.invoke(cli, ["info", str(hcs_ref / "B" / "03" / "0")])
assert "Channel names" in result_pos.output
assert "scale (um)" in result_pos.output
assert "Chunk size" in result_pos.output
assert "84.4 MiB" in result_pos.output


@pytest.mark.parametrize("grid_layout", ["-g", None])
Expand Down
10 changes: 9 additions & 1 deletion tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from iohub._deprecated.singlepagetiff import MicromanagerSequenceReader
from iohub.mmstack import MMStack
from iohub.ndtiff import NDTiffDataset
from iohub.reader import read_images
from iohub.reader import read_images, sizeof_fmt
from tests.conftest import (
mm2gamma_ome_tiffs,
mm2gamma_singlepage_tiffs,
Expand Down Expand Up @@ -36,3 +36,11 @@ def test_detect_ndtiff(data_path):
def test_detect_single_page_tiff(data_path):
reader = read_images(data_path)
assert isinstance(reader, MicromanagerSequenceReader)


@pytest.mark.parametrize(
"num_bytes,expected",
[(3, "3 B"), (2.234 * 2**20, "2.2 MiB"), (3.456 * 2**40, "3.5 TiB")],
)
def test_sizeof_fmt(num_bytes, expected):
assert sizeof_fmt(num_bytes) == expected

0 comments on commit 16b5571

Please sign in to comment.