Add decompressed OME-Zarr dataset size to iohub info (#248)

* adding datastore size to info * adding uncompressed string * adding changes for readability * typo * Only show decompressed size due to zarr-python bug * add test for size formatting * add test for CLI size info --------- Co-authored-by: Ziwen Liu <[email protected]>
czbiohub-sf · Nov 6, 2024 · 16b5571 · 16b5571
1 parent 0b301a4
commit 16b5571
Show file tree

Hide file tree

Showing 3 changed files with 38 additions and 1 deletion.
diff --git a/iohub/reader.py b/iohub/reader.py
@@ -262,11 +262,23 @@ def print_info(path: StrOrBytesPath, verbose=False):
                 print("Zarr hierarchy:")
                 reader.print_tree()
                 positions = list(reader.positions())
+                total_bytes_uncompressed = sum(
+                    p["0"].nbytes for _, p in positions
+                )
                 msgs.append(f"Positions:\t\t {len(positions)}")
                 msgs.append(f"Chunk size:\t\t {positions[0][1][0].chunks}")
+                msgs.append(
+                    f"No. bytes decompressed:\t\t {total_bytes_uncompressed} "
+                    f"[{sizeof_fmt(total_bytes_uncompressed)}]"
+                )
         else:
+            total_bytes_uncompressed = reader["0"].nbytes
             msgs.append(f"(Z, Y, X) scale (um):\t {tuple(reader.scale[2:])}")
             msgs.append(f"Chunk size:\t\t {reader['0'].chunks}")
+            msgs.append(
+                f"No. bytes decompressed:\t\t {total_bytes_uncompressed} "
+                f"[{sizeof_fmt(total_bytes_uncompressed)}]"
+            )
         if verbose:
             msgs.extend(
                 [
@@ -280,3 +292,18 @@ def print_info(path: StrOrBytesPath, verbose=False):
             reader.print_tree()
         print("\n".join(msgs))
         reader.close()
+
+
+def sizeof_fmt(num: int) -> str:
+    """
+    Human readable file size
+    Adapted form:
+    https://web.archive.org/web/20111010015624/
+    http://blogmag.net/blog/read/38/Print_human_readable_file_size
+    """
+    if num < 1024:
+        return f"{num} B"
+    for x in ["KiB", "MiB", "GiB", "TiB"]:
+        num /= 1024
+        if num < 1024:
+            return f"{num:.1f} {x}"
diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py
@@ -90,11 +90,13 @@ def test_cli_info_ome_zarr(verbose):
     assert result.exit_code == 0
     assert re.search(r"Wells:\s+1", result.output)
     assert ("Chunk size" in result.output) == bool(verbose)
+    assert ("No. bytes decompressed" in result.output) == bool(verbose)
     # Test on single position
     result_pos = runner.invoke(cli, ["info", str(hcs_ref / "B" / "03" / "0")])
     assert "Channel names" in result_pos.output
     assert "scale (um)" in result_pos.output
     assert "Chunk size" in result_pos.output
+    assert "84.4 MiB" in result_pos.output
 
 
 @pytest.mark.parametrize("grid_layout", ["-g", None])

diff --git a/tests/test_reader.py b/tests/test_reader.py
@@ -3,7 +3,7 @@
 from iohub._deprecated.singlepagetiff import MicromanagerSequenceReader
 from iohub.mmstack import MMStack
 from iohub.ndtiff import NDTiffDataset
-from iohub.reader import read_images
+from iohub.reader import read_images, sizeof_fmt
 from tests.conftest import (
     mm2gamma_ome_tiffs,
     mm2gamma_singlepage_tiffs,
@@ -36,3 +36,11 @@ def test_detect_ndtiff(data_path):
 def test_detect_single_page_tiff(data_path):
     reader = read_images(data_path)
     assert isinstance(reader, MicromanagerSequenceReader)
+
+
+@pytest.mark.parametrize(
+    "num_bytes,expected",
+    [(3, "3 B"), (2.234 * 2**20, "2.2 MiB"), (3.456 * 2**40, "3.5 TiB")],
+)
+def test_sizeof_fmt(num_bytes, expected):
+    assert sizeof_fmt(num_bytes) == expected