nerfstudio-project · AntonioMacaronio · Jun 11, 2024 · Jun 12, 2024 · Jun 12, 2024 · Jun 12, 2024
diff --git a/nerfstudio/configs/method_configs.py b/nerfstudio/configs/method_configs.py
@@ -90,7 +90,7 @@
     max_num_iterations=30000,
     mixed_precision=True,
     pipeline=VanillaPipelineConfig(
-        datamanager=ParallelDataManagerConfig(
+        datamanager=VanillaDataManagerConfig(
             dataparser=NerfstudioDataParserConfig(),
             train_num_rays_per_batch=4096,
             eval_num_rays_per_batch=4096,

diff --git a/nerfstudio/data/datamanagers/base_datamanager.py b/nerfstudio/data/datamanagers/base_datamanager.py
diff --git a/nerfstudio/data/datamanagers/full_images_datamanager.py b/nerfstudio/data/datamanagers/full_images_datamanager.py
@@ -47,6 +47,14 @@
 from nerfstudio.utils.misc import get_orig_class
 from nerfstudio.utils.rich_utils import CONSOLE
 
+class ImageBatchStream(torch.utils.data.IterableDataset):
+    def __init__(
+            self,
+
+    ):
+        return
+
+    # def 
 
 @dataclass
 class FullImageDatamanagerConfig(DataManagerConfig):
@@ -79,7 +87,7 @@ class FullImageDatamanagerConfig(DataManagerConfig):
     fps_reset_every: int = 100
     """The number of iterations before one resets fps sampler repeatly, which is essentially drawing fps_reset_every
     samples from the pool of all training cameras without replacement before a new round of sampling starts."""
-
+    
 
 class FullImageDatamanager(DataManager, Generic[TDataset]):
     """

diff --git a/nerfstudio/data/datasets/base_dataset.py b/nerfstudio/data/datasets/base_dataset.py
@@ -18,6 +18,7 @@
 from __future__ import annotations
 
 from copy import deepcopy
+import io
 from pathlib import Path
 from typing import Dict, List, Literal
 
@@ -31,8 +32,8 @@
 
 from nerfstudio.cameras.cameras import Cameras
 from nerfstudio.data.dataparsers.base_dataparser import DataparserOutputs
-from nerfstudio.data.utils.data_utils import get_image_mask_tensor_from_path
-
+from nerfstudio.data.utils.data_utils import get_image_mask_tensor_from_path, pil_to_numpy
+from torch.profiler import record_function
 
 class InputDataset(Dataset):
     """Dataset that returns images.
@@ -45,7 +46,7 @@ class InputDataset(Dataset):
     exclude_batch_keys_from_device: List[str] = ["image", "mask"]
     cameras: Cameras
 
-    def __init__(self, dataparser_outputs: DataparserOutputs, scale_factor: float = 1.0):
+    def __init__(self, dataparser_outputs: DataparserOutputs, scale_factor: float = 1.0, cache_images: bool = True):
         super().__init__()
         self._dataparser_outputs = dataparser_outputs
         self.scale_factor = scale_factor
@@ -54,6 +55,18 @@ def __init__(self, dataparser_outputs: DataparserOutputs, scale_factor: float =
         self.cameras = deepcopy(dataparser_outputs.cameras)
         self.cameras.rescale_output_resolution(scaling_factor=scale_factor)
         self.mask_color = dataparser_outputs.metadata.get("mask_color", None)
+        self.cache_images = cache_images
+        """If cache_images == True, cache all the image files into RAM in their compressed form (not as tensors yet)"""
+        if cache_images:
+            self.binary_images = []
+            self.binary_masks = []
+            for image_filename in self._dataparser_outputs.image_filenames:
+                with open(image_filename, 'rb') as f:
+                    self.binary_images.append(io.BytesIO(f.read()))
+            if self._dataparser_outputs.mask_filenames is not None:
+                for mask_filename in self._dataparser_outputs.mask_filenames:
+                    with open(mask_filename, 'rb') as f:
+                        self.binary_masks.append(io.BytesIO(f.read()))
 
     def __len__(self):
         return len(self._dataparser_outputs.image_filenames)
@@ -65,12 +78,15 @@ def get_numpy_image(self, image_idx: int) -> npt.NDArray[np.uint8]:
             image_idx: The image index in the dataset.
         """
         image_filename = self._dataparser_outputs.image_filenames[image_idx]
-        pil_image = Image.open(image_filename)
+        if self.cache_images:
+            pil_image = Image.open(self.binary_images[image_idx])
+        else:
+            pil_image = Image.open(image_filename)
         if self.scale_factor != 1.0:
             width, height = pil_image.size
             newsize = (int(width * self.scale_factor), int(height * self.scale_factor))
             pil_image = pil_image.resize(newsize, resample=Image.Resampling.BILINEAR)
-        image = np.array(pil_image, dtype="uint8")  # shape is (h, w) or (h, w, 3 or 4)
+        image = pil_to_numpy(pil_image) # # shape is (h, w) or (h, w, 3 or 4) and dtype == "uint8"
         if len(image.shape) == 2:
             image = image[:, :, None].repeat(3, axis=2)
         assert len(image.shape) == 3
@@ -84,7 +100,12 @@ def get_image_float32(self, image_idx: int) -> Float[Tensor, "image_height image
         Args:
             image_idx: The image index in the dataset.
         """
-        image = torch.from_numpy(self.get_numpy_image(image_idx).astype("float32") / 255.0)
+        with record_function("pil_to_numpy()"):
+            image = self.get_numpy_image(image_idx)
+        with record_function("divide by 255.0 + convert to float32"):
+            image = image / np.float32(255)
+        with record_function("torch.from_numpy()"):
+            image = torch.from_numpy(image)
         if self._dataparser_outputs.alpha_color is not None and image.shape[-1] == 4:
             assert (self._dataparser_outputs.alpha_color >= 0).all() and (
                 self._dataparser_outputs.alpha_color <= 1
@@ -98,7 +119,7 @@ def get_image_uint8(self, image_idx: int) -> UInt8[Tensor, "image_height image_w
         Args:
             image_idx: The image index in the dataset.
         """
-        image = torch.from_numpy(self.get_numpy_image(image_idx))
+        image = torch.from_numpy(self.get_numpy_image(image_idx).astype(np.uint8))
         if self._dataparser_outputs.alpha_color is not None and image.shape[-1] == 4:
             assert (self._dataparser_outputs.alpha_color >= 0).all() and (
                 self._dataparser_outputs.alpha_color <= 1
@@ -125,7 +146,10 @@ def get_data(self, image_idx: int, image_type: Literal["uint8", "float32"] = "fl
 
         data = {"image_idx": image_idx, "image": image}
         if self._dataparser_outputs.mask_filenames is not None:
-            mask_filepath = self._dataparser_outputs.mask_filenames[image_idx]
+            if self.cache_images:
+                mask_filepath = self.binary_masks[image_idx]
+            else:
+                mask_filepath = self._dataparser_outputs.mask_filenames[image_idx]
             data["mask"] = get_image_mask_tensor_from_path(filepath=mask_filepath, scale_factor=self.scale_factor)
             assert (
                 data["mask"].shape[:2] == data["image"].shape[:2]

diff --git a/nerfstudio/data/utils/data_utils.py b/nerfstudio/data/utils/data_utils.py
@@ -14,15 +14,47 @@
 
 """Utility functions to allow easy re-use of common operations across dataloaders"""
 from pathlib import Path
-from typing import List, Tuple, Union
+from typing import List, Tuple, Union, IO
 
 import cv2
 import numpy as np
 import torch
 from PIL import Image
 
 
-def get_image_mask_tensor_from_path(filepath: Path, scale_factor: float = 1.0) -> torch.Tensor:
+def pil_to_numpy(im: Image) -> np.ndarray:
+    """Converts a PIL Image object to a NumPy array.
+
+    Args:
+        im (PIL.Image.Image): The input PIL Image object.
+
+    Returns:
+        numpy.ndarray representing the image data.
+    """
+    # Load in image completely (PIL defaults to lazy loading)
+    im.load()
+
+    # Unpack data
+    e = Image._getencoder(im.mode, "raw", im.mode)
+    e.setimage(im.im)
+
+    # NumPy buffer for the result
+    shape, typestr = Image._conv_type_shape(im)
+    data = np.empty(shape, dtype=np.dtype(typestr))
+    mem = data.data.cast("B", (data.data.nbytes,))
+
+    bufsize, s, offset = 65536, 0, 0
+    while not s:
+        l, s, d = e.encode(bufsize)
+        mem[offset:offset + len(d)] = d
+        offset += len(d)
+    if s < 0:
+        raise RuntimeError("encoder error %d in tobytes" % s)
+
+    return data
+
+
+def get_image_mask_tensor_from_path(filepath: Union[Path, IO[bytes]], scale_factor: float = 1.0) -> torch.Tensor:
     """
     Utility function to read a mask image from the given path and return a boolean tensor
     """
@@ -31,7 +63,7 @@ def get_image_mask_tensor_from_path(filepath: Path, scale_factor: float = 1.0) -
         width, height = pil_mask.size
         newsize = (int(width * scale_factor), int(height * scale_factor))
         pil_mask = pil_mask.resize(newsize, resample=Image.Resampling.NEAREST)
-    mask_tensor = torch.from_numpy(np.array(pil_mask)).unsqueeze(-1).bool()
+    mask_tensor = torch.from_numpy(pil_to_numpy(pil_mask)).unsqueeze(-1).bool()
     if len(mask_tensor.shape) != 3:
         raise ValueError("The mask image should have 1 channel")
     return mask_tensor