Merge branch 'main' into jkulhanek/docker

nerfstudio-project · Sep 5, 2024 · 9d9d598 · 9d9d598
2 parents eefe098 + 96b7fe2
commit 9d9d598
Show file tree

Hide file tree

Showing 13 changed files with 106 additions and 34 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -17,7 +17,7 @@ repos:
     -   id: trailing-whitespace
     -   id: end-of-file-fixer
 -   repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.1.13
+    rev: v0.6.2
     hooks:
     -   id: ruff
         types_or: [ python, pyi, jupyter ]

diff --git a/docs/reference/cli/ns_train.md b/docs/reference/cli/ns_train.md
@@ -29,3 +29,11 @@ By default the nerfstudio dataparser is used. If you would like to use a differe
 ```bash
 ns-train {method} {dataparser} --help
 ```
+
+For example, if you want to specify the `eval_mode` of the nerfstudio dataparser to be `filename` when training your `splatfacto` model via the `ns-train` cli, you can do
+
+```
+ns-train splatfacto [method args] nerfstudio-data --eval-mode filename
+```
+
+Notice that the custom dataparser and its arguments are passed after specifying the training method and its arguments.
diff --git a/nerfstudio/data/datamanagers/full_images_datamanager.py b/nerfstudio/data/datamanagers/full_images_datamanager.py
@@ -315,7 +315,7 @@ def fixed_indices_eval_dataloader(self) -> List[Tuple[Cameras, Dict]]:
         Pretends to be the dataloader for evaluation, it returns a list of (camera, data) tuples
         """
         image_indices = [i for i in range(len(self.eval_dataset))]
-        data = deepcopy(self.cached_eval)
+        data = [d.copy() for d in self.cached_eval]
         _cameras = deepcopy(self.eval_dataset.cameras).to(self.device)
         cameras = []
         for i in image_indices:
@@ -332,8 +332,13 @@ def get_param_groups(self) -> Dict[str, List[Parameter]]:
         return {}
 
     def get_train_rays_per_batch(self):
-        # TODO: fix this to be the resolution of the last image rendered
-        return 800 * 800
+        """Returns resolution of the image returned from datamanager."""
+        if len(self.cached_train) != 0:
+            h = self.cached_train[0]["image"].shape[0]
+            w = self.cached_train[0]["image"].shape[1]
+            return h * w
+        else:
+            return 800 * 800
 
     def next_train(self, step: int) -> Tuple[Cameras, Dict]:
         """Returns the next training batch
@@ -345,6 +350,9 @@ def next_train(self, step: int) -> Tuple[Cameras, Dict]:
             self.train_unseen_cameras = self.sample_train_cameras()
 
         data = self.cached_train[image_idx]
+        # We're going to copy to make sure we don't mutate the cached dictionary.
+        # This can cause a memory leak: https://github.com/nerfstudio-project/nerfstudio/issues/3335
+        data = data.copy()
         data["image"] = data["image"].to(self.device)
 
         assert len(self.train_cameras.shape) == 1, "Assumes single batch dimension"
@@ -370,7 +378,8 @@ def next_eval_image(self, step: int) -> Tuple[Cameras, Dict]:
         # Make sure to re-populate the unseen cameras list if we have exhausted it
         if len(self.eval_unseen_cameras) == 0:
             self.eval_unseen_cameras = [i for i in range(len(self.eval_dataset))]
-        data = deepcopy(self.cached_eval[image_idx])
+        data = self.cached_eval[image_idx]
+        data = data.copy()
         data["image"] = data["image"].to(self.device)
         assert len(self.eval_dataset.cameras.shape) == 1, "Assumes single batch dimension"
         camera = self.eval_dataset.cameras[image_idx : image_idx + 1].to(self.device)
@@ -386,6 +395,8 @@ def _undistort_image(
             "We doesn't support the 4th Brown parameter for image undistortion, "
             "Only k1, k2, k3, p1, p2 can be non-zero."
         )
+        # because OpenCV expects the order of distortion parameters to be (k1, k2, p1, p2, k3), we need to reorder them
+        # see https://docs.opencv.org/4.x/dc/dbb/tutorial_py_calibration.html
         distortion_params = np.array(
             [
                 distortion_params[0],
@@ -411,6 +422,9 @@ def _undistort_image(
         # crop the image and update the intrinsics accordingly
         x, y, w, h = roi
         image = image[y : y + h, x : x + w]
+        # update the principal point based on our cropped region of interest (ROI)
+        newK[0, 2] -= x
+        newK[1, 2] -= y
         if "depth_image" in data:
             data["depth_image"] = data["depth_image"][y : y + h, x : x + w]
         if "mask" in data:

diff --git a/nerfstudio/data/dataparsers/colmap_dataparser.py b/nerfstudio/data/dataparsers/colmap_dataparser.py
@@ -276,6 +276,12 @@ def _generate_dataparser_outputs(self, split: str = "train", **kwargs):
             cy.append(float(frame["cy"]))
             height.append(int(frame["h"]))
             width.append(int(frame["w"]))
+            if any([k in frame and float(frame[k]) != 0.0 for k in ["k4", "k5", "k6"]]):
+                raise ValueError(
+                    "K4/K5/K6 is non-zero! Note that Nerfstudio camera model's K4 has different meaning than colmap "
+                    "OPENCV camera model K4. Nerfstudio's K4 is the 4-th order of radial distortion coefficient, while "
+                    "colmap/OPENCV's K4 is 4-th coefficient in fractional radial distortion model."
+                )
             distort.append(
                 camera_utils.get_distortion_params(
                     k1=float(frame["k1"]) if "k1" in frame else 0.0,

diff --git a/nerfstudio/engine/trainer.py b/nerfstudio/engine/trainer.py
@@ -229,10 +229,10 @@ def setup_optimizers(self) -> Optimizers:
     def train(self) -> None:
         """Train the model."""
         assert self.pipeline.datamanager.train_dataset is not None, "Missing DatsetInputs"
-
-        self.pipeline.datamanager.train_dataparser_outputs.save_dataparser_transform(
-            self.base_dir / "dataparser_transforms.json"
-        )
+        if hasattr(self.pipeline.datamanager, "train_dataparser_outputs"):
+            self.pipeline.datamanager.train_dataparser_outputs.save_dataparser_transform(
+                self.base_dir / "dataparser_transforms.json"
+            )
 
         self._init_viewer_state()
         with TimeWriter(writer, EventName.TOTAL_TRAIN_TIME):

diff --git a/nerfstudio/fields/nerfacto_field.py b/nerfstudio/fields/nerfacto_field.py
@@ -211,10 +211,15 @@ def get_density(self, ray_samples: RaySamples) -> Tuple[Tensor, Tensor]:
         # Make sure the tcnn gets inputs between 0 and 1.
         selector = ((positions > 0.0) & (positions < 1.0)).all(dim=-1)
         positions = positions * selector[..., None]
+
+        assert positions.numel() > 0, "positions is empty."
+
         self._sample_locations = positions
         if not self._sample_locations.requires_grad:
             self._sample_locations.requires_grad = True
         positions_flat = positions.view(-1, 3)
+
+        assert positions_flat.numel() > 0, "positions_flat is empty."
         h = self.mlp_base(positions_flat).view(*ray_samples.frustums.shape, -1)
         density_before_activation, base_mlp_out = torch.split(h, [1, self.geo_feat_dim], dim=-1)
         self._density_before_activation = density_before_activation

diff --git a/nerfstudio/models/instant_ngp.py b/nerfstudio/models/instant_ngp.py
@@ -19,7 +19,7 @@
 from __future__ import annotations
 
 from dataclasses import dataclass, field
-from typing import Dict, List, Literal, Optional, Tuple, Type
+from typing import Dict, List, Literal, Optional, Tuple, Type, Union
 
 import nerfacc
 import torch
@@ -49,7 +49,7 @@ class InstantNGPModelConfig(ModelConfig):
     """Whether to create a scene collider to filter rays."""
     collider_params: Optional[Dict[str, float]] = None
     """Instant NGP doesn't use a collider."""
-    grid_resolution: int = 128
+    grid_resolution: Union[int, List[int]] = 128
     """Resolution of the grid used for the field."""
     grid_levels: int = 4
     """Levels of the grid used for the field."""
@@ -72,7 +72,10 @@ class InstantNGPModelConfig(ModelConfig):
     use_appearance_embedding: bool = False
     """Whether to use an appearance embedding."""
     background_color: Literal["random", "black", "white"] = "random"
-    """The color that is given to untrained areas."""
+    """
+    The color that is given to masked areas.
+    These areas are used to force the density in those regions to be zero.
+    """
     disable_scene_contraction: bool = False
     """Whether to disable scene contraction or not."""
 

diff --git a/nerfstudio/models/splatfacto.py b/nerfstudio/models/splatfacto.py
@@ -25,7 +25,6 @@
 
 import numpy as np
 import torch
-from gsplat.cuda_legacy._torch_impl import quat_to_rotmat
 
 try:
     from gsplat.rendering import rasterization
@@ -47,6 +46,26 @@
 from nerfstudio.utils.rich_utils import CONSOLE
 
 
+def quat_to_rotmat(quat):
+    assert quat.shape[-1] == 4, quat.shape
+    w, x, y, z = torch.unbind(quat, dim=-1)
+    mat = torch.stack(
+        [
+            1 - 2 * (y**2 + z**2),
+            2 * (x * y - w * z),
+            2 * (x * z + w * y),
+            2 * (x * y + w * z),
+            1 - 2 * (x**2 + z**2),
+            2 * (y * z - w * x),
+            2 * (x * z - w * y),
+            2 * (y * z + w * x),
+            1 - 2 * (x**2 + y**2),
+        ],
+        dim=-1,
+    )
+    return mat.reshape(quat.shape[:-1] + (3, 3))
+
+
 def random_quat_tensor(N):
     """
     Defines a random quaternion tensor of shape (N, 4)
@@ -295,7 +314,10 @@ def colors(self):
 
     @property
     def shs_0(self):
-        return self.features_dc
+        if self.config.sh_degree > 0:
+            return self.features_dc
+        else:
+            return RGB2SH(torch.sigmoid(self.features_dc))
 
     @property
     def shs_rest(self):

diff --git a/nerfstudio/process_data/images_to_nerfstudio_dataset.py b/nerfstudio/process_data/images_to_nerfstudio_dataset.py
@@ -74,6 +74,9 @@ def main(self) -> None:
                 same_dimensions=self.same_dimensions,
                 keep_image_dir=False,
             )
+            image_rename_map = dict(
+                (a.relative_to(self.data).as_posix(), b.name) for a, b in image_rename_map_paths.items()
+            )
             if self.eval_data is not None:
                 eval_image_rename_map_paths = process_data_utils.copy_images(
                     self.eval_data,
@@ -85,11 +88,11 @@ def main(self) -> None:
                     same_dimensions=self.same_dimensions,
                     keep_image_dir=True,
                 )
-                image_rename_map_paths.update(eval_image_rename_map_paths)
+                eval_image_rename_map = dict(
+                    (a.relative_to(self.eval_data).as_posix(), b.name) for a, b in eval_image_rename_map_paths.items()
+                )
+                image_rename_map.update(eval_image_rename_map)
 
-            image_rename_map = dict(
-                (a.relative_to(self.data).as_posix(), b.name) for a, b in image_rename_map_paths.items()
-            )
             num_frames = len(image_rename_map)
             summary_log.append(f"Starting with {num_frames} images")
 

diff --git a/nerfstudio/scripts/exporter.py b/nerfstudio/scripts/exporter.py
@@ -485,6 +485,9 @@ class ExportGaussianSplat(Exporter):
     """Rotation of the oriented bounding box. Expressed as RPY Euler angles in radians"""
     obb_scale: Optional[Tuple[float, float, float]] = None
     """Scale of the oriented bounding box along each axis."""
+    ply_color_mode: Literal["sh_coeffs", "rgb"] = "sh_coeffs"
+    """If "rgb", export colors as red/green/blue fields. Otherwise, export colors as
+    spherical harmonics coefficients."""
 
     @staticmethod
     def write_ply(
@@ -504,7 +507,7 @@ def write_ply(
         """
 
         # Ensure count matches the length of all tensors
-        if not all(len(tensor) == count for tensor in map_to_tensors.values()):
+        if not all(tensor.size == count for tensor in map_to_tensors.values()):
             raise ValueError("Count does not match the length of all tensors")
 
         # Type check for numpy arrays of type float or uint8 and non-empty
@@ -552,7 +555,6 @@ def main(self) -> None:
 
         filename = self.output_dir / "splat.ply"
 
-        count = 0
         map_to_tensors = OrderedDict()
 
         with torch.no_grad():
@@ -566,19 +568,28 @@ def main(self) -> None:
             map_to_tensors["ny"] = np.zeros(n, dtype=np.float32)
             map_to_tensors["nz"] = np.zeros(n, dtype=np.float32)
 
-            if model.config.sh_degree > 0:
+            if self.ply_color_mode == "rgb":
+                colors = torch.clamp(model.colors.clone(), 0.0, 1.0).data.cpu().numpy()
+                colors = (colors * 255).astype(np.uint8)
+                map_to_tensors["red"] = colors[:, 0]
+                map_to_tensors["green"] = colors[:, 1]
+                map_to_tensors["blue"] = colors[:, 2]
+            elif self.ply_color_mode == "sh_coeffs":
                 shs_0 = model.shs_0.contiguous().cpu().numpy()
                 for i in range(shs_0.shape[1]):
                     map_to_tensors[f"f_dc_{i}"] = shs_0[:, i, None]
 
-                # transpose(1, 2) was needed to match the sh order in Inria version
-                shs_rest = model.shs_rest.transpose(1, 2).contiguous().cpu().numpy()
-                shs_rest = shs_rest.reshape((n, -1))
-                for i in range(shs_rest.shape[-1]):
-                    map_to_tensors[f"f_rest_{i}"] = shs_rest[:, i, None]
-            else:
-                colors = torch.clamp(model.colors.clone(), 0.0, 1.0).data.cpu().numpy()
-                map_to_tensors["colors"] = (colors * 255).astype(np.uint8)
+            if model.config.sh_degree > 0:
+                if self.ply_color_mode == "rgb":
+                    CONSOLE.print(
+                        "Warning: model has higher level of spherical harmonics, ignoring them and only export rgb."
+                    )
+                elif self.ply_color_mode == "sh_coeffs":
+                    # transpose(1, 2) was needed to match the sh order in Inria version
+                    shs_rest = model.shs_rest.transpose(1, 2).contiguous().cpu().numpy()
+                    shs_rest = shs_rest.reshape((n, -1))
+                    for i in range(shs_rest.shape[-1]):
+                        map_to_tensors[f"f_rest_{i}"] = shs_rest[:, i, None]
 
             map_to_tensors["opacity"] = model.opacities.data.cpu().numpy()
 

diff --git a/nerfstudio/utils/writer.py b/nerfstudio/utils/writer.py
@@ -334,7 +334,7 @@ def write_config(self, name: str, config_dict: Dict[str, Any], step: int):
         """
         import wandb  # wandb is slow to import, so we only import it if we need it.
 
-        wandb.config.update(config_dict, allow_val_change=True)
+        wandb.config.update(config_dict, allow_val_change=True)  # type: ignore
 
 
 @decorate_all([check_main_thread])

diff --git a/nerfstudio/viewer/viewer.py b/nerfstudio/viewer/viewer.py
@@ -458,9 +458,9 @@ def init_scene(
             R = R @ vtf.SO3.from_x_radians(np.pi)
             camera_handle = self.viser_server.scene.add_camera_frustum(
                 name=f"/cameras/camera_{idx:05d}",
-                fov=float(2 * np.arctan(camera.cx / camera.fx[0])),
+                fov=float(2 * np.arctan((camera.cx / camera.fx[0]).cpu())),
                 scale=self.config.camera_frustum_scale,
-                aspect=float(camera.cx[0] / camera.cy[0]),
+                aspect=float((camera.cx[0] / camera.cy[0]).cpu()),
                 image=image_uint8,
                 wxyz=R.wxyz,
                 position=c2w[:3, 3] * VISER_NERFSTUDIO_SCALE_RATIO,

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "nerfstudio"
-version = "1.1.3"
+version = "1.1.4"
 description = "All-in-one repository for state-of-the-art NeRFs"
 readme = "README.md"
 license = { text="Apache 2.0"}
@@ -56,7 +56,7 @@ dependencies = [
     "torchvision>=0.14.1",
     "torchmetrics[image]>=1.0.1",
     "typing_extensions>=4.4.0",
-    "viser==0.2.3",
+    "viser==0.2.7",
     "nuscenes-devkit>=1.1.1",
     "wandb>=0.13.3",
     "xatlas",