Skip to content

Commit

Permalink
Merge branch 'main' into jkulhanek/docker
Browse files Browse the repository at this point in the history
  • Loading branch information
brentyi authored Sep 5, 2024
2 parents eefe098 + 96b7fe2 commit 9d9d598
Show file tree
Hide file tree
Showing 13 changed files with 106 additions and 34 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ repos:
- id: trailing-whitespace
- id: end-of-file-fixer
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.1.13
rev: v0.6.2
hooks:
- id: ruff
types_or: [ python, pyi, jupyter ]
Expand Down
8 changes: 8 additions & 0 deletions docs/reference/cli/ns_train.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,11 @@ By default the nerfstudio dataparser is used. If you would like to use a differe
```bash
ns-train {method} {dataparser} --help
```

For example, if you want to specify the `eval_mode` of the nerfstudio dataparser to be `filename` when training your `splatfacto` model via the `ns-train` cli, you can do

```
ns-train splatfacto [method args] nerfstudio-data --eval-mode filename
```

Notice that the custom dataparser and its arguments are passed after specifying the training method and its arguments.
22 changes: 18 additions & 4 deletions nerfstudio/data/datamanagers/full_images_datamanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ def fixed_indices_eval_dataloader(self) -> List[Tuple[Cameras, Dict]]:
Pretends to be the dataloader for evaluation, it returns a list of (camera, data) tuples
"""
image_indices = [i for i in range(len(self.eval_dataset))]
data = deepcopy(self.cached_eval)
data = [d.copy() for d in self.cached_eval]
_cameras = deepcopy(self.eval_dataset.cameras).to(self.device)
cameras = []
for i in image_indices:
Expand All @@ -332,8 +332,13 @@ def get_param_groups(self) -> Dict[str, List[Parameter]]:
return {}

def get_train_rays_per_batch(self):
# TODO: fix this to be the resolution of the last image rendered
return 800 * 800
"""Returns resolution of the image returned from datamanager."""
if len(self.cached_train) != 0:
h = self.cached_train[0]["image"].shape[0]
w = self.cached_train[0]["image"].shape[1]
return h * w
else:
return 800 * 800

def next_train(self, step: int) -> Tuple[Cameras, Dict]:
"""Returns the next training batch
Expand All @@ -345,6 +350,9 @@ def next_train(self, step: int) -> Tuple[Cameras, Dict]:
self.train_unseen_cameras = self.sample_train_cameras()

data = self.cached_train[image_idx]
# We're going to copy to make sure we don't mutate the cached dictionary.
# This can cause a memory leak: https://github.com/nerfstudio-project/nerfstudio/issues/3335
data = data.copy()
data["image"] = data["image"].to(self.device)

assert len(self.train_cameras.shape) == 1, "Assumes single batch dimension"
Expand All @@ -370,7 +378,8 @@ def next_eval_image(self, step: int) -> Tuple[Cameras, Dict]:
# Make sure to re-populate the unseen cameras list if we have exhausted it
if len(self.eval_unseen_cameras) == 0:
self.eval_unseen_cameras = [i for i in range(len(self.eval_dataset))]
data = deepcopy(self.cached_eval[image_idx])
data = self.cached_eval[image_idx]
data = data.copy()
data["image"] = data["image"].to(self.device)
assert len(self.eval_dataset.cameras.shape) == 1, "Assumes single batch dimension"
camera = self.eval_dataset.cameras[image_idx : image_idx + 1].to(self.device)
Expand All @@ -386,6 +395,8 @@ def _undistort_image(
"We doesn't support the 4th Brown parameter for image undistortion, "
"Only k1, k2, k3, p1, p2 can be non-zero."
)
# because OpenCV expects the order of distortion parameters to be (k1, k2, p1, p2, k3), we need to reorder them
# see https://docs.opencv.org/4.x/dc/dbb/tutorial_py_calibration.html
distortion_params = np.array(
[
distortion_params[0],
Expand All @@ -411,6 +422,9 @@ def _undistort_image(
# crop the image and update the intrinsics accordingly
x, y, w, h = roi
image = image[y : y + h, x : x + w]
# update the principal point based on our cropped region of interest (ROI)
newK[0, 2] -= x
newK[1, 2] -= y
if "depth_image" in data:
data["depth_image"] = data["depth_image"][y : y + h, x : x + w]
if "mask" in data:
Expand Down
6 changes: 6 additions & 0 deletions nerfstudio/data/dataparsers/colmap_dataparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,12 @@ def _generate_dataparser_outputs(self, split: str = "train", **kwargs):
cy.append(float(frame["cy"]))
height.append(int(frame["h"]))
width.append(int(frame["w"]))
if any([k in frame and float(frame[k]) != 0.0 for k in ["k4", "k5", "k6"]]):
raise ValueError(
"K4/K5/K6 is non-zero! Note that Nerfstudio camera model's K4 has different meaning than colmap "
"OPENCV camera model K4. Nerfstudio's K4 is the 4-th order of radial distortion coefficient, while "
"colmap/OPENCV's K4 is 4-th coefficient in fractional radial distortion model."
)
distort.append(
camera_utils.get_distortion_params(
k1=float(frame["k1"]) if "k1" in frame else 0.0,
Expand Down
8 changes: 4 additions & 4 deletions nerfstudio/engine/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,10 +229,10 @@ def setup_optimizers(self) -> Optimizers:
def train(self) -> None:
"""Train the model."""
assert self.pipeline.datamanager.train_dataset is not None, "Missing DatsetInputs"

self.pipeline.datamanager.train_dataparser_outputs.save_dataparser_transform(
self.base_dir / "dataparser_transforms.json"
)
if hasattr(self.pipeline.datamanager, "train_dataparser_outputs"):
self.pipeline.datamanager.train_dataparser_outputs.save_dataparser_transform(
self.base_dir / "dataparser_transforms.json"
)

self._init_viewer_state()
with TimeWriter(writer, EventName.TOTAL_TRAIN_TIME):
Expand Down
5 changes: 5 additions & 0 deletions nerfstudio/fields/nerfacto_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,10 +211,15 @@ def get_density(self, ray_samples: RaySamples) -> Tuple[Tensor, Tensor]:
# Make sure the tcnn gets inputs between 0 and 1.
selector = ((positions > 0.0) & (positions < 1.0)).all(dim=-1)
positions = positions * selector[..., None]

assert positions.numel() > 0, "positions is empty."

self._sample_locations = positions
if not self._sample_locations.requires_grad:
self._sample_locations.requires_grad = True
positions_flat = positions.view(-1, 3)

assert positions_flat.numel() > 0, "positions_flat is empty."
h = self.mlp_base(positions_flat).view(*ray_samples.frustums.shape, -1)
density_before_activation, base_mlp_out = torch.split(h, [1, self.geo_feat_dim], dim=-1)
self._density_before_activation = density_before_activation
Expand Down
9 changes: 6 additions & 3 deletions nerfstudio/models/instant_ngp.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from __future__ import annotations

from dataclasses import dataclass, field
from typing import Dict, List, Literal, Optional, Tuple, Type
from typing import Dict, List, Literal, Optional, Tuple, Type, Union

import nerfacc
import torch
Expand Down Expand Up @@ -49,7 +49,7 @@ class InstantNGPModelConfig(ModelConfig):
"""Whether to create a scene collider to filter rays."""
collider_params: Optional[Dict[str, float]] = None
"""Instant NGP doesn't use a collider."""
grid_resolution: int = 128
grid_resolution: Union[int, List[int]] = 128
"""Resolution of the grid used for the field."""
grid_levels: int = 4
"""Levels of the grid used for the field."""
Expand All @@ -72,7 +72,10 @@ class InstantNGPModelConfig(ModelConfig):
use_appearance_embedding: bool = False
"""Whether to use an appearance embedding."""
background_color: Literal["random", "black", "white"] = "random"
"""The color that is given to untrained areas."""
"""
The color that is given to masked areas.
These areas are used to force the density in those regions to be zero.
"""
disable_scene_contraction: bool = False
"""Whether to disable scene contraction or not."""

Expand Down
26 changes: 24 additions & 2 deletions nerfstudio/models/splatfacto.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@

import numpy as np
import torch
from gsplat.cuda_legacy._torch_impl import quat_to_rotmat

try:
from gsplat.rendering import rasterization
Expand All @@ -47,6 +46,26 @@
from nerfstudio.utils.rich_utils import CONSOLE


def quat_to_rotmat(quat):
assert quat.shape[-1] == 4, quat.shape
w, x, y, z = torch.unbind(quat, dim=-1)
mat = torch.stack(
[
1 - 2 * (y**2 + z**2),
2 * (x * y - w * z),
2 * (x * z + w * y),
2 * (x * y + w * z),
1 - 2 * (x**2 + z**2),
2 * (y * z - w * x),
2 * (x * z - w * y),
2 * (y * z + w * x),
1 - 2 * (x**2 + y**2),
],
dim=-1,
)
return mat.reshape(quat.shape[:-1] + (3, 3))


def random_quat_tensor(N):
"""
Defines a random quaternion tensor of shape (N, 4)
Expand Down Expand Up @@ -295,7 +314,10 @@ def colors(self):

@property
def shs_0(self):
return self.features_dc
if self.config.sh_degree > 0:
return self.features_dc
else:
return RGB2SH(torch.sigmoid(self.features_dc))

@property
def shs_rest(self):
Expand Down
11 changes: 7 additions & 4 deletions nerfstudio/process_data/images_to_nerfstudio_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ def main(self) -> None:
same_dimensions=self.same_dimensions,
keep_image_dir=False,
)
image_rename_map = dict(
(a.relative_to(self.data).as_posix(), b.name) for a, b in image_rename_map_paths.items()
)
if self.eval_data is not None:
eval_image_rename_map_paths = process_data_utils.copy_images(
self.eval_data,
Expand All @@ -85,11 +88,11 @@ def main(self) -> None:
same_dimensions=self.same_dimensions,
keep_image_dir=True,
)
image_rename_map_paths.update(eval_image_rename_map_paths)
eval_image_rename_map = dict(
(a.relative_to(self.eval_data).as_posix(), b.name) for a, b in eval_image_rename_map_paths.items()
)
image_rename_map.update(eval_image_rename_map)

image_rename_map = dict(
(a.relative_to(self.data).as_posix(), b.name) for a, b in image_rename_map_paths.items()
)
num_frames = len(image_rename_map)
summary_log.append(f"Starting with {num_frames} images")

Expand Down
33 changes: 22 additions & 11 deletions nerfstudio/scripts/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,9 @@ class ExportGaussianSplat(Exporter):
"""Rotation of the oriented bounding box. Expressed as RPY Euler angles in radians"""
obb_scale: Optional[Tuple[float, float, float]] = None
"""Scale of the oriented bounding box along each axis."""
ply_color_mode: Literal["sh_coeffs", "rgb"] = "sh_coeffs"
"""If "rgb", export colors as red/green/blue fields. Otherwise, export colors as
spherical harmonics coefficients."""

@staticmethod
def write_ply(
Expand All @@ -504,7 +507,7 @@ def write_ply(
"""

# Ensure count matches the length of all tensors
if not all(len(tensor) == count for tensor in map_to_tensors.values()):
if not all(tensor.size == count for tensor in map_to_tensors.values()):
raise ValueError("Count does not match the length of all tensors")

# Type check for numpy arrays of type float or uint8 and non-empty
Expand Down Expand Up @@ -552,7 +555,6 @@ def main(self) -> None:

filename = self.output_dir / "splat.ply"

count = 0
map_to_tensors = OrderedDict()

with torch.no_grad():
Expand All @@ -566,19 +568,28 @@ def main(self) -> None:
map_to_tensors["ny"] = np.zeros(n, dtype=np.float32)
map_to_tensors["nz"] = np.zeros(n, dtype=np.float32)

if model.config.sh_degree > 0:
if self.ply_color_mode == "rgb":
colors = torch.clamp(model.colors.clone(), 0.0, 1.0).data.cpu().numpy()
colors = (colors * 255).astype(np.uint8)
map_to_tensors["red"] = colors[:, 0]
map_to_tensors["green"] = colors[:, 1]
map_to_tensors["blue"] = colors[:, 2]
elif self.ply_color_mode == "sh_coeffs":
shs_0 = model.shs_0.contiguous().cpu().numpy()
for i in range(shs_0.shape[1]):
map_to_tensors[f"f_dc_{i}"] = shs_0[:, i, None]

# transpose(1, 2) was needed to match the sh order in Inria version
shs_rest = model.shs_rest.transpose(1, 2).contiguous().cpu().numpy()
shs_rest = shs_rest.reshape((n, -1))
for i in range(shs_rest.shape[-1]):
map_to_tensors[f"f_rest_{i}"] = shs_rest[:, i, None]
else:
colors = torch.clamp(model.colors.clone(), 0.0, 1.0).data.cpu().numpy()
map_to_tensors["colors"] = (colors * 255).astype(np.uint8)
if model.config.sh_degree > 0:
if self.ply_color_mode == "rgb":
CONSOLE.print(
"Warning: model has higher level of spherical harmonics, ignoring them and only export rgb."
)
elif self.ply_color_mode == "sh_coeffs":
# transpose(1, 2) was needed to match the sh order in Inria version
shs_rest = model.shs_rest.transpose(1, 2).contiguous().cpu().numpy()
shs_rest = shs_rest.reshape((n, -1))
for i in range(shs_rest.shape[-1]):
map_to_tensors[f"f_rest_{i}"] = shs_rest[:, i, None]

map_to_tensors["opacity"] = model.opacities.data.cpu().numpy()

Expand Down
2 changes: 1 addition & 1 deletion nerfstudio/utils/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ def write_config(self, name: str, config_dict: Dict[str, Any], step: int):
"""
import wandb # wandb is slow to import, so we only import it if we need it.

wandb.config.update(config_dict, allow_val_change=True)
wandb.config.update(config_dict, allow_val_change=True) # type: ignore


@decorate_all([check_main_thread])
Expand Down
4 changes: 2 additions & 2 deletions nerfstudio/viewer/viewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,9 +458,9 @@ def init_scene(
R = R @ vtf.SO3.from_x_radians(np.pi)
camera_handle = self.viser_server.scene.add_camera_frustum(
name=f"/cameras/camera_{idx:05d}",
fov=float(2 * np.arctan(camera.cx / camera.fx[0])),
fov=float(2 * np.arctan((camera.cx / camera.fx[0]).cpu())),
scale=self.config.camera_frustum_scale,
aspect=float(camera.cx[0] / camera.cy[0]),
aspect=float((camera.cx[0] / camera.cy[0]).cpu()),
image=image_uint8,
wxyz=R.wxyz,
position=c2w[:3, 3] * VISER_NERFSTUDIO_SCALE_RATIO,
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "nerfstudio"
version = "1.1.3"
version = "1.1.4"
description = "All-in-one repository for state-of-the-art NeRFs"
readme = "README.md"
license = { text="Apache 2.0"}
Expand Down Expand Up @@ -56,7 +56,7 @@ dependencies = [
"torchvision>=0.14.1",
"torchmetrics[image]>=1.0.1",
"typing_extensions>=4.4.0",
"viser==0.2.3",
"viser==0.2.7",
"nuscenes-devkit>=1.1.1",
"wandb>=0.13.3",
"xatlas",
Expand Down

0 comments on commit 9d9d598

Please sign in to comment.