From 194b5d4e46a5c026359d9207f6e88988ef2e92ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jon=C3=A1=C5=A1=20Kulh=C3=A1nek?= <jonas.kulhanek@live.com>
Date: Thu, 12 Sep 2024 15:43:36 +0200
Subject: [PATCH 1/6] Fix docker attestation fail on forked repo PRs (#3423)

---
 .github/workflows/build_docker_image.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/build_docker_image.yml b/.github/workflows/build_docker_image.yml
index 97cb772820f..3dc60a32217 100644
--- a/.github/workflows/build_docker_image.yml
+++ b/.github/workflows/build_docker_image.yml
@@ -47,6 +47,7 @@ jobs:
       - name: Build and push Docker image
         id: push
         uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671
+        if: 
         with:
           context: .
           file: ./Dockerfile
@@ -55,8 +56,9 @@ jobs:
           labels: ${{ steps.meta.outputs.labels }}
       - name: Generate artifact attestation
         uses: actions/attest-build-provenance@v1
+        if: ${{ github.event_name != 'pull_request' }}
         with:
           subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}}
           subject-digest: ${{ steps.push.outputs.digest }}
-          push-to-registry: ${{ github.event_name != 'pull_request' }}
+          push-to-registry: true
 

From 7a0b8cbc89c0500984b016e3d73201f372ba8ef3 Mon Sep 17 00:00:00 2001
From: "J.Y." <132313008+jb-ye@users.noreply.github.com>
Date: Fri, 13 Sep 2024 07:10:37 -0400
Subject: [PATCH 2/6] use gsplat strategy interface to simplify splatfacto
 (#3376)

* use gsplat strategy interface to simplify splatfacto

* change splatfacto-big densify_grad_thresh to 0.0005

* upgrade gsplat version

* filter low opacities when exporting

* remove num_sh_bases

---------

Co-authored-by: Jianbo Ye <jianboye@amazon.com>
Co-authored-by: Justin Kerr <justin.g.kerr@gmail.com>
---
 nerfstudio/configs/method_configs.py |   3 +-
 nerfstudio/models/splatfacto.py      | 321 +++++----------------------
 nerfstudio/scripts/exporter.py       |  10 +-
 pyproject.toml                       |   2 +-
 4 files changed, 63 insertions(+), 273 deletions(-)

diff --git a/nerfstudio/configs/method_configs.py b/nerfstudio/configs/method_configs.py
index e77ab130c46..c9bc9f77035 100644
--- a/nerfstudio/configs/method_configs.py
+++ b/nerfstudio/configs/method_configs.py
@@ -663,8 +663,7 @@
         ),
         model=SplatfactoModelConfig(
             cull_alpha_thresh=0.005,
-            continue_cull_post_densification=False,
-            densify_grad_thresh=0.0006,
+            densify_grad_thresh=0.0005,
         ),
     ),
     optimizers={
diff --git a/nerfstudio/models/splatfacto.py b/nerfstudio/models/splatfacto.py
index 61d9eda19f9..28b8f0a1de3 100644
--- a/nerfstudio/models/splatfacto.py
+++ b/nerfstudio/models/splatfacto.py
@@ -25,12 +25,12 @@
 
 import numpy as np
 import torch
+from gsplat.strategy import DefaultStrategy
 
 try:
     from gsplat.rendering import rasterization
 except ImportError:
     print("Please install gsplat>=1.0.0")
-from gsplat.cuda_legacy._wrapper import num_sh_bases
 from pytorch_msssim import SSIM
 from torch.nn import Parameter
 
@@ -46,6 +46,14 @@
 from nerfstudio.utils.rich_utils import CONSOLE
 
 
+def num_sh_bases(degree: int) -> int:
+    """
+    Returns the number of spherical harmonic bases for a given degree.
+    """
+    assert degree <= 4, "We don't support degree greater than 4."
+    return (degree + 1) ** 2
+
+
 def quat_to_rotmat(quat):
     assert quat.shape[-1] == 4, quat.shape
     w, x, y, z = torch.unbind(quat, dim=-1)
@@ -154,12 +162,12 @@ class SplatfactoModelConfig(ModelConfig):
     """threshold of opacity for culling gaussians. One can set it to a lower value (e.g. 0.005) for higher quality."""
     cull_scale_thresh: float = 0.5
     """threshold of scale for culling huge gaussians"""
-    continue_cull_post_densification: bool = True
-    """If True, continue to cull gaussians post refinement"""
     reset_alpha_every: int = 30
     """Every this many refinement steps, reset the alpha"""
     densify_grad_thresh: float = 0.0008
     """threshold of positional gradient norm for densifying gaussians"""
+    use_absgrad: bool = True
+    """Whether to use absgrad to densify gaussians, if False, will use grad rather than absgrad"""
     densify_size_thresh: float = 0.01
     """below this size, gaussians are *duplicated*, otherwise split"""
     n_split_samples: int = 2
@@ -235,8 +243,6 @@ def populate_modules(self):
             means = torch.nn.Parameter(self.seed_points[0])  # (Location, Color)
         else:
             means = torch.nn.Parameter((torch.rand((self.config.num_random, 3)) - 0.5) * self.config.random_scale)
-        self.xys_grad_norm = None
-        self.max_2Dsize = None
         distances, _ = self.k_nearest_sklearn(means.data, 3)
         distances = torch.from_numpy(distances)
         # find the average of the three nearest neighbors for each point and use that as the scale
@@ -305,6 +311,26 @@ def populate_modules(self):
                 grid_W=self.config.grid_shape[2],
             )
 
+        # Strategy for GS densification
+        self.strategy = DefaultStrategy(
+            prune_opa=self.config.cull_alpha_thresh,
+            grow_grad2d=self.config.densify_grad_thresh,
+            grow_scale3d=self.config.densify_size_thresh,
+            grow_scale2d=self.config.split_screen_size,
+            prune_scale3d=self.config.cull_scale_thresh,
+            prune_scale2d=self.config.cull_screen_size,
+            refine_scale2d_stop_iter=self.config.stop_screen_size_at,
+            refine_start_iter=self.config.warmup_length,
+            refine_stop_iter=self.config.stop_split_at,
+            reset_every=self.config.reset_alpha_every * self.config.refine_every,
+            refine_every=self.config.refine_every,
+            pause_refine_after_reset=self.num_train_data + self.config.refine_every,
+            absgrad=self.config.use_absgrad,
+            revised_opacity=False,
+            verbose=True,
+        )
+        self.strategy_state = self.strategy.initialize_state(scene_scale=1.0)
+
     @property
     def colors(self):
         if self.config.sh_degree > 0:
@@ -386,87 +412,6 @@ def k_nearest_sklearn(self, x: torch.Tensor, k: int):
         # Exclude the point itself from the result and return
         return distances[:, 1:].astype(np.float32), indices[:, 1:].astype(np.float32)
 
-    def remove_from_optim(self, optimizer, deleted_mask, new_params):
-        """removes the deleted_mask from the optimizer provided"""
-        assert len(new_params) == 1
-        # assert isinstance(optimizer, torch.optim.Adam), "Only works with Adam"
-
-        param = optimizer.param_groups[0]["params"][0]
-        param_state = optimizer.state[param]
-        del optimizer.state[param]
-
-        # Modify the state directly without deleting and reassigning.
-        if "exp_avg" in param_state:
-            param_state["exp_avg"] = param_state["exp_avg"][~deleted_mask]
-            param_state["exp_avg_sq"] = param_state["exp_avg_sq"][~deleted_mask]
-
-        # Update the parameter in the optimizer's param group.
-        del optimizer.param_groups[0]["params"][0]
-        del optimizer.param_groups[0]["params"]
-        optimizer.param_groups[0]["params"] = new_params
-        optimizer.state[new_params[0]] = param_state
-
-    def remove_from_all_optim(self, optimizers, deleted_mask):
-        param_groups = self.get_gaussian_param_groups()
-        for group, param in param_groups.items():
-            self.remove_from_optim(optimizers.optimizers[group], deleted_mask, param)
-        torch.cuda.empty_cache()
-
-    def dup_in_optim(self, optimizer, dup_mask, new_params, n=2):
-        """adds the parameters to the optimizer"""
-        param = optimizer.param_groups[0]["params"][0]
-        param_state = optimizer.state[param]
-        if "exp_avg" in param_state:
-            repeat_dims = (n,) + tuple(1 for _ in range(param_state["exp_avg"].dim() - 1))
-            param_state["exp_avg"] = torch.cat(
-                [
-                    param_state["exp_avg"],
-                    torch.zeros_like(param_state["exp_avg"][dup_mask.squeeze()]).repeat(*repeat_dims),
-                ],
-                dim=0,
-            )
-            param_state["exp_avg_sq"] = torch.cat(
-                [
-                    param_state["exp_avg_sq"],
-                    torch.zeros_like(param_state["exp_avg_sq"][dup_mask.squeeze()]).repeat(*repeat_dims),
-                ],
-                dim=0,
-            )
-        del optimizer.state[param]
-        optimizer.state[new_params[0]] = param_state
-        optimizer.param_groups[0]["params"] = new_params
-        del param
-
-    def dup_in_all_optim(self, optimizers, dup_mask, n):
-        param_groups = self.get_gaussian_param_groups()
-        for group, param in param_groups.items():
-            self.dup_in_optim(optimizers.optimizers[group], dup_mask, param, n)
-
-    def after_train(self, step: int):
-        assert step == self.step
-        # to save some training time, we no longer need to update those stats post refinement
-        if self.step >= self.config.stop_split_at:
-            return
-        with torch.no_grad():
-            # keep track of a moving average of grad norms
-            visible_mask = (self.radii > 0).flatten()
-            grads = self.xys.absgrad[0][visible_mask].norm(dim=-1)  # type: ignore
-            # print(f"grad norm min {grads.min().item()} max {grads.max().item()} mean {grads.mean().item()} size {grads.shape}")
-            if self.xys_grad_norm is None:
-                self.xys_grad_norm = torch.zeros(self.num_points, device=self.device, dtype=torch.float32)
-                self.vis_counts = torch.ones(self.num_points, device=self.device, dtype=torch.float32)
-            assert self.vis_counts is not None
-            self.vis_counts[visible_mask] += 1
-            self.xys_grad_norm[visible_mask] += grads
-            # update the max screen size, as a ratio of number of pixels
-            if self.max_2Dsize is None:
-                self.max_2Dsize = torch.zeros_like(self.radii, dtype=torch.float32)
-            newradii = self.radii.detach()[visible_mask]
-            self.max_2Dsize[visible_mask] = torch.maximum(
-                self.max_2Dsize[visible_mask],
-                newradii / float(max(self.last_size[0], self.last_size[1])),
-            )
-
     def set_crop(self, crop_box: Optional[OrientedBox]):
         self.crop_box = crop_box
 
@@ -474,199 +419,39 @@ def set_background(self, background_color: torch.Tensor):
         assert background_color.shape == (3,)
         self.background_color = background_color
 
-    def refinement_after(self, optimizers: Optimizers, step):
+    def step_post_backward(self, step):
         assert step == self.step
-        if self.step <= self.config.warmup_length:
-            return
-        with torch.no_grad():
-            # Offset all the opacity reset logic by refine_every so that we don't
-            # save checkpoints right when the opacity is reset (saves every 2k)
-            # then cull
-            # only split/cull if we've seen every image since opacity reset
-            reset_interval = self.config.reset_alpha_every * self.config.refine_every
-            do_densification = (
-                self.step < self.config.stop_split_at
-                and self.step % reset_interval > self.num_train_data + self.config.refine_every
-            )
-            if do_densification:
-                # then we densify
-                assert self.xys_grad_norm is not None and self.vis_counts is not None and self.max_2Dsize is not None
-                avg_grad_norm = (self.xys_grad_norm / self.vis_counts) * 0.5 * max(self.last_size[0], self.last_size[1])
-                high_grads = (avg_grad_norm > self.config.densify_grad_thresh).squeeze()
-                splits = (self.scales.exp().max(dim=-1).values > self.config.densify_size_thresh).squeeze()
-                splits &= high_grads
-                if self.step < self.config.stop_screen_size_at:
-                    splits |= (self.max_2Dsize > self.config.split_screen_size).squeeze()
-                nsamps = self.config.n_split_samples
-                split_params = self.split_gaussians(splits, nsamps)
-
-                dups = (self.scales.exp().max(dim=-1).values <= self.config.densify_size_thresh).squeeze()
-                dups &= high_grads
-                dup_params = self.dup_gaussians(dups)
-                for name, param in self.gauss_params.items():
-                    self.gauss_params[name] = torch.nn.Parameter(
-                        torch.cat([param.detach(), split_params[name], dup_params[name]], dim=0)
-                    )
-                # append zeros to the max_2Dsize tensor
-                self.max_2Dsize = torch.cat(
-                    [
-                        self.max_2Dsize,
-                        torch.zeros_like(split_params["scales"][:, 0]),
-                        torch.zeros_like(dup_params["scales"][:, 0]),
-                    ],
-                    dim=0,
-                )
-
-                split_idcs = torch.where(splits)[0]
-                self.dup_in_all_optim(optimizers, split_idcs, nsamps)
-
-                dup_idcs = torch.where(dups)[0]
-                self.dup_in_all_optim(optimizers, dup_idcs, 1)
-
-                # After a guassian is split into two new gaussians, the original one should also be pruned.
-                splits_mask = torch.cat(
-                    (
-                        splits,
-                        torch.zeros(
-                            nsamps * splits.sum() + dups.sum(),
-                            device=self.device,
-                            dtype=torch.bool,
-                        ),
-                    )
-                )
-
-                deleted_mask = self.cull_gaussians(splits_mask)
-            elif self.step >= self.config.stop_split_at and self.config.continue_cull_post_densification:
-                deleted_mask = self.cull_gaussians()
-            else:
-                # if we donot allow culling post refinement, no more gaussians will be pruned.
-                deleted_mask = None
-
-            if deleted_mask is not None:
-                self.remove_from_all_optim(optimizers, deleted_mask)
-
-            if self.step < self.config.stop_split_at and self.step % reset_interval == self.config.refine_every:
-                # Reset value is set to be twice of the cull_alpha_thresh
-                reset_value = self.config.cull_alpha_thresh * 2.0
-                self.opacities.data = torch.clamp(
-                    self.opacities.data,
-                    max=torch.logit(torch.tensor(reset_value, device=self.device)).item(),
-                )
-                # reset the exp of optimizer
-                optim = optimizers.optimizers["opacities"]
-                param = optim.param_groups[0]["params"][0]
-                param_state = optim.state[param]
-                param_state["exp_avg"] = torch.zeros_like(param_state["exp_avg"])
-                param_state["exp_avg_sq"] = torch.zeros_like(param_state["exp_avg_sq"])
-
-            self.xys_grad_norm = None
-            self.vis_counts = None
-            self.max_2Dsize = None
-
-    def cull_gaussians(self, extra_cull_mask: Optional[torch.Tensor] = None):
-        """
-        This function deletes gaussians with under a certain opacity threshold
-        extra_cull_mask: a mask indicates extra gaussians to cull besides existing culling criterion
-        """
-        n_bef = self.num_points
-        # cull transparent ones
-        culls = (torch.sigmoid(self.opacities) < self.config.cull_alpha_thresh).squeeze()
-        below_alpha_count = torch.sum(culls).item()
-        toobigs_count = 0
-        if extra_cull_mask is not None:
-            culls = culls | extra_cull_mask
-        if self.step > self.config.refine_every * self.config.reset_alpha_every:
-            # cull huge ones
-            toobigs = (torch.exp(self.scales).max(dim=-1).values > self.config.cull_scale_thresh).squeeze()
-            if self.step < self.config.stop_screen_size_at:
-                # cull big screen space
-                if self.max_2Dsize is not None:
-                    toobigs = toobigs | (self.max_2Dsize > self.config.cull_screen_size).squeeze()
-            culls = culls | toobigs
-            toobigs_count = torch.sum(toobigs).item()
-        for name, param in self.gauss_params.items():
-            self.gauss_params[name] = torch.nn.Parameter(param[~culls])
-
-        CONSOLE.log(
-            f"Culled {n_bef - self.num_points} gaussians "
-            f"({below_alpha_count} below alpha thresh, {toobigs_count} too bigs, {self.num_points} remaining)"
+        self.strategy.step_post_backward(
+            params=self.gauss_params,
+            optimizers=self.optimizers,
+            state=self.strategy_state,
+            step=self.step,
+            info=self.info,
+            packed=False,
         )
 
-        return culls
-
-    def split_gaussians(self, split_mask, samps):
-        """
-        This function splits gaussians that are too large
-        """
-        n_splits = split_mask.sum().item()
-        CONSOLE.log(f"Splitting {split_mask.sum().item()/self.num_points} gaussians: {n_splits}/{self.num_points}")
-        centered_samples = torch.randn((samps * n_splits, 3), device=self.device)  # Nx3 of axis-aligned scales
-        scaled_samples = (
-            torch.exp(self.scales[split_mask].repeat(samps, 1)) * centered_samples
-        )  # how these scales are rotated
-        quats = self.quats[split_mask] / self.quats[split_mask].norm(dim=-1, keepdim=True)  # normalize them first
-        rots = quat_to_rotmat(quats.repeat(samps, 1))  # how these scales are rotated
-        rotated_samples = torch.bmm(rots, scaled_samples[..., None]).squeeze()
-        new_means = rotated_samples + self.means[split_mask].repeat(samps, 1)
-        # step 2, sample new colors
-        new_features_dc = self.features_dc[split_mask].repeat(samps, 1)
-        new_features_rest = self.features_rest[split_mask].repeat(samps, 1, 1)
-        # step 3, sample new opacities
-        new_opacities = self.opacities[split_mask].repeat(samps, 1)
-        # step 4, sample new scales
-        size_fac = 1.6
-        new_scales = torch.log(torch.exp(self.scales[split_mask]) / size_fac).repeat(samps, 1)
-        self.scales[split_mask] = torch.log(torch.exp(self.scales[split_mask]) / size_fac)
-        # step 5, sample new quats
-        new_quats = self.quats[split_mask].repeat(samps, 1)
-        out = {
-            "means": new_means,
-            "features_dc": new_features_dc,
-            "features_rest": new_features_rest,
-            "opacities": new_opacities,
-            "scales": new_scales,
-            "quats": new_quats,
-        }
-        for name, param in self.gauss_params.items():
-            if name not in out:
-                out[name] = param[split_mask].repeat(samps, 1)
-        return out
-
-    def dup_gaussians(self, dup_mask):
-        """
-        This function duplicates gaussians that are too small
-        """
-        n_dups = dup_mask.sum().item()
-        CONSOLE.log(f"Duplicating {dup_mask.sum().item()/self.num_points} gaussians: {n_dups}/{self.num_points}")
-        new_dups = {}
-        for name, param in self.gauss_params.items():
-            new_dups[name] = param[dup_mask]
-        return new_dups
-
     def get_training_callbacks(
         self, training_callback_attributes: TrainingCallbackAttributes
     ) -> List[TrainingCallback]:
         cbs = []
-        cbs.append(TrainingCallback([TrainingCallbackLocation.BEFORE_TRAIN_ITERATION], self.step_cb))
-        # The order of these matters
         cbs.append(
             TrainingCallback(
-                [TrainingCallbackLocation.AFTER_TRAIN_ITERATION],
-                self.after_train,
+                [TrainingCallbackLocation.BEFORE_TRAIN_ITERATION],
+                self.step_cb,
+                args=[training_callback_attributes.optimizers],
             )
         )
         cbs.append(
             TrainingCallback(
                 [TrainingCallbackLocation.AFTER_TRAIN_ITERATION],
-                self.refinement_after,
-                update_every_num_iters=self.config.refine_every,
-                args=[training_callback_attributes.optimizers],
+                self.step_post_backward,
             )
         )
         return cbs
 
-    def step_cb(self, step):
+    def step_cb(self, optimizers: Optimizers, step):
         self.step = step
+        self.optimizers = optimizers.optimizers
 
     def get_gaussian_param_groups(self) -> Dict[str, List[Parameter]]:
         # Here we explicitly use the means, scales as parameters so that the user can override this function and
@@ -788,7 +573,6 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]:
 
         colors_crop = torch.cat((features_dc_crop[:, None, :], features_rest_crop), dim=1)
 
-        BLOCK_WIDTH = 16  # this controls the tile size of rasterization, 16 is a good default
         camera_scale_fac = self._get_downscale_factor()
         camera.rescale_output_resolution(1 / camera_scale_fac)
         viewmat = get_viewmat(optimized_camera_to_world)
@@ -812,9 +596,9 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]:
             colors_crop = torch.sigmoid(colors_crop).squeeze(1)  # [N, 1, 3] -> [N, 3]
             sh_degree_to_use = None
 
-        render, alpha, info = rasterization(
+        render, alpha, self.info = rasterization(
             means=means_crop,
-            quats=quats_crop / quats_crop.norm(dim=-1, keepdim=True),
+            quats=quats_crop,  # rasterization does normalization internally
             scales=torch.exp(scales_crop),
             opacities=torch.sigmoid(opacities_crop).squeeze(-1),
             colors=colors_crop,
@@ -822,22 +606,21 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]:
             Ks=K,  # [1, 3, 3]
             width=W,
             height=H,
-            tile_size=BLOCK_WIDTH,
             packed=False,
             near_plane=0.01,
             far_plane=1e10,
             render_mode=render_mode,
             sh_degree=sh_degree_to_use,
             sparse_grad=False,
-            absgrad=True,
+            absgrad=self.strategy.absgrad,
             rasterize_mode=self.config.rasterize_mode,
             # set some threshold to disregrad small gaussians for faster rendering.
             # radius_clip=3.0,
         )
-        if self.training and info["means2d"].requires_grad:
-            info["means2d"].retain_grad()
-        self.xys = info["means2d"]  # [1, N, 2]
-        self.radii = info["radii"][0]  # [N]
+        if self.training:
+            self.strategy.step_pre_backward(
+                self.gauss_params, self.optimizers, self.strategy_state, self.step, self.info
+            )
         alpha = alpha[:, ...]
 
         background = self._get_background_color()
diff --git a/nerfstudio/scripts/exporter.py b/nerfstudio/scripts/exporter.py
index 970b5a9c7a3..a76f6629f87 100644
--- a/nerfstudio/scripts/exporter.py
+++ b/nerfstudio/scripts/exporter.py
@@ -620,9 +620,17 @@ def main(self) -> None:
             n_after = np.sum(select)
             if n_after < n_before:
                 CONSOLE.print(f"{n_before - n_after} NaN/Inf elements in {k}")
+        nan_count = np.sum(select) - n
+
+        # filter gaussians that have opacities < 1/255, because they are skipped in cuda rasterization
+        low_opacity_gaussians = (map_to_tensors["opacity"]).squeeze(axis=-1) < -5.5373  # logit(1/255)
+        lowopa_count = np.sum(low_opacity_gaussians)
+        select[low_opacity_gaussians] = 0
 
         if np.sum(select) < n:
-            CONSOLE.print(f"values have NaN/Inf in map_to_tensors, only export {np.sum(select)}/{n}")
+            CONSOLE.print(
+                f"{nan_count} Gaussians have NaN/Inf and {lowopa_count} have low opacity, only export {np.sum(select)}/{n}"
+            )
             for k, t in map_to_tensors.items():
                 map_to_tensors[k] = map_to_tensors[k][select]
             count = np.sum(select)
diff --git a/pyproject.toml b/pyproject.toml
index 027ef13744f..8614d92b4b4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -62,7 +62,7 @@ dependencies = [
     "xatlas",
     "trimesh>=3.20.2",
     "timm==0.6.7",
-    "gsplat==1.0.0",
+    "gsplat==1.3.0",
     "pytorch-msssim",
     "pathos",
     "packaging",

From 106d427e804d65066d516dda9c2699f9bd9a9a8a Mon Sep 17 00:00:00 2001
From: Maximum Wilder-Smith <maxwildersmith@gmail.com>
Date: Mon, 16 Sep 2024 19:57:21 +0200
Subject: [PATCH 3/6] Minor update to generate splat files in inference mode
 (#3432)

changed gaussian splat export to inference mode
---
 nerfstudio/scripts/exporter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nerfstudio/scripts/exporter.py b/nerfstudio/scripts/exporter.py
index a76f6629f87..ca019850f9c 100644
--- a/nerfstudio/scripts/exporter.py
+++ b/nerfstudio/scripts/exporter.py
@@ -547,7 +547,7 @@ def main(self) -> None:
         if not self.output_dir.exists():
             self.output_dir.mkdir(parents=True)
 
-        _, pipeline, _, _ = eval_setup(self.load_config)
+        _, pipeline, _, _ = eval_setup(self.load_config, test_mode="inference")
 
         assert isinstance(pipeline.model, SplatfactoModel)
 

From 27b8e14798a49ab763d978bcb732916896618fc9 Mon Sep 17 00:00:00 2001
From: Nicholas Pfaff <53228351+nepfaff@users.noreply.github.com>
Date: Mon, 16 Sep 2024 16:44:31 -0400
Subject: [PATCH 4/6] Fix crop obb masking in pointcloud generation (#3433)

---
 nerfstudio/exporter/exporter_utils.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/nerfstudio/exporter/exporter_utils.py b/nerfstudio/exporter/exporter_utils.py
index b87078bc9c1..9435a3b919e 100644
--- a/nerfstudio/exporter/exporter_utils.py
+++ b/nerfstudio/exporter/exporter_utils.py
@@ -165,11 +165,11 @@ def generate_point_cloud(
 
             if crop_obb is not None:
                 mask = crop_obb.within(point)
-            point = point[mask]
-            rgb = rgb[mask]
-            view_direction = view_direction[mask]
-            if normal is not None:
-                normal = normal[mask]
+                point = point[mask]
+                rgb = rgb[mask]
+                view_direction = view_direction[mask]
+                if normal is not None:
+                    normal = normal[mask]
 
             points.append(point)
             rgbs.append(rgb)

From 3e638da63571d391ef36c5906b1532555c1ca7db Mon Sep 17 00:00:00 2001
From: Anthony Tafoya <87080582+Anthony-Tafoya@users.noreply.github.com>
Date: Tue, 17 Sep 2024 08:32:09 -0700
Subject: [PATCH 5/6] Adding Random Seed for Frame Processing (#3416)

* Adding Random Seed for Frame Processing

* Added Unit Tests

* Updating Unit Tests for Ffmpeg

* Make Logs More Detailed

---------

Co-authored-by: Anthony-Tafoya <anthonytafoya@berkeley.edu>
Co-authored-by: J.Y. <132313008+jb-ye@users.noreply.github.com>
---
 nerfstudio/process_data/process_data_utils.py |  16 ++-
 .../video_to_nerfstudio_dataset.py            |   9 +-
 tests/process_data/test_misc.py               | 114 +++++++++++++++++-
 3 files changed, 133 insertions(+), 6 deletions(-)

diff --git a/nerfstudio/process_data/process_data_utils.py b/nerfstudio/process_data/process_data_utils.py
index b5b2391a090..3c9013abe32 100644
--- a/nerfstudio/process_data/process_data_utils.py
+++ b/nerfstudio/process_data/process_data_utils.py
@@ -15,6 +15,7 @@
 """Helper utils for processing data into the nerfstudio format."""
 
 import math
+import random
 import re
 import shutil
 import sys
@@ -126,6 +127,7 @@ def convert_video_to_images(
     verbose: bool = False,
     image_prefix: str = "frame_",
     keep_image_dir: bool = False,
+    random_seed: Optional[int] = None,
 ) -> Tuple[List[str], int]:
     """Converts a video into a sequence of images.
 
@@ -138,6 +140,7 @@ def convert_video_to_images(
         verbose: If True, logs the output of the command.
         image_prefix: Prefix to use for the image filenames.
         keep_image_dir: If True, don't delete the output directory if it already exists.
+        random_seed: If set, the seed used to choose the frames of the video
     Returns:
         A tuple containing summary of the conversion and the number of extracted frames.
     """
@@ -178,8 +181,6 @@ def convert_video_to_images(
             start_y = crop_factor[0]
             crop_cmd = f"crop=w=iw*{width}:h=ih*{height}:x=iw*{start_x}:y=ih*{start_y},"
 
-        spacing = num_frames // num_frames_target
-
         downscale_chains = [f"[t{i}]scale=iw/{2**i}:ih/{2**i}[out{i}]" for i in range(num_downscales + 1)]
         downscale_dirs = [Path(str(image_dir) + (f"_{2**i}" if i > 0 else "")) for i in range(num_downscales + 1)]
         downscale_paths = [downscale_dirs[i] / f"{image_prefix}%05d.png" for i in range(num_downscales + 1)]
@@ -196,8 +197,15 @@ def convert_video_to_images(
 
         ffmpeg_cmd += " -vsync vfr"
 
-        if spacing > 1:
-            CONSOLE.print("Number of frames to extract:", math.ceil(num_frames / spacing))
+        # Evenly distribute frame selection if random seed does not exist
+        spacing = num_frames // num_frames_target
+        if random_seed:
+            random.seed(random_seed)
+            frame_indices = sorted(random.sample(range(num_frames), num_frames_target))
+            select_cmd = "select='" + "+".join([f"eq(n\,{idx})" for idx in frame_indices]) + "',setpts=N/TB,"
+            CONSOLE.print(f"Extracting {num_frames_target} frames using seed {random_seed} random selection.")
+        elif spacing > 1:
+            CONSOLE.print(f"Extracting {math.ceil(num_frames / spacing)} frames in evenly spaced intervals")
             select_cmd = f"thumbnail={spacing},setpts=N/TB,"
         else:
             CONSOLE.print("[bold red]Can't satisfy requested number of frames. Extracting all frames.")
diff --git a/nerfstudio/process_data/video_to_nerfstudio_dataset.py b/nerfstudio/process_data/video_to_nerfstudio_dataset.py
index af17e7d6b6b..51a8a0b761f 100644
--- a/nerfstudio/process_data/video_to_nerfstudio_dataset.py
+++ b/nerfstudio/process_data/video_to_nerfstudio_dataset.py
@@ -16,7 +16,7 @@
 
 import shutil
 from dataclasses import dataclass
-from typing import Literal
+from typing import Literal, Optional
 
 from nerfstudio.process_data import equirect_utils, process_data_utils
 from nerfstudio.process_data.colmap_converter_to_nerfstudio_dataset import ColmapConverterToNerfstudioDataset
@@ -41,6 +41,10 @@ class VideoToNerfstudioDataset(ColmapConverterToNerfstudioDataset):
     """Feature matching method to use. Vocab tree is recommended for a balance of speed
     and accuracy. Exhaustive is slower but more accurate. Sequential is faster but
     should only be used for videos."""
+    random_seed: Optional[int] = None
+    """Random seed to select video frames for training set"""
+    eval_random_seed: Optional[int] = None
+    """Random seed to select video frames for eval set"""
 
     def main(self) -> None:
         """Process video into a nerfstudio dataset."""
@@ -59,6 +63,7 @@ def main(self) -> None:
                 num_downscales=0,
                 crop_factor=(0.0, 0.0, 0.0, 0.0),
                 verbose=self.verbose,
+                random_seed=self.random_seed,
             )
         else:
             # If we're not dealing with equirects we can downscale in one step.
@@ -71,6 +76,7 @@ def main(self) -> None:
                 verbose=self.verbose,
                 image_prefix="frame_train_" if self.eval_data is not None else "frame_",
                 keep_image_dir=False,
+                random_seed=self.random_seed,
             )
             if self.eval_data is not None:
                 summary_log_eval, num_extracted_frames_eval = process_data_utils.convert_video_to_images(
@@ -82,6 +88,7 @@ def main(self) -> None:
                     verbose=self.verbose,
                     image_prefix="frame_eval_",
                     keep_image_dir=True,
+                    random_seed=self.eval_random_seed,
                 )
                 summary_log += summary_log_eval
                 num_extracted_frames += num_extracted_frames_eval
diff --git a/tests/process_data/test_misc.py b/tests/process_data/test_misc.py
index 1b2404b517d..23fc3453ca7 100644
--- a/tests/process_data/test_misc.py
+++ b/tests/process_data/test_misc.py
@@ -2,13 +2,21 @@
 Test misc data utils
 """
 
+import os
+import re
+from pathlib import Path
+from unittest import mock
+
+import cv2
 import numpy as np
+from PIL import Image
 from pyquaternion import Quaternion
 from scipy.spatial.transform import Rotation
 
 # TODO(1480) use pycolmap instead of colmap_parsing_utils
 # import pycolmap
 from nerfstudio.data.utils.colmap_parsing_utils import qvec2rotmat
+from nerfstudio.process_data.process_data_utils import convert_video_to_images
 
 
 def test_scalar_first_scalar_last_quaternions():
@@ -39,7 +47,7 @@ def test_scalar_first_scalar_last_quaternions():
 
     # Expected Rotation matrix
     # fmt: off
-    R_expected = np.array( 
+    R_expected = np.array(
         [
             [ 0.81379768, -0.44096961,  0.37852231],
             [ 0.46984631,  0.88256412,  0.01802831],
@@ -61,3 +69,107 @@ def test_scalar_first_scalar_last_quaternions():
     # R = pycolmap.qvec_to_rotmat(wxyz)
     R = qvec2rotmat(wxyz)
     assert np.allclose(R, R_expected)
+
+
+def test_process_video_conversion_with_seed(tmp_path: Path):
+    """
+    Test convert_video_to_images by creating a mock video and ensuring correct frame extraction with seed.
+    """
+
+    # Inner functions needed for the unit tests
+    def create_mock_video(video_path: Path, frame_dir: Path, num_frames=10, frame_rate=1):
+        """Creates a mock video from a series of frames using OpenCV."""
+
+        first_frame = cv2.imread(str(frame_dir / "frame_0.png"))
+        height, width, _ = first_frame.shape
+        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+        out = cv2.VideoWriter(str(video_path), fourcc, frame_rate, (width, height))
+
+        for i in range(num_frames):
+            frame_path = frame_dir / f"frame_{i}.png"
+            frame = cv2.imread(str(frame_path))
+            out.write(frame)
+        out.release()
+
+    def extract_frame_numbers(ffmpeg_command: str):
+        """Extracts the frame numbers from the ffmpeg command"""
+
+        pattern = r"eq\(n\\,(\d+)\)"
+        matches = re.findall(pattern, ffmpeg_command)
+        frame_numbers = [int(match) for match in matches]
+        return frame_numbers
+
+    # Create a video directory with path video
+    video_dir = tmp_path / "video"
+    video_dir.mkdir(exist_ok=True)
+
+    # Set parameters for mock video
+    video_path = video_dir / "mock_video.mp4"
+    num_frames = 10
+    frame_height = 150
+    frame_width = 100
+    frame_rate = 1
+
+    # Create the mock video
+    for i in range(num_frames):
+        img = Image.new("RGB", (frame_width, frame_height), (0, 0, 0))
+        img.save(video_dir / f"frame_{i}.png")
+    create_mock_video(video_path, video_dir, num_frames=num_frames, frame_rate=frame_rate)
+
+    # Call convert_video_to_images
+    image_output_dir = tmp_path / "extracted_images"
+    num_frames_target = 5
+    num_downscales = 1
+    crop_factor = (0.0, 0.0, 0.0, 0.0)
+
+    # Mock missing COLMAP and ffmpeg in the dev env
+    old_path = os.environ.get("PATH", "")
+    os.environ["PATH"] = str(tmp_path / "mocked_bin") + f":{old_path}"
+    (tmp_path / "mocked_bin").mkdir()
+    (tmp_path / "mocked_bin" / "colmap").touch(mode=0o777)
+    (tmp_path / "mocked_bin" / "ffmpeg").touch(mode=0o777)
+
+    # Return value of 10 for the get_num_frames_in_video run_command call
+    with mock.patch("nerfstudio.process_data.process_data_utils.run_command", return_value="10") as mock_run_func:
+        summary_log, extracted_frame_count = convert_video_to_images(
+            video_path=video_path,
+            image_dir=image_output_dir,
+            num_frames_target=num_frames_target,
+            num_downscales=num_downscales,
+            crop_factor=crop_factor,
+            verbose=False,
+            random_seed=42,
+        )
+        assert mock_run_func.call_count == 2, f"Expected 2 calls, but got {mock_run_func.call_count}"
+        first_frames = extract_frame_numbers(mock_run_func.call_args[0][0])
+        assert len(first_frames) == 5, f"Expected 5 frames, but got {len(first_frames)}"
+
+        summary_log, extracted_frame_count = convert_video_to_images(
+            video_path=video_path,
+            image_dir=image_output_dir,
+            num_frames_target=num_frames_target,
+            num_downscales=num_downscales,
+            crop_factor=crop_factor,
+            verbose=False,
+            random_seed=42,
+        )
+
+        assert mock_run_func.call_count == 4, f"Expected 4 total calls, but got {mock_run_func.call_count}"
+        second_frames = extract_frame_numbers(mock_run_func.call_args[0][0])
+        assert len(second_frames) == 5, f"Expected 5 frames, but got {len(first_frames)}"
+        assert first_frames == second_frames
+
+        summary_log, extracted_frame_count = convert_video_to_images(
+            video_path=video_path,
+            image_dir=image_output_dir,
+            num_frames_target=num_frames_target,
+            num_downscales=num_downscales,
+            crop_factor=crop_factor,
+            verbose=False,
+            random_seed=52,
+        )
+
+        assert mock_run_func.call_count == 6, f"Expected 6 total calls, but got {mock_run_func.call_count}"
+        third_frames = extract_frame_numbers(mock_run_func.call_args[0][0])
+        assert len(third_frames) == 5, f"Expected 5 frames, but got {len(first_frames)}"
+        assert first_frames != third_frames

From 516fd7c9ac73e28db6a522df57d7b22e7b0d6756 Mon Sep 17 00:00:00 2001
From: Aayush Gupta <19579293+aayushg55@users.noreply.github.com>
Date: Tue, 17 Sep 2024 12:31:42 -0700
Subject: [PATCH 6/6] Fix splatfacto crash in eval when using viewer (#3430)

---
 nerfstudio/engine/trainer.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/nerfstudio/engine/trainer.py b/nerfstudio/engine/trainer.py
index 644d62a5b55..cac31cf504b 100644
--- a/nerfstudio/engine/trainer.py
+++ b/nerfstudio/engine/trainer.py
@@ -300,7 +300,8 @@ def train(self) -> None:
 
                 # Do not perform evaluation if there are no validation images
                 if self.pipeline.datamanager.eval_dataset:
-                    self.eval_iteration(step)
+                    with self.train_lock:
+                        self.eval_iteration(step)
 
                 if step_check(step, self.config.steps_per_save):
                     self.save_checkpoint(step)