diff --git a/.github/workflows/build_docker_image.yml b/.github/workflows/build_docker_image.yml index 97cb772820f..3dc60a32217 100644 --- a/.github/workflows/build_docker_image.yml +++ b/.github/workflows/build_docker_image.yml @@ -47,6 +47,7 @@ jobs: - name: Build and push Docker image id: push uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671 + if: with: context: . file: ./Dockerfile @@ -55,8 +56,9 @@ jobs: labels: ${{ steps.meta.outputs.labels }} - name: Generate artifact attestation uses: actions/attest-build-provenance@v1 + if: ${{ github.event_name != 'pull_request' }} with: subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}} subject-digest: ${{ steps.push.outputs.digest }} - push-to-registry: ${{ github.event_name != 'pull_request' }} + push-to-registry: true diff --git a/nerfstudio/configs/method_configs.py b/nerfstudio/configs/method_configs.py index e77ab130c46..c9bc9f77035 100644 --- a/nerfstudio/configs/method_configs.py +++ b/nerfstudio/configs/method_configs.py @@ -663,8 +663,7 @@ ), model=SplatfactoModelConfig( cull_alpha_thresh=0.005, - continue_cull_post_densification=False, - densify_grad_thresh=0.0006, + densify_grad_thresh=0.0005, ), ), optimizers={ diff --git a/nerfstudio/engine/trainer.py b/nerfstudio/engine/trainer.py index 644d62a5b55..cac31cf504b 100644 --- a/nerfstudio/engine/trainer.py +++ b/nerfstudio/engine/trainer.py @@ -300,7 +300,8 @@ def train(self) -> None: # Do not perform evaluation if there are no validation images if self.pipeline.datamanager.eval_dataset: - self.eval_iteration(step) + with self.train_lock: + self.eval_iteration(step) if step_check(step, self.config.steps_per_save): self.save_checkpoint(step) diff --git a/nerfstudio/exporter/exporter_utils.py b/nerfstudio/exporter/exporter_utils.py index b87078bc9c1..9435a3b919e 100644 --- a/nerfstudio/exporter/exporter_utils.py +++ b/nerfstudio/exporter/exporter_utils.py @@ -165,11 +165,11 @@ def generate_point_cloud( if crop_obb is not None: mask = crop_obb.within(point) - point = point[mask] - rgb = rgb[mask] - view_direction = view_direction[mask] - if normal is not None: - normal = normal[mask] + point = point[mask] + rgb = rgb[mask] + view_direction = view_direction[mask] + if normal is not None: + normal = normal[mask] points.append(point) rgbs.append(rgb) diff --git a/nerfstudio/models/splatfacto.py b/nerfstudio/models/splatfacto.py index 61d9eda19f9..28b8f0a1de3 100644 --- a/nerfstudio/models/splatfacto.py +++ b/nerfstudio/models/splatfacto.py @@ -25,12 +25,12 @@ import numpy as np import torch +from gsplat.strategy import DefaultStrategy try: from gsplat.rendering import rasterization except ImportError: print("Please install gsplat>=1.0.0") -from gsplat.cuda_legacy._wrapper import num_sh_bases from pytorch_msssim import SSIM from torch.nn import Parameter @@ -46,6 +46,14 @@ from nerfstudio.utils.rich_utils import CONSOLE +def num_sh_bases(degree: int) -> int: + """ + Returns the number of spherical harmonic bases for a given degree. + """ + assert degree <= 4, "We don't support degree greater than 4." + return (degree + 1) ** 2 + + def quat_to_rotmat(quat): assert quat.shape[-1] == 4, quat.shape w, x, y, z = torch.unbind(quat, dim=-1) @@ -154,12 +162,12 @@ class SplatfactoModelConfig(ModelConfig): """threshold of opacity for culling gaussians. One can set it to a lower value (e.g. 0.005) for higher quality.""" cull_scale_thresh: float = 0.5 """threshold of scale for culling huge gaussians""" - continue_cull_post_densification: bool = True - """If True, continue to cull gaussians post refinement""" reset_alpha_every: int = 30 """Every this many refinement steps, reset the alpha""" densify_grad_thresh: float = 0.0008 """threshold of positional gradient norm for densifying gaussians""" + use_absgrad: bool = True + """Whether to use absgrad to densify gaussians, if False, will use grad rather than absgrad""" densify_size_thresh: float = 0.01 """below this size, gaussians are *duplicated*, otherwise split""" n_split_samples: int = 2 @@ -235,8 +243,6 @@ def populate_modules(self): means = torch.nn.Parameter(self.seed_points[0]) # (Location, Color) else: means = torch.nn.Parameter((torch.rand((self.config.num_random, 3)) - 0.5) * self.config.random_scale) - self.xys_grad_norm = None - self.max_2Dsize = None distances, _ = self.k_nearest_sklearn(means.data, 3) distances = torch.from_numpy(distances) # find the average of the three nearest neighbors for each point and use that as the scale @@ -305,6 +311,26 @@ def populate_modules(self): grid_W=self.config.grid_shape[2], ) + # Strategy for GS densification + self.strategy = DefaultStrategy( + prune_opa=self.config.cull_alpha_thresh, + grow_grad2d=self.config.densify_grad_thresh, + grow_scale3d=self.config.densify_size_thresh, + grow_scale2d=self.config.split_screen_size, + prune_scale3d=self.config.cull_scale_thresh, + prune_scale2d=self.config.cull_screen_size, + refine_scale2d_stop_iter=self.config.stop_screen_size_at, + refine_start_iter=self.config.warmup_length, + refine_stop_iter=self.config.stop_split_at, + reset_every=self.config.reset_alpha_every * self.config.refine_every, + refine_every=self.config.refine_every, + pause_refine_after_reset=self.num_train_data + self.config.refine_every, + absgrad=self.config.use_absgrad, + revised_opacity=False, + verbose=True, + ) + self.strategy_state = self.strategy.initialize_state(scene_scale=1.0) + @property def colors(self): if self.config.sh_degree > 0: @@ -386,87 +412,6 @@ def k_nearest_sklearn(self, x: torch.Tensor, k: int): # Exclude the point itself from the result and return return distances[:, 1:].astype(np.float32), indices[:, 1:].astype(np.float32) - def remove_from_optim(self, optimizer, deleted_mask, new_params): - """removes the deleted_mask from the optimizer provided""" - assert len(new_params) == 1 - # assert isinstance(optimizer, torch.optim.Adam), "Only works with Adam" - - param = optimizer.param_groups[0]["params"][0] - param_state = optimizer.state[param] - del optimizer.state[param] - - # Modify the state directly without deleting and reassigning. - if "exp_avg" in param_state: - param_state["exp_avg"] = param_state["exp_avg"][~deleted_mask] - param_state["exp_avg_sq"] = param_state["exp_avg_sq"][~deleted_mask] - - # Update the parameter in the optimizer's param group. - del optimizer.param_groups[0]["params"][0] - del optimizer.param_groups[0]["params"] - optimizer.param_groups[0]["params"] = new_params - optimizer.state[new_params[0]] = param_state - - def remove_from_all_optim(self, optimizers, deleted_mask): - param_groups = self.get_gaussian_param_groups() - for group, param in param_groups.items(): - self.remove_from_optim(optimizers.optimizers[group], deleted_mask, param) - torch.cuda.empty_cache() - - def dup_in_optim(self, optimizer, dup_mask, new_params, n=2): - """adds the parameters to the optimizer""" - param = optimizer.param_groups[0]["params"][0] - param_state = optimizer.state[param] - if "exp_avg" in param_state: - repeat_dims = (n,) + tuple(1 for _ in range(param_state["exp_avg"].dim() - 1)) - param_state["exp_avg"] = torch.cat( - [ - param_state["exp_avg"], - torch.zeros_like(param_state["exp_avg"][dup_mask.squeeze()]).repeat(*repeat_dims), - ], - dim=0, - ) - param_state["exp_avg_sq"] = torch.cat( - [ - param_state["exp_avg_sq"], - torch.zeros_like(param_state["exp_avg_sq"][dup_mask.squeeze()]).repeat(*repeat_dims), - ], - dim=0, - ) - del optimizer.state[param] - optimizer.state[new_params[0]] = param_state - optimizer.param_groups[0]["params"] = new_params - del param - - def dup_in_all_optim(self, optimizers, dup_mask, n): - param_groups = self.get_gaussian_param_groups() - for group, param in param_groups.items(): - self.dup_in_optim(optimizers.optimizers[group], dup_mask, param, n) - - def after_train(self, step: int): - assert step == self.step - # to save some training time, we no longer need to update those stats post refinement - if self.step >= self.config.stop_split_at: - return - with torch.no_grad(): - # keep track of a moving average of grad norms - visible_mask = (self.radii > 0).flatten() - grads = self.xys.absgrad[0][visible_mask].norm(dim=-1) # type: ignore - # print(f"grad norm min {grads.min().item()} max {grads.max().item()} mean {grads.mean().item()} size {grads.shape}") - if self.xys_grad_norm is None: - self.xys_grad_norm = torch.zeros(self.num_points, device=self.device, dtype=torch.float32) - self.vis_counts = torch.ones(self.num_points, device=self.device, dtype=torch.float32) - assert self.vis_counts is not None - self.vis_counts[visible_mask] += 1 - self.xys_grad_norm[visible_mask] += grads - # update the max screen size, as a ratio of number of pixels - if self.max_2Dsize is None: - self.max_2Dsize = torch.zeros_like(self.radii, dtype=torch.float32) - newradii = self.radii.detach()[visible_mask] - self.max_2Dsize[visible_mask] = torch.maximum( - self.max_2Dsize[visible_mask], - newradii / float(max(self.last_size[0], self.last_size[1])), - ) - def set_crop(self, crop_box: Optional[OrientedBox]): self.crop_box = crop_box @@ -474,199 +419,39 @@ def set_background(self, background_color: torch.Tensor): assert background_color.shape == (3,) self.background_color = background_color - def refinement_after(self, optimizers: Optimizers, step): + def step_post_backward(self, step): assert step == self.step - if self.step <= self.config.warmup_length: - return - with torch.no_grad(): - # Offset all the opacity reset logic by refine_every so that we don't - # save checkpoints right when the opacity is reset (saves every 2k) - # then cull - # only split/cull if we've seen every image since opacity reset - reset_interval = self.config.reset_alpha_every * self.config.refine_every - do_densification = ( - self.step < self.config.stop_split_at - and self.step % reset_interval > self.num_train_data + self.config.refine_every - ) - if do_densification: - # then we densify - assert self.xys_grad_norm is not None and self.vis_counts is not None and self.max_2Dsize is not None - avg_grad_norm = (self.xys_grad_norm / self.vis_counts) * 0.5 * max(self.last_size[0], self.last_size[1]) - high_grads = (avg_grad_norm > self.config.densify_grad_thresh).squeeze() - splits = (self.scales.exp().max(dim=-1).values > self.config.densify_size_thresh).squeeze() - splits &= high_grads - if self.step < self.config.stop_screen_size_at: - splits |= (self.max_2Dsize > self.config.split_screen_size).squeeze() - nsamps = self.config.n_split_samples - split_params = self.split_gaussians(splits, nsamps) - - dups = (self.scales.exp().max(dim=-1).values <= self.config.densify_size_thresh).squeeze() - dups &= high_grads - dup_params = self.dup_gaussians(dups) - for name, param in self.gauss_params.items(): - self.gauss_params[name] = torch.nn.Parameter( - torch.cat([param.detach(), split_params[name], dup_params[name]], dim=0) - ) - # append zeros to the max_2Dsize tensor - self.max_2Dsize = torch.cat( - [ - self.max_2Dsize, - torch.zeros_like(split_params["scales"][:, 0]), - torch.zeros_like(dup_params["scales"][:, 0]), - ], - dim=0, - ) - - split_idcs = torch.where(splits)[0] - self.dup_in_all_optim(optimizers, split_idcs, nsamps) - - dup_idcs = torch.where(dups)[0] - self.dup_in_all_optim(optimizers, dup_idcs, 1) - - # After a guassian is split into two new gaussians, the original one should also be pruned. - splits_mask = torch.cat( - ( - splits, - torch.zeros( - nsamps * splits.sum() + dups.sum(), - device=self.device, - dtype=torch.bool, - ), - ) - ) - - deleted_mask = self.cull_gaussians(splits_mask) - elif self.step >= self.config.stop_split_at and self.config.continue_cull_post_densification: - deleted_mask = self.cull_gaussians() - else: - # if we donot allow culling post refinement, no more gaussians will be pruned. - deleted_mask = None - - if deleted_mask is not None: - self.remove_from_all_optim(optimizers, deleted_mask) - - if self.step < self.config.stop_split_at and self.step % reset_interval == self.config.refine_every: - # Reset value is set to be twice of the cull_alpha_thresh - reset_value = self.config.cull_alpha_thresh * 2.0 - self.opacities.data = torch.clamp( - self.opacities.data, - max=torch.logit(torch.tensor(reset_value, device=self.device)).item(), - ) - # reset the exp of optimizer - optim = optimizers.optimizers["opacities"] - param = optim.param_groups[0]["params"][0] - param_state = optim.state[param] - param_state["exp_avg"] = torch.zeros_like(param_state["exp_avg"]) - param_state["exp_avg_sq"] = torch.zeros_like(param_state["exp_avg_sq"]) - - self.xys_grad_norm = None - self.vis_counts = None - self.max_2Dsize = None - - def cull_gaussians(self, extra_cull_mask: Optional[torch.Tensor] = None): - """ - This function deletes gaussians with under a certain opacity threshold - extra_cull_mask: a mask indicates extra gaussians to cull besides existing culling criterion - """ - n_bef = self.num_points - # cull transparent ones - culls = (torch.sigmoid(self.opacities) < self.config.cull_alpha_thresh).squeeze() - below_alpha_count = torch.sum(culls).item() - toobigs_count = 0 - if extra_cull_mask is not None: - culls = culls | extra_cull_mask - if self.step > self.config.refine_every * self.config.reset_alpha_every: - # cull huge ones - toobigs = (torch.exp(self.scales).max(dim=-1).values > self.config.cull_scale_thresh).squeeze() - if self.step < self.config.stop_screen_size_at: - # cull big screen space - if self.max_2Dsize is not None: - toobigs = toobigs | (self.max_2Dsize > self.config.cull_screen_size).squeeze() - culls = culls | toobigs - toobigs_count = torch.sum(toobigs).item() - for name, param in self.gauss_params.items(): - self.gauss_params[name] = torch.nn.Parameter(param[~culls]) - - CONSOLE.log( - f"Culled {n_bef - self.num_points} gaussians " - f"({below_alpha_count} below alpha thresh, {toobigs_count} too bigs, {self.num_points} remaining)" + self.strategy.step_post_backward( + params=self.gauss_params, + optimizers=self.optimizers, + state=self.strategy_state, + step=self.step, + info=self.info, + packed=False, ) - return culls - - def split_gaussians(self, split_mask, samps): - """ - This function splits gaussians that are too large - """ - n_splits = split_mask.sum().item() - CONSOLE.log(f"Splitting {split_mask.sum().item()/self.num_points} gaussians: {n_splits}/{self.num_points}") - centered_samples = torch.randn((samps * n_splits, 3), device=self.device) # Nx3 of axis-aligned scales - scaled_samples = ( - torch.exp(self.scales[split_mask].repeat(samps, 1)) * centered_samples - ) # how these scales are rotated - quats = self.quats[split_mask] / self.quats[split_mask].norm(dim=-1, keepdim=True) # normalize them first - rots = quat_to_rotmat(quats.repeat(samps, 1)) # how these scales are rotated - rotated_samples = torch.bmm(rots, scaled_samples[..., None]).squeeze() - new_means = rotated_samples + self.means[split_mask].repeat(samps, 1) - # step 2, sample new colors - new_features_dc = self.features_dc[split_mask].repeat(samps, 1) - new_features_rest = self.features_rest[split_mask].repeat(samps, 1, 1) - # step 3, sample new opacities - new_opacities = self.opacities[split_mask].repeat(samps, 1) - # step 4, sample new scales - size_fac = 1.6 - new_scales = torch.log(torch.exp(self.scales[split_mask]) / size_fac).repeat(samps, 1) - self.scales[split_mask] = torch.log(torch.exp(self.scales[split_mask]) / size_fac) - # step 5, sample new quats - new_quats = self.quats[split_mask].repeat(samps, 1) - out = { - "means": new_means, - "features_dc": new_features_dc, - "features_rest": new_features_rest, - "opacities": new_opacities, - "scales": new_scales, - "quats": new_quats, - } - for name, param in self.gauss_params.items(): - if name not in out: - out[name] = param[split_mask].repeat(samps, 1) - return out - - def dup_gaussians(self, dup_mask): - """ - This function duplicates gaussians that are too small - """ - n_dups = dup_mask.sum().item() - CONSOLE.log(f"Duplicating {dup_mask.sum().item()/self.num_points} gaussians: {n_dups}/{self.num_points}") - new_dups = {} - for name, param in self.gauss_params.items(): - new_dups[name] = param[dup_mask] - return new_dups - def get_training_callbacks( self, training_callback_attributes: TrainingCallbackAttributes ) -> List[TrainingCallback]: cbs = [] - cbs.append(TrainingCallback([TrainingCallbackLocation.BEFORE_TRAIN_ITERATION], self.step_cb)) - # The order of these matters cbs.append( TrainingCallback( - [TrainingCallbackLocation.AFTER_TRAIN_ITERATION], - self.after_train, + [TrainingCallbackLocation.BEFORE_TRAIN_ITERATION], + self.step_cb, + args=[training_callback_attributes.optimizers], ) ) cbs.append( TrainingCallback( [TrainingCallbackLocation.AFTER_TRAIN_ITERATION], - self.refinement_after, - update_every_num_iters=self.config.refine_every, - args=[training_callback_attributes.optimizers], + self.step_post_backward, ) ) return cbs - def step_cb(self, step): + def step_cb(self, optimizers: Optimizers, step): self.step = step + self.optimizers = optimizers.optimizers def get_gaussian_param_groups(self) -> Dict[str, List[Parameter]]: # Here we explicitly use the means, scales as parameters so that the user can override this function and @@ -788,7 +573,6 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]: colors_crop = torch.cat((features_dc_crop[:, None, :], features_rest_crop), dim=1) - BLOCK_WIDTH = 16 # this controls the tile size of rasterization, 16 is a good default camera_scale_fac = self._get_downscale_factor() camera.rescale_output_resolution(1 / camera_scale_fac) viewmat = get_viewmat(optimized_camera_to_world) @@ -812,9 +596,9 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]: colors_crop = torch.sigmoid(colors_crop).squeeze(1) # [N, 1, 3] -> [N, 3] sh_degree_to_use = None - render, alpha, info = rasterization( + render, alpha, self.info = rasterization( means=means_crop, - quats=quats_crop / quats_crop.norm(dim=-1, keepdim=True), + quats=quats_crop, # rasterization does normalization internally scales=torch.exp(scales_crop), opacities=torch.sigmoid(opacities_crop).squeeze(-1), colors=colors_crop, @@ -822,22 +606,21 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]: Ks=K, # [1, 3, 3] width=W, height=H, - tile_size=BLOCK_WIDTH, packed=False, near_plane=0.01, far_plane=1e10, render_mode=render_mode, sh_degree=sh_degree_to_use, sparse_grad=False, - absgrad=True, + absgrad=self.strategy.absgrad, rasterize_mode=self.config.rasterize_mode, # set some threshold to disregrad small gaussians for faster rendering. # radius_clip=3.0, ) - if self.training and info["means2d"].requires_grad: - info["means2d"].retain_grad() - self.xys = info["means2d"] # [1, N, 2] - self.radii = info["radii"][0] # [N] + if self.training: + self.strategy.step_pre_backward( + self.gauss_params, self.optimizers, self.strategy_state, self.step, self.info + ) alpha = alpha[:, ...] background = self._get_background_color() diff --git a/nerfstudio/process_data/process_data_utils.py b/nerfstudio/process_data/process_data_utils.py index 730b46475da..b3154f09f73 100644 --- a/nerfstudio/process_data/process_data_utils.py +++ b/nerfstudio/process_data/process_data_utils.py @@ -15,6 +15,7 @@ """Helper utils for processing data into the nerfstudio format.""" import math +import random import re import shutil import sys @@ -126,7 +127,7 @@ def convert_video_to_images( verbose: bool = False, image_prefix: str = "frame_", keep_image_dir: bool = False, - random_seed: Optional[int] = None + random_seed: Optional[int] = None, ) -> Tuple[List[str], int]: """Converts a video into a sequence of images. @@ -139,6 +140,7 @@ def convert_video_to_images( verbose: If True, logs the output of the command. image_prefix: Prefix to use for the image filenames. keep_image_dir: If True, don't delete the output directory if it already exists. + random_seed: If set, the seed used to choose the frames t commit of the video Returns: A tuple containing summary of the conversion and the number of extracted frames. """ @@ -178,7 +180,7 @@ def convert_video_to_images( start_x = crop_factor[2] start_y = crop_factor[0] crop_cmd = f"crop=w=iw*{width}:h=ih*{height}:x=iw*{start_x}:y=ih*{start_y}," - + downscale_chains = [f"[t{i}]scale=iw/{2**i}:ih/{2**i}[out{i}]" for i in range(num_downscales + 1)] downscale_dirs = [Path(str(image_dir) + (f"_{2**i}" if i > 0 else "")) for i in range(num_downscales + 1)] downscale_paths = [downscale_dirs[i] / f"{image_prefix}%05d.png" for i in range(num_downscales + 1)] @@ -200,10 +202,10 @@ def convert_video_to_images( if random_seed: random.seed(random_seed) frame_indices = sorted(random.sample(range(num_frames), num_frames_target)) - select_cmd = f"select=\'" + "+".join([f"eq(n\,{idx})" for idx in frame_indices]) + "\',setpts=N/TB," - CONSOLE.print(f"Extracting {num_frames_target} frames using seed-based random selection.") + select_cmd = "select='" + "+".join([f"eq(n\,{idx})" for idx in frame_indices]) + "',setpts=N/TB," + CONSOLE.print(f"Extracting {num_frames_target} frames using seed {random_seed} random selection.") elif spacing > 1: - CONSOLE.print("Number of frames to extract:", math.ceil(num_frames / spacing)) + CONSOLE.print(f"Extracting {math.ceil(num_frames / spacing)} frames in evenly spaced intervals") select_cmd = f"thumbnail={spacing},setpts=N/TB," else: CONSOLE.print("[bold red]Can't satisfy requested number of frames. Extracting all frames.") diff --git a/nerfstudio/process_data/video_to_nerfstudio_dataset.py b/nerfstudio/process_data/video_to_nerfstudio_dataset.py index afc5d841a7a..51a8a0b761f 100644 --- a/nerfstudio/process_data/video_to_nerfstudio_dataset.py +++ b/nerfstudio/process_data/video_to_nerfstudio_dataset.py @@ -16,7 +16,7 @@ import shutil from dataclasses import dataclass -from typing import Literal +from typing import Literal, Optional from nerfstudio.process_data import equirect_utils, process_data_utils from nerfstudio.process_data.colmap_converter_to_nerfstudio_dataset import ColmapConverterToNerfstudioDataset @@ -41,9 +41,9 @@ class VideoToNerfstudioDataset(ColmapConverterToNerfstudioDataset): """Feature matching method to use. Vocab tree is recommended for a balance of speed and accuracy. Exhaustive is slower but more accurate. Sequential is faster but should only be used for videos.""" - random_seed: int = None - """Random seed to select video frames""" - eval_random_seed: int = None + random_seed: Optional[int] = None + """Random seed to select video frames for training set""" + eval_random_seed: Optional[int] = None """Random seed to select video frames for eval set""" def main(self) -> None: @@ -63,7 +63,7 @@ def main(self) -> None: num_downscales=0, crop_factor=(0.0, 0.0, 0.0, 0.0), verbose=self.verbose, - random_seed = self.random_seed + random_seed=self.random_seed, ) else: # If we're not dealing with equirects we can downscale in one step. @@ -76,7 +76,7 @@ def main(self) -> None: verbose=self.verbose, image_prefix="frame_train_" if self.eval_data is not None else "frame_", keep_image_dir=False, - random_seed = self.random_seed + random_seed=self.random_seed, ) if self.eval_data is not None: summary_log_eval, num_extracted_frames_eval = process_data_utils.convert_video_to_images( @@ -88,7 +88,7 @@ def main(self) -> None: verbose=self.verbose, image_prefix="frame_eval_", keep_image_dir=True, - random_seed = self.eval_random_seed + random_seed=self.eval_random_seed, ) summary_log += summary_log_eval num_extracted_frames += num_extracted_frames_eval diff --git a/nerfstudio/scripts/exporter.py b/nerfstudio/scripts/exporter.py index 970b5a9c7a3..ca019850f9c 100644 --- a/nerfstudio/scripts/exporter.py +++ b/nerfstudio/scripts/exporter.py @@ -547,7 +547,7 @@ def main(self) -> None: if not self.output_dir.exists(): self.output_dir.mkdir(parents=True) - _, pipeline, _, _ = eval_setup(self.load_config) + _, pipeline, _, _ = eval_setup(self.load_config, test_mode="inference") assert isinstance(pipeline.model, SplatfactoModel) @@ -620,9 +620,17 @@ def main(self) -> None: n_after = np.sum(select) if n_after < n_before: CONSOLE.print(f"{n_before - n_after} NaN/Inf elements in {k}") + nan_count = np.sum(select) - n + + # filter gaussians that have opacities < 1/255, because they are skipped in cuda rasterization + low_opacity_gaussians = (map_to_tensors["opacity"]).squeeze(axis=-1) < -5.5373 # logit(1/255) + lowopa_count = np.sum(low_opacity_gaussians) + select[low_opacity_gaussians] = 0 if np.sum(select) < n: - CONSOLE.print(f"values have NaN/Inf in map_to_tensors, only export {np.sum(select)}/{n}") + CONSOLE.print( + f"{nan_count} Gaussians have NaN/Inf and {lowopa_count} have low opacity, only export {np.sum(select)}/{n}" + ) for k, t in map_to_tensors.items(): map_to_tensors[k] = map_to_tensors[k][select] count = np.sum(select) diff --git a/pyproject.toml b/pyproject.toml index 027ef13744f..8614d92b4b4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,7 +62,7 @@ dependencies = [ "xatlas", "trimesh>=3.20.2", "timm==0.6.7", - "gsplat==1.0.0", + "gsplat==1.3.0", "pytorch-msssim", "pathos", "packaging", diff --git a/tests/process_data/test_misc.py b/tests/process_data/test_misc.py index 1b2404b517d..23fc3453ca7 100644 --- a/tests/process_data/test_misc.py +++ b/tests/process_data/test_misc.py @@ -2,13 +2,21 @@ Test misc data utils """ +import os +import re +from pathlib import Path +from unittest import mock + +import cv2 import numpy as np +from PIL import Image from pyquaternion import Quaternion from scipy.spatial.transform import Rotation # TODO(1480) use pycolmap instead of colmap_parsing_utils # import pycolmap from nerfstudio.data.utils.colmap_parsing_utils import qvec2rotmat +from nerfstudio.process_data.process_data_utils import convert_video_to_images def test_scalar_first_scalar_last_quaternions(): @@ -39,7 +47,7 @@ def test_scalar_first_scalar_last_quaternions(): # Expected Rotation matrix # fmt: off - R_expected = np.array( + R_expected = np.array( [ [ 0.81379768, -0.44096961, 0.37852231], [ 0.46984631, 0.88256412, 0.01802831], @@ -61,3 +69,107 @@ def test_scalar_first_scalar_last_quaternions(): # R = pycolmap.qvec_to_rotmat(wxyz) R = qvec2rotmat(wxyz) assert np.allclose(R, R_expected) + + +def test_process_video_conversion_with_seed(tmp_path: Path): + """ + Test convert_video_to_images by creating a mock video and ensuring correct frame extraction with seed. + """ + + # Inner functions needed for the unit tests + def create_mock_video(video_path: Path, frame_dir: Path, num_frames=10, frame_rate=1): + """Creates a mock video from a series of frames using OpenCV.""" + + first_frame = cv2.imread(str(frame_dir / "frame_0.png")) + height, width, _ = first_frame.shape + fourcc = cv2.VideoWriter_fourcc(*"mp4v") + out = cv2.VideoWriter(str(video_path), fourcc, frame_rate, (width, height)) + + for i in range(num_frames): + frame_path = frame_dir / f"frame_{i}.png" + frame = cv2.imread(str(frame_path)) + out.write(frame) + out.release() + + def extract_frame_numbers(ffmpeg_command: str): + """Extracts the frame numbers from the ffmpeg command""" + + pattern = r"eq\(n\\,(\d+)\)" + matches = re.findall(pattern, ffmpeg_command) + frame_numbers = [int(match) for match in matches] + return frame_numbers + + # Create a video directory with path video + video_dir = tmp_path / "video" + video_dir.mkdir(exist_ok=True) + + # Set parameters for mock video + video_path = video_dir / "mock_video.mp4" + num_frames = 10 + frame_height = 150 + frame_width = 100 + frame_rate = 1 + + # Create the mock video + for i in range(num_frames): + img = Image.new("RGB", (frame_width, frame_height), (0, 0, 0)) + img.save(video_dir / f"frame_{i}.png") + create_mock_video(video_path, video_dir, num_frames=num_frames, frame_rate=frame_rate) + + # Call convert_video_to_images + image_output_dir = tmp_path / "extracted_images" + num_frames_target = 5 + num_downscales = 1 + crop_factor = (0.0, 0.0, 0.0, 0.0) + + # Mock missing COLMAP and ffmpeg in the dev env + old_path = os.environ.get("PATH", "") + os.environ["PATH"] = str(tmp_path / "mocked_bin") + f":{old_path}" + (tmp_path / "mocked_bin").mkdir() + (tmp_path / "mocked_bin" / "colmap").touch(mode=0o777) + (tmp_path / "mocked_bin" / "ffmpeg").touch(mode=0o777) + + # Return value of 10 for the get_num_frames_in_video run_command call + with mock.patch("nerfstudio.process_data.process_data_utils.run_command", return_value="10") as mock_run_func: + summary_log, extracted_frame_count = convert_video_to_images( + video_path=video_path, + image_dir=image_output_dir, + num_frames_target=num_frames_target, + num_downscales=num_downscales, + crop_factor=crop_factor, + verbose=False, + random_seed=42, + ) + assert mock_run_func.call_count == 2, f"Expected 2 calls, but got {mock_run_func.call_count}" + first_frames = extract_frame_numbers(mock_run_func.call_args[0][0]) + assert len(first_frames) == 5, f"Expected 5 frames, but got {len(first_frames)}" + + summary_log, extracted_frame_count = convert_video_to_images( + video_path=video_path, + image_dir=image_output_dir, + num_frames_target=num_frames_target, + num_downscales=num_downscales, + crop_factor=crop_factor, + verbose=False, + random_seed=42, + ) + + assert mock_run_func.call_count == 4, f"Expected 4 total calls, but got {mock_run_func.call_count}" + second_frames = extract_frame_numbers(mock_run_func.call_args[0][0]) + assert len(second_frames) == 5, f"Expected 5 frames, but got {len(first_frames)}" + assert first_frames == second_frames + + summary_log, extracted_frame_count = convert_video_to_images( + video_path=video_path, + image_dir=image_output_dir, + num_frames_target=num_frames_target, + num_downscales=num_downscales, + crop_factor=crop_factor, + verbose=False, + random_seed=52, + ) + + assert mock_run_func.call_count == 6, f"Expected 6 total calls, but got {mock_run_func.call_count}" + third_frames = extract_frame_numbers(mock_run_func.call_args[0][0]) + assert len(third_frames) == 5, f"Expected 5 frames, but got {len(first_frames)}" + assert first_frames != third_frames