Merge with Main

nerfstudio-project · Sep 19, 2024 · 374e1f9 · 374e1f9
2 parents e16135e + 516fd7c
commit 374e1f9
Show file tree

Hide file tree

Showing 10 changed files with 201 additions and 294 deletions.
diff --git a/.github/workflows/build_docker_image.yml b/.github/workflows/build_docker_image.yml
@@ -47,6 +47,7 @@ jobs:
       - name: Build and push Docker image
         id: push
         uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671
+        if: 
         with:
           context: .
           file: ./Dockerfile
@@ -55,8 +56,9 @@ jobs:
           labels: ${{ steps.meta.outputs.labels }}
       - name: Generate artifact attestation
         uses: actions/attest-build-provenance@v1
+        if: ${{ github.event_name != 'pull_request' }}
         with:
           subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}}
           subject-digest: ${{ steps.push.outputs.digest }}
-          push-to-registry: ${{ github.event_name != 'pull_request' }}
+          push-to-registry: true
 
diff --git a/nerfstudio/configs/method_configs.py b/nerfstudio/configs/method_configs.py
@@ -663,8 +663,7 @@
         ),
         model=SplatfactoModelConfig(
             cull_alpha_thresh=0.005,
-            continue_cull_post_densification=False,
-            densify_grad_thresh=0.0006,
+            densify_grad_thresh=0.0005,
         ),
     ),
     optimizers={

diff --git a/nerfstudio/engine/trainer.py b/nerfstudio/engine/trainer.py
@@ -300,7 +300,8 @@ def train(self) -> None:
 
                 # Do not perform evaluation if there are no validation images
                 if self.pipeline.datamanager.eval_dataset:
-                    self.eval_iteration(step)
+                    with self.train_lock:
+                        self.eval_iteration(step)
 
                 if step_check(step, self.config.steps_per_save):
                     self.save_checkpoint(step)

diff --git a/nerfstudio/exporter/exporter_utils.py b/nerfstudio/exporter/exporter_utils.py
@@ -165,11 +165,11 @@ def generate_point_cloud(
 
             if crop_obb is not None:
                 mask = crop_obb.within(point)
-            point = point[mask]
-            rgb = rgb[mask]
-            view_direction = view_direction[mask]
-            if normal is not None:
-                normal = normal[mask]
+                point = point[mask]
+                rgb = rgb[mask]
+                view_direction = view_direction[mask]
+                if normal is not None:
+                    normal = normal[mask]
 
             points.append(point)
             rgbs.append(rgb)

diff --git a/nerfstudio/models/splatfacto.py b/nerfstudio/models/splatfacto.py
diff --git a/nerfstudio/process_data/process_data_utils.py b/nerfstudio/process_data/process_data_utils.py
@@ -15,6 +15,7 @@
 """Helper utils for processing data into the nerfstudio format."""
 
 import math
+import random
 import re
 import shutil
 import sys
@@ -126,7 +127,7 @@ def convert_video_to_images(
     verbose: bool = False,
     image_prefix: str = "frame_",
     keep_image_dir: bool = False,
-    random_seed: Optional[int] = None
+    random_seed: Optional[int] = None,
 ) -> Tuple[List[str], int]:
     """Converts a video into a sequence of images.
 
@@ -139,6 +140,7 @@ def convert_video_to_images(
         verbose: If True, logs the output of the command.
         image_prefix: Prefix to use for the image filenames.
         keep_image_dir: If True, don't delete the output directory if it already exists.
+        random_seed: If set, the seed used to choose the frames t commit of the video
     Returns:
         A tuple containing summary of the conversion and the number of extracted frames.
     """
@@ -178,7 +180,7 @@ def convert_video_to_images(
             start_x = crop_factor[2]
             start_y = crop_factor[0]
             crop_cmd = f"crop=w=iw*{width}:h=ih*{height}:x=iw*{start_x}:y=ih*{start_y},"
-        
+
         downscale_chains = [f"[t{i}]scale=iw/{2**i}:ih/{2**i}[out{i}]" for i in range(num_downscales + 1)]
         downscale_dirs = [Path(str(image_dir) + (f"_{2**i}" if i > 0 else "")) for i in range(num_downscales + 1)]
         downscale_paths = [downscale_dirs[i] / f"{image_prefix}%05d.png" for i in range(num_downscales + 1)]
@@ -200,10 +202,10 @@ def convert_video_to_images(
         if random_seed:
             random.seed(random_seed)
             frame_indices = sorted(random.sample(range(num_frames), num_frames_target))
-            select_cmd = f"select=\'" + "+".join([f"eq(n\,{idx})" for idx in frame_indices]) + "\',setpts=N/TB,"
-            CONSOLE.print(f"Extracting {num_frames_target} frames using seed-based random selection.")
+            select_cmd = "select='" + "+".join([f"eq(n\,{idx})" for idx in frame_indices]) + "',setpts=N/TB,"
+            CONSOLE.print(f"Extracting {num_frames_target} frames using seed {random_seed} random selection.")
         elif spacing > 1:
-            CONSOLE.print("Number of frames to extract:", math.ceil(num_frames / spacing))
+            CONSOLE.print(f"Extracting {math.ceil(num_frames / spacing)} frames in evenly spaced intervals")
             select_cmd = f"thumbnail={spacing},setpts=N/TB,"
         else:
             CONSOLE.print("[bold red]Can't satisfy requested number of frames. Extracting all frames.")

diff --git a/nerfstudio/process_data/video_to_nerfstudio_dataset.py b/nerfstudio/process_data/video_to_nerfstudio_dataset.py
@@ -16,7 +16,7 @@
 
 import shutil
 from dataclasses import dataclass
-from typing import Literal
+from typing import Literal, Optional
 
 from nerfstudio.process_data import equirect_utils, process_data_utils
 from nerfstudio.process_data.colmap_converter_to_nerfstudio_dataset import ColmapConverterToNerfstudioDataset
@@ -41,9 +41,9 @@ class VideoToNerfstudioDataset(ColmapConverterToNerfstudioDataset):
     """Feature matching method to use. Vocab tree is recommended for a balance of speed
     and accuracy. Exhaustive is slower but more accurate. Sequential is faster but
     should only be used for videos."""
-    random_seed: int = None
-    """Random seed to select video frames"""
-    eval_random_seed: int = None
+    random_seed: Optional[int] = None
+    """Random seed to select video frames for training set"""
+    eval_random_seed: Optional[int] = None
     """Random seed to select video frames for eval set"""
 
     def main(self) -> None:
@@ -63,7 +63,7 @@ def main(self) -> None:
                 num_downscales=0,
                 crop_factor=(0.0, 0.0, 0.0, 0.0),
                 verbose=self.verbose,
-                random_seed = self.random_seed
+                random_seed=self.random_seed,
             )
         else:
             # If we're not dealing with equirects we can downscale in one step.
@@ -76,7 +76,7 @@ def main(self) -> None:
                 verbose=self.verbose,
                 image_prefix="frame_train_" if self.eval_data is not None else "frame_",
                 keep_image_dir=False,
-                random_seed = self.random_seed
+                random_seed=self.random_seed,
             )
             if self.eval_data is not None:
                 summary_log_eval, num_extracted_frames_eval = process_data_utils.convert_video_to_images(
@@ -88,7 +88,7 @@ def main(self) -> None:
                     verbose=self.verbose,
                     image_prefix="frame_eval_",
                     keep_image_dir=True,
-                    random_seed = self.eval_random_seed
+                    random_seed=self.eval_random_seed,
                 )
                 summary_log += summary_log_eval
                 num_extracted_frames += num_extracted_frames_eval

diff --git a/nerfstudio/scripts/exporter.py b/nerfstudio/scripts/exporter.py
@@ -547,7 +547,7 @@ def main(self) -> None:
         if not self.output_dir.exists():
             self.output_dir.mkdir(parents=True)
 
-        _, pipeline, _, _ = eval_setup(self.load_config)
+        _, pipeline, _, _ = eval_setup(self.load_config, test_mode="inference")
 
         assert isinstance(pipeline.model, SplatfactoModel)
 
@@ -620,9 +620,17 @@ def main(self) -> None:
             n_after = np.sum(select)
             if n_after < n_before:
                 CONSOLE.print(f"{n_before - n_after} NaN/Inf elements in {k}")
+        nan_count = np.sum(select) - n
+
+        # filter gaussians that have opacities < 1/255, because they are skipped in cuda rasterization
+        low_opacity_gaussians = (map_to_tensors["opacity"]).squeeze(axis=-1) < -5.5373  # logit(1/255)
+        lowopa_count = np.sum(low_opacity_gaussians)
+        select[low_opacity_gaussians] = 0
 
         if np.sum(select) < n:
-            CONSOLE.print(f"values have NaN/Inf in map_to_tensors, only export {np.sum(select)}/{n}")
+            CONSOLE.print(
+                f"{nan_count} Gaussians have NaN/Inf and {lowopa_count} have low opacity, only export {np.sum(select)}/{n}"
+            )
             for k, t in map_to_tensors.items():
                 map_to_tensors[k] = map_to_tensors[k][select]
             count = np.sum(select)

diff --git a/pyproject.toml b/pyproject.toml
@@ -62,7 +62,7 @@ dependencies = [
     "xatlas",
     "trimesh>=3.20.2",
     "timm==0.6.7",
-    "gsplat==1.0.0",
+    "gsplat==1.3.0",
     "pytorch-msssim",
     "pathos",
     "packaging",

diff --git a/tests/process_data/test_misc.py b/tests/process_data/test_misc.py
@@ -2,13 +2,21 @@
 Test misc data utils
 """
 
+import os
+import re
+from pathlib import Path
+from unittest import mock
+
+import cv2
 import numpy as np
+from PIL import Image
 from pyquaternion import Quaternion
 from scipy.spatial.transform import Rotation
 
 # TODO(1480) use pycolmap instead of colmap_parsing_utils
 # import pycolmap
 from nerfstudio.data.utils.colmap_parsing_utils import qvec2rotmat
+from nerfstudio.process_data.process_data_utils import convert_video_to_images
 
 
 def test_scalar_first_scalar_last_quaternions():
@@ -39,7 +47,7 @@ def test_scalar_first_scalar_last_quaternions():
 
     # Expected Rotation matrix
     # fmt: off
-    R_expected = np.array( 
+    R_expected = np.array(
         [
             [ 0.81379768, -0.44096961,  0.37852231],
             [ 0.46984631,  0.88256412,  0.01802831],
@@ -61,3 +69,107 @@ def test_scalar_first_scalar_last_quaternions():
     # R = pycolmap.qvec_to_rotmat(wxyz)
     R = qvec2rotmat(wxyz)
     assert np.allclose(R, R_expected)
+
+
+def test_process_video_conversion_with_seed(tmp_path: Path):
+    """
+    Test convert_video_to_images by creating a mock video and ensuring correct frame extraction with seed.
+    """
+
+    # Inner functions needed for the unit tests
+    def create_mock_video(video_path: Path, frame_dir: Path, num_frames=10, frame_rate=1):
+        """Creates a mock video from a series of frames using OpenCV."""
+
+        first_frame = cv2.imread(str(frame_dir / "frame_0.png"))
+        height, width, _ = first_frame.shape
+        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+        out = cv2.VideoWriter(str(video_path), fourcc, frame_rate, (width, height))
+
+        for i in range(num_frames):
+            frame_path = frame_dir / f"frame_{i}.png"
+            frame = cv2.imread(str(frame_path))
+            out.write(frame)
+        out.release()
+
+    def extract_frame_numbers(ffmpeg_command: str):
+        """Extracts the frame numbers from the ffmpeg command"""
+
+        pattern = r"eq\(n\\,(\d+)\)"
+        matches = re.findall(pattern, ffmpeg_command)
+        frame_numbers = [int(match) for match in matches]
+        return frame_numbers
+
+    # Create a video directory with path video
+    video_dir = tmp_path / "video"
+    video_dir.mkdir(exist_ok=True)
+
+    # Set parameters for mock video
+    video_path = video_dir / "mock_video.mp4"
+    num_frames = 10
+    frame_height = 150
+    frame_width = 100
+    frame_rate = 1
+
+    # Create the mock video
+    for i in range(num_frames):
+        img = Image.new("RGB", (frame_width, frame_height), (0, 0, 0))
+        img.save(video_dir / f"frame_{i}.png")
+    create_mock_video(video_path, video_dir, num_frames=num_frames, frame_rate=frame_rate)
+
+    # Call convert_video_to_images
+    image_output_dir = tmp_path / "extracted_images"
+    num_frames_target = 5
+    num_downscales = 1
+    crop_factor = (0.0, 0.0, 0.0, 0.0)
+
+    # Mock missing COLMAP and ffmpeg in the dev env
+    old_path = os.environ.get("PATH", "")
+    os.environ["PATH"] = str(tmp_path / "mocked_bin") + f":{old_path}"
+    (tmp_path / "mocked_bin").mkdir()
+    (tmp_path / "mocked_bin" / "colmap").touch(mode=0o777)
+    (tmp_path / "mocked_bin" / "ffmpeg").touch(mode=0o777)
+
+    # Return value of 10 for the get_num_frames_in_video run_command call
+    with mock.patch("nerfstudio.process_data.process_data_utils.run_command", return_value="10") as mock_run_func:
+        summary_log, extracted_frame_count = convert_video_to_images(
+            video_path=video_path,
+            image_dir=image_output_dir,
+            num_frames_target=num_frames_target,
+            num_downscales=num_downscales,
+            crop_factor=crop_factor,
+            verbose=False,
+            random_seed=42,
+        )
+        assert mock_run_func.call_count == 2, f"Expected 2 calls, but got {mock_run_func.call_count}"
+        first_frames = extract_frame_numbers(mock_run_func.call_args[0][0])
+        assert len(first_frames) == 5, f"Expected 5 frames, but got {len(first_frames)}"
+
+        summary_log, extracted_frame_count = convert_video_to_images(
+            video_path=video_path,
+            image_dir=image_output_dir,
+            num_frames_target=num_frames_target,
+            num_downscales=num_downscales,
+            crop_factor=crop_factor,
+            verbose=False,
+            random_seed=42,
+        )
+
+        assert mock_run_func.call_count == 4, f"Expected 4 total calls, but got {mock_run_func.call_count}"
+        second_frames = extract_frame_numbers(mock_run_func.call_args[0][0])
+        assert len(second_frames) == 5, f"Expected 5 frames, but got {len(first_frames)}"
+        assert first_frames == second_frames
+
+        summary_log, extracted_frame_count = convert_video_to_images(
+            video_path=video_path,
+            image_dir=image_output_dir,
+            num_frames_target=num_frames_target,
+            num_downscales=num_downscales,
+            crop_factor=crop_factor,
+            verbose=False,
+            random_seed=52,
+        )
+
+        assert mock_run_func.call_count == 6, f"Expected 6 total calls, but got {mock_run_func.call_count}"
+        third_frames = extract_frame_numbers(mock_run_func.call_args[0][0])
+        assert len(third_frames) == 5, f"Expected 5 frames, but got {len(first_frames)}"
+        assert first_frames != third_frames