Skip to content

Commit

Permalink
Merge with Main
Browse files Browse the repository at this point in the history
  • Loading branch information
Anthony-Tafoya committed Sep 19, 2024
2 parents e16135e + 516fd7c commit 374e1f9
Show file tree
Hide file tree
Showing 10 changed files with 201 additions and 294 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/build_docker_image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ jobs:
- name: Build and push Docker image
id: push
uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671
if:
with:
context: .
file: ./Dockerfile
Expand All @@ -55,8 +56,9 @@ jobs:
labels: ${{ steps.meta.outputs.labels }}
- name: Generate artifact attestation
uses: actions/attest-build-provenance@v1
if: ${{ github.event_name != 'pull_request' }}
with:
subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}}
subject-digest: ${{ steps.push.outputs.digest }}
push-to-registry: ${{ github.event_name != 'pull_request' }}
push-to-registry: true

3 changes: 1 addition & 2 deletions nerfstudio/configs/method_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -663,8 +663,7 @@
),
model=SplatfactoModelConfig(
cull_alpha_thresh=0.005,
continue_cull_post_densification=False,
densify_grad_thresh=0.0006,
densify_grad_thresh=0.0005,
),
),
optimizers={
Expand Down
3 changes: 2 additions & 1 deletion nerfstudio/engine/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,8 @@ def train(self) -> None:

# Do not perform evaluation if there are no validation images
if self.pipeline.datamanager.eval_dataset:
self.eval_iteration(step)
with self.train_lock:
self.eval_iteration(step)

if step_check(step, self.config.steps_per_save):
self.save_checkpoint(step)
Expand Down
10 changes: 5 additions & 5 deletions nerfstudio/exporter/exporter_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,11 +165,11 @@ def generate_point_cloud(

if crop_obb is not None:
mask = crop_obb.within(point)
point = point[mask]
rgb = rgb[mask]
view_direction = view_direction[mask]
if normal is not None:
normal = normal[mask]
point = point[mask]
rgb = rgb[mask]
view_direction = view_direction[mask]
if normal is not None:
normal = normal[mask]

points.append(point)
rgbs.append(rgb)
Expand Down
321 changes: 52 additions & 269 deletions nerfstudio/models/splatfacto.py

Large diffs are not rendered by default.

12 changes: 7 additions & 5 deletions nerfstudio/process_data/process_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"""Helper utils for processing data into the nerfstudio format."""

import math
import random
import re
import shutil
import sys
Expand Down Expand Up @@ -126,7 +127,7 @@ def convert_video_to_images(
verbose: bool = False,
image_prefix: str = "frame_",
keep_image_dir: bool = False,
random_seed: Optional[int] = None
random_seed: Optional[int] = None,
) -> Tuple[List[str], int]:
"""Converts a video into a sequence of images.
Expand All @@ -139,6 +140,7 @@ def convert_video_to_images(
verbose: If True, logs the output of the command.
image_prefix: Prefix to use for the image filenames.
keep_image_dir: If True, don't delete the output directory if it already exists.
random_seed: If set, the seed used to choose the frames t commit of the video
Returns:
A tuple containing summary of the conversion and the number of extracted frames.
"""
Expand Down Expand Up @@ -178,7 +180,7 @@ def convert_video_to_images(
start_x = crop_factor[2]
start_y = crop_factor[0]
crop_cmd = f"crop=w=iw*{width}:h=ih*{height}:x=iw*{start_x}:y=ih*{start_y},"

downscale_chains = [f"[t{i}]scale=iw/{2**i}:ih/{2**i}[out{i}]" for i in range(num_downscales + 1)]
downscale_dirs = [Path(str(image_dir) + (f"_{2**i}" if i > 0 else "")) for i in range(num_downscales + 1)]
downscale_paths = [downscale_dirs[i] / f"{image_prefix}%05d.png" for i in range(num_downscales + 1)]
Expand All @@ -200,10 +202,10 @@ def convert_video_to_images(
if random_seed:
random.seed(random_seed)
frame_indices = sorted(random.sample(range(num_frames), num_frames_target))
select_cmd = f"select=\'" + "+".join([f"eq(n\,{idx})" for idx in frame_indices]) + "\',setpts=N/TB,"
CONSOLE.print(f"Extracting {num_frames_target} frames using seed-based random selection.")
select_cmd = "select='" + "+".join([f"eq(n\,{idx})" for idx in frame_indices]) + "',setpts=N/TB,"
CONSOLE.print(f"Extracting {num_frames_target} frames using seed {random_seed} random selection.")
elif spacing > 1:
CONSOLE.print("Number of frames to extract:", math.ceil(num_frames / spacing))
CONSOLE.print(f"Extracting {math.ceil(num_frames / spacing)} frames in evenly spaced intervals")
select_cmd = f"thumbnail={spacing},setpts=N/TB,"
else:
CONSOLE.print("[bold red]Can't satisfy requested number of frames. Extracting all frames.")
Expand Down
14 changes: 7 additions & 7 deletions nerfstudio/process_data/video_to_nerfstudio_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

import shutil
from dataclasses import dataclass
from typing import Literal
from typing import Literal, Optional

from nerfstudio.process_data import equirect_utils, process_data_utils
from nerfstudio.process_data.colmap_converter_to_nerfstudio_dataset import ColmapConverterToNerfstudioDataset
Expand All @@ -41,9 +41,9 @@ class VideoToNerfstudioDataset(ColmapConverterToNerfstudioDataset):
"""Feature matching method to use. Vocab tree is recommended for a balance of speed
and accuracy. Exhaustive is slower but more accurate. Sequential is faster but
should only be used for videos."""
random_seed: int = None
"""Random seed to select video frames"""
eval_random_seed: int = None
random_seed: Optional[int] = None
"""Random seed to select video frames for training set"""
eval_random_seed: Optional[int] = None
"""Random seed to select video frames for eval set"""

def main(self) -> None:
Expand All @@ -63,7 +63,7 @@ def main(self) -> None:
num_downscales=0,
crop_factor=(0.0, 0.0, 0.0, 0.0),
verbose=self.verbose,
random_seed = self.random_seed
random_seed=self.random_seed,
)
else:
# If we're not dealing with equirects we can downscale in one step.
Expand All @@ -76,7 +76,7 @@ def main(self) -> None:
verbose=self.verbose,
image_prefix="frame_train_" if self.eval_data is not None else "frame_",
keep_image_dir=False,
random_seed = self.random_seed
random_seed=self.random_seed,
)
if self.eval_data is not None:
summary_log_eval, num_extracted_frames_eval = process_data_utils.convert_video_to_images(
Expand All @@ -88,7 +88,7 @@ def main(self) -> None:
verbose=self.verbose,
image_prefix="frame_eval_",
keep_image_dir=True,
random_seed = self.eval_random_seed
random_seed=self.eval_random_seed,
)
summary_log += summary_log_eval
num_extracted_frames += num_extracted_frames_eval
Expand Down
12 changes: 10 additions & 2 deletions nerfstudio/scripts/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,7 @@ def main(self) -> None:
if not self.output_dir.exists():
self.output_dir.mkdir(parents=True)

_, pipeline, _, _ = eval_setup(self.load_config)
_, pipeline, _, _ = eval_setup(self.load_config, test_mode="inference")

assert isinstance(pipeline.model, SplatfactoModel)

Expand Down Expand Up @@ -620,9 +620,17 @@ def main(self) -> None:
n_after = np.sum(select)
if n_after < n_before:
CONSOLE.print(f"{n_before - n_after} NaN/Inf elements in {k}")
nan_count = np.sum(select) - n

# filter gaussians that have opacities < 1/255, because they are skipped in cuda rasterization
low_opacity_gaussians = (map_to_tensors["opacity"]).squeeze(axis=-1) < -5.5373 # logit(1/255)
lowopa_count = np.sum(low_opacity_gaussians)
select[low_opacity_gaussians] = 0

if np.sum(select) < n:
CONSOLE.print(f"values have NaN/Inf in map_to_tensors, only export {np.sum(select)}/{n}")
CONSOLE.print(
f"{nan_count} Gaussians have NaN/Inf and {lowopa_count} have low opacity, only export {np.sum(select)}/{n}"
)
for k, t in map_to_tensors.items():
map_to_tensors[k] = map_to_tensors[k][select]
count = np.sum(select)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ dependencies = [
"xatlas",
"trimesh>=3.20.2",
"timm==0.6.7",
"gsplat==1.0.0",
"gsplat==1.3.0",
"pytorch-msssim",
"pathos",
"packaging",
Expand Down
114 changes: 113 additions & 1 deletion tests/process_data/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,21 @@
Test misc data utils
"""

import os
import re
from pathlib import Path
from unittest import mock

import cv2
import numpy as np
from PIL import Image
from pyquaternion import Quaternion
from scipy.spatial.transform import Rotation

# TODO(1480) use pycolmap instead of colmap_parsing_utils
# import pycolmap
from nerfstudio.data.utils.colmap_parsing_utils import qvec2rotmat
from nerfstudio.process_data.process_data_utils import convert_video_to_images


def test_scalar_first_scalar_last_quaternions():
Expand Down Expand Up @@ -39,7 +47,7 @@ def test_scalar_first_scalar_last_quaternions():

# Expected Rotation matrix
# fmt: off
R_expected = np.array(
R_expected = np.array(
[
[ 0.81379768, -0.44096961, 0.37852231],
[ 0.46984631, 0.88256412, 0.01802831],
Expand All @@ -61,3 +69,107 @@ def test_scalar_first_scalar_last_quaternions():
# R = pycolmap.qvec_to_rotmat(wxyz)
R = qvec2rotmat(wxyz)
assert np.allclose(R, R_expected)


def test_process_video_conversion_with_seed(tmp_path: Path):
"""
Test convert_video_to_images by creating a mock video and ensuring correct frame extraction with seed.
"""

# Inner functions needed for the unit tests
def create_mock_video(video_path: Path, frame_dir: Path, num_frames=10, frame_rate=1):
"""Creates a mock video from a series of frames using OpenCV."""

first_frame = cv2.imread(str(frame_dir / "frame_0.png"))
height, width, _ = first_frame.shape
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(str(video_path), fourcc, frame_rate, (width, height))

for i in range(num_frames):
frame_path = frame_dir / f"frame_{i}.png"
frame = cv2.imread(str(frame_path))
out.write(frame)
out.release()

def extract_frame_numbers(ffmpeg_command: str):
"""Extracts the frame numbers from the ffmpeg command"""

pattern = r"eq\(n\\,(\d+)\)"
matches = re.findall(pattern, ffmpeg_command)
frame_numbers = [int(match) for match in matches]
return frame_numbers

# Create a video directory with path video
video_dir = tmp_path / "video"
video_dir.mkdir(exist_ok=True)

# Set parameters for mock video
video_path = video_dir / "mock_video.mp4"
num_frames = 10
frame_height = 150
frame_width = 100
frame_rate = 1

# Create the mock video
for i in range(num_frames):
img = Image.new("RGB", (frame_width, frame_height), (0, 0, 0))
img.save(video_dir / f"frame_{i}.png")
create_mock_video(video_path, video_dir, num_frames=num_frames, frame_rate=frame_rate)

# Call convert_video_to_images
image_output_dir = tmp_path / "extracted_images"
num_frames_target = 5
num_downscales = 1
crop_factor = (0.0, 0.0, 0.0, 0.0)

# Mock missing COLMAP and ffmpeg in the dev env
old_path = os.environ.get("PATH", "")
os.environ["PATH"] = str(tmp_path / "mocked_bin") + f":{old_path}"
(tmp_path / "mocked_bin").mkdir()
(tmp_path / "mocked_bin" / "colmap").touch(mode=0o777)
(tmp_path / "mocked_bin" / "ffmpeg").touch(mode=0o777)

# Return value of 10 for the get_num_frames_in_video run_command call
with mock.patch("nerfstudio.process_data.process_data_utils.run_command", return_value="10") as mock_run_func:
summary_log, extracted_frame_count = convert_video_to_images(
video_path=video_path,
image_dir=image_output_dir,
num_frames_target=num_frames_target,
num_downscales=num_downscales,
crop_factor=crop_factor,
verbose=False,
random_seed=42,
)
assert mock_run_func.call_count == 2, f"Expected 2 calls, but got {mock_run_func.call_count}"
first_frames = extract_frame_numbers(mock_run_func.call_args[0][0])
assert len(first_frames) == 5, f"Expected 5 frames, but got {len(first_frames)}"

summary_log, extracted_frame_count = convert_video_to_images(
video_path=video_path,
image_dir=image_output_dir,
num_frames_target=num_frames_target,
num_downscales=num_downscales,
crop_factor=crop_factor,
verbose=False,
random_seed=42,
)

assert mock_run_func.call_count == 4, f"Expected 4 total calls, but got {mock_run_func.call_count}"
second_frames = extract_frame_numbers(mock_run_func.call_args[0][0])
assert len(second_frames) == 5, f"Expected 5 frames, but got {len(first_frames)}"
assert first_frames == second_frames

summary_log, extracted_frame_count = convert_video_to_images(
video_path=video_path,
image_dir=image_output_dir,
num_frames_target=num_frames_target,
num_downscales=num_downscales,
crop_factor=crop_factor,
verbose=False,
random_seed=52,
)

assert mock_run_func.call_count == 6, f"Expected 6 total calls, but got {mock_run_func.call_count}"
third_frames = extract_frame_numbers(mock_run_func.call_args[0][0])
assert len(third_frames) == 5, f"Expected 5 frames, but got {len(first_frames)}"
assert first_frames != third_frames

0 comments on commit 374e1f9

Please sign in to comment.