nerfstudio-project · jkulhanek · Sep 5, 2024 · May 3, 2024 · May 3, 2024 · Jul 2, 2024
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -29,3 +29,25 @@ jobs:
         run: |
           python -m build
           twine upload --username __token__ --password $PYPI_TOKEN dist/*
+
+  deploy-docker-image:
+    runs-on: ubuntu-latest
+    needs: deploy
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python
+        uses: actions/setup-python@v1
+        with:
+          python-version: '3.8'
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v1
+      - name: Build and push
+        env: # Or as an environment variable
+          DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
+        run: |
+          python -m pip install --no-dependencies .
+          ns_version="$(python -c "import pkg_resources;print(pkg_resources.require('nerfstudio')[0].version)")"
+          echo ${DOCKER_TOKEN} | docker login hub.docker.com -u nerfstudio --password-stdin
+          docker build -f Dockerfile -t nerfstudio/nerfstudio:${ns_version} -t nerfstudio/nerfstudio:latest .
+          docker push nerfstudio/nerfstudio:${ns_version}
+          docker push nerfstudio/nerfstudio:latest
diff --git a/Dockerfile b/Dockerfile
@@ -1,178 +1,119 @@
-ARG CUDA_VERSION=11.8.0
-ARG OS_VERSION=22.04
-ARG USER_ID=1000
-# Define base image.
-FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${OS_VERSION}
-ARG CUDA_VERSION
-ARG OS_VERSION
-ARG USER_ID
+ARG UBUNTU_VERSION=22.04
+ARG NVIDIA_CUDA_VERSION=11.8.0
+# CUDA architectures, required by Colmap and tiny-cuda-nn. Use >= 8.0 for faster TCNN.
+ARG CUDA_ARCHITECTURES="90;89;86;80;75;70;61"
+ARG NERFSTUDIO_VERSION=main
 
-# metainformation
-LABEL org.opencontainers.image.version = "0.1.18"
-LABEL org.opencontainers.image.source = "https://github.com/nerfstudio-project/nerfstudio"
-LABEL org.opencontainers.image.licenses = "Apache License 2.0"
-LABEL org.opencontainers.image.base.name="docker.io/library/nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${OS_VERSION}"
+FROM nvidia/cuda:${NVIDIA_CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} as builder
+ARG CUDA_ARCHITECTURES
+ARG NVIDIA_CUDA_VERSION
+ARG UBUNTU_VERSION
 
-# Variables used at build time.
-## CUDA architectures, required by Colmap and tiny-cuda-nn.
-## NOTE: All commonly used GPU architectures are included and supported here. To speedup the image build process remove all architectures but the one of your explicit GPU. Find details here: https://developer.nvidia.com/cuda-gpus (8.6 translates to 86 in the line below) or in the docs.
-ARG CUDA_ARCHITECTURES=90;89;86;80;75;70;61;52;37
-
-# Set environment variables.
-## Set non-interactive to prevent asking for user inputs blocking image creation.
 ENV DEBIAN_FRONTEND=noninteractive
-## Set timezone as it is required by some packages.
-ENV TZ=Europe/Berlin
-## CUDA Home, required to find CUDA in some packages.
-ENV CUDA_HOME="/usr/local/cuda"
-
-# Install required apt packages and clear cache afterwards.
+ENV QT_XCB_GL_INTEGRATION=xcb_egl
 RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-    build-essential \
-    cmake \
-    curl \
-    ffmpeg \
-    git \
-    libatlas-base-dev \
-    libboost-filesystem-dev \
-    libboost-graph-dev \
-    libboost-program-options-dev \
-    libboost-system-dev \
-    libboost-test-dev \
-    libhdf5-dev \
-    libcgal-dev \
-    libeigen3-dev \
-    libflann-dev \
-    libfreeimage-dev \
-    libgflags-dev \
-    libglew-dev \
-    libgoogle-glog-dev \
-    libmetis-dev \
-    libprotobuf-dev \
-    libqt5opengl5-dev \
-    libsqlite3-dev \
-    libsuitesparse-dev \
-    nano \
-    protobuf-compiler \
-    python-is-python3 \
-    python3.10-dev \
-    python3-pip \
-    qtbase5-dev \
-    sudo \
-    vim-tiny \
-    wget && \
-    rm -rf /var/lib/apt/lists/*
-
-
-# Install GLOG (required by ceres).
-RUN git clone --branch v0.6.0 https://github.com/google/glog.git --single-branch && \
-    cd glog && \
-    mkdir build && \
-    cd build && \
-    cmake .. && \
-    make -j `nproc` && \
-    make install && \
-    cd ../.. && \
-    rm -rf glog
-# Add glog path to LD_LIBRARY_PATH.
-ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/lib"
-
-# Install Ceres-solver (required by colmap).
-RUN git clone --branch 2.1.0 https://ceres-solver.googlesource.com/ceres-solver.git --single-branch && \
-    cd ceres-solver && \
-    git checkout $(git describe --tags) && \
-    mkdir build && \
-    cd build && \
-    cmake .. -DBUILD_TESTING=OFF -DBUILD_EXAMPLES=OFF && \
-    make -j `nproc` && \
-    make install && \
-    cd ../.. && \
-    rm -rf ceres-solver
-
-# Install colmap.
-RUN git clone --branch 3.8 https://github.com/colmap/colmap.git --single-branch && \
+    apt-get install -y --no-install-recommends --no-install-suggests \
+        git \
+        cmake \
+        ninja-build \
+        build-essential \
+        libboost-program-options-dev \
+        libboost-filesystem-dev \
+        libboost-graph-dev \
+        libboost-system-dev \
+        libeigen3-dev \
+        libflann-dev \
+        libfreeimage-dev \
+        libmetis-dev \
+        libgoogle-glog-dev \
+        libgtest-dev \
+        libsqlite3-dev \
+        libglew-dev \
+        qtbase5-dev \
+        libqt5opengl5-dev \
+        libcgal-dev \
+        libceres-dev \
+        python3.10-dev \
+        python3-pip
+
+# Build and install COLMAP.
+RUN git clone https://github.com/colmap/colmap.git && \
     cd colmap && \
+    git checkout "3.9.1" && \
     mkdir build && \
     cd build && \
-    cmake .. -DCUDA_ENABLED=ON \
-             -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCHITECTURES} && \
-    make -j `nproc` && \
-    make install && \
-    cd ../.. && \
-    rm -rf colmap
-
-# Create non root user and setup environment.
-RUN useradd -m -d /home/user -g root -G sudo -u ${USER_ID} user
-RUN usermod -aG sudo user
-# Set user password
-RUN echo "user:user" | chpasswd
-# Ensure sudo group users are not asked for a password when using sudo command by ammending sudoers file
-RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
-
-# Switch to new uer and workdir.
-USER ${USER_ID}
-WORKDIR /home/user
-
-# Add local user binary folder to PATH variable.
-ENV PATH="${PATH}:/home/user/.local/bin"
-SHELL ["/bin/bash", "-c"]
-
-# Upgrade pip and install packages.
-RUN python3.10 -m pip install --no-cache-dir --upgrade pip setuptools pathtools promise pybind11
-# Install pytorch and submodules
-RUN CUDA_VER=${CUDA_VERSION%.*} && CUDA_VER=${CUDA_VER//./} && python3.10 -m pip install --no-cache-dir \
-    torch==2.0.1+cu${CUDA_VER} \
-    torchvision==0.15.2+cu${CUDA_VER} \
-        --extra-index-url https://download.pytorch.org/whl/cu${CUDA_VER}
-# Install tynyCUDNN (we need to set the target architectures as environment variable first).
-ENV TCNN_CUDA_ARCHITECTURES=${CUDA_ARCHITECTURES}
-RUN python3.10 -m pip install --no-cache-dir git+https://github.com/NVlabs/[email protected]#subdirectory=bindings/torch
-
-# Install pycolmap, required by hloc.
-RUN git clone --branch v0.4.0 --recursive https://github.com/colmap/pycolmap.git && \
-    cd pycolmap && \
-    python3.10 -m pip install --no-cache-dir . && \
-    cd ..
-
-# Install hloc 1.4 as alternative feature detector and matcher option for nerfstudio.
-RUN git clone --branch master --recursive https://github.com/cvg/Hierarchical-Localization.git && \
-    cd Hierarchical-Localization && \
-    git checkout v1.4 && \
-    python3.10 -m pip install --no-cache-dir -e . && \
-    cd ..
-
-# Install pyceres from source
-RUN git clone --branch v1.0 --recursive https://github.com/cvg/pyceres.git && \
-    cd pyceres && \
-    python3.10 -m pip install --no-cache-dir -e . && \
-    cd ..
-
-# Install pixel perfect sfm.
-RUN git clone --recursive https://github.com/cvg/pixel-perfect-sfm.git && \
-    cd pixel-perfect-sfm && \
-    git reset --hard 40f7c1339328b2a0c7cf71f76623fb848e0c0357 && \
-    git clean -df && \
-    python3.10 -m pip install --no-cache-dir -e . && \
-    cd ..
-
-RUN python3.10 -m pip install --no-cache-dir omegaconf
-# Copy nerfstudio folder and give ownership to user.
-ADD . /home/user/nerfstudio
-USER root
-RUN chown -R user /home/user/nerfstudio
-USER ${USER_ID}
-
-# Install nerfstudio dependencies.
-RUN cd nerfstudio && \
-    python3.10 -m pip install --no-cache-dir -e . && \
-    cd ..
+    mkdir -p /build && \
+    cmake .. -GNinja "-DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCHITECTURES}" \
+        -DCMAKE_INSTALL_PREFIX=/build/colmap && \
+    ninja install -j1 && \
+    cd ~
+
+# Upgrade pip and install dependencies.
+# pip install torch==2.2.2 torchvision==0.17.2 --index-url https://download.pytorch.org/whl/cu118 && \
+RUN pip install --no-cache-dir --upgrade pip 'setuptools<70.0.0' && \
+    pip install --no-cache-dir torch==2.1.2+cu118 torchvision==0.16.2+cu118 'numpy<2.0.0' --extra-index-url https://download.pytorch.org/whl/cu118 && \
+    git clone --branch master --recursive https://github.com/cvg/Hierarchical-Localization.git /opt/hloc && \
+    cd /opt/hloc && git checkout v1.4 && python3.10 -m pip install --no-cache-dir . && cd ~ && \
+    TCNN_CUDA_ARCHITECTURES="${CUDA_ARCHITECTURES}" pip install --no-cache-dir "git+https://github.com/NVlabs/tiny-cuda-nn.git#subdirectory=bindings/torch" && \
+    pip install --no-cache-dir pycolmap==0.6.1 pyceres==2.1 omegaconf==2.3.0
+
+# Build and install newest gsplat and NerfStudio.
+ARG NERFSTUDIO_VERSION
+COPY * /tmp/nerfstudio/
+
+# Clone nerfstudio if not supplied as a local directory.
+RUN if [ ! -d /tmp/nerfstudio ]; then git clone --branch ${NERFSTUDIO_VERSION} --recursive https://github.com/nerfstudio-project/nerfstudio.git /tmp/nerfstudio; echo "Cloned NerfStudio from GitHub (version ${NERFSTUDIO_VERSION})."; fi && \
+    export TORCH_CUDA_ARCH_LIST="$(echo "$CUDA_ARCHITECTURES" | tr ';' '\n' | awk '$0 > 70 {print substr($0,1,1)"."substr($0,2)}' | tr '\n' ' ' | sed 's/ $//')" && \
+    pip install --no-cache-dir git+https://github.com/nerfstudio-project/gsplat.git && \
+    pip install --no-cache-dir /tmp/nerfstudio 'numpy<2.0.0' && \
+    rm -rf /tmp/nerfstudio
+
+# Fix permissions
+RUN chmod -R go=u /usr/local/lib/python3.10 && \
+    chmod -R go=u /build
+
+#
+# Docker runtime stage.
+#
+FROM nvidia/cuda:${NVIDIA_CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} as runtime
+ARG CUDA_ARCHITECTURES
+ARG NVIDIA_CUDA_VERSION
+ARG UBUNTU_VERSION
+
+# LABEL org.opencontainers.image.version = "0.1.18"
+LABEL org.opencontainers.image.source = "https://github.com/nerfstudio-project/nerfstudio"
+LABEL org.opencontainers.image.licenses = "Apache License 2.0"
+LABEL org.opencontainers.image.base.name="docker.io/library/nvidia/cuda:${NVIDIA_CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}"
+LABEL org.opencontainers.image.documentation = "https://docs.nerf.studio/"
 
-# Change working directory
-WORKDIR /workspace
+# Minimal dependencies to run COLMAP binary compiled in the builder stage.
+# Note: this reduces the size of the final image considerably, since all the
+# build dependencies are not needed.
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends --no-install-suggests \
+        libboost-filesystem1.74.0 \
+        libboost-program-options1.74.0 \
+        libc6 \
+        libceres2 \
+        libfreeimage3 \
+        libgcc-s1 \
+        libgl1 \
+        libglew2.2 \
+        libgoogle-glog0v5 \
+        libqt5core5a \
+        libqt5gui5 \
+        libqt5widgets5 \
+        python3.10 \
+        python-is-python3 \
+        ffmpeg
+
+# Copy packages from builder stage.
+COPY --from=builder /build/colmap/ /usr/local/
+COPY --from=builder /usr/local/lib/python3.10/dist-packages/ /usr/local/lib/python3.10/dist-packages/
+COPY --from=builder /usr/local/bin/ns* /usr/local/bin/
 
 # Install nerfstudio cli auto completion
-RUN ns-install-cli --mode install
+RUN /bin/bash -c 'ns-install-cli --mode install'
 
 # Bash as default entrypoint.
 CMD /bin/bash -l
diff --git a/nerfstudio/scripts/train.py b/nerfstudio/scripts/train.py
@@ -95,6 +95,7 @@ def train_loop(local_rank: int, world_size: int, config: TrainerConfig, global_r
         config: config file specifying training regimen
     """
     _set_random_seed(config.machine.seed + global_rank)
+    torch.cuda.set_device(local_rank)
     trainer = config.setup(local_rank=local_rank, world_size=world_size)
     trainer.setup()
     trainer.train()