From f7b18d9d7b677169e2242dbafca01005d3a7f6c5 Mon Sep 17 00:00:00 2001 From: Jonathan Swartz Date: Tue, 3 Dec 2024 20:11:34 +1300 Subject: [PATCH] fVDB: Fix missing fvdb.utils.data modules Expose various 'ELU non-linearities Integer JaggedReduce Updated build TORCH_CUDA_ARCH_LIST for supported CUDA compute architectures, fixed Docker dev and CI images Updated REAME's env and build instructions Signed-off-by: Jonathan Swartz --- fvdb/.clang-format | 2 +- fvdb/.github/workflows/tests.yml | 133 +++- fvdb/.gitignore | 9 +- fvdb/.vscode/c_cpp_properties.json | 1 + fvdb/.vscode/settings.json | 17 +- fvdb/Dockerfile | 4 +- fvdb/README.md | 36 +- fvdb/env/build_requirements.txt | 1 - fvdb/env/dev_environment.yml | 1 + fvdb/fvdb/nn/__init__.py | 8 + fvdb/fvdb/nn/modules.py | 36 + fvdb/fvdb/utils/data/__init__.py | 6 + .../fvdb/utils/data/_colmap_utils/__init__.py | 5 + fvdb/fvdb/utils/data/_colmap_utils/camera.py | 268 +++++++ .../fvdb/utils/data/_colmap_utils/database.py | 338 +++++++++ fvdb/fvdb/utils/data/_colmap_utils/image.py | 36 + .../fvdb/utils/data/_colmap_utils/rotation.py | 344 +++++++++ .../utils/data/_colmap_utils/scene_manager.py | 651 ++++++++++++++++++ .../data/_colmap_utils/tools/colmap_to_nvm.py | 64 ++ .../data/_colmap_utils/tools/delete_images.py | 37 + .../tools/impute_missing_cameras.py | 174 +++++ .../tools/save_cameras_as_ply.py | 86 +++ .../_colmap_utils/tools/transform_model.py | 49 ++ .../tools/write_camera_track_to_bundler.py | 59 ++ .../tools/write_depthmap_to_ply.py | 137 ++++ fvdb/fvdb/utils/data/colmap_dataset.py | 474 +++++++++++++ fvdb/setup.py | 7 + fvdb/src/detail/build/FineFromCoarse.cpp | 3 +- .../GaussianFullyFusedProjectionJagged.cu | 4 +- .../detail/ops/gsplat/GaussianRasterize.cu | 4 +- fvdb/src/detail/ops/jagged/JaggedReduce.cu | 2 +- fvdb/tests/unit/test_basic_ops.py | 36 + fvdb/tests/unit/test_jagged_tensor.py | 7 +- 33 files changed, 2958 insertions(+), 81 deletions(-) create mode 100644 fvdb/fvdb/utils/data/__init__.py create mode 100644 fvdb/fvdb/utils/data/_colmap_utils/__init__.py create mode 100644 fvdb/fvdb/utils/data/_colmap_utils/camera.py create mode 100644 fvdb/fvdb/utils/data/_colmap_utils/database.py create mode 100644 fvdb/fvdb/utils/data/_colmap_utils/image.py create mode 100644 fvdb/fvdb/utils/data/_colmap_utils/rotation.py create mode 100644 fvdb/fvdb/utils/data/_colmap_utils/scene_manager.py create mode 100644 fvdb/fvdb/utils/data/_colmap_utils/tools/colmap_to_nvm.py create mode 100644 fvdb/fvdb/utils/data/_colmap_utils/tools/delete_images.py create mode 100644 fvdb/fvdb/utils/data/_colmap_utils/tools/impute_missing_cameras.py create mode 100644 fvdb/fvdb/utils/data/_colmap_utils/tools/save_cameras_as_ply.py create mode 100644 fvdb/fvdb/utils/data/_colmap_utils/tools/transform_model.py create mode 100644 fvdb/fvdb/utils/data/_colmap_utils/tools/write_camera_track_to_bundler.py create mode 100644 fvdb/fvdb/utils/data/_colmap_utils/tools/write_depthmap_to_ply.py create mode 100644 fvdb/fvdb/utils/data/colmap_dataset.py diff --git a/fvdb/.clang-format b/fvdb/.clang-format index 744e9d16e8..0e90463ff5 100644 --- a/fvdb/.clang-format +++ b/fvdb/.clang-format @@ -77,7 +77,7 @@ Cpp11BracedListStyle: false IncludeBlocks: Preserve IncludeIsMainRegex: "$" IncludeCategories: - - Regex: '^<.*\.h>' + - Regex: '^<.*\.(h|cuh)>' Priority: 1 - Regex: ".*" Priority: 2 diff --git a/fvdb/.github/workflows/tests.yml b/fvdb/.github/workflows/tests.yml index a21f764413..e64b3bf953 100644 --- a/fvdb/.github/workflows/tests.yml +++ b/fvdb/.github/workflows/tests.yml @@ -10,6 +10,11 @@ on: - 'notebooks/**' - 'scripts/**' +# Allow subsequent pushes to the same PR or REF to cancel any previous jobs. +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + permissions: contents: write deployments: write @@ -21,37 +26,43 @@ jobs: runs-on: - self-hosted container: - image: aswf/ci-openvdb:2023-clang15.0 + image: aswf/ci-openvdb:2024 + env: + PYTHONPATH: "" + options: --rm + defaults: + run: + shell: bash -el {0} steps: - uses: actions/checkout@v4 - name: Set up fvdb_build Conda env uses: conda-incubator/setup-miniconda@v3 with: - miniconda-version: "latest" - channels: conda-forge - channel-priority: strict + miniforge-version: latest + conda-remove-defaults: "true" activate-environment: fvdb_build environment-file: env/build_environment.yml - name: Buid fvdb run: | - TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6+PTX" MAX_JOBS=$(($(nproc) < $(free -g | awk '/^Mem:/{jobs=int($7/2.5); if(jobs<1) jobs=1; print jobs}') ? $(nproc) : $(free -g | awk '/^Mem:/{jobs=int($7/2.5); if(jobs<1) jobs=1; print jobs}'))) python setup.py bdist_wheel --dist-dir=dist - shell: - bash -el {0} + TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9+PTX" MAX_JOBS=$(($(nproc) < $(free -g | awk '/^Mem:/{jobs=int($7/2.5); if(jobs<1) jobs=1; print jobs}') ? $(nproc) : $(free -g | awk '/^Mem:/{jobs=int($7/2.5); if(jobs<1) jobs=1; print jobs}'))) conda run --no-capture-output -n fvdb_build python setup.py bdist_wheel --dist-dir=dist - name: Upload package uses: actions/upload-artifact@v4 with: name: fvdb-test-package path: dist/*.whl + retention-days: 2 - - name: Clean Conda + - name: Cleanup + if: always() run: | - conda clean -pty - shell: - bash -el {0} + echo "Cleaning up /__w/_temp directory" + sudo rm -rf /__w/_temp/* + echo "Cleanup completed" + fvdb-unit-test: needs: [fvdb-build] @@ -59,15 +70,20 @@ jobs: runs-on: - self-hosted container: - image: aswf/ci-openvdb:2023-clang15.0 + image: aswf/ci-openvdb:2024 + env: + PYTHONPATH: "" + options: --rm + defaults: + run: + shell: bash -el {0} steps: - uses: actions/checkout@v4 - name: Set up fvdb_test Conda env uses: conda-incubator/setup-miniconda@v3 with: - miniconda-version: "latest" - channels: conda-forge - channel-priority: strict + miniforge-version: latest + conda-remove-defaults: "true" activate-environment: fvdb_test environment-file: env/test_environment.yml @@ -81,21 +97,63 @@ jobs: run: | conda activate fvdb_test pip install ./dist/*.whl - shell: - bash -el {0} - name: Run tests run: | cd tests; pytest -v unit - shell: - bash -el {0} - - name: Clean Conda + - name: Cleanup + if: always() run: | - conda clean -pty - shell: - bash -el {0} + echo "Cleaning up /__w/_temp directory" + sudo rm -rf /__w/_temp/* + echo "Cleanup completed" + + fvdb-docs-test: + needs: [fvdb-build] + name: fVDB Documentation Tests + runs-on: + - self-hosted + container: + image: aswf/ci-openvdb:2024 + env: + PYTHONPATH: "" + options: --rm + defaults: + run: + shell: bash -el {0} + steps: + - uses: actions/checkout@v4 + - name: Set up fvdb_test Conda env + uses: conda-incubator/setup-miniconda@v3 + with: + miniforge-version: latest + conda-remove-defaults: "true" + activate-environment: fvdb_test + environment-file: env/test_environment.yml + + - name: Download package + uses: actions/download-artifact@v4 + with: + name: fvdb-test-package + path: ./dist + + - name: Install package + run: | + conda activate fvdb_test + pip install ./dist/*.whl + + - name: Run tests + run: | + cd .. && pytest --markdown-docs fvdb/docs + + - name: Cleanup + if: always() + run: | + echo "Cleaning up /__w/_temp directory" + sudo rm -rf /__w/_temp/* + echo "Cleanup completed" fvdb-benchmarks: needs: [fvdb-build] @@ -103,16 +161,20 @@ jobs: runs-on: - self-hosted container: - image: aswf/ci-openvdb:2023-clang15.0 - + image: aswf/ci-openvdb:2024 + env: + PYTHONPATH: "" + options: --rm + defaults: + run: + shell: bash -el {0} steps: - uses: actions/checkout@v4 - name: Set up fvdb_test Conda env uses: conda-incubator/setup-miniconda@v3 with: - miniconda-version: "latest" - channels: conda-forge - channel-priority: strict + miniforge-version: latest + conda-remove-defaults: "true" activate-environment: fvdb_test environment-file: env/test_environment.yml @@ -126,21 +188,15 @@ jobs: run: | conda activate fvdb_test pip install ./dist/*.whl - shell: - bash -el {0} - name: Disable git ownership verification run: | git config --global --add safe.directory "$(pwd)" - shell: - bash -el {0} - name: Run benchmarks run: | cd tests; pytest benchmark --benchmark-json benchmark/output.json - shell: - bash -el {0} - name: Store benchmark result uses: benchmark-action/github-action-benchmark@v1 @@ -157,8 +213,9 @@ jobs: fail-on-alert: true alert-comment-cc-users: '@swahtz' - - name: Clean Conda + - name: Cleanup + if: always() run: | - conda clean -pty - shell: - bash -el {0} \ No newline at end of file + echo "Cleaning up /__w/_temp directory" + sudo rm -rf /__w/_temp/* + echo "Cleanup completed" \ No newline at end of file diff --git a/fvdb/.gitignore b/fvdb/.gitignore index ac041d2381..b3f49659f4 100644 --- a/fvdb/.gitignore +++ b/fvdb/.gitignore @@ -31,23 +31,18 @@ apex/ *.npy *.o *.h5 -data/lego/transforms_test.json -data/lego/transforms_train.json -data/fegr/* -data/fegr2/* docs/_build fvdb/include/ trash scratch -data/output_hi.0.nvdb -data/output_mid.0.nvdb _build docs/build *tmp*.py *.whl tests/benchmark/data releases/ -data/ +/data/ +/data/* !tests/data .vscode/launch.json lightning_logs diff --git a/fvdb/.vscode/c_cpp_properties.json b/fvdb/.vscode/c_cpp_properties.json index 64eee8cadb..733b60b871 100644 --- a/fvdb/.vscode/c_cpp_properties.json +++ b/fvdb/.vscode/c_cpp_properties.json @@ -7,6 +7,7 @@ "${env:CONDA_PREFIX}/envs/fvdb/include/cuda", "${workspaceFolder}/src", "${workspaceFolder}/external/openvdb/nanovdb", + "${workspaceFolder}/external/glm", "${workspaceFolder}/external/cudnn_fe/include", "${workspaceFolder}/external/cutlass/include", "${env:CONDA_PREFIX}/envs/fvdb/include/python3.10", diff --git a/fvdb/.vscode/settings.json b/fvdb/.vscode/settings.json index 3af287bf0e..60e196bb77 100644 --- a/fvdb/.vscode/settings.json +++ b/fvdb/.vscode/settings.json @@ -135,19 +135,6 @@ }, "editor.rulers": [ 120 ] }, - "python.linting.pylintArgs": [ - "--disable", "C0114", - "--disable", "C0115", - "--disable", "C0116", - "--disable", "C0103", - "--disable", "C0209", - "--disable", "C0301", - "--disable", "R0914", - "--disable", "R0913", - "--disable", "W0622", - "--disable", "E1102", - "--generated-members=numpy.* ,torch.*" - ], "python.analysis.typeCheckingMode": "basic", "python.testing.unittestArgs": [ "-v", "-s", "." @@ -158,9 +145,13 @@ "--target-version=py311", "--line-length=120" ], + "python.analysis.diagnosticSeverityOverrides": { + "reportPrivateImportUsage": "none", + }, "isort.args": [ "--profile", "black" ], "json.format.keepLines": true, "C_Cpp.formatting": "clangFormat", + "C_Cpp.clang_format_path": "${env:CONDA_PREFIX}/envs/fvdb/bin/clang-format-18", } \ No newline at end of file diff --git a/fvdb/Dockerfile b/fvdb/Dockerfile index fdf5e0341a..7bf041e68b 100644 --- a/fvdb/Dockerfile +++ b/fvdb/Dockerfile @@ -1,4 +1,4 @@ -FROM nvcr.io/nvidia/pytorch:24.04-py3 +FROM nvcr.io/nvidia/pytorch:24.05-py3 ARG MODE=production RUN echo "Building fVDB container in $MODE mode" @@ -12,6 +12,6 @@ RUN pip install --no-cache-dir -r env/build_requirements.txt RUN if [ "$MODE" = "production" ]; then \ MAX_JOBS=$(free -g | awk '/^Mem:/{jobs=int($4/2.5); if(jobs<1) jobs=1; print jobs}') \ - TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6+PTX" \ + TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9+PTX" \ python setup.py install; \ fi \ No newline at end of file diff --git a/fvdb/README.md b/fvdb/README.md index 7d1e6e414a..c2ed6bc7fc 100644 --- a/fvdb/README.md +++ b/fvdb/README.md @@ -21,13 +21,15 @@ Lastly, our [documentation](docs) provides deeper details on the concepts as wel During the project's initial stage of release, it is necessary to [run the build steps](#building-fvdb-from-source) to install ƒVDB. Eventually, ƒVDB will be provided as a pre-built, installable package from anaconda. We support building the latest ƒVDB version for the following dependent library configurations: -| PyTorch | Python | CUDA | -| -------------- | ---------- | ------- | -| 2.4.0-2.4.1 | 3.10 - 3.12 | 12.1 | +| PyTorch | Python | CUDA | +| -------------- | ----------- | ------------ | +| 2.4.0-2.4.1 | 3.10 - 3.12 | 12.1 - 12.4 | -***Note:** Linux is the only platform currently supported (Ubuntu >= 20.04 recommended). +** Notes:** +* Linux is the only platform currently supported (Ubuntu >= 20.04 recommended). +* A CUDA-capable GPU with Ampere architecture or newer (i.e. compute capability >=8.0) is required to run the CUDA-accelerated operations in ƒVDB. ## Building *f*VDB from Source @@ -63,6 +65,12 @@ docker run -it --gpus all --rm \ /bin/bash ``` +When running the docker container in `dev` mode and when you are ready to build ƒVDB, you can run the following command to build ƒVDB for the recommended set of CUDA architectures: +```shell +MAX_JOBS=$(free -g | awk '/^Mem:/{jobs=int($4/2.5); if(jobs<1) jobs=1; print jobs}') \ + TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9+PTX" \ + python setup.py install +``` #### Setting up a Conda Environment @@ -98,14 +106,17 @@ conda config --set solver libmamba Next, create the `fvdb` conda environment by running the following command from the root of this repository, and then grabbing a ☕: ```shell -conda env create -f env/test_environment.yml +conda env create -f env/dev_environment.yml ``` -**Note:** You can optionally use the `env/build_environment.yml` environment file if you want a minimum set of dependencies needed to build *f*VDB and don't intend to run the tests or the `env/learn_environment` if you would like the additional packages needed to run the examples and view their visualizations. If you intend to use our learning material such as the [notebooks](notebooks) or [examples](examples), we recommend you start from the `fvdb_learn` conda environment which contains all the dependencies needed to run the learning material as well as build *f*VDB from source. +**Notes:** +* You can optionally use the `env/build_environment.yml` environment file if you want a minimum set of dependencies needed just to build/package *f*VDB (note this environment won't have all the runtime dependencies needed to `import fvdb`). +* If you would like a runtime environment which has only the packages required to run the unit tests after building ƒVDB, you can use the `env/test_environment.yml`. This is the environment used by the CI pipeline to run the tests after building ƒVDB in the `fvdb_build` environment. +* Use the `fvdb_learn` environment defined in `env/learn_environment.yml` if you would like an environment with the runtime requirements and the additional packages needed to run the [notebooks](notebooks) or [examples](examples) and view their visualizations. Now activate the environment: ```shell -conda activate fvdb_test +conda activate fvdb ``` @@ -117,22 +128,27 @@ conda activate fvdb_test export MAX_JOBS=$(free -g | awk '/^Mem:/{jobs=int($4/2.5); if(jobs<1) jobs=1; print jobs}') ``` -You could either do an editable install with setuptools: +You could either perform an editable install with setuptools: ```shell python setup.py develop ``` -or directly install it to your site package folder if you are developing extensions: +or install a 'read-only' copy to your site package folder: ```shell pip install . ``` +If you would like to build a packaged wheel for installing in other environments, you can run the following command: +```shell +python setup.py bdist_wheel +``` ### Running Tests To make sure that everything works by running tests: ```shell -pytest tests/unit +cd tests +pytest unit ``` ### Building Documentation diff --git a/fvdb/env/build_requirements.txt b/fvdb/env/build_requirements.txt index 885008c6df..68f3a64a35 100644 --- a/fvdb/env/build_requirements.txt +++ b/fvdb/env/build_requirements.txt @@ -1,4 +1,3 @@ -torch >= 2.2.0+cu121 pip >= 23.3.1 setuptools >= 68.2.2 wheel diff --git a/fvdb/env/dev_environment.yml b/fvdb/env/dev_environment.yml index cc97a92f75..1c3b4f8c47 100644 --- a/fvdb/env/dev_environment.yml +++ b/fvdb/env/dev_environment.yml @@ -24,6 +24,7 @@ dependencies: - cuda-cccl=12.1 - cuda-libraries-static=12.1 - cuda-cudart-static=12.1 + - libcurand-dev - gcc_linux-64=11 - gxx_linux-64=11 - cxx-compiler diff --git a/fvdb/fvdb/nn/__init__.py b/fvdb/fvdb/nn/__init__.py index d7831da56d..b6aedb49a4 100644 --- a/fvdb/fvdb/nn/__init__.py +++ b/fvdb/fvdb/nn/__init__.py @@ -3,6 +3,9 @@ # from .gaussian_splatting import GaussianSplat3D from .modules import ( + CELU, + ELU, + GELU, SELU, AvgPool, BatchNorm, @@ -14,6 +17,7 @@ MaxPool, ReLU, Sigmoid, + SiLU, SparseConv3d, Tanh, UpsamplingNearest, @@ -34,7 +38,11 @@ "Linear", "ReLU", "LeakyReLU", + "ELU", + "CELU", + "GELU", "SELU", + "SiLU", "Tanh", "Sigmoid", "Dropout", diff --git a/fvdb/fvdb/nn/modules.py b/fvdb/fvdb/nn/modules.py index 7c6a394f37..f4fa407c68 100644 --- a/fvdb/fvdb/nn/modules.py +++ b/fvdb/fvdb/nn/modules.py @@ -428,6 +428,9 @@ def forward(self, input: VDBTensor) -> VDBTensor: return VDBTensor(input.grid, input.grid.jagged_like(result_data), input.kmap) +# Non-linear Activations + + @fvnn_module class ElementwiseMixin: def forward(self, input: VDBTensor) -> VDBTensor: @@ -436,6 +439,36 @@ def forward(self, input: VDBTensor) -> VDBTensor: return VDBTensor(input.grid, input.data.jagged_like(res), input.kmap) +class ELU(ElementwiseMixin, nn.ELU): + r""" + Applies the Exponential Linear Unit function element-wise: + .. math:: + \text{ELU}(x) = \begin{cases} + x, & \text{ if } x > 0\\ + \alpha * (\exp(x) - 1), & \text{ if } x \leq 0 + \end{cases} + """ + + +class CELU(ElementwiseMixin, nn.CELU): + r""" + Applies the CELU function element-wise. + + .. math:: + \text{CELU}(x) = \max(0,x) + \min(0, \alpha * (\exp(x/\alpha) - 1)) + """ + + +class GELU(ElementwiseMixin, nn.GELU): + r""" + Applies the Gaussian Error Linear Units function. + + .. math:: \text{GELU}(x) = x * \Phi(x) + + where :math:`\Phi(x)` is the Cumulative Distribution Function for Gaussian Distribution. + """ + + class Linear(ElementwiseMixin, nn.Linear): r""" Applies a linear transformation to the incoming data: :math:`y = xA^T + b`. @@ -483,6 +516,9 @@ class Sigmoid(ElementwiseMixin, nn.Sigmoid): """ +# Dropout Layers + + class Dropout(ElementwiseMixin, nn.Dropout): r""" During training, randomly zeroes some of the elements of the input tensor with probability :attr:`p` diff --git a/fvdb/fvdb/utils/data/__init__.py b/fvdb/fvdb/utils/data/__init__.py new file mode 100644 index 0000000000..b048e66635 --- /dev/null +++ b/fvdb/fvdb/utils/data/__init__.py @@ -0,0 +1,6 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 +# +from .colmap_dataset import ColmapDataset, ColmapParser + +__all__ = ["ColmapParser", "ColmapDataset"] diff --git a/fvdb/fvdb/utils/data/_colmap_utils/__init__.py b/fvdb/fvdb/utils/data/_colmap_utils/__init__.py new file mode 100644 index 0000000000..7044ce39dc --- /dev/null +++ b/fvdb/fvdb/utils/data/_colmap_utils/__init__.py @@ -0,0 +1,5 @@ +from .camera import Camera +from .database import COLMAPDatabase +from .image import Image +from .rotation import DualQuaternion, Quaternion +from .scene_manager import SceneManager diff --git a/fvdb/fvdb/utils/data/_colmap_utils/camera.py b/fvdb/fvdb/utils/data/_colmap_utils/camera.py new file mode 100644 index 0000000000..e40eb3a5f8 --- /dev/null +++ b/fvdb/fvdb/utils/data/_colmap_utils/camera.py @@ -0,0 +1,268 @@ +# Author: True Price + +import numpy as np +from scipy.optimize import root + +# ------------------------------------------------------------------------------- +# +# camera distortion functions for arrays of size (..., 2) +# +# ------------------------------------------------------------------------------- + + +def simple_radial_distortion(camera, x): + return x * (1.0 + camera.k1 * np.square(x).sum(axis=-1, keepdims=True)) + + +def radial_distortion(camera, x): + r_sq = np.square(x).sum(axis=-1, keepdims=True) + return x * (1.0 + r_sq * (camera.k1 + camera.k2 * r_sq)) + + +def opencv_distortion(camera, x): + x_sq = np.square(x) + xy = np.prod(x, axis=-1, keepdims=True) + r_sq = x_sq.sum(axis=-1, keepdims=True) + + return x * (1.0 + r_sq * (camera.k1 + camera.k2 * r_sq)) + np.concatenate( + ( + 2.0 * camera.p1 * xy + camera.p2 * (r_sq + 2.0 * x_sq), + camera.p1 * (r_sq + 2.0 * y_sq) + 2.0 * camera.p2 * xy, + ), + axis=-1, + ) + + +# ------------------------------------------------------------------------------- +# +# Camera +# +# ------------------------------------------------------------------------------- + + +class Camera: + @staticmethod + def GetNumParams(type_): + if type_ == 0 or type_ == "SIMPLE_PINHOLE": + return 3 + if type_ == 1 or type_ == "PINHOLE": + return 4 + if type_ == 2 or type_ == "SIMPLE_RADIAL": + return 4 + if type_ == 3 or type_ == "RADIAL": + return 5 + if type_ == 4 or type_ == "OPENCV": + return 8 + if type_ == 5 or type_ == "OPENCV_FISHEYE": + return 8 + # if type_ == 6 or type_ == 'FULL_OPENCV': + # return 12 + # if type_ == 7 or type_ == 'FOV': + # return 5 + # if type_ == 8 or type_ == 'SIMPLE_RADIAL_FISHEYE': + # return 4 + # if type_ == 9 or type_ == 'RADIAL_FISHEYE': + # return 5 + # if type_ == 10 or type_ == 'THIN_PRISM_FISHEYE': + # return 12 + + # TODO: not supporting other camera types, currently + raise Exception("Camera type not supported") + + # --------------------------------------------------------------------------- + + @staticmethod + def GetNameFromType(type_): + if type_ == 0: + return "SIMPLE_PINHOLE" + if type_ == 1: + return "PINHOLE" + if type_ == 2: + return "SIMPLE_RADIAL" + if type_ == 3: + return "RADIAL" + if type_ == 4: + return "OPENCV" + if type_ == 5: + return "OPENCV_FISHEYE" + # if type_ == 6: return 'FULL_OPENCV' + # if type_ == 7: return 'FOV' + # if type_ == 8: return 'SIMPLE_RADIAL_FISHEYE' + # if type_ == 9: return 'RADIAL_FISHEYE' + # if type_ == 10: return 'THIN_PRISM_FISHEYE' + + raise Exception("Camera type not supported") + + # --------------------------------------------------------------------------- + + def __init__(self, type_, width_, height_, params): + self.width = width_ + self.height = height_ + + if type_ == 0 or type_ == "SIMPLE_PINHOLE": + self.fx, self.cx, self.cy = params + self.fy = self.fx + self.distortion_func = None + self.camera_type = 0 + + elif type_ == 1 or type_ == "PINHOLE": + self.fx, self.fy, self.cx, self.cy = params + self.distortion_func = None + self.camera_type = 1 + + elif type_ == 2 or type_ == "SIMPLE_RADIAL": + self.fx, self.cx, self.cy, self.k1 = params + self.fy = self.fx + self.distortion_func = simple_radial_distortion + self.camera_type = 2 + + elif type_ == 3 or type_ == "RADIAL": + self.fx, self.cx, self.cy, self.k1, self.k2 = params + self.fy = self.fx + self.distortion_func = radial_distortion + self.camera_type = 3 + + elif type_ == 4 or type_ == "OPENCV": + self.fx, self.fy, self.cx, self.cy = params[:4] + self.k1, self.k2, self.p1, self.p2 = params[4:] + self.distortion_func = opencv_distortion + self.camera_type = 4 + + elif type_ == 5 or type_ == "OPENCV_FISHEYE": + self.fx, self.fy, self.cx, self.cy = params[:4] + self.k1, self.k2, self.k3, self.k4 = params[4:] + + def error_fn(camera, x): + raise Exception("Fisheye distortion not supported") + + self.distortion_func = error_fn + self.camera_type = 5 + + else: + raise Exception("Camera type not supported") + + # --------------------------------------------------------------------------- + + def __str__(self): + s = self.GetNameFromType(self.camera_type) + " {} {} {}".format(self.width, self.height, self.fx) + + if self.camera_type in (1, 4): # PINHOLE, OPENCV + s += " {}".format(self.fy) + + s += " {} {}".format(self.cx, self.cy) + + if self.camera_type == 2: # SIMPLE_RADIAL + s += " {}".format(self.k1) + + elif self.camera_type == 3: # RADIAL + s += " {} {}".format(self.k1, self.k2) + + elif self.camera_type == 4: # OPENCV + s += " {} {} {} {}".format(self.k1, self.k2, self.p1, self.p2) + + elif self.camera_type == 5: # OPENCV_FISHEYE + s += " {} {} {} {}".format(self.k1, self.k2, self.k3, self.k4) + + return s + + # --------------------------------------------------------------------------- + + # return the camera parameters in the same order as the colmap output format + def get_params(self): + if self.camera_type == 0: + return np.array((self.fx, self.cx, self.cy)) + if self.camera_type == 1: + return np.array((self.fx, self.fy, self.cx, self.cy)) + if self.camera_type == 2: + return np.array((self.fx, self.cx, self.cy, self.k1)) + if self.camera_type == 3: + return np.array((self.fx, self.cx, self.cy, self.k1, self.k2)) + if self.camera_type == 4: + return np.array((self.fx, self.fy, self.cx, self.cy, self.k1, self.k2, self.p1, self.p2)) + if self.camera_type == 5: + return np.array((self.fx, self.fy, self.cx, self.cy, self.k1, self.k2, self.k3, self.k4)) + + # --------------------------------------------------------------------------- + + def get_camera_matrix(self): + return np.array(((self.fx, 0, self.cx), (0, self.fy, self.cy), (0, 0, 1))) + + def get_inverse_camera_matrix(self): + return np.array(((1.0 / self.fx, 0, -self.cx / self.fx), (0, 1.0 / self.fy, -self.cy / self.fy), (0, 0, 1))) + + @property + def K(self): + return self.get_camera_matrix() + + @property + def K_inv(self): + return self.get_inverse_camera_matrix() + + # --------------------------------------------------------------------------- + + # return the inverse camera matrix + def get_inv_camera_matrix(self): + inv_fx, inv_fy = 1.0 / self.fx, 1.0 / self.fy + return np.array(((inv_fx, 0, -inv_fx * self.cx), (0, inv_fy, -inv_fy * self.cy), (0, 0, 1))) + + # --------------------------------------------------------------------------- + + # return an (x, y) pixel coordinate grid for this camera + def get_image_grid(self): + xmin = (0.5 - self.cx) / self.fx + xmax = (self.width - 0.5 - self.cx) / self.fx + ymin = (0.5 - self.cy) / self.fy + ymax = (self.height - 0.5 - self.cy) / self.fy + return np.meshgrid(np.linspace(xmin, xmax, self.width), np.linspace(ymin, ymax, self.height)) + + # --------------------------------------------------------------------------- + + # x: array of shape (N,2) or (2,) + # normalized: False if the input points are in pixel coordinates + # denormalize: True if the points should be put back into pixel coordinates + def distort_points(self, x, normalized=True, denormalize=True): + x = np.atleast_2d(x) + + # put the points into normalized camera coordinates + if not normalized: + x -= np.array([[self.cx, self.cy]]) + x /= np.array([[self.fx, self.fy]]) + + # distort, if necessary + if self.distortion_func is not None: + x = self.distortion_func(self, x) + + if denormalize: + x *= np.array([[self.fx, self.fy]]) + x += np.array([[self.cx, self.cy]]) + + return x + + # --------------------------------------------------------------------------- + + # x: array of shape (N1,N2,...,2), (N,2), or (2,) + # normalized: False if the input points are in pixel coordinates + # denormalize: True if the points should be put back into pixel coordinates + def undistort_points(self, x, normalized=False, denormalize=True): + x = np.atleast_2d(x) + + # put the points into normalized camera coordinates + if not normalized: + x = x - np.array([self.cx, self.cy]) # creates a copy + x /= np.array([self.fx, self.fy]) + + # undistort, if necessary + if self.distortion_func is not None: + + def objective(xu): + return (x - self.distortion_func(self, xu.reshape(*x.shape))).ravel() + + xu = root(objective, x).x.reshape(*x.shape) + else: + xu = x + + if denormalize: + xu *= np.array([[self.fx, self.fy]]) + xu += np.array([[self.cx, self.cy]]) + + return xu diff --git a/fvdb/fvdb/utils/data/_colmap_utils/database.py b/fvdb/fvdb/utils/data/_colmap_utils/database.py new file mode 100644 index 0000000000..ec150ffbea --- /dev/null +++ b/fvdb/fvdb/utils/data/_colmap_utils/database.py @@ -0,0 +1,338 @@ +import os +import sqlite3 + +import numpy as np + +# ------------------------------------------------------------------------------- +# convert SQLite BLOBs to/from numpy arrays + + +def array_to_blob(arr): + return np.getbuffer(arr) + + +def blob_to_array(blob, dtype, shape=(-1,)): + return np.frombuffer(blob, dtype).reshape(*shape) + + +# ------------------------------------------------------------------------------- +# convert to/from image pair ids + +MAX_IMAGE_ID = 2**31 - 1 + + +def get_pair_id(image_id1, image_id2): + if image_id1 > image_id2: + image_id1, image_id2 = image_id2, image_id1 + return image_id1 * MAX_IMAGE_ID + image_id2 + + +def get_image_ids_from_pair_id(pair_id): + image_id2 = pair_id % MAX_IMAGE_ID + return (pair_id - image_id2) / MAX_IMAGE_ID, image_id2 + + +# ------------------------------------------------------------------------------- +# create table commands + +CREATE_CAMERAS_TABLE = """CREATE TABLE IF NOT EXISTS cameras ( + camera_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + model INTEGER NOT NULL, + width INTEGER NOT NULL, + height INTEGER NOT NULL, + params BLOB, + prior_focal_length INTEGER NOT NULL)""" + +CREATE_DESCRIPTORS_TABLE = """CREATE TABLE IF NOT EXISTS descriptors ( + image_id INTEGER PRIMARY KEY NOT NULL, + rows INTEGER NOT NULL, + cols INTEGER NOT NULL, + data BLOB, + FOREIGN KEY(image_id) REFERENCES images(image_id) ON DELETE CASCADE)""" + +CREATE_IMAGES_TABLE = """CREATE TABLE IF NOT EXISTS images ( + image_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + name TEXT NOT NULL UNIQUE, + camera_id INTEGER NOT NULL, + prior_qw REAL, + prior_qx REAL, + prior_qy REAL, + prior_qz REAL, + prior_tx REAL, + prior_ty REAL, + prior_tz REAL, + CONSTRAINT image_id_check CHECK(image_id >= 0 and image_id < 2147483647), + FOREIGN KEY(camera_id) REFERENCES cameras(camera_id))""" + +CREATE_INLIER_MATCHES_TABLE = """CREATE TABLE IF NOT EXISTS two_view_geometries ( + pair_id INTEGER PRIMARY KEY NOT NULL, + rows INTEGER NOT NULL, + cols INTEGER NOT NULL, + data BLOB, + config INTEGER NOT NULL, + F BLOB, + E BLOB, + H BLOB)""" + +CREATE_KEYPOINTS_TABLE = """CREATE TABLE IF NOT EXISTS keypoints ( + image_id INTEGER PRIMARY KEY NOT NULL, + rows INTEGER NOT NULL, + cols INTEGER NOT NULL, + data BLOB, + FOREIGN KEY(image_id) REFERENCES images(image_id) ON DELETE CASCADE)""" + +CREATE_MATCHES_TABLE = """CREATE TABLE IF NOT EXISTS matches ( + pair_id INTEGER PRIMARY KEY NOT NULL, + rows INTEGER NOT NULL, + cols INTEGER NOT NULL, + data BLOB)""" + +CREATE_NAME_INDEX = "CREATE UNIQUE INDEX IF NOT EXISTS index_name ON images(name)" + +CREATE_ALL = "; ".join( + [ + CREATE_CAMERAS_TABLE, + CREATE_DESCRIPTORS_TABLE, + CREATE_IMAGES_TABLE, + CREATE_INLIER_MATCHES_TABLE, + CREATE_KEYPOINTS_TABLE, + CREATE_MATCHES_TABLE, + CREATE_NAME_INDEX, + ] +) + + +# ------------------------------------------------------------------------------- +# functional interface for adding objects + + +def add_camera(db, model, width, height, params, prior_focal_length=False, camera_id=None): + # TODO: Parameter count checks + params = np.asarray(params, np.float64) + db.execute( + "INSERT INTO cameras VALUES (?, ?, ?, ?, ?, ?)", + (camera_id, model, width, height, array_to_blob(params), prior_focal_length), + ) + + +def add_descriptors(db, image_id, descriptors): + descriptors = np.ascontiguousarray(descriptors, np.uint8) + db.execute( + "INSERT INTO descriptors VALUES (?, ?, ?, ?)", (image_id,) + descriptors.shape + (array_to_blob(descriptors),) + ) + + +def add_image(db, name, camera_id, prior_q=np.zeros(4), prior_t=np.zeros(3), image_id=None): + db.execute( + "INSERT INTO images VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + (image_id, name, camera_id, prior_q[0], prior_q[1], prior_q[2], prior_q[3], prior_t[0], prior_t[1], prior_t[2]), + ) + + +# config: defaults to fundamental matrix +def add_inlier_matches(db, image_id1, image_id2, matches, config=2, F=None, E=None, H=None): + assert len(matches.shape) == 2 + assert matches.shape[1] == 2 + + if image_id1 > image_id2: + matches = matches[:, ::-1] + + if F is not None: + F = np.asarray(F, np.float64) + if E is not None: + E = np.asarray(E, np.float64) + if H is not None: + H = np.asarray(H, np.float64) + + pair_id = get_pair_id(image_id1, image_id2) + matches = np.asarray(matches, np.uint32) + db.execute( + "INSERT INTO inlier_matches VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + (pair_id,) + matches.shape + (array_to_blob(matches), config, F, E, H), + ) + + +def add_keypoints(db, image_id, keypoints): + assert len(keypoints.shape) == 2 + assert keypoints.shape[1] in [2, 4, 6] + + keypoints = np.asarray(keypoints, np.float32) + db.execute("INSERT INTO keypoints VALUES (?, ?, ?, ?)", (image_id,) + keypoints.shape + (array_to_blob(keypoints),)) + + +# config: defaults to fundamental matrix +def add_matches(db, image_id1, image_id2, matches): + assert len(matches.shape) == 2 + assert matches.shape[1] == 2 + + if image_id1 > image_id2: + matches = matches[:, ::-1] + + pair_id = get_pair_id(image_id1, image_id2) + matches = np.asarray(matches, np.uint32) + db.execute("INSERT INTO matches VALUES (?, ?, ?, ?)", (pair_id,) + matches.shape + (array_to_blob(matches),)) + + +# ------------------------------------------------------------------------------- +# simple functional interface + + +class COLMAPDatabase(sqlite3.Connection): + @staticmethod + def connect(database_path): + return sqlite3.connect(database_path, factory=COLMAPDatabase) + + def __init__(self, *args, **kwargs): + super(COLMAPDatabase, self).__init__(*args, **kwargs) + + self.initialize_tables = lambda: self.executescript(CREATE_ALL) + + self.initialize_cameras = lambda: self.executescript(CREATE_CAMERAS_TABLE) + self.initialize_descriptors = lambda: self.executescript(CREATE_DESCRIPTORS_TABLE) + self.initialize_images = lambda: self.executescript(CREATE_IMAGES_TABLE) + self.initialize_inlier_matches = lambda: self.executescript(CREATE_INLIER_MATCHES_TABLE) + self.initialize_keypoints = lambda: self.executescript(CREATE_KEYPOINTS_TABLE) + self.initialize_matches = lambda: self.executescript(CREATE_MATCHES_TABLE) + + self.create_name_index = lambda: self.executescript(CREATE_NAME_INDEX) + + add_camera = add_camera + add_descriptors = add_descriptors + add_image = add_image + add_inlier_matches = add_inlier_matches + add_keypoints = add_keypoints + add_matches = add_matches + + +# ------------------------------------------------------------------------------- + + +def main(args): + import os + + if os.path.exists(args.database_path): + print("Error: database path already exists -- will not modify it.") + exit() + + db = COLMAPDatabase.connect(args.database_path) + + # + # for convenience, try creating all the tables upfront + # + + db.initialize_tables() + + # + # create dummy cameras + # + + model1, w1, h1, params1 = 0, 1024, 768, np.array((1024.0, 512.0, 384.0)) + model2, w2, h2, params2 = 2, 1024, 768, np.array((1024.0, 512.0, 384.0, 0.1)) + + db.add_camera(model1, w1, h1, params1) + db.add_camera(model2, w2, h2, params2) + + # + # create dummy images + # + + db.add_image("image1.png", 0) + db.add_image("image2.png", 0) + db.add_image("image3.png", 2) + db.add_image("image4.png", 2) + + # + # create dummy keypoints; note that COLMAP supports 2D keypoints (x, y), + # 4D keypoints (x, y, theta, scale), and 6D affine keypoints + # (x, y, a_11, a_12, a_21, a_22) + # + + N = 1000 + kp1 = np.random.rand(N, 2) * (1024.0, 768.0) + kp2 = np.random.rand(N, 2) * (1024.0, 768.0) + kp3 = np.random.rand(N, 2) * (1024.0, 768.0) + kp4 = np.random.rand(N, 2) * (1024.0, 768.0) + + db.add_keypoints(1, kp1) + db.add_keypoints(2, kp2) + db.add_keypoints(3, kp3) + db.add_keypoints(4, kp4) + + # + # create dummy matches + # + + M = 50 + m12 = np.random.randint(N, size=(M, 2)) + m23 = np.random.randint(N, size=(M, 2)) + m34 = np.random.randint(N, size=(M, 2)) + + db.add_matches(1, 2, m12) + db.add_matches(2, 3, m23) + db.add_matches(3, 4, m34) + + # + # check cameras + # + + rows = db.execute("SELECT * FROM cameras") + + camera_id, model, width, height, params, prior = next(rows) + params = blob_to_array(params, np.float32) + assert model == model1 and width == w1 and height == h1 + assert np.allclose(params, params1) + + camera_id, model, width, height, params, prior = next(rows) + params = blob_to_array(params, np.float32) + assert model == model2 and width == w2 and height == h2 + assert np.allclose(params, params2) + + # + # check keypoints + # + + kps = dict( + (image_id, blob_to_array(data, np.float32, (-1, 2))) + for image_id, data in db.execute("SELECT image_id, data FROM keypoints") + ) + + assert np.allclose(kps[1], kp1) + assert np.allclose(kps[2], kp2) + assert np.allclose(kps[3], kp3) + assert np.allclose(kps[4], kp4) + + # + # check matches + # + + pair_ids = [get_pair_id(*pair) for pair in [(1, 2), (2, 3), (3, 4)]] + + matches = dict( + (get_image_ids_from_pair_id(pair_id), blob_to_array(data, np.uint32, (-1, 2))) + for pair_id, data in db.execute("SELECT pair_id, data FROM matches") + ) + + assert np.all(matches[(1, 2)] == m12) + assert np.all(matches[(2, 3)] == m23) + assert np.all(matches[(3, 4)] == m34) + + # + # clean up + # + + db.close() + os.remove(args.database_path) + + +# ------------------------------------------------------------------------------- + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument("--database_path", type=str, default="database.db") + + args = parser.parse_args() + + main(args) diff --git a/fvdb/fvdb/utils/data/_colmap_utils/image.py b/fvdb/fvdb/utils/data/_colmap_utils/image.py new file mode 100644 index 0000000000..eb6a226832 --- /dev/null +++ b/fvdb/fvdb/utils/data/_colmap_utils/image.py @@ -0,0 +1,36 @@ +# Author: True Price + +import numpy as np + +# ------------------------------------------------------------------------------- +# +# Image +# +# ------------------------------------------------------------------------------- + + +class Image: + def __init__(self, name_, camera_id_, q_, tvec_): + self.name = name_ + self.camera_id = camera_id_ + self.q = q_ + self.tvec = tvec_ + + self.points2D = np.empty((0, 2), dtype=np.float64) + self.point3D_ids = np.empty((0,), dtype=np.uint64) + + # --------------------------------------------------------------------------- + + def R(self): + return self.q.ToR() + + # --------------------------------------------------------------------------- + + def C(self): + return -self.R().T.dot(self.tvec) + + # --------------------------------------------------------------------------- + + @property + def t(self): + return self.tvec diff --git a/fvdb/fvdb/utils/data/_colmap_utils/rotation.py b/fvdb/fvdb/utils/data/_colmap_utils/rotation.py new file mode 100644 index 0000000000..90ec9533bf --- /dev/null +++ b/fvdb/fvdb/utils/data/_colmap_utils/rotation.py @@ -0,0 +1,344 @@ +# Author: True Price + +import numpy as np + +# ------------------------------------------------------------------------------- +# +# Axis-Angle Functions +# +# ------------------------------------------------------------------------------- + + +# returns the cross product matrix representation of a 3-vector v +def cross_prod_matrix(v): + return np.array(((0.0, -v[2], v[1]), (v[2], 0.0, -v[0]), (-v[1], v[0], 0.0))) + + +# ------------------------------------------------------------------------------- + + +# www.euclideanspace.com/maths/geometry/rotations/conversions/angleToMatrix/ +# if angle is None, assume ||axis|| == angle, in radians +# if angle is not None, assume that axis is a unit vector +def axis_angle_to_rotation_matrix(axis, angle=None): + if angle is None: + angle = np.linalg.norm(axis) + if np.abs(angle) > np.finfo("float").eps: + axis = axis / angle + + cp_axis = cross_prod_matrix(axis) + return np.eye(3) + (np.sin(angle) * cp_axis + (1.0 - np.cos(angle)) * cp_axis.dot(cp_axis)) + + +# ------------------------------------------------------------------------------- + + +# after some deliberation, I've decided the easiest way to do this is to use +# quaternions as an intermediary +def rotation_matrix_to_axis_angle(R): + return Quaternion.FromR(R).ToAxisAngle() + + +# ------------------------------------------------------------------------------- +# +# Quaternion +# +# ------------------------------------------------------------------------------- + + +class Quaternion: + # create a quaternion from an existing rotation matrix + # euclideanspace.com/maths/geometry/rotations/conversions/matrixToQuaternion/ + @staticmethod + def FromR(R): + trace = np.trace(R) + + if trace > 0: + qw = 0.5 * np.sqrt(1.0 + trace) + qx = (R[2, 1] - R[1, 2]) * 0.25 / qw + qy = (R[0, 2] - R[2, 0]) * 0.25 / qw + qz = (R[1, 0] - R[0, 1]) * 0.25 / qw + elif R[0, 0] > R[1, 1] and R[0, 0] > R[2, 2]: + s = 2.0 * np.sqrt(1.0 + R[0, 0] - R[1, 1] - R[2, 2]) + qw = (R[2, 1] - R[1, 2]) / s + qx = 0.25 * s + qy = (R[0, 1] + R[1, 0]) / s + qz = (R[0, 2] + R[2, 0]) / s + elif R[1, 1] > R[2, 2]: + s = 2.0 * np.sqrt(1.0 + R[1, 1] - R[0, 0] - R[2, 2]) + qw = (R[0, 2] - R[2, 0]) / s + qx = (R[0, 1] + R[1, 0]) / s + qy = 0.25 * s + qz = (R[1, 2] + R[2, 1]) / s + else: + s = 2.0 * np.sqrt(1.0 + R[2, 2] - R[0, 0] - R[1, 1]) + qw = (R[1, 0] - R[0, 1]) / s + qx = (R[0, 2] + R[2, 0]) / s + qy = (R[1, 2] + R[2, 1]) / s + qz = 0.25 * s + + return Quaternion(np.array((qw, qx, qy, qz))) + + # if angle is None, assume ||axis|| == angle, in radians + # if angle is not None, assume that axis is a unit vector + @staticmethod + def FromAxisAngle(axis, angle=None): + if angle is None: + angle = np.linalg.norm(axis) + if np.abs(angle) > np.finfo("float").eps: + axis = axis / angle + + qw = np.cos(0.5 * angle) + axis = axis * np.sin(0.5 * angle) + + return Quaternion(np.array((qw, axis[0], axis[1], axis[2]))) + + # --------------------------------------------------------------------------- + + def __init__(self, q=np.array((1.0, 0.0, 0.0, 0.0))): + if isinstance(q, Quaternion): + self.q = q.q.copy() + else: + q = np.asarray(q) + if q.size == 4: + self.q = q.copy() + elif q.size == 3: # convert from a 3-vector to a quaternion + self.q = np.empty(4) + self.q[0], self.q[1:] = 0.0, q.ravel() + else: + raise Exception("Input quaternion should be a 3- or 4-vector") + + def __add__(self, other): + return Quaternion(self.q + other.q) + + def __iadd__(self, other): + self.q += other.q + return self + + # conjugation via the ~ operator + def __invert__(self): + return Quaternion(np.array((self.q[0], -self.q[1], -self.q[2], -self.q[3]))) + + # returns: self.q * other.q if other is a Quaternion; otherwise performs + # scalar multiplication + def __mul__(self, other): + if isinstance(other, Quaternion): # quaternion multiplication + return Quaternion( + np.array( + ( + self.q[0] * other.q[0] + - self.q[1] * other.q[1] + - self.q[2] * other.q[2] + - self.q[3] * other.q[3], + self.q[0] * other.q[1] + + self.q[1] * other.q[0] + + self.q[2] * other.q[3] + - self.q[3] * other.q[2], + self.q[0] * other.q[2] + - self.q[1] * other.q[3] + + self.q[2] * other.q[0] + + self.q[3] * other.q[1], + self.q[0] * other.q[3] + + self.q[1] * other.q[2] + - self.q[2] * other.q[1] + + self.q[3] * other.q[0], + ) + ) + ) + else: # scalar multiplication (assumed) + return Quaternion(other * self.q) + + def __rmul__(self, other): + return self * other + + def __imul__(self, other): + self.q[:] = (self * other).q + return self + + def __irmul__(self, other): + self.q[:] = (self * other).q + return self + + def __neg__(self): + return Quaternion(-self.q) + + def __sub__(self, other): + return Quaternion(self.q - other.q) + + def __isub__(self, other): + self.q -= other.q + return self + + def __str__(self): + return str(self.q) + + def copy(self): + return Quaternion(self) + + def dot(self, other): + return self.q.dot(other.q) + + # assume the quaternion is nonzero! + def inverse(self): + return Quaternion((~self).q / self.q.dot(self.q)) + + def norm(self): + return np.linalg.norm(self.q) + + def normalize(self): + self.q /= np.linalg.norm(self.q) + return self + + # assume x is a Nx3 numpy array or a numpy 3-vector + def rotate_points(self, x): + x = np.atleast_2d(x) + return x.dot(self.ToR().T) + + # convert to a rotation matrix + def ToR(self): + return np.eye(3) + 2 * np.array( + ( + ( + -self.q[2] * self.q[2] - self.q[3] * self.q[3], + self.q[1] * self.q[2] - self.q[3] * self.q[0], + self.q[1] * self.q[3] + self.q[2] * self.q[0], + ), + ( + self.q[1] * self.q[2] + self.q[3] * self.q[0], + -self.q[1] * self.q[1] - self.q[3] * self.q[3], + self.q[2] * self.q[3] - self.q[1] * self.q[0], + ), + ( + self.q[1] * self.q[3] - self.q[2] * self.q[0], + self.q[2] * self.q[3] + self.q[1] * self.q[0], + -self.q[1] * self.q[1] - self.q[2] * self.q[2], + ), + ) + ) + + # convert to axis-angle representation, with angle encoded by the length + def ToAxisAngle(self): + # recall that for axis-angle representation (a, angle), with "a" unit: + # q = (cos(angle/2), a * sin(angle/2)) + # below, for readability, "theta" actually means half of the angle + + sin_sq_theta = self.q[1:].dot(self.q[1:]) + + # if theta is non-zero, then we can compute a unique rotation + if np.abs(sin_sq_theta) > np.finfo("float").eps: + sin_theta = np.sqrt(sin_sq_theta) + cos_theta = self.q[0] + + # atan2 is more stable, so we use it to compute theta + # note that we multiply by 2 to get the actual angle + angle = 2.0 * (np.arctan2(-sin_theta, -cos_theta) if cos_theta < 0.0 else np.arctan2(sin_theta, cos_theta)) + + return self.q[1:] * (angle / sin_theta) + + # otherwise, the result is singular, and we avoid dividing by + # sin(angle/2) = 0 + return np.zeros(3) + + # euclideanspace.com/maths/geometry/rotations/conversions/quaternionToEuler + # this assumes the quaternion is non-zero + # returns yaw, pitch, roll, with application in that order + def ToEulerAngles(self): + qsq = self.q**2 + k = 2.0 * (self.q[0] * self.q[3] + self.q[1] * self.q[2]) / qsq.sum() + + if (1.0 - k) < np.finfo("float").eps: # north pole singularity + return 2.0 * np.arctan2(self.q[1], self.q[0]), 0.5 * np.pi, 0.0 + if (1.0 + k) < np.finfo("float").eps: # south pole singularity + return -2.0 * np.arctan2(self.q[1], self.q[0]), -0.5 * np.pi, 0.0 + + yaw = np.arctan2(2.0 * (self.q[0] * self.q[2] - self.q[1] * self.q[3]), qsq[0] + qsq[1] - qsq[2] - qsq[3]) + pitch = np.arcsin(k) + roll = np.arctan2(2.0 * (self.q[0] * self.q[1] - self.q[2] * self.q[3]), qsq[0] - qsq[1] + qsq[2] - qsq[3]) + + return yaw, pitch, roll + + +# ------------------------------------------------------------------------------- +# +# DualQuaternion +# +# ------------------------------------------------------------------------------- + + +class DualQuaternion: + # DualQuaternion from an existing rotation + translation + @staticmethod + def FromQT(q, t): + return DualQuaternion(qe=(0.5 * np.asarray(t))) * DualQuaternion(q) + + def __init__(self, q0=np.array((1.0, 0.0, 0.0, 0.0)), qe=np.zeros(4)): + self.q0, self.qe = Quaternion(q0), Quaternion(qe) + + def __add__(self, other): + return DualQuaternion(self.q0 + other.q0, self.qe + other.qe) + + def __iadd__(self, other): + self.q0 += other.q0 + self.qe += other.qe + return self + + # conguation via the ~ operator + def __invert__(self): + return DualQuaternion(~self.q0, ~self.qe) + + def __mul__(self, other): + if isinstance(other, DualQuaternion): + return DualQuaternion(self.q0 * other.q0, self.q0 * other.qe + self.qe * other.q0) + elif isinstance(other, complex): # multiplication by a dual number + return DualQuaternion(self.q0 * other.real, self.q0 * other.imag + self.qe * other.real) + else: # scalar multiplication (assumed) + return DualQuaternion(other * self.q0, other * self.qe) + + def __rmul__(self, other): + return self.__mul__(other) + + def __imul__(self, other): + tmp = self * other + self.q0, self.qe = tmp.q0, tmp.qe + return self + + def __neg__(self): + return DualQuaternion(-self.q0, -self.qe) + + def __sub__(self, other): + return DualQuaternion(self.q0 - other.q0, self.qe - other.qe) + + def __isub__(self, other): + self.q0 -= other.q0 + self.qe -= other.qe + return self + + # q^-1 = q* / ||q||^2 + # assume that q0 is nonzero! + def inverse(self): + normsq = complex(q0.dot(q0), 2.0 * self.q0.q.dot(self.qe.q)) + inv_len_real = 1.0 / normsq.real + return ~self * complex(inv_len_real, -normsq.imag * inv_len_real * inv_len_real) + + # returns a complex representation of the real and imaginary parts of the norm + # assume that q0 is nonzero! + def norm(self): + q0_norm = self.q0.norm() + return complex(q0_norm, self.q0.dot(self.qe) / q0_norm) + + # assume that q0 is nonzero! + def normalize(self): + # current length is ||q0|| + eps * ( / ||q0||) + # writing this as a + eps * b, the inverse is + # 1/||q|| = 1/a - eps * b / a^2 + norm = self.norm() + inv_len_real = 1.0 / norm.real + self *= complex(inv_len_real, -norm.imag * inv_len_real * inv_len_real) + return self + + # return the translation vector for this dual quaternion + def getT(self): + return 2 * (self.qe * ~self.q0).q[1:] + + def ToQT(self): + return self.q0, self.getT() diff --git a/fvdb/fvdb/utils/data/_colmap_utils/scene_manager.py b/fvdb/fvdb/utils/data/_colmap_utils/scene_manager.py new file mode 100644 index 0000000000..7cd2db9fd1 --- /dev/null +++ b/fvdb/fvdb/utils/data/_colmap_utils/scene_manager.py @@ -0,0 +1,651 @@ +# Author: True Price + +import array +import os +import struct +from collections import OrderedDict +from itertools import combinations + +import numpy as np + +from .camera import Camera +from .image import Image +from .rotation import Quaternion + +# ------------------------------------------------------------------------------- +# +# SceneManager +# +# ------------------------------------------------------------------------------- + + +class SceneManager: + INVALID_POINT3D = np.iinfo(np.uint64).max + + def __init__(self, colmap_results_folder, image_path=None): + self.folder = colmap_results_folder + if not self.folder.endswith("/"): + self.folder += "/" + + self.image_path = None + self.load_colmap_project_file(image_path=image_path) + + self.cameras = OrderedDict() + self.images = OrderedDict() + self.name_to_image_id = dict() + + self.last_camera_id = 0 + self.last_image_id = 0 + + # Nx3 array of point3D xyz's + self.points3D = np.zeros((0, 3)) + + # for each element in points3D, stores the id of the point + self.point3D_ids = np.empty(0) + + # point3D_id => index in self.points3D + self.point3D_id_to_point3D_idx = dict() + + # point3D_id => [(image_id, point2D idx in image)] + self.point3D_id_to_images = dict() + + self.point3D_colors = np.zeros((0, 3), dtype=np.uint8) + self.point3D_errors = np.zeros(0) + + # --------------------------------------------------------------------------- + + def load_colmap_project_file(self, project_file=None, image_path=None): + if project_file is None: + project_file = self.folder + "project.ini" + + self.image_path = image_path + + if self.image_path is None: + try: + with open(project_file, "r") as f: + for line in iter(f.readline, ""): + if line.startswith("image_path"): + self.image_path = line[11:].strip() + break + except: + pass + + if self.image_path is None: + print("Warning: image_path not found for reconstruction") + elif not self.image_path.endswith("/"): + self.image_path += "/" + + # --------------------------------------------------------------------------- + + def load(self): + self.load_cameras() + self.load_images() + self.load_points3D() + + # --------------------------------------------------------------------------- + + def load_cameras(self, input_file=None): + if input_file is None: + input_file = self.folder + "cameras.bin" + if os.path.exists(input_file): + self._load_cameras_bin(input_file) + else: + input_file = self.folder + "cameras.txt" + if os.path.exists(input_file): + self._load_cameras_txt(input_file) + else: + raise IOError("no cameras file found") + + def _load_cameras_bin(self, input_file): + self.cameras = OrderedDict() + + with open(input_file, "rb") as f: + num_cameras = struct.unpack("L", f.read(8))[0] + + for _ in range(num_cameras): + camera_id, camera_type, w, h = struct.unpack("IiLL", f.read(24)) + num_params = Camera.GetNumParams(camera_type) + params = struct.unpack("d" * num_params, f.read(8 * num_params)) + self.cameras[camera_id] = Camera(camera_type, w, h, params) + self.last_camera_id = max(self.last_camera_id, camera_id) + + def _load_cameras_txt(self, input_file): + self.cameras = OrderedDict() + + with open(input_file, "r") as f: + for line in iter(lambda: f.readline().strip(), ""): + if not line or line.startswith("#"): + continue + + data = line.split() + camera_id = int(data[0]) + self.cameras[camera_id] = Camera(data[1], int(data[2]), int(data[3]), map(float, data[4:])) + self.last_camera_id = max(self.last_camera_id, camera_id) + + # --------------------------------------------------------------------------- + + def load_images(self, input_file=None): + if input_file is None: + input_file = self.folder + "images.bin" + if os.path.exists(input_file): + self._load_images_bin(input_file) + else: + input_file = self.folder + "images.txt" + if os.path.exists(input_file): + self._load_images_txt(input_file) + else: + raise IOError("no images file found") + + def _load_images_bin(self, input_file): + self.images = OrderedDict() + + with open(input_file, "rb") as f: + num_images = struct.unpack("L", f.read(8))[0] + image_struct = struct.Struct("7x improvements in 60 image model, 23s -> 3s. + points_array = array.array("d") + points_array.fromfile(f, 3 * num_points2D) + points_elements = np.array(points_array).reshape((num_points2D, 3)) + image.points2D = points_elements[:, :2] + + ids_array = array.array("Q") + ids_array.frombytes(points_elements[:, 2].tobytes()) + image.point3D_ids = np.array(ids_array, dtype=np.uint64).reshape((num_points2D,)) + + # automatically remove points without an associated 3D point + # mask = (image.point3D_ids != SceneManager.INVALID_POINT3D) + # image.points2D = image.points2D[mask] + # image.point3D_ids = image.point3D_ids[mask] + + self.images[image_id] = image + self.name_to_image_id[image.name] = image_id + + self.last_image_id = max(self.last_image_id, image_id) + + def _load_images_txt(self, input_file): + self.images = OrderedDict() + + with open(input_file, "r") as f: + is_camera_description_line = False + + for line in iter(lambda: f.readline().strip(), ""): + if not line or line.startswith("#"): + continue + + is_camera_description_line = not is_camera_description_line + + data = line.split() + + if is_camera_description_line: + image_id = int(data[0]) + image = Image( + data[-1], + int(data[-2]), + Quaternion(np.array(map(float, data[1:5]))), + np.array(map(float, data[5:8])), + ) + else: + image.points2D = np.array([map(float, data[::3]), map(float, data[1::3])]).T + image.point3D_ids = np.array(map(np.uint64, data[2::3])) + + # automatically remove points without an associated 3D point + # mask = (image.point3D_ids != SceneManager.INVALID_POINT3D) + # image.points2D = image.points2D[mask] + # image.point3D_ids = image.point3D_ids[mask] + + self.images[image_id] = image + self.name_to_image_id[image.name] = image_id + + self.last_image_id = max(self.last_image_id, image_id) + + # --------------------------------------------------------------------------- + + def load_points3D(self, input_file=None): + if input_file is None: + input_file = self.folder + "points3D.bin" + if os.path.exists(input_file): + self._load_points3D_bin(input_file) + else: + input_file = self.folder + "points3D.txt" + if os.path.exists(input_file): + self._load_points3D_txt(input_file) + else: + raise IOError("no points3D file found") + + def _load_points3D_bin(self, input_file): + with open(input_file, "rb") as f: + num_points3D = struct.unpack("L", f.read(8))[0] + + self.points3D = np.empty((num_points3D, 3)) + self.point3D_ids = np.empty(num_points3D, dtype=np.uint64) + self.point3D_colors = np.empty((num_points3D, 3), dtype=np.uint8) + self.point3D_id_to_point3D_idx = dict() + self.point3D_id_to_images = dict() + self.point3D_errors = np.empty(num_points3D) + + data_struct = struct.Struct("> fid, "# Camera list with one line of data per camera:" + print >> fid, "# CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]" + print >> fid, "# Number of cameras:", len(self.cameras) + + for camera_id, camera in sorted(self.cameras.iteritems()): + print >> fid, camera_id, camera + + # --------------------------------------------------------------------------- + + def save_images(self, output_folder, output_file=None, binary=True): + if not os.path.exists(output_folder): + os.makedirs(output_folder) + + if output_file is None: + output_file = "images.bin" if binary else "images.txt" + + output_file = os.path.join(output_folder, output_file) + + if binary: + self._save_images_bin(output_file) + else: + self._save_images_txt(output_file) + + def _save_images_bin(self, output_file): + with open(output_file, "wb") as fid: + fid.write(struct.pack("L", len(self.images))) + + for image_id, image in self.images.iteritems(): + fid.write(struct.pack("I", image_id)) + fid.write(image.q.q.tobytes()) + fid.write(image.tvec.tobytes()) + fid.write(struct.pack("I", image.camera_id)) + fid.write(image.name + "\0") + fid.write(struct.pack("L", len(image.points2D))) + data = np.rec.fromarrays((image.points2D[:, 0], image.points2D[:, 1], image.point3D_ids)) + fid.write(data.tobytes()) + + def _save_images_txt(self, output_file): + with open(output_file, "w") as fid: + print >> fid, "# Image list with two lines of data per image:" + print >> fid, "# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME" + print >> fid, "# POINTS2D[] as (X, Y, POINT3D_ID)" + print >> fid, "# Number of images: {},".format(len(self.images)), + print >> fid, "mean observations per image: unknown" + + for image_id, image in self.images.iteritems(): + print >> fid, image_id, + print >> fid, " ".join(str(qi) for qi in image.q.q), + print >> fid, " ".join(str(ti) for ti in image.tvec), + print >> fid, image.camera_id, image.name + + data = np.rec.fromarrays( + (image.points2D[:, 0], image.points2D[:, 1], image.point3D_ids.astype(np.int64)) + ) + if len(data) > 0: + np.savetxt(fid, data, "%.2f %.2f %d", newline=" ") + fid.seek(-1, os.SEEK_CUR) + fid.write("\n") + + # --------------------------------------------------------------------------- + + def save_points3D(self, output_folder, output_file=None, binary=True): + if not os.path.exists(output_folder): + os.makedirs(output_folder) + + if output_file is None: + output_file = "points3D.bin" if binary else "points3D.txt" + + output_file = os.path.join(output_folder, output_file) + + if binary: + self._save_points3D_bin(output_file) + else: + self._save_points3D_txt(output_file) + + def _save_points3D_bin(self, output_file): + num_valid_points3D = sum( + 1 + for point3D_idx in self.point3D_id_to_point3D_idx.itervalues() + if point3D_idx != SceneManager.INVALID_POINT3D + ) + + iter_point3D_id_to_point3D_idx = self.point3D_id_to_point3D_idx.iteritems() + + with open(output_file, "wb") as fid: + fid.write(struct.pack("L", num_valid_points3D)) + + for point3D_id, point3D_idx in iter_point3D_id_to_point3D_idx: + if point3D_idx == SceneManager.INVALID_POINT3D: + continue + + fid.write(struct.pack("L", point3D_id)) + fid.write(self.points3D[point3D_idx].tobytes()) + fid.write(self.point3D_colors[point3D_idx].tobytes()) + fid.write(self.point3D_errors[point3D_idx].tobytes()) + fid.write(struct.pack("L", len(self.point3D_id_to_images[point3D_id]))) + fid.write(self.point3D_id_to_images[point3D_id].tobytes()) + + def _save_points3D_txt(self, output_file): + num_valid_points3D = sum( + 1 + for point3D_idx in self.point3D_id_to_point3D_idx.itervalues() + if point3D_idx != SceneManager.INVALID_POINT3D + ) + + array_to_string = lambda arr: " ".join(str(x) for x in arr) + + iter_point3D_id_to_point3D_idx = self.point3D_id_to_point3D_idx.iteritems() + + with open(output_file, "w") as fid: + print >> fid, "# 3D point list with one line of data per point:" + print >> fid, "# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as ", + print >> fid, "(IMAGE_ID, POINT2D_IDX)" + print >> fid, "# Number of points: {},".format(num_valid_points3D), + print >> fid, "mean track length: unknown" + + for point3D_id, point3D_idx in iter_point3D_id_to_point3D_idx: + if point3D_idx == SceneManager.INVALID_POINT3D: + continue + + print >> fid, point3D_id, + print >> fid, array_to_string(self.points3D[point3D_idx]), + print >> fid, array_to_string(self.point3D_colors[point3D_idx]), + print >> fid, self.point3D_errors[point3D_idx], + print >> fid, array_to_string(self.point3D_id_to_images[point3D_id].flat) + + # --------------------------------------------------------------------------- + + # return the image id associated with a given image file + def get_image_from_name(self, image_name): + image_id = self.name_to_image_id[image_name] + return image_id, self.images[image_id] + + # --------------------------------------------------------------------------- + + def get_camera(self, camera_id): + return self.cameras[camera_id] + + # --------------------------------------------------------------------------- + + def get_points3D(self, image_id, return_points2D=True, return_colors=False): + image = self.images[image_id] + + mask = image.point3D_ids != SceneManager.INVALID_POINT3D + + point3D_idxs = np.array([self.point3D_id_to_point3D_idx[point3D_id] for point3D_id in image.point3D_ids[mask]]) + # detect filtered points + filter_mask = point3D_idxs != SceneManager.INVALID_POINT3D + point3D_idxs = point3D_idxs[filter_mask] + result = [self.points3D[point3D_idxs, :]] + + if return_points2D: + mask[mask] &= filter_mask + result += [image.points2D[mask]] + if return_colors: + result += [self.point3D_colors[point3D_idxs, :]] + + return result if len(result) > 1 else result[0] + + # --------------------------------------------------------------------------- + + def point3D_valid(self, point3D_id): + return self.point3D_id_to_point3D_idx[point3D_id] != SceneManager.INVALID_POINT3D + + # --------------------------------------------------------------------------- + + def get_filtered_points3D(self, return_colors=False): + point3D_idxs = [idx for idx in self.point3D_id_to_point3D_idx.values() if idx != SceneManager.INVALID_POINT3D] + result = [self.points3D[point3D_idxs, :]] + + if return_colors: + result += [self.point3D_colors[point3D_idxs, :]] + + return result if len(result) > 1 else result[0] + + # --------------------------------------------------------------------------- + + # return 3D points shared by two images + def get_shared_points3D(self, image_id1, image_id2): + point3D_ids = set(self.images[image_id1].point3D_ids) & set(self.images[image_id2].point3D_ids) + point3D_ids.discard(SceneManager.INVALID_POINT3D) + + point3D_idxs = np.array([self.point3D_id_to_point3D_idx[point3D_id] for point3D_id in point3D_ids]) + + return self.points3D[point3D_idxs, :] + + # --------------------------------------------------------------------------- + + # project *all* 3D points into image, return their projection coordinates, + # as well as their 3D positions + def get_viewed_points(self, image_id): + image = self.images[image_id] + + # get unfiltered points + point3D_idxs = set(self.point3D_id_to_point3D_idx.itervalues()) + point3D_idxs.discard(SceneManager.INVALID_POINT3D) + point3D_idxs = list(point3D_idxs) + points3D = self.points3D[point3D_idxs, :] + + # orient points relative to camera + R = image.q.ToR() + points3D = points3D.dot(R.T) + image.tvec[np.newaxis, :] + points3D = points3D[points3D[:, 2] > 0, :] # keep points with positive z + + # put points into image coordinates + camera = self.cameras[image.camera_id] + points2D = points3D.dot(camera.get_camera_matrix().T) + points2D = points2D[:, :2] / points2D[:, 2][:, np.newaxis] + + # keep points that are within the image + mask = ( + (points2D[:, 0] >= 0) + & (points2D[:, 1] >= 0) + & (points2D[:, 0] < camera.width - 1) + & (points2D[:, 1] < camera.height - 1) + ) + + return points2D[mask, :], points3D[mask, :] + + # --------------------------------------------------------------------------- + + def add_camera(self, camera): + self.last_camera_id += 1 + self.cameras[self.last_camera_id] = camera + return self.last_camera_id + + # --------------------------------------------------------------------------- + + def add_image(self, image): + self.last_image_id += 1 + self.images[self.last_image_id] = image + return self.last_image_id + + # --------------------------------------------------------------------------- + + def delete_images(self, image_list): + # delete specified images + for image_id in image_list: + if image_id in self.images: + del self.images[image_id] + + keep_set = set(self.images.iterkeys()) + + # delete references to specified images, and ignore any points that are + # invalidated + iter_point3D_id_to_point3D_idx = self.point3D_id_to_point3D_idx.iteritems() + + for point3D_id, point3D_idx in iter_point3D_id_to_point3D_idx: + if point3D_idx == SceneManager.INVALID_POINT3D: + continue + + mask = np.array([image_id in keep_set for image_id in self.point3D_id_to_images[point3D_id][:, 0]]) + if np.any(mask): + self.point3D_id_to_images[point3D_id] = self.point3D_id_to_images[point3D_id][mask] + else: + self.point3D_id_to_point3D_idx[point3D_id] = SceneManager.INVALID_POINT3D + + # --------------------------------------------------------------------------- + + # camera_list: set of cameras whose points we'd like to keep + # min/max triangulation angle: in degrees + def filter_points3D(self, min_track_len=0, max_error=np.inf, min_tri_angle=0, max_tri_angle=180, image_set=set()): + + image_set = set(image_set) + + check_triangulation_angles = min_tri_angle > 0 or max_tri_angle < 180 + if check_triangulation_angles: + max_tri_prod = np.cos(np.radians(min_tri_angle)) + min_tri_prod = np.cos(np.radians(max_tri_angle)) + + iter_point3D_id_to_point3D_idx = self.point3D_id_to_point3D_idx.iteritems() + + image_ids = [] + + for point3D_id, point3D_idx in iter_point3D_id_to_point3D_idx: + if point3D_idx == SceneManager.INVALID_POINT3D: + continue + + if image_set or min_track_len > 0: + image_ids = set(self.point3D_id_to_images[point3D_id][:, 0]) + + # check if error and min track length are sufficient, or if none of + # the selected cameras see the point + if ( + len(image_ids) < min_track_len + or self.point3D_errors[point3D_idx] > max_error + or image_set + and image_set.isdisjoint(image_ids) + ): + self.point3D_id_to_point3D_idx[point3D_id] = SceneManager.INVALID_POINT3D + + # find dot product between all camera viewing rays + elif check_triangulation_angles: + xyz = self.points3D[point3D_idx, :] + tvecs = np.array([(self.images[image_id].tvec - xyz) for image_id in image_ids]) + tvecs /= np.linalg.norm(tvecs, axis=-1)[:, np.newaxis] + + cos_theta = np.array([u.dot(v) for u, v in combinations(tvecs, 2)]) + + # min_prod = cos(maximum viewing angle), and vice versa + # if maximum viewing angle is too small or too large, + # don't add this point + if np.min(cos_theta) > max_tri_prod or np.max(cos_theta) < min_tri_prod: + self.point3D_id_to_point3D_idx[point3D_id] = SceneManager.INVALID_POINT3D + + # apply the filters to the image point3D_ids + for image in self.images.itervalues(): + mask = np.array( + [ + self.point3D_id_to_point3D_idx.get(point3D_id, 0) == SceneManager.INVALID_POINT3D + for point3D_id in image.point3D_ids + ] + ) + image.point3D_ids[mask] = SceneManager.INVALID_POINT3D + + # --------------------------------------------------------------------------- + + # scene graph: {image_id: [image_id: #shared points]} + def build_scene_graph(self): + self.scene_graph = defaultdict(lambda: defaultdict(int)) + point3D_iter = self.point3D_id_to_images.iteritems() + + for i, (point3D_id, images) in enumerate(point3D_iter): + if not self.point3D_valid(point3D_id): + continue + + for image_id1, image_id2 in combinations(images[:, 0], 2): + self.scene_graph[image_id1][image_id2] += 1 + self.scene_graph[image_id2][image_id1] += 1 diff --git a/fvdb/fvdb/utils/data/_colmap_utils/tools/colmap_to_nvm.py b/fvdb/fvdb/utils/data/_colmap_utils/tools/colmap_to_nvm.py new file mode 100644 index 0000000000..e7c8db5029 --- /dev/null +++ b/fvdb/fvdb/utils/data/_colmap_utils/tools/colmap_to_nvm.py @@ -0,0 +1,64 @@ +import itertools +import sys + +sys.path.append("..") + +import numpy as np + +from .. import Quaternion, SceneManager + +# ------------------------------------------------------------------------------- + + +def main(args): + scene_manager = SceneManager(args.input_folder) + scene_manager.load() + + with open(args.output_file, "w") as fid: + fid.write("NVM_V3\n \n{:d}\n".format(len(scene_manager.images))) + + image_fmt_str = " {:.3f} " + 7 * "{:.7f} " + for image_id, image in scene_manager.images.iteritems(): + camera = scene_manager.cameras[image.camera_id] + f = 0.5 * (camera.fx + camera.fy) + fid.write(args.image_name_prefix + image.name) + fid.write(image_fmt_str.format(*((f,) + tuple(image.q.q) + tuple(image.C())))) + if camera.distortion_func is None: + fid.write("0 0\n") + else: + fid.write("{:.7f} 0\n".format(-camera.k1)) + + image_id_to_idx = dict((image_id, i) for i, image_id in enumerate(scene_manager.images)) + + fid.write("{:d}\n".format(len(scene_manager.points3D))) + for i, point3D_id in enumerate(scene_manager.point3D_ids): + fid.write("{:.7f} {:.7f} {:.7f} ".format(*scene_manager.points3D[i])) + fid.write("{:d} {:d} {:d} ".format(*scene_manager.point3D_colors[i])) + keypoints = [ + (image_id_to_idx[image_id], kp_idx) + tuple(scene_manager.images[image_id].points2D[kp_idx]) + for image_id, kp_idx in scene_manager.point3D_id_to_images[point3D_id] + ] + fid.write("{:d}".format(len(keypoints))) + fid.write((len(keypoints) * " {:d} {:d} {:.3f} {:.3f}" + "\n").format(*itertools.chain(*keypoints))) + + +# ------------------------------------------------------------------------------- + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser( + description="Save a COLMAP reconstruction in the NVM format " "(http://ccwu.me/vsfm/doc.html#nvm).", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + + parser.add_argument("input_folder") + parser.add_argument("output_file") + + parser.add_argument( + "--image_name_prefix", type=str, default="", help="prefix image names with this string (e.g., 'images/')" + ) + + args = parser.parse_args() + + main(args) diff --git a/fvdb/fvdb/utils/data/_colmap_utils/tools/delete_images.py b/fvdb/fvdb/utils/data/_colmap_utils/tools/delete_images.py new file mode 100644 index 0000000000..80f9bc9a86 --- /dev/null +++ b/fvdb/fvdb/utils/data/_colmap_utils/tools/delete_images.py @@ -0,0 +1,37 @@ +import sys + +sys.path.append("..") + +import numpy as np + +from .. import DualQuaternion, Image, SceneManager + +# ------------------------------------------------------------------------------- + + +def main(args): + scene_manager = SceneManager(args.input_folder) + scene_manager.load() + + image_ids = map(scene_manager.get_image_from_name, iter(lambda: sys.stdin.readline().strip(), "")) + scene_manager.delete_images(image_ids) + + scene_manager.save(args.output_folder) + + +# ------------------------------------------------------------------------------- + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser( + description="Deletes images (filenames read from stdin) from a model.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + + parser.add_argument("input_folder") + parser.add_argument("output_folder") + + args = parser.parse_args() + + main(args) diff --git a/fvdb/fvdb/utils/data/_colmap_utils/tools/impute_missing_cameras.py b/fvdb/fvdb/utils/data/_colmap_utils/tools/impute_missing_cameras.py new file mode 100644 index 0000000000..f579fa8ac5 --- /dev/null +++ b/fvdb/fvdb/utils/data/_colmap_utils/tools/impute_missing_cameras.py @@ -0,0 +1,174 @@ +import sys + +sys.path.append("..") + +import numpy as np + +from .. import DualQuaternion, Image, SceneManager + +# ------------------------------------------------------------------------------- + +image_to_idx = lambda im: int(im.name[: im.name.rfind(".")]) + + +# ------------------------------------------------------------------------------- + + +def interpolate_linear(images, camera_id, file_format): + if len(images) < 2: + raise ValueError("Need at least two images for linear interpolation!") + + prev_image = images[0] + prev_idx = image_to_idx(prev_image) + prev_dq = DualQuaternion.FromQT(prev_image.q, prev_image.t) + start = prev_idx + + new_images = [] + + for image in images[1:]: + curr_idx = image_to_idx(image) + curr_dq = DualQuaternion.FromQT(image.q, image.t) + T = curr_idx - prev_idx + Tinv = 1.0 / T + + # like quaternions, dq(x) = -dq(x), so we'll need to pick the one more + # appropriate for interpolation by taking -dq if the dot product of the + # two q-vectors is negative + if prev_dq.q0.dot(curr_dq.q0) < 0: + curr_dq = -curr_dq + + for i in xrange(1, T): + t = i * Tinv + dq = t * prev_dq + (1.0 - t) * curr_dq + q, t = dq.ToQT() + new_images.append(Image(file_format.format(prev_idx + i), args.camera_id, q, t)) + + prev_idx = curr_idx + prev_dq = curr_dq + + return new_images + + +# ------------------------------------------------------------------------------- + + +def interpolate_hermite(images, camera_id, file_format): + if len(images) < 4: + raise ValueError("Need at least four images for Hermite spline interpolation!") + + new_images = [] + + # linear blending for the first frames + T0 = image_to_idx(images[0]) + dq0 = DualQuaternion.FromQT(images[0].q, images[0].t) + T1 = image_to_idx(images[1]) + dq1 = DualQuaternion.FromQT(images[1].q, images[1].t) + + if dq0.q0.dot(dq1.q0) < 0: + dq1 = -dq1 + dT = 1.0 / float(T1 - T0) + for j in xrange(1, T1 - T0): + t = j * dT + dq = ((1.0 - t) * dq0 + t * dq1).normalize() + new_images.append(Image(file_format.format(T0 + j), camera_id, *dq.ToQT())) + + T2 = image_to_idx(images[2]) + dq2 = DualQuaternion.FromQT(images[2].q, images[2].t) + if dq1.q0.dot(dq2.q0) < 0: + dq2 = -dq2 + + # Hermite spline interpolation of dual quaternions + # pdfs.semanticscholar.org/05b1/8ede7f46c29c2722fed3376d277a1d286c55.pdf + for i in xrange(1, len(images) - 2): + T3 = image_to_idx(images[i + 2]) + dq3 = DualQuaternion.FromQT(images[i + 2].q, images[i + 2].t) + if dq2.q0.dot(dq3.q0) < 0: + dq3 = -dq3 + + prev_duration = T1 - T0 + current_duration = T2 - T1 + next_duration = T3 - T2 + + # approximate the derivatives at dq1 and dq2 using weighted central + # differences + dt1 = 1.0 / float(T2 - T0) + dt2 = 1.0 / float(T3 - T1) + + m1 = (current_duration * dt1) * (dq2 - dq1) + (prev_duration * dt1) * (dq1 - dq0) + m2 = (next_duration * dt2) * (dq3 - dq2) + (current_duration * dt2) * (dq2 - dq1) + + dT = 1.0 / float(current_duration) + + for j in xrange(1, current_duration): + t = j * dT # 0 to 1 + t2 = t * t # t squared + t3 = t2 * t # t cubed + + # coefficients of the Hermite spline (a=>dq and b=>m) + a1 = 2.0 * t3 - 3.0 * t2 + 1.0 + b1 = t3 - 2.0 * t2 + t + a2 = -2.0 * t3 + 3.0 * t2 + b2 = t3 - t2 + + dq = (a1 * dq1 + b1 * m1 + a2 * dq2 + b2 * m2).normalize() + + new_images.append(Image(file_format.format(T1 + j), camera_id, *dq.ToQT())) + + T0, T1, T2 = T1, T2, T3 + dq0, dq1, dq2 = dq1, dq2, dq3 + + # linear blending for the last frames + dT = 1.0 / float(T2 - T1) + for j in xrange(1, T2 - T1): + t = j * dT # 0 to 1 + dq = ((1.0 - t) * dq1 + t * dq2).normalize() + new_images.append(Image(file_format.format(T1 + j), camera_id, *dq.ToQT())) + + return new_images + + +# ------------------------------------------------------------------------------- + + +def main(args): + scene_manager = SceneManager(args.input_folder) + scene_manager.load() + + images = sorted(scene_manager.images.itervalues(), key=image_to_idx) + + if args.method.lower() == "linear": + new_images = interpolate_linear(images, args.camera_id, args.format) + else: + new_images = interpolate_hermite(images, args.camera_id, args.format) + + map(scene_manager.add_image, new_images) + + scene_manager.save(args.output_folder) + + +# ------------------------------------------------------------------------------- + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser( + description="Given a reconstruction with ordered images *with integer " + "filenames* like '000100.png', fill in missing camera positions for " + "intermediate frames.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + + parser.add_argument("input_folder") + parser.add_argument("output_folder") + + parser.add_argument("--camera_id", type=int, default=1, help="camera id to use for the missing images") + + parser.add_argument("--format", type=str, default="{:06d}.png", help="filename format to use for added images") + + parser.add_argument( + "--method", type=str.lower, choices=("linear", "hermite"), default="hermite", help="Pose imputation method" + ) + + args = parser.parse_args() + + main(args) diff --git a/fvdb/fvdb/utils/data/_colmap_utils/tools/save_cameras_as_ply.py b/fvdb/fvdb/utils/data/_colmap_utils/tools/save_cameras_as_ply.py new file mode 100644 index 0000000000..5e6c9e8eac --- /dev/null +++ b/fvdb/fvdb/utils/data/_colmap_utils/tools/save_cameras_as_ply.py @@ -0,0 +1,86 @@ +import sys + +sys.path.append("..") + +import os + +import numpy as np + +from .. import SceneManager + +# ------------------------------------------------------------------------------- + + +# Saves the cameras as a mesh +# +# inputs: +# - ply_file: output file +# - images: ordered array of pycolmap Image objects +# - color: color string for the camera +# - scale: amount to shrink/grow the camera model +def save_camera_ply(ply_file, images, scale): + points3D = scale * np.array( + ((0.0, 0.0, 0.0), (-1.0, -1.0, 1.0), (-1.0, 1.0, 1.0), (1.0, -1.0, 1.0), (1.0, 1.0, 1.0)) + ) + + faces = np.array(((0, 2, 1), (0, 4, 2), (0, 3, 4), (0, 1, 3), (1, 2, 4), (1, 4, 3))) + + r = np.linspace(0, 255, len(images), dtype=np.uint8) + g = 255 - r + b = r - np.linspace(0, 128, len(images), dtype=np.uint8) + color = np.column_stack((r, g, b)) + + with open(ply_file, "w") as fid: + print >> fid, "ply" + print >> fid, "format ascii 1.0" + print >> fid, "element vertex", len(points3D) * len(images) + print >> fid, "property float x" + print >> fid, "property float y" + print >> fid, "property float z" + print >> fid, "property uchar red" + print >> fid, "property uchar green" + print >> fid, "property uchar blue" + print >> fid, "element face", len(faces) * len(images) + print >> fid, "property list uchar int vertex_index" + print >> fid, "end_header" + + for image, c in zip(images, color): + for p3D in points3D.dot(image.R()) + image.C(): + print >> fid, p3D[0], p3D[1], p3D[2], c[0], c[1], c[2] + + for i in xrange(len(images)): + for f in faces + len(points3D) * i: + print >> fid, "3 {} {} {}".format(*f) + + +# ------------------------------------------------------------------------------- + + +def main(args): + scene_manager = SceneManager(args.input_folder) + scene_manager.load_images() + + images = sorted(scene_manager.images.itervalues(), key=lambda image: image.name) + + save_camera_ply(args.output_file, images, args.scale) + + +# ------------------------------------------------------------------------------- + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser( + description="Saves camera positions to a PLY for easy viewing outside " + "of COLMAP. Currently, camera FoV is not reflected in the output.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + + parser.add_argument("input_folder") + parser.add_argument("output_file") + + parser.add_argument("--scale", type=float, default=1.0, help="Scaling factor for the camera mesh.") + + args = parser.parse_args() + + main(args) diff --git a/fvdb/fvdb/utils/data/_colmap_utils/tools/transform_model.py b/fvdb/fvdb/utils/data/_colmap_utils/tools/transform_model.py new file mode 100644 index 0000000000..ea398ef5cd --- /dev/null +++ b/fvdb/fvdb/utils/data/_colmap_utils/tools/transform_model.py @@ -0,0 +1,49 @@ +import sys + +sys.path.append("..") + +import numpy as np + +from .. import Quaternion, SceneManager + +# ------------------------------------------------------------------------------- + + +def main(args): + scene_manager = SceneManager(args.input_folder) + scene_manager.load() + + # expect each line of input corresponds to one row + P = np.array([map(float, sys.stdin.readline().strip().split()) for _ in xrange(3)]) + + scene_manager.points3D[:] = scene_manager.points3D.dot(P[:, :3].T) + P[:, 3] + + # get rotation without any global scaling (assuming isotropic scaling) + scale = np.cbrt(np.linalg.det(P[:, :3])) + q_old_from_new = ~Quaternion.FromR(P[:, :3] / scale) + + for image in scene_manager.images.itervalues(): + image.q *= q_old_from_new + image.tvec = scale * image.tvec - image.R().dot(P[:, 3]) + + scene_manager.save(args.output_folder) + + +# ------------------------------------------------------------------------------- + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser( + description="Apply a 3x4 transformation matrix to a COLMAP model and " + "save the result as a new model. Row-major input can be piped in from " + "a file or entered via the command line.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + + parser.add_argument("input_folder") + parser.add_argument("output_folder") + + args = parser.parse_args() + + main(args) diff --git a/fvdb/fvdb/utils/data/_colmap_utils/tools/write_camera_track_to_bundler.py b/fvdb/fvdb/utils/data/_colmap_utils/tools/write_camera_track_to_bundler.py new file mode 100644 index 0000000000..43a721c162 --- /dev/null +++ b/fvdb/fvdb/utils/data/_colmap_utils/tools/write_camera_track_to_bundler.py @@ -0,0 +1,59 @@ +import sys + +sys.path.append("..") + +import numpy as np + +from .. import SceneManager + +# ------------------------------------------------------------------------------- + + +def main(args): + scene_manager = SceneManager(args.input_folder) + scene_manager.load_cameras() + scene_manager.load_images() + + if args.sort: + images = sorted(scene_manager.images.itervalues(), key=lambda im: im.name) + else: + images = scene_manager.images.values() + + fid = open(args.output_file, "w") + fid_filenames = open(args.output_file + ".list.txt", "w") + + print >> fid, "# Bundle file v0.3" + print >> fid, len(images), 0 + + for image in images: + print >> fid_filenames, image.name + camera = scene_manager.cameras[image.camera_id] + print >> fid, 0.5 * (camera.fx + camera.fy), 0, 0 + R, t = image.R(), image.t + print >> fid, R[0, 0], R[0, 1], R[0, 2] + print >> fid, -R[1, 0], -R[1, 1], -R[1, 2] + print >> fid, -R[2, 0], -R[2, 1], -R[2, 2] + print >> fid, t[0], -t[1], -t[2] + + fid.close() + fid_filenames.close() + + +# ------------------------------------------------------------------------------- + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser( + description="Saves the camera positions in the Bundler format. Note " "that 3D points are not saved.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + + parser.add_argument("input_folder") + parser.add_argument("output_file") + + parser.add_argument("--sort", default=False, action="store_true", help="sort the images by their filename") + + args = parser.parse_args() + + main(args) diff --git a/fvdb/fvdb/utils/data/_colmap_utils/tools/write_depthmap_to_ply.py b/fvdb/fvdb/utils/data/_colmap_utils/tools/write_depthmap_to_ply.py new file mode 100644 index 0000000000..0dba35c870 --- /dev/null +++ b/fvdb/fvdb/utils/data/_colmap_utils/tools/write_depthmap_to_ply.py @@ -0,0 +1,137 @@ +import sys + +sys.path.append("..") + +import os + +import imageio +import numpy as np +from plyfile import PlyData, PlyElement +from scipy.ndimage.interpolation import zoom + +from .. import SceneManager + +# ------------------------------------------------------------------------------- + + +def main(args): + suffix = ".photometric.bin" if args.photometric else ".geometric.bin" + + image_file = os.path.join(args.dense_folder, "images", args.image_filename) + depth_file = os.path.join(args.dense_folder, args.stereo_folder, "depth_maps", args.image_filename + suffix) + if args.save_normals: + normals_file = os.path.join(args.dense_folder, args.stereo_folder, "normal_maps", args.image_filename + suffix) + + # load camera intrinsics from the COLMAP reconstruction + scene_manager = SceneManager(os.path.join(args.dense_folder, "sparse")) + scene_manager.load_cameras() + scene_manager.load_images() + + image_id, image = scene_manager.get_image_from_name(args.image_filename) + camera = scene_manager.cameras[image.camera_id] + rotation_camera_from_world = image.R() + camera_center = image.C() + + # load image, depth map, and normal map + image = imageio.imread(image_file) + + with open(depth_file, "rb") as fid: + w = int("".join(iter(lambda: fid.read(1), "&"))) + h = int("".join(iter(lambda: fid.read(1), "&"))) + c = int("".join(iter(lambda: fid.read(1), "&"))) + depth_map = np.fromfile(fid, np.float32).reshape(h, w) + if (h, w) != image.shape[:2]: + depth_map = zoom(depth_map, (float(image.shape[0]) / h, float(image.shape[1]) / w), order=0) + + if args.save_normals: + with open(normals_file, "rb") as fid: + w = int("".join(iter(lambda: fid.read(1), "&"))) + h = int("".join(iter(lambda: fid.read(1), "&"))) + c = int("".join(iter(lambda: fid.read(1), "&"))) + normals = np.fromfile(fid, np.float32).reshape(c, h, w).transpose([1, 2, 0]) + if (h, w) != image.shape[:2]: + normals = zoom(normals, (float(image.shape[0]) / h, float(image.shape[1]) / w, 1.0), order=0) + + if args.min_depth is not None: + depth_map[depth_map < args.min_depth] = 0.0 + if args.max_depth is not None: + depth_map[depth_map > args.max_depth] = 0.0 + + # create 3D points + # depth_map = np.minimum(depth_map, 100.) + points3D = np.dstack(camera.get_image_grid() + [depth_map]) + points3D[:, :, :2] *= depth_map[:, :, np.newaxis] + + # save + points3D = points3D.astype(np.float32).reshape(-1, 3) + if args.save_normals: + normals = normals.astype(np.float32).reshape(-1, 3) + image = image.reshape(-1, 3) + if image.dtype != np.uint8: + if image.max() <= 1: + image = (image * 255.0).astype(np.uint8) + else: + image = image.astype(np.uint8) + + if args.world_space: + points3D = points3D.dot(rotation_camera_from_world) + camera_center + if args.save_normals: + normals = normals.dot(rotation_camera_from_world) + + if args.save_normals: + vertices = np.rec.fromarrays( + tuple(points3D.T) + tuple(normals.T) + tuple(image.T), names="x,y,z,nx,ny,nz,red,green,blue" + ) + else: + vertices = np.rec.fromarrays(tuple(points3D.T) + tuple(image.T), names="x,y,z,red,green,blue") + vertices = PlyElement.describe(vertices, "vertex") + PlyData([vertices]).write(args.output_filename) + + +# ------------------------------------------------------------------------------- + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument("dense_folder", type=str) + parser.add_argument("image_filename", type=str) + parser.add_argument("output_filename", type=str) + + parser.add_argument( + "--photometric", default=False, action="store_true", help="use photometric depthmap instead of geometric" + ) + + parser.add_argument( + "--world_space", + default=False, + action="store_true", + help="apply the camera->world extrinsic transformation to the result", + ) + + parser.add_argument( + "--save_normals", + default=False, + action="store_true", + help="load the estimated normal map and save as part of the PLY", + ) + + parser.add_argument( + "--stereo_folder", + type=str, + default="stereo", + help="folder in the dense workspace containing depth and normal maps", + ) + + parser.add_argument( + "--min_depth", type=float, default=None, help="set pixels with depth less than this value to zero depth" + ) + + parser.add_argument( + "--max_depth", type=float, default=None, help="set pixels with depth greater than this value to zero depth" + ) + + args = parser.parse_args() + + main(args) diff --git a/fvdb/fvdb/utils/data/colmap_dataset.py b/fvdb/fvdb/utils/data/colmap_dataset.py new file mode 100644 index 0000000000..4bce06a547 --- /dev/null +++ b/fvdb/fvdb/utils/data/colmap_dataset.py @@ -0,0 +1,474 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 +# +import os +from typing import Any, Dict, List, Optional + +import cv2 +import imageio.v2 as imageio +import numpy as np +import torch +import torch.utils.data + +from ._colmap_utils import SceneManager + + +def similarity_from_cameras(c2w, strict_scaling=False, center_method="focus"): + """ + reference: nerf-factory + Get a similarity transform to normalize dataset + from c2w (OpenCV convention) cameras + :param c2w: (N, 4) + :return T (4,4) , scale (float) + """ + t = c2w[:, :3, 3] + R = c2w[:, :3, :3] + + # (1) Rotate the world so that z+ is the up axis + # we estimate the up axis by averaging the camera up axes + ups = np.sum(R * np.array([0, -1.0, 0]), axis=-1) + world_up = np.mean(ups, axis=0) + world_up /= np.linalg.norm(world_up) + + up_camspace = np.array([0.0, -1.0, 0.0]) + c = (up_camspace * world_up).sum() + cross = np.cross(world_up, up_camspace) + skew = np.array( + [ + [0.0, -cross[2], cross[1]], + [cross[2], 0.0, -cross[0]], + [-cross[1], cross[0], 0.0], + ] + ) + if c > -1: + R_align = np.eye(3) + skew + (skew @ skew) * 1 / (1 + c) + else: + # In the unlikely case the original data has y+ up axis, + # rotate 180-deg about x axis + R_align = np.array([[-1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) + + # R_align = np.eye(3) # DEBUG + R = R_align @ R + fwds = np.sum(R * np.array([0, 0.0, 1.0]), axis=-1) + t = (R_align @ t[..., None])[..., 0] + + # (2) Recenter the scene. + if center_method == "focus": + # find the closest point to the origin for each camera's center ray + nearest = t + (fwds * -t).sum(-1)[:, None] * fwds + translate = -np.median(nearest, axis=0) + elif center_method == "poses": + # use center of the camera positions + translate = -np.median(t, axis=0) + else: + raise ValueError(f"Unknown center_method {center_method}") + + transform = np.eye(4) + transform[:3, 3] = translate + transform[:3, :3] = R_align + + # (3) Rescale the scene using camera distances + scale_fn = np.max if strict_scaling else np.median + scale = 1.0 / scale_fn(np.linalg.norm(t + translate, axis=-1)) + transform[:3, :] *= scale + + return transform + + +def align_principle_axes(point_cloud): + # Compute centroid + centroid = np.median(point_cloud, axis=0) + + # Translate point cloud to centroid + translated_point_cloud = point_cloud - centroid + + # Compute covariance matrix + covariance_matrix = np.cov(translated_point_cloud, rowvar=False) + + # Compute eigenvectors and eigenvalues + eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix) + + # Sort eigenvectors by eigenvalues (descending order) so that the z-axis + # is the principal axis with the smallest eigenvalue. + sort_indices = eigenvalues.argsort()[::-1] + eigenvectors = eigenvectors[:, sort_indices] + + # Check orientation of eigenvectors. If the determinant of the eigenvectors is + # negative, then we need to flip the sign of one of the eigenvectors. + if np.linalg.det(eigenvectors) < 0: + eigenvectors[:, 0] *= -1 + + # Create rotation matrix + rotation_matrix = eigenvectors.T + + # Create SE(3) matrix (4x4 transformation matrix) + transform = np.eye(4) + transform[:3, :3] = rotation_matrix + transform[:3, 3] = -rotation_matrix @ centroid + + return transform + + +def transform_points(matrix, points): + """Transform points using an SE(3) matrix. + + Args: + matrix: 4x4 SE(3) matrix + points: Nx3 array of points + + Returns: + Nx3 array of transformed points + """ + assert matrix.shape == (4, 4) + assert len(points.shape) == 2 and points.shape[1] == 3 + return points @ matrix[:3, :3].T + matrix[:3, 3] + + +def transform_cameras(matrix, camtoworlds): + """Transform cameras using an SE(3) matrix. + + Args: + matrix: 4x4 SE(3) matrix + camtoworlds: Nx4x4 array of camera-to-world matrices + + Returns: + Nx4x4 array of transformed camera-to-world matrices + """ + assert matrix.shape == (4, 4) + assert len(camtoworlds.shape) == 3 and camtoworlds.shape[1:] == (4, 4) + camtoworlds = np.einsum("nij, ki -> nkj", camtoworlds, matrix) + scaling = np.linalg.norm(camtoworlds[:, 0, :3], axis=1) + camtoworlds[:, :3, :3] = camtoworlds[:, :3, :3] / scaling[:, None, None] + return camtoworlds + + +def normalize(camtoworlds, points=None): + T1 = similarity_from_cameras(camtoworlds) + camtoworlds = transform_cameras(T1, camtoworlds) + if points is not None: + points = transform_points(T1, points) + T2 = align_principle_axes(points) + camtoworlds = transform_cameras(T2, camtoworlds) + points = transform_points(T2, points) + return camtoworlds, points, T2 @ T1 + else: + return camtoworlds, T1 + + +class ColmapParser: + """COLMAP parser.""" + + def __init__( + self, + data_dir: str, + factor: int = 1, + normalize: bool = False, + test_every: int = 8, + ): + self.data_dir = data_dir + self.factor = factor + self.normalize = normalize + self.test_every = test_every + + colmap_dir = os.path.join(data_dir, "sparse/0/") + if not os.path.exists(colmap_dir): + colmap_dir = os.path.join(data_dir, "sparse") + assert os.path.exists(colmap_dir), f"COLMAP directory {colmap_dir} does not exist." + + manager = SceneManager(colmap_dir) + manager.load_cameras() + manager.load_images() + manager.load_points3D() + + # Extract extrinsic matrices in world-to-camera format. + imdata = manager.images + w2c_mats = [] + camera_ids = [] + Ks_dict = dict() + params_dict = dict() + imsize_dict = dict() # width, height + bottom = np.array([0, 0, 0, 1]).reshape(1, 4) + for k in imdata: + im = imdata[k] + rot = im.R() + trans = im.tvec.reshape(3, 1) + w2c = np.concatenate([np.concatenate([rot, trans], 1), bottom], axis=0) + w2c_mats.append(w2c) + + # support different camera intrinsics + camera_id = im.camera_id + camera_ids.append(camera_id) + + # camera intrinsics + cam = manager.cameras[camera_id] + fx, fy, cx, cy = cam.fx, cam.fy, cam.cx, cam.cy + K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]]) + K[:2, :] /= factor + Ks_dict[camera_id] = K + + # Get distortion parameters. + type_ = cam.camera_type + if type_ == 0 or type_ == "SIMPLE_PINHOLE": + params = np.empty(0, dtype=np.float32) + camtype = "perspective" + elif type_ == 1 or type_ == "PINHOLE": + params = np.empty(0, dtype=np.float32) + camtype = "perspective" + if type_ == 2 or type_ == "SIMPLE_RADIAL": + params = np.array([cam.k1], dtype=np.float32) + camtype = "perspective" + elif type_ == 3 or type_ == "RADIAL": + params = np.array([cam.k1, cam.k2, 0.0, 0.0], dtype=np.float32) + camtype = "perspective" + elif type_ == 4 or type_ == "OPENCV": + params = np.array([cam.k1, cam.k2, cam.p1, cam.p2], dtype=np.float32) + camtype = "perspective" + elif type_ == 5 or type_ == "OPENCV_FISHEYE": + params = np.array([cam.k1, cam.k2, cam.k3, cam.k4], dtype=np.float32) + camtype = "fisheye" + assert camtype == "perspective", f"Only support perspective camera model, got {type_}" + + params_dict[camera_id] = params + + # image size + imsize_dict[camera_id] = (cam.width // factor, cam.height // factor) + + print(f"[Parser] {len(imdata)} images, taken by {len(set(camera_ids))} cameras.") + + if len(imdata) == 0: + raise ValueError("No images found in COLMAP.") + if not (type_ == 0 or type_ == 1): + print("Warning: COLMAP Camera is not PINHOLE. Images have distortion.") + + w2c_mats = np.stack(w2c_mats, axis=0) + + # Convert extrinsics to camera-to-world. + camtoworlds = np.linalg.inv(w2c_mats) + + # Image names from COLMAP. No need for permuting the poses according to + # image names anymore. + image_names = [imdata[k].name for k in imdata] + + # Previous Nerf results were generated with images sorted by filename, + # ensure metrics are reported on the same test set. + inds = np.argsort(image_names) + image_names = [image_names[i] for i in inds] + camtoworlds = camtoworlds[inds] + camera_ids = [camera_ids[i] for i in inds] + + # Load images. + if factor > 1: + image_dir_suffix = f"_{factor}" + else: + image_dir_suffix = "" + colmap_image_dir = os.path.join(data_dir, "images") + image_dir = os.path.join(data_dir, "images" + image_dir_suffix) + for d in [image_dir, colmap_image_dir]: + if not os.path.exists(d): + raise ValueError(f"Image folder {d} does not exist.") + + # Downsampled images may have different names vs images used for COLMAP, + # so we need to map between the two sorted lists of files. + colmap_files = sorted(self._get_rel_paths(colmap_image_dir)) + image_files = sorted(self._get_rel_paths(image_dir)) + colmap_to_image = dict(zip(colmap_files, image_files)) + image_paths = [os.path.join(image_dir, colmap_to_image[f]) for f in image_names] + + # 3D points and {image_name -> [point_idx]} + points: np.ndarray = manager.points3D.astype(np.float32) # type: ignore + points_err: np.ndarray = manager.point3D_errors.astype(np.float32) # type: ignore + points_rgb: np.ndarray = manager.point3D_colors.astype(np.uint8) # type: ignore + point_indices = dict() + + image_id_to_name = {v: k for k, v in manager.name_to_image_id.items()} + for point_id, data in manager.point3D_id_to_images.items(): + for image_id, _ in data: + image_name = image_id_to_name[image_id] + point_idx = manager.point3D_id_to_point3D_idx[point_id] + point_indices.setdefault(image_name, []).append(point_idx) + point_indices = {k: np.array(v).astype(np.int32) for k, v in point_indices.items()} + + # Normalize the world space. + if normalize: + T1 = similarity_from_cameras(camtoworlds) + camtoworlds = transform_cameras(T1, camtoworlds) + points = transform_points(T1, points) + + T2 = align_principle_axes(points) + camtoworlds = transform_cameras(T2, camtoworlds) + points = transform_points(T2, points) + + transform = T2 @ T1 + else: + transform = np.eye(4) + + self.image_names = image_names # List[str], (num_images,) + self.image_paths = image_paths # List[str], (num_images,) + self.camtoworlds = camtoworlds # np.ndarray, (num_images, 4, 4) + self.camera_ids = camera_ids # List[int], (num_images,) + self.Ks_dict = Ks_dict # Dict of camera_id -> K + self.params_dict = params_dict # Dict of camera_id -> params + self.imsize_dict = imsize_dict # Dict of camera_id -> (width, height) + self.points = points # np.ndarray, (num_points, 3) + self.points_err = points_err # np.ndarray, (num_points,) + self.points_rgb = points_rgb # np.ndarray, (num_points, 3) + self.point_indices = point_indices # Dict[str, np.ndarray], image_name -> [M,] + self.transform = transform # np.ndarray, (4, 4) + + # undistortion + self.mapx_dict = dict() + self.mapy_dict = dict() + self.roi_undist_dict = dict() + for camera_id in self.params_dict.keys(): + params = self.params_dict[camera_id] + if len(params) == 0: + continue # no distortion + assert camera_id in self.Ks_dict, f"Missing K for camera {camera_id}" + assert camera_id in self.params_dict, f"Missing params for camera {camera_id}" + K = self.Ks_dict[camera_id] + width, height = self.imsize_dict[camera_id] + K_undist, roi_undist = cv2.getOptimalNewCameraMatrix(K, params, (width, height), 0) + mapx, mapy = cv2.initUndistortRectifyMap(K, params, None, K_undist, (width, height), cv2.CV_32FC1) # type: ignore + self.Ks_dict[camera_id] = K_undist + self.mapx_dict[camera_id] = mapx + self.mapy_dict[camera_id] = mapy + self.roi_undist_dict[camera_id] = roi_undist + + # size of the scene measured by cameras + camera_locations = camtoworlds[:, :3, 3] + scene_center = np.mean(camera_locations, axis=0) + dists = np.linalg.norm(camera_locations - scene_center, axis=1) + self.scene_scale = np.max(dists) + + @staticmethod + def _get_rel_paths(path_dir: str) -> List[str]: + """Recursively get relative paths of files in a directory.""" + paths = [] + for dp, dn, fn in os.walk(path_dir): + for f in fn: + paths.append(os.path.relpath(os.path.join(dp, f), path_dir)) + return paths + + +class ColmapDataset(torch.utils.data.Dataset): + """A simple dataset class.""" + + def __init__( + self, + parser: ColmapParser, + split: str = "train", + patch_size: Optional[int] = None, + load_depths: bool = False, + ): + self.parser = parser + self.split = split + self.patch_size = patch_size + self.load_depths = load_depths + indices = np.arange(len(self.parser.image_names)) + if split == "train": + self.indices = indices[indices % self.parser.test_every != 0] + else: + self.indices = indices[indices % self.parser.test_every == 0] + + def __len__(self): + return len(self.indices) + + def __getitem__(self, item: int) -> Dict[str, Any]: + index = self.indices[item] + image = imageio.imread(self.parser.image_paths[index])[..., :3] + camera_id = self.parser.camera_ids[index] + K = self.parser.Ks_dict[camera_id].copy() # undistorted K + params = self.parser.params_dict[camera_id] + camtoworlds = self.parser.camtoworlds[index] + + if len(params) > 0: + # Images are distorted. Undistort them. + mapx, mapy = ( + self.parser.mapx_dict[camera_id], + self.parser.mapy_dict[camera_id], + ) + image = cv2.remap(image, mapx, mapy, cv2.INTER_LINEAR) + x, y, w, h = self.parser.roi_undist_dict[camera_id] + image = image[y : y + h, x : x + w] + + if self.patch_size is not None: + # Random crop. + h, w = image.shape[:2] + x = np.random.randint(0, max(w - self.patch_size, 1)) + y = np.random.randint(0, max(h - self.patch_size, 1)) + image = image[y : y + self.patch_size, x : x + self.patch_size] + K[0, 2] -= x + K[1, 2] -= y + + data = { + "K": torch.from_numpy(K).float(), + "camtoworld": torch.from_numpy(camtoworlds).float(), + "image": torch.from_numpy(image).float(), + "image_id": item, # the index of the image in the dataset + "image_path": self.parser.image_paths[index], + } + + if self.load_depths: + # projected points to image plane to get depths + worldtocams = np.linalg.inv(camtoworlds) + image_name = self.parser.image_names[index] + point_indices = self.parser.point_indices[image_name] + points_world = self.parser.points[point_indices] + points_cam = (worldtocams[:3, :3] @ points_world.T + worldtocams[:3, 3:4]).T + points_proj = (K @ points_cam.T).T + points = points_proj[:, :2] / points_proj[:, 2:3] # (M, 2) + depths = points_cam[:, 2] # (M,) + if self.patch_size is not None: + points[:, 0] -= x + points[:, 1] -= y + # filter out points outside the image + selector = ( + (points[:, 0] >= 0) + & (points[:, 0] < image.shape[1]) + & (points[:, 1] >= 0) + & (points[:, 1] < image.shape[0]) + & (depths > 0) + ) + points = points[selector] + depths = depths[selector] + data["points"] = torch.from_numpy(points).float() + data["depths"] = torch.from_numpy(depths).float() + + return data + + +__all__ = ["ColmapParser", "ColmapDataset"] + +if __name__ == "__main__": + import argparse + + import imageio.v2 as imageio + import tqdm + + parser = argparse.ArgumentParser() + parser.add_argument("--data_dir", type=str, default="data/360_v2/garden") + parser.add_argument("--factor", type=int, default=4) + args = parser.parse_args() + + # Parse COLMAP data. + parser = ColmapParser(data_dir=args.data_dir, factor=args.factor, normalize=True, test_every=8) + dataset = ColmapDataset(parser, split="train", load_depths=True) + print(f"Dataset: {len(dataset)} images.") + + imsize = None + writer = imageio.get_writer("results/points.mp4", fps=30) + for data in tqdm.tqdm(dataset, desc="Plotting points"): # type: ignore + image = data["image"].numpy().astype(np.uint8) + # Make sure all images we write are the same size. We use the first image to determine the size of the video. + # This is done because some images have slightly different sizes due to undistortion. + imsize = image.shape if imsize is None else imsize + if image.shape != imsize: + new_image = np.zeros(imsize, dtype=np.uint8) + new_image[: image.shape[0], : image.shape[1]] = image[: imsize[0], : imsize[1]] + image = new_image + points = data["points"].numpy() + depths = data["depths"].numpy() + for x, y in points: # type: ignore + cv2.circle(image, (int(x), int(y)), 2, (255, 0, 0), -1) + writer.append_data(image) + writer.close() diff --git a/fvdb/setup.py b/fvdb/setup.py index d7f6b4b74f..5d1f545e2a 100644 --- a/fvdb/setup.py +++ b/fvdb/setup.py @@ -261,6 +261,13 @@ def download_and_install_cudnn() -> Tuple[List[str], List[str]]: if __name__ == "__main__": + # check we will be compiling for a supported compute architecture + for arch_flag in cpp_extension._get_cuda_arch_flags(): + match = re.search(r"code=sm_(\d+)", arch_flag) + if match: + if int(match.group(1)) < 80: + raise RuntimeError("ƒVDB requires a minimum compute capability of 8.0 (Ampere)") + external_dir = get_external_dir() # Use new C++ standard for newer NVCC versions diff --git a/fvdb/src/detail/build/FineFromCoarse.cpp b/fvdb/src/detail/build/FineFromCoarse.cpp index 7deede22ed..d9355a5e82 100644 --- a/fvdb/src/detail/build/FineFromCoarse.cpp +++ b/fvdb/src/detail/build/FineFromCoarse.cpp @@ -36,12 +36,13 @@ buildFineGridFromCoarseGridCPU(const GridBatchImpl &coarseBatchHdl, const torch: auto proxyGrid = std::make_shared(-1.0f); auto proxyGridAccessor = proxyGrid->getWriteAccessor(); + const int64_t joffset = coarseBatchHdl.cumVoxels(bidx); for (auto it = ActiveVoxelIterator(coarseTree); it.isValid(); it++) { const nanovdb::Coord baseIjk(it->first[0] * subdivisionFactor[0], it->first[1] * subdivisionFactor[1], it->first[2] * subdivisionFactor[2]); - if (subdivMaskAcc.size(0) > 0 && !subdivMaskAcc[it->second]) { + if (subdivMaskAcc.size(0) > 0 && !subdivMaskAcc[it->second + joffset]) { continue; } diff --git a/fvdb/src/detail/ops/gsplat/GaussianFullyFusedProjectionJagged.cu b/fvdb/src/detail/ops/gsplat/GaussianFullyFusedProjectionJagged.cu index 224dfa0f81..19b5239ac0 100644 --- a/fvdb/src/detail/ops/gsplat/GaussianFullyFusedProjectionJagged.cu +++ b/fvdb/src/detail/ops/gsplat/GaussianFullyFusedProjectionJagged.cu @@ -411,7 +411,7 @@ dispatchGaussianFullyFusedProjectionJaggedForward( const torch::Tensor &Ks, // [ccz, 3, 3] const uint32_t image_width, const uint32_t image_height, const float eps2d, const float near_plane, const float far_plane, const float radius_clip) { - TORCH_CHECK(false, "CPU implementation not available"); + TORCH_CHECK_NOT_IMPLEMENTED(false, "CPU implementation not available"); } template <> @@ -525,7 +525,7 @@ dispatchGaussianFullyFusedProjectionJaggedBackward( const torch::Tensor &v_depths, // [nnz] const torch::Tensor &v_conics, // [nnz, 3] const bool viewmats_requires_grad) { - TORCH_CHECK(false, "CPU implementation not available"); + TORCH_CHECK_NOT_IMPLEMENTED(false, "CPU implementation not available"); } } // namespace ops diff --git a/fvdb/src/detail/ops/gsplat/GaussianRasterize.cu b/fvdb/src/detail/ops/gsplat/GaussianRasterize.cu index e2e24f7af4..100f83d168 100644 --- a/fvdb/src/detail/ops/gsplat/GaussianRasterize.cu +++ b/fvdb/src/detail/ops/gsplat/GaussianRasterize.cu @@ -782,7 +782,7 @@ dispatchGaussianRasterizeForward( const torch::Tensor &tile_offsets, // [C, tile_height, tile_width] const torch::Tensor &flatten_ids // [n_isects] ) { - TORCH_CHECK(false, "CPU implementation not available"); + TORCH_CHECK_NOT_IMPLEMENTED(false, "CPU implementation not available"); } template <> @@ -838,7 +838,7 @@ dispatchGaussianRasterizeBackward( const torch::Tensor &v_render_alphas, // [C, image_height, image_width, 1] // options bool absgrad) { - TORCH_CHECK(false, "CPU implementation not available"); + TORCH_CHECK_NOT_IMPLEMENTED(false, "CPU implementation not available"); } } // namespace ops diff --git a/fvdb/src/detail/ops/jagged/JaggedReduce.cu b/fvdb/src/detail/ops/jagged/JaggedReduce.cu index c1abd2345f..1788a279b8 100644 --- a/fvdb/src/detail/ops/jagged/JaggedReduce.cu +++ b/fvdb/src/detail/ops/jagged/JaggedReduce.cu @@ -107,7 +107,7 @@ JaggedReduce(const torch::Tensor &jdataRaw, const torch::Tensor &jidx, auto joffsetsAccessor = tensorAccessor(joffsets); - AT_DISPATCH_FLOATING_TYPES_AND_HALF(jdata.scalar_type(), "JaggedReduce", [&]() { + AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::Half, jdata.scalar_type(), "JaggedReduce", [&]() { out.fill_(Reducer::init()); auto outAccessor = tensorAccessor(out); diff --git a/fvdb/tests/unit/test_basic_ops.py b/fvdb/tests/unit/test_basic_ops.py index 03c6b3cdef..0f7f2fd339 100644 --- a/fvdb/tests/unit/test_basic_ops.py +++ b/fvdb/tests/unit/test_basic_ops.py @@ -42,6 +42,42 @@ def setUp(self): np.random.seed(0) pass + @parameterized.expand(all_device_dtype_combos) + def test_subdivide_1x_with_mask(self, device, dtype, mutable): + def get_point_list(npc: list, device: torch.device | str) -> list[torch.Tensor]: + batch_size = len(npc) + plist = [] + for i in range(batch_size): + ni = npc[i] + plist.append(torch.randn((ni, 3), dtype=torch.float32, device=device, requires_grad=False)) + return plist + + batch_size = 5 + vxl_size = 0.4 + npc = torch.randint(low=0, high=100, size=(batch_size,), device=device).tolist() + plist = get_point_list(npc, device) + pc_jagged = fvdb.JaggedTensor(plist) + grid_batch = fvdb.gridbatch_from_points(pc_jagged, voxel_sizes=[[vxl_size] * 3] * batch_size) + + random_mask = ( + torch.randn(grid_batch.total_voxels, device=device) + ) > 0.5 # random mask that selects voxels randomly from different grids + random_mask = grid_batch.jagged_like(random_mask) + filtered_grid_batch = grid_batch.subdivided_grid(1, random_mask) + sum = 0 + for i in range(batch_size): + si = grid_batch.joffsets[i] + ei = grid_batch.joffsets[i + 1] + ri = random_mask.jdata[si:ei] + self.assertEqual(ri.sum().item(), filtered_grid_batch.num_voxels_at(i)) + sum += torch.sum(ri) + + self.assertEqual(sum, torch.sum(random_mask.jdata)) + self.assertEqual(torch.sum(random_mask.jdata), filtered_grid_batch.total_voxels) + self.assertTrue( + torch.all(random_mask.int().jsum().jdata == filtered_grid_batch.num_voxels.int()).item(), + ) + @parameterized.expand(["cpu", "cuda"]) def test_is_same(self, device): grid = GridBatch(device=device) diff --git a/fvdb/tests/unit/test_jagged_tensor.py b/fvdb/tests/unit/test_jagged_tensor.py index ae4d8b7212..6972b6a02a 100644 --- a/fvdb/tests/unit/test_jagged_tensor.py +++ b/fvdb/tests/unit/test_jagged_tensor.py @@ -401,8 +401,8 @@ def test_jagged_tensor_one_element(self, device, dtype): data_path = get_fvdb_test_data_path() ray_o_path = data_path / "jagged_tensor" / "ray_orig.pt" ray_d_path = data_path / "jagged_tensor" / "ray_dir.pt" - ray_o = torch.load(ray_o_path).to(device=device, dtype=dtype) - ray_d = torch.load(ray_d_path).to(device=device, dtype=dtype) + ray_o = torch.load(ray_o_path, weights_only=True).to(device=device, dtype=dtype) + ray_d = torch.load(ray_d_path, weights_only=True).to(device=device, dtype=dtype) ray_orig = fvdb.JaggedTensor([ray_o]) ray_dir = fvdb.JaggedTensor([ray_d]) self.check_lshape(ray_orig, [ray_o]) @@ -1098,11 +1098,12 @@ def test_batch_size_one(self, device, dtype): pass_percentage=80, conditional_args=[ ["cuda"], - [torch.float16], + [torch.float16, torch.float32], ], ) def test_jsum(self, device, dtype): torch.random.manual_seed(111) + np.random.seed(111) if dtype == torch.float16: min_num = 100 else: