diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml new file mode 100644 index 00000000..05d61c16 --- /dev/null +++ b/.github/workflows/python-tests.yml @@ -0,0 +1,30 @@ +name: Python Tests +on: + push: + branches: + - main + pull_request: + types: [ assigned, opened, synchronize, reopened ] +jobs: + build: + name: Run Python Tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + cache: 'pip' + - name: Install dependencies + run: | + sudo apt-get remove libunwind-14-dev || true + sudo apt-get install -y libceres-dev libeigen3-dev + python -m pip install --upgrade pip + python -m pip install pytest pytest-cov + python -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu + python -m pip install -e .[dev] + python -m pip install -e .[extra] + - name: Test with pytest + run: | + set -o pipefail + pytest --junitxml=pytest.xml --cov=gluefactory tests/ \ No newline at end of file diff --git a/README.md b/README.md index f1132ca5..95f060d4 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ python3 -m pip install -e .[extra] All models and datasets in gluefactory have auto-downloaders, so you can get started right away! ## License -The code and trained models in Glue Factory are released with an Apache-2.0 license. This includes LightGlue trained with an [open version of SuperPoint](https://github.com/rpautrat/SuperPoint). Third-party models that are not compatible with this license, such as SuperPoint (original) and SuperGlue, are provided in `gluefactory_nonfree`, where each model might follow its own, restrictive license. +The code and trained models in Glue Factory are released with an Apache-2.0 license. This includes LightGlue and an [open version of SuperPoint](https://github.com/rpautrat/SuperPoint). Third-party models that are not compatible with this license, such as SuperPoint (original) and SuperGlue, are provided in `gluefactory_nonfree`, where each model might follow its own, restrictive license. ## Evaluation @@ -66,8 +66,8 @@ Here are the results as Area Under the Curve (AUC) of the homography error at 1 | Methods | DLT | [OpenCV](../gluefactory/robust_estimators/homography/opencv.py) | [PoseLib](../gluefactory/robust_estimators/homography/poselib.py) | | ------------------------------------------------------------ | ------------------ | ------------------ | ------------------ | -| [SuperPoint + SuperGlue](../gluefactory/configs/superpoint+superglue.yaml) | 32.1 / 65.0 / 75.7 | 32.9 / 55.7 / 68.0 | 37.0 / 68.2 / 78.7 | -| [SuperPoint + LightGlue](../gluefactory/configs/superpoint+lightglue.yaml) | 35.1 / 67.2 / 77.6 | 34.2 / 57.9 / 69.9 | 37.1 / 67.4 / 77.8 | +| [SuperPoint + SuperGlue](gluefactory/configs/superpoint+superglue-official.yaml) | 32.1 / 65.0 / 75.7 | 32.9 / 55.7 / 68.0 | 37.0 / 68.2 / 78.7 | +| [SuperPoint + LightGlue](gluefactory/configs/superpoint+lightglue-official.yaml) | 35.1 / 67.2 / 77.6 | 34.2 / 57.9 / 69.9 | 37.1 / 67.4 / 77.8 | @@ -159,9 +159,12 @@ Here are the results as Area Under the Curve (AUC) of the pose error at 5/10/20 | Methods | [pycolmap](../gluefactory/robust_estimators/relative_pose/pycolmap.py) | [OpenCV](../gluefactory/robust_estimators/relative_pose/opencv.py) | [PoseLib](../gluefactory/robust_estimators/relative_pose/poselib.py) | | ------------------------------------------------------------ | ------------------ | ------------------ | ------------------ | -| [SuperPoint + SuperGlue](../gluefactory/configs/superpoint+superglue.yaml) | 54.4 / 70.4 / 82.4 | 48.7 / 65.6 / 79.0 | 64.8 / 77.9 / 87.0 | -| [SuperPoint + LightGlue](../gluefactory/configs/superpoint+lightglue.yaml) | 56.7 / 72.4 / 83.7 | 51.0 / 68.1 / 80.7 | 66.8 / 79.3 / 87.9 | -| [SuperPoint + GlueStick](../gluefactory/configs/superpoint+lsd+gluestick.yaml) | 53.2 / 69.8 / 81.9 | 46.3 / 64.2 / 78.1 | 64.4 / 77.5 / 86.5 | +| [SuperPoint + SuperGlue](gluefactory/configs/superpoint+superglue-official.yaml) | 54.4 / 70.4 / 82.4 | 48.7 / 65.6 / 79.0 | 64.8 / 77.9 / 87.0 | +| [SuperPoint + LightGlue](gluefactory/configs/superpoint+lightglue-official.yaml) | 56.7 / 72.4 / 83.7 | 51.0 / 68.1 / 80.7 | 66.8 / 79.3 / 87.9 | +| [SIFT (2K) + LightGlue](gluefactory/configs/sift+lightglue-official.yaml) | ? / ? / ? | 43.5 / 61.5 / 75.9 | 60.4 / 74.3 / 84.5 | +| [SIFT (4K) + LightGlue](gluefactory/configs/sift+lightglue-official.yaml) | ? / ? / ? | 49.9 / 67.3 / 80.3 | 65.9 / 78.6 / 87.4 | +| [ALIKED + LightGlue](gluefactory/configs/aliked+lightglue-official.yaml) | ? / ? / ? | 51.5 / 68.1 / 80.4 | 66.3 / 78.7 / 87.5 | +| [SuperPoint + GlueStick](gluefactory/configs/superpoint+lsd+gluestick.yaml) | 53.2 / 69.8 / 81.9 | 46.3 / 64.2 / 78.1 | 64.4 / 77.5 / 86.5 | @@ -223,18 +226,18 @@ All training commands automatically download the datasets.
[Training LightGlue] -We show how to train LightGlue with [SuperPoint open](https://github.com/rpautrat/SuperPoint). +We show how to train LightGlue with [SuperPoint](https://github.com/magicleap/SuperPointPretrainedNetwork). We first pre-train LightGlue on the homography dataset: ```bash python -m gluefactory.train sp+lg_homography \ # experiment name - --conf gluefactory/configs/superpoint-open+lightglue_homography.yaml + --conf gluefactory/configs/superpoint+lightglue_homography.yaml ``` Feel free to use any other experiment name. By default the checkpoints are written to `outputs/training/`. The default batch size of 128 corresponds to the results reported in the paper and requires 2x 3090 GPUs with 24GB of VRAM each as well as PyTorch >= 2.0 (FlashAttention). Configurations are managed by [OmegaConf](https://omegaconf.readthedocs.io/) so any entry can be overridden from the command line. If you have PyTorch < 2.0 or weaker GPUs, you may thus need to reduce the batch size via: ```bash python -m gluefactory.train sp+lg_homography \ - --conf gluefactory/configs/superpoint-open+lightglue_homography.yaml \ + --conf gluefactory/configs/superpoint+lightglue_homography.yaml \ data.batch_size=32 # for 1x 1080 GPU ``` Be aware that this can impact the overall performance. You might need to adjust the learning rate accordingly. @@ -242,17 +245,17 @@ Be aware that this can impact the overall performance. You might need to adjust We then fine-tune the model on the MegaDepth dataset: ```bash python -m gluefactory.train sp+lg_megadepth \ - --conf gluefactory/configs/superpoint-open+lightglue_megadepth.yaml \ + --conf gluefactory/configs/superpoint+lightglue_megadepth.yaml \ train.load_experiment=sp+lg_homography ``` Here the default batch size is 32. To speed up training on MegaDepth, we suggest to cache the local features before training (requires around 150 GB of disk space): ```bash # extract features -python -m gluefactory.scripts.export_megadepth --method sp_open --num_workers 8 +python -m gluefactory.scripts.export_megadepth --method sp --num_workers 8 # run training with cached features python -m gluefactory.train sp+lg_megadepth \ - --conf gluefactory/configs/superpoint-open+lightglue_megadepth.yaml \ + --conf gluefactory/configs/superpoint+lightglue_megadepth.yaml \ train.load_experiment=sp+lg_homography \ data.load_features.do=True ``` @@ -297,10 +300,10 @@ Using the following local feature extractors: | Model | LightGlue config | | --------- | --------- | | [SuperPoint (open)](https://github.com/rpautrat/SuperPoint) | `superpoint-open+lightglue_{homography,megadepth}.yaml` | -| [SuperPoint (official)](https://github.com/magicleap/SuperPointPretrainedNetwork) | ❌ TODO | +| [SuperPoint (official)](https://github.com/magicleap/SuperPointPretrainedNetwork) | `superpoint+lightglue_{homography,megadepth}.yaml` | | SIFT (via [pycolmap](https://github.com/colmap/pycolmap)) | `sift+lightglue_{homography,megadepth}.yaml` | | [ALIKED](https://github.com/Shiaoming/ALIKED) | `aliked+lightglue_{homography,megadepth}.yaml` | -| [DISK](https://github.com/cvlab-epfl/disk) | ❌ TODO | +| [DISK](https://github.com/cvlab-epfl/disk) | `disk+lightglue_{homography,megadepth}.yaml` | | Key.Net + HardNet | ❌ TODO | ## Coming soon diff --git a/assets/boat1.png b/assets/boat1.png new file mode 100644 index 00000000..89cca50e Binary files /dev/null and b/assets/boat1.png differ diff --git a/assets/boat2.png b/assets/boat2.png new file mode 100644 index 00000000..5fb961bc Binary files /dev/null and b/assets/boat2.png differ diff --git a/gluefactory/configs/aliked+lightglue-official.yaml b/gluefactory/configs/aliked+lightglue-official.yaml new file mode 100644 index 00000000..47bd8266 --- /dev/null +++ b/gluefactory/configs/aliked+lightglue-official.yaml @@ -0,0 +1,28 @@ +model: + name: two_view_pipeline + extractor: + name: extractors.aliked + max_num_keypoints: 2048 + detection_threshold: 0.0 + matcher: + name: matchers.lightglue_pretrained + features: aliked + depth_confidence: -1 + width_confidence: -1 + filter_threshold: 0.1 +benchmarks: + megadepth1500: + data: + preprocessing: + side: long + resize: 1600 + eval: + estimator: opencv + ransac_th: 0.5 + hpatches: + eval: + estimator: opencv + ransac_th: 0.5 + model: + extractor: + max_num_keypoints: 1024 # overwrite config above diff --git a/gluefactory/configs/disk+lightglue_homography.yaml b/gluefactory/configs/disk+lightglue_homography.yaml new file mode 100644 index 00000000..867b1a2b --- /dev/null +++ b/gluefactory/configs/disk+lightglue_homography.yaml @@ -0,0 +1,47 @@ +data: + name: homographies + data_dir: revisitop1m + train_size: 150000 + val_size: 2000 + batch_size: 128 + num_workers: 14 + homography: + difficulty: 0.7 + max_angle: 45 + photometric: + name: lg +model: + name: two_view_pipeline + extractor: + name: extractors.disk_kornia + max_num_keypoints: 512 + force_num_keypoints: True + detection_threshold: 0.0 + trainable: False + ground_truth: + name: matchers.homography_matcher + th_positive: 3 + th_negative: 3 + matcher: + name: matchers.lightglue + filter_threshold: 0.1 + input_dim: 128 + flash: false + checkpointed: true +train: + seed: 0 + epochs: 40 + log_every_iter: 100 + eval_every_iter: 500 + lr: 1e-4 + lr_schedule: + start: 20 + type: exp + on_epoch: true + exp_div_10: 10 + plot: [5, 'gluefactory.visualization.visualize_batch.make_match_figures'] +benchmarks: + hpatches: + eval: + estimator: opencv + ransac_th: 0.5 diff --git a/gluefactory/configs/disk+lightglue_megadepth.yaml b/gluefactory/configs/disk+lightglue_megadepth.yaml new file mode 100644 index 00000000..0beb3794 --- /dev/null +++ b/gluefactory/configs/disk+lightglue_megadepth.yaml @@ -0,0 +1,70 @@ +data: + name: megadepth + preprocessing: + resize: 1024 + side: long + square_pad: True + train_split: train_scenes_clean.txt + train_num_per_scene: 300 + val_split: valid_scenes_clean.txt + val_pairs: valid_pairs.txt + min_overlap: 0.1 + max_overlap: 0.7 + num_overlap_bins: 3 + read_depth: true + read_image: true + batch_size: 32 + num_workers: 14 + load_features: + do: false # enable this if you have cached predictions + path: exports/megadepth-undist-depth-r1024_DISK-k2048-nms5/{scene}.h5 + padding_length: 2048 + padding_fn: pad_local_features +model: + name: two_view_pipeline + extractor: + name: extractors.disk_kornia + max_num_keypoints: 512 + force_num_keypoints: True + detection_threshold: 0.0 + trainable: False + ground_truth: + name: matchers.homography_matcher + th_positive: 3 + th_negative: 3 + matcher: + name: matchers.lightglue + filter_threshold: 0.1 + input_dim: 128 + flash: false + checkpointed: true + allow_no_extract: True +train: + seed: 0 + epochs: 50 + log_every_iter: 100 + eval_every_iter: 1000 + lr: 1e-4 + lr_schedule: + start: 30 + type: exp + on_epoch: true + exp_div_10: 10 + dataset_callback_fn: sample_new_items + plot: [5, 'gluefactory.visualization.visualize_batch.make_match_figures'] +benchmarks: + megadepth1500: + data: + preprocessing: + side: long + resize: 1024 + eval: + estimator: opencv + ransac_th: 0.5 + hpatches: + eval: + estimator: opencv + ransac_th: 0.5 + model: + extractor: + max_num_keypoints: 1024 diff --git a/gluefactory/configs/sift+lightglue-official.yaml b/gluefactory/configs/sift+lightglue-official.yaml new file mode 100644 index 00000000..7d22df58 --- /dev/null +++ b/gluefactory/configs/sift+lightglue-official.yaml @@ -0,0 +1,28 @@ +model: + name: two_view_pipeline + extractor: + name: extractors.sift + backend: pycolmap_cuda + max_num_keypoints: 4096 + matcher: + name: matchers.lightglue_pretrained + features: sift + depth_confidence: -1 + width_confidence: -1 + filter_threshold: 0.1 +benchmarks: + megadepth1500: + data: + preprocessing: + side: long + resize: 1600 + eval: + estimator: opencv + ransac_th: 0.5 + hpatches: + eval: + estimator: opencv + ransac_th: 0.5 + model: + extractor: + max_num_keypoints: 1024 # overwrite config above diff --git a/gluefactory/configs/sift+lightglue_homography.yaml b/gluefactory/configs/sift+lightglue_homography.yaml index b42c0e7c..2822a4f8 100644 --- a/gluefactory/configs/sift+lightglue_homography.yaml +++ b/gluefactory/configs/sift+lightglue_homography.yaml @@ -14,10 +14,10 @@ model: name: two_view_pipeline extractor: name: extractors.sift - detector: pycolmap_cuda + backend: pycolmap_cuda max_num_keypoints: 1024 force_num_keypoints: True - detection_threshold: 0.0001 + nms_radius: 3 trainable: False ground_truth: name: matchers.homography_matcher @@ -46,3 +46,6 @@ benchmarks: eval: estimator: opencv ransac_th: 0.5 + model: + extractor: + nms_radius: 0 diff --git a/gluefactory/configs/sift+lightglue_megadepth.yaml b/gluefactory/configs/sift+lightglue_megadepth.yaml index dca53c8a..bc8c87b3 100644 --- a/gluefactory/configs/sift+lightglue_megadepth.yaml +++ b/gluefactory/configs/sift+lightglue_megadepth.yaml @@ -25,10 +25,10 @@ model: name: two_view_pipeline extractor: name: extractors.sift - detector: pycolmap_cuda + backend: pycolmap_cuda max_num_keypoints: 2048 force_num_keypoints: True - detection_threshold: 0.0001 + nms_radius: 3 trainable: False matcher: name: matchers.lightglue @@ -62,6 +62,9 @@ benchmarks: preprocessing: side: long resize: 1600 + model: + extractor: + nms_radius: 0 eval: estimator: opencv ransac_th: 0.5 @@ -72,3 +75,4 @@ benchmarks: model: extractor: max_num_keypoints: 1024 + nms_radius: 0 diff --git a/gluefactory/configs/superpoint+lightglue_homography.yaml b/gluefactory/configs/superpoint+lightglue_homography.yaml new file mode 100644 index 00000000..1f353b33 --- /dev/null +++ b/gluefactory/configs/superpoint+lightglue_homography.yaml @@ -0,0 +1,47 @@ +data: + name: homographies + data_dir: revisitop1m + train_size: 150000 + val_size: 2000 + batch_size: 128 + num_workers: 14 + homography: + difficulty: 0.7 + max_angle: 45 + photometric: + name: lg +model: + name: two_view_pipeline + extractor: + name: gluefactory_nonfree.superpoint + max_num_keypoints: 512 + force_num_keypoints: True + detection_threshold: 0.0 + nms_radius: 3 + trainable: False + ground_truth: + name: matchers.homography_matcher + th_positive: 3 + th_negative: 3 + matcher: + name: matchers.lightglue + filter_threshold: 0.1 + flash: false + checkpointed: true +train: + seed: 0 + epochs: 40 + log_every_iter: 100 + eval_every_iter: 500 + lr: 1e-4 + lr_schedule: + start: 20 + type: exp + on_epoch: true + exp_div_10: 10 + plot: [5, 'gluefactory.visualization.visualize_batch.make_match_figures'] +benchmarks: + hpatches: + eval: + estimator: opencv + ransac_th: 0.5 diff --git a/gluefactory/configs/superpoint+lightglue_megadepth.yaml b/gluefactory/configs/superpoint+lightglue_megadepth.yaml new file mode 100644 index 00000000..6e3a982a --- /dev/null +++ b/gluefactory/configs/superpoint+lightglue_megadepth.yaml @@ -0,0 +1,71 @@ +data: + name: megadepth + preprocessing: + resize: 1024 + side: long + square_pad: True + train_split: train_scenes_clean.txt + train_num_per_scene: 300 + val_split: valid_scenes_clean.txt + val_pairs: valid_pairs.txt + min_overlap: 0.1 + max_overlap: 0.7 + num_overlap_bins: 3 + read_depth: true + read_image: true + batch_size: 32 + num_workers: 14 + load_features: + do: false # enable this if you have cached predictions + path: exports/megadepth-undist-depth-r1024_SP-k2048-nms3/{scene}.h5 + padding_length: 2048 + padding_fn: pad_local_features +model: + name: two_view_pipeline + extractor: + name: gluefactory_nonfree.superpoint + max_num_keypoints: 2048 + force_num_keypoints: True + detection_threshold: 0.0 + nms_radius: 3 + trainable: False + matcher: + name: matchers.lightglue + filter_threshold: 0.1 + flash: false + checkpointed: true + ground_truth: + name: matchers.depth_matcher + th_positive: 3 + th_negative: 5 + th_epi: 5 + allow_no_extract: True +train: + seed: 0 + epochs: 50 + log_every_iter: 100 + eval_every_iter: 1000 + lr: 1e-4 + lr_schedule: + start: 30 + type: exp + on_epoch: true + exp_div_10: 10 + dataset_callback_fn: sample_new_items + plot: [5, 'gluefactory.visualization.visualize_batch.make_match_figures'] +benchmarks: + megadepth1500: + data: + preprocessing: + side: long + resize: 1600 + eval: + estimator: opencv + ransac_th: 0.5 + hpatches: + eval: + estimator: opencv + ransac_th: 0.5 + model: + extractor: + max_num_keypoints: 1024 diff --git a/gluefactory/eval/hpatches.py b/gluefactory/eval/hpatches.py index 8be7b704..bcd799c3 100644 --- a/gluefactory/eval/hpatches.py +++ b/gluefactory/eval/hpatches.py @@ -5,6 +5,7 @@ import matplotlib.pyplot as plt import numpy as np +import torch from omegaconf import OmegaConf from tqdm import tqdm @@ -12,6 +13,7 @@ from ..models.cache_loader import CacheLoader from ..settings import EVAL_PATH from ..utils.export_predictions import export_predictions +from ..utils.tensor import map_tensor from ..utils.tools import AUCMetric from ..visualization.viz2d import plot_cumulative from .eval_pipeline import EvalPipeline @@ -105,9 +107,11 @@ def run_eval(self, loader, pred_file): cache_loader = CacheLoader({"path": str(pred_file), "collate": None}).eval() for i, data in enumerate(tqdm(loader)): pred = cache_loader(data) + # Remove batch dimension + data = map_tensor(data, lambda t: torch.squeeze(t, dim=0)) # add custom evaluations here if "keypoints0" in pred: - results_i = eval_matches_homography(data, pred, {}) + results_i = eval_matches_homography(data, pred) results_i = {**results_i, **eval_homography_dlt(data, pred)} else: results_i = {} diff --git a/gluefactory/eval/io.py b/gluefactory/eval/io.py index 067e8456..6a55d59e 100644 --- a/gluefactory/eval/io.py +++ b/gluefactory/eval/io.py @@ -89,6 +89,11 @@ def load_model(model_conf, checkpoint): model = load_experiment(checkpoint, conf=model_conf).eval() else: model = get_model("two_view_pipeline")(model_conf).eval() + if not model.is_initialized(): + raise ValueError( + "The provided model has non-initialized parameters. " + + "Try to load a checkpoint instead." + ) return model diff --git a/gluefactory/eval/utils.py b/gluefactory/eval/utils.py index c6e6f006..b89fe792 100644 --- a/gluefactory/eval/utils.py +++ b/gluefactory/eval/utils.py @@ -1,11 +1,12 @@ -import kornia import numpy as np import torch +from kornia.geometry.homography import find_homography_dlt from ..geometry.epipolar import generalized_epi_dist, relative_pose_error from ..geometry.gt_generation import IGNORE_FEATURE from ..geometry.homography import homography_corner_error, sym_homography_error from ..robust_estimators import load_estimator +from ..utils.tensor import index_batch from ..utils.tools import AUCMetric @@ -26,6 +27,16 @@ def get_matches_scores(kpts0, kpts1, matches0, mscores0): return pts0, pts1, scores +def eval_per_batch_item(data: dict, pred: dict, eval_f, *args, **kwargs): + # Batched data + results = [ + eval_f(data_i, pred_i, *args, **kwargs) + for data_i, pred_i in zip(index_batch(data), index_batch(pred)) + ] + # Return a dictionary of lists with the evaluation of each item + return {k: [r[k] for r in results] for k in results[0].keys()} + + def eval_matches_epipolar(data: dict, pred: dict) -> dict: check_keys_recursive(data, ["view0", "view1", "T_0to1"]) check_keys_recursive( @@ -58,23 +69,25 @@ def eval_matches_epipolar(data: dict, pred: dict) -> dict: return results -def eval_matches_homography(data: dict, pred: dict, conf) -> dict: +def eval_matches_homography(data: dict, pred: dict) -> dict: check_keys_recursive(data, ["H_0to1"]) check_keys_recursive( pred, ["keypoints0", "keypoints1", "matches0", "matching_scores0"] ) H_gt = data["H_0to1"] + if H_gt.ndim > 2: + return eval_per_batch_item(data, pred, eval_matches_homography) + kp0, kp1 = pred["keypoints0"], pred["keypoints1"] m0, scores0 = pred["matches0"], pred["matching_scores0"] pts0, pts1, scores = get_matches_scores(kp0, kp1, m0, scores0) - err = sym_homography_error(pts0, pts1, H_gt[0]) + err = sym_homography_error(pts0, pts1, H_gt) results = {} results["prec@1px"] = (err < 1).float().mean().nan_to_num().item() results["prec@3px"] = (err < 3).float().mean().nan_to_num().item() results["num_matches"] = pts0.shape[0] results["num_keypoints"] = (kp0.shape[0] + kp1.shape[0]) / 2.0 - return results @@ -84,7 +97,7 @@ def eval_relative_pose_robust(data, pred, conf): pred, ["keypoints0", "keypoints1", "matches0", "matching_scores0"] ) - T_gt = data["T_0to1"][0] + T_gt = data["T_0to1"] kp0, kp1 = pred["keypoints0"], pred["keypoints1"] m0, scores0 = pred["matches0"], pred["matching_scores0"] pts0, pts1, scores = get_matches_scores(kp0, kp1, m0, scores0) @@ -107,9 +120,8 @@ def eval_relative_pose_robust(data, pred, conf): else: # R, t, inl = ret M = est["M_0to1"] - R, t = M.numpy() inl = est["inliers"].numpy() - r_error, t_error = relative_pose_error(T_gt, R, t) + t_error, r_error = relative_pose_error(T_gt, M.R, M.t) results["rel_pose_error"] = max(r_error, t_error) results["ransac_inl"] = np.sum(inl) results["ransac_inl%"] = np.mean(inl) @@ -119,6 +131,9 @@ def eval_relative_pose_robust(data, pred, conf): def eval_homography_robust(data, pred, conf): H_gt = data["H_0to1"] + if H_gt.ndim > 2: + return eval_per_batch_item(data, pred, eval_relative_pose_robust, conf) + estimator = load_estimator("homography", conf["estimator"])(conf) data_ = {} @@ -158,24 +173,26 @@ def eval_homography_robust(data, pred, conf): return results -def eval_homography_dlt(data, pred, *args): +def eval_homography_dlt(data, pred): H_gt = data["H_0to1"] H_inf = torch.ones_like(H_gt) * float("inf") kp0, kp1 = pred["keypoints0"], pred["keypoints1"] m0, scores0 = pred["matches0"], pred["matching_scores0"] pts0, pts1, scores = get_matches_scores(kp0, kp1, m0, scores0) + scores = scores.to(pts0) results = {} try: - Hdlt = kornia.geometry.homography.find_homography_dlt( - pts0[None], pts1[None], scores[None].to(pts0) - )[0] + if H_gt.ndim == 2: + pts0, pts1, scores = pts0[None], pts1[None], scores[None] + h_dlt = find_homography_dlt(pts0, pts1, scores) + if H_gt.ndim == 2: + h_dlt = h_dlt[0] except AssertionError: - Hdlt = H_inf + h_dlt = H_inf - error_dlt = homography_corner_error(Hdlt, H_gt, data["view0"]["image_size"]) + error_dlt = homography_corner_error(h_dlt, H_gt, data["view0"]["image_size"]) results["H_error_dlt"] = error_dlt.item() - return results diff --git a/gluefactory/geometry/epipolar.py b/gluefactory/geometry/epipolar.py index 7e1507c0..1f7bb9ce 100644 --- a/gluefactory/geometry/epipolar.py +++ b/gluefactory/geometry/epipolar.py @@ -1,4 +1,3 @@ -import numpy as np import torch from .utils import skew_symmetric, to_homogeneous @@ -124,39 +123,33 @@ def decompose_essential_matrix(E): # pose errors -# TODO: port to torch and batch +# TODO: test for batched data def angle_error_mat(R1, R2): - cos = (np.trace(np.dot(R1.T, R2)) - 1) / 2 - cos = np.clip(cos, -1.0, 1.0) # numercial errors can make it out of bounds - return np.rad2deg(np.abs(np.arccos(cos))) + cos = (torch.trace(torch.einsum("...ij, ...jk -> ...ik", R1.T, R2)) - 1) / 2 + cos = torch.clip(cos, -1.0, 1.0) # numerical errors can make it out of bounds + return torch.rad2deg(torch.abs(torch.arccos(cos))) -def angle_error_vec(v1, v2): - n = np.linalg.norm(v1) * np.linalg.norm(v2) - return np.rad2deg(np.arccos(np.clip(np.dot(v1, v2) / n, -1.0, 1.0))) +def angle_error_vec(v1, v2, eps=1e-10): + n = torch.clip(v1.norm(dim=-1) * v2.norm(dim=-1), min=eps) + v1v2 = (v1 * v2).sum(dim=-1) # dot product in the last dimension + return torch.rad2deg(torch.arccos(torch.clip(v1v2 / n, -1.0, 1.0))) -def compute_pose_error(T_0to1, R, t): - R_gt = T_0to1[:3, :3] - t_gt = T_0to1[:3, 3] - error_t = angle_error_vec(t, t_gt) - error_t = np.minimum(error_t, 180 - error_t) # ambiguity of E estimation - error_R = angle_error_mat(R, R_gt) - return error_t, error_R - +def relative_pose_error(T_0to1, R, t, ignore_gt_t_thr=0.0, eps=1e-10): + if isinstance(T_0to1, torch.Tensor): + R_gt, t_gt = T_0to1[:3, :3], T_0to1[:3, 3] + else: + R_gt, t_gt = T_0to1.R, T_0to1.t + R_gt, t_gt = torch.squeeze(R_gt), torch.squeeze(t_gt) -def relative_pose_error(T_0to1, R, t, ignore_gt_t_thr=0.0): # angle error between 2 vectors - R_gt, t_gt = T_0to1.numpy() - n = np.linalg.norm(t) * np.linalg.norm(t_gt) - t_err = np.rad2deg(np.arccos(np.clip(np.dot(t, t_gt) / n, -1.0, 1.0))) - t_err = np.minimum(t_err, 180 - t_err) # handle E ambiguity - if np.linalg.norm(t_gt) < ignore_gt_t_thr: # pure rotation is challenging + t_err = angle_error_vec(t, t_gt, eps) + t_err = torch.minimum(t_err, 180 - t_err) # handle E ambiguity + if t_gt.norm() < ignore_gt_t_thr: # pure rotation is challenging t_err = 0 # angle error between 2 rotation matrices - cos = (np.trace(np.dot(R.T, R_gt)) - 1) / 2 - cos = np.clip(cos, -1.0, 1.0) # handle numercial errors - R_err = np.rad2deg(np.abs(np.arccos(cos))) + r_err = angle_error_mat(R, R_gt) - return t_err, R_err + return t_err, r_err diff --git a/gluefactory/geometry/homography.py b/gluefactory/geometry/homography.py index 3acb9307..f87b9f90 100644 --- a/gluefactory/geometry/homography.py +++ b/gluefactory/geometry/homography.py @@ -164,7 +164,8 @@ def warp_points_torch(points, H, inverse=True): The inverse is used to be coherent with tf.contrib.image.transform Arguments: points: batched list of N points, shape (B, N, 2). - homography: batched or not (shapes (B, 3, 3) and (3, 3) respectively). + H: batched or not (shapes (B, 3, 3) and (3, 3) respectively). + inverse: Whether to multiply the points by H or the inverse of H Returns: a Tensor of shape (B, N, 2) containing the new coordinates of the warps. """ @@ -333,7 +334,7 @@ def sym_homography_error_all(kpts0, kpts1, H): def homography_corner_error(T, T_gt, image_size): - W, H = image_size[:, 0], image_size[:, 1] + W, H = image_size[..., 0], image_size[..., 1] corners0 = torch.Tensor([[0, 0], [W, 0], [W, H], [0, H]]).float().to(T) corners1_gt = from_homogeneous(to_homogeneous(corners0) @ T_gt.transpose(-1, -2)) corners1 = from_homogeneous(to_homogeneous(corners0) @ T.transpose(-1, -2)) diff --git a/gluefactory/geometry/utils.py b/gluefactory/geometry/utils.py index eec330a9..4734e341 100644 --- a/gluefactory/geometry/utils.py +++ b/gluefactory/geometry/utils.py @@ -23,6 +23,7 @@ def from_homogeneous(points, eps=0.0): """Remove the homogeneous dimension of N-dimensional points. Args: points: torch.Tensor or numpy.ndarray with size (..., N+1). + eps: Epsilon value to prevent zero division. Returns: A torch.Tensor or numpy ndarray with size (..., N). """ diff --git a/gluefactory/models/backbones/dinov2.py b/gluefactory/models/backbones/dinov2.py index 48a48b59..cf828523 100644 --- a/gluefactory/models/backbones/dinov2.py +++ b/gluefactory/models/backbones/dinov2.py @@ -10,6 +10,7 @@ class DinoV2(BaseModel): def _init(self, conf): self.net = torch.hub.load("facebookresearch/dinov2", conf.weights) + self.set_initialized() def _forward(self, data): img = data["image"] diff --git a/gluefactory/models/base_model.py b/gluefactory/models/base_model.py index 7313d986..b4f66288 100644 --- a/gluefactory/models/base_model.py +++ b/gluefactory/models/base_model.py @@ -60,6 +60,8 @@ class BaseModel(nn.Module, metaclass=MetaModel): required_data_keys = [] strict_conf = False + are_weights_initialized = False + def __init__(self, conf): """Perform some logic and call the _init method of the child model.""" super().__init__() @@ -125,3 +127,31 @@ def _forward(self, data): def loss(self, pred, data): """To be implemented by the child class.""" raise NotImplementedError + + def load_state_dict(self, *args, **kwargs): + """Load the state dict of the model, and set the model to initialized.""" + ret = super().load_state_dict(*args, **kwargs) + self.set_initialized() + return ret + + def is_initialized(self): + """Recursively check if the model is initialized, i.e. weights are loaded""" + is_initialized = True # initialize to true and perform recursive and + for _, w in self.named_children(): + if isinstance(w, BaseModel): + # if children is BaseModel, we perform recursive check + is_initialized = is_initialized and w.is_initialized() + else: + # else, we check if self is initialized or the children has no params + n_params = len(list(w.parameters())) + is_initialized = is_initialized and ( + n_params == 0 or self.are_weights_initialized + ) + return is_initialized + + def set_initialized(self, to: bool = True): + """Recursively set the initialization state.""" + self.are_weights_initialized = to + for _, w in self.named_parameters(): + if isinstance(w, BaseModel): + w.set_initialized(to) diff --git a/gluefactory/models/cache_loader.py b/gluefactory/models/cache_loader.py index 3fbf0f71..b345a997 100644 --- a/gluefactory/models/cache_loader.py +++ b/gluefactory/models/cache_loader.py @@ -29,6 +29,15 @@ def pad_local_features(pred: dict, seq_l: int): pred["scales"] = pad_to_length(pred["scales"], seq_l, -1, mode="zeros") if "oris" in pred.keys(): pred["oris"] = pad_to_length(pred["oris"], seq_l, -1, mode="zeros") + + if "depth_keypoints" in pred.keys(): + pred["depth_keypoints"] = pad_to_length( + pred["depth_keypoints"], seq_l, -1, mode="zeros" + ) + if "valid_depth_keypoints" in pred.keys(): + pred["valid_depth_keypoints"] = pad_to_length( + pred["valid_depth_keypoints"], seq_l, -1, mode="zeros" + ) return pred diff --git a/gluefactory/models/extractors/disk_kornia.py b/gluefactory/models/extractors/disk_kornia.py index 4d60973d..e01ab89d 100644 --- a/gluefactory/models/extractors/disk_kornia.py +++ b/gluefactory/models/extractors/disk_kornia.py @@ -21,6 +21,7 @@ class DISK(BaseModel): def _init(self, conf): self.model = kornia.feature.DISK.from_pretrained(conf.weights) + self.set_initialized() def _get_dense_outputs(self, images): B = images.shape[0] diff --git a/gluefactory/models/extractors/keynet_affnet_hardnet.py b/gluefactory/models/extractors/keynet_affnet_hardnet.py index b9091ea4..419ee972 100644 --- a/gluefactory/models/extractors/keynet_affnet_hardnet.py +++ b/gluefactory/models/extractors/keynet_affnet_hardnet.py @@ -21,6 +21,7 @@ def _init(self, conf): upright=conf.upright, scale_laf=conf.scale_laf, ) + self.set_initialized() def _forward(self, data): image = data["image"] diff --git a/gluefactory/models/extractors/sift.py b/gluefactory/models/extractors/sift.py index 5eb0c956..9f07725d 100644 --- a/gluefactory/models/extractors/sift.py +++ b/gluefactory/models/extractors/sift.py @@ -1,238 +1,233 @@ +import warnings + import cv2 import numpy as np -import pycolmap import torch -from omegaconf import OmegaConf -from scipy.spatial import KDTree +from kornia.color import rgb_to_grayscale +from packaging import version + +try: + import pycolmap +except ImportError: + pycolmap = None from ..base_model import BaseModel from ..utils.misc import pad_to_length -EPS = 1e-6 - - -def sift_to_rootsift(x): - x = x / (np.linalg.norm(x, ord=1, axis=-1, keepdims=True) + EPS) - x = np.sqrt(x.clip(min=EPS)) - x = x / (np.linalg.norm(x, axis=-1, keepdims=True) + EPS) - return x - - -# from OpenGlue -def nms_keypoints(kpts: np.ndarray, responses: np.ndarray, radius: float) -> np.ndarray: - # TODO: add approximate tree - kd_tree = KDTree(kpts) - - sorted_idx = np.argsort(-responses) - kpts_to_keep_idx = [] - removed_idx = set() - - for idx in sorted_idx: - # skip point if it was already removed - if idx in removed_idx: - continue - - kpts_to_keep_idx.append(idx) - point = kpts[idx] - neighbors = kd_tree.query_ball_point(point, r=radius) - # Variable `neighbors` contains the `point` itself - removed_idx.update(neighbors) - - mask = np.zeros((kpts.shape[0],), dtype=bool) - mask[kpts_to_keep_idx] = True - return mask - -def detect_kpts_opencv( - features: cv2.Feature2D, image: np.ndarray, describe: bool = True -) -> np.ndarray: +def filter_dog_point(points, scales, angles, image_shape, nms_radius, scores=None): + h, w = image_shape + ij = np.round(points - 0.5).astype(int).T[::-1] + + # Remove duplicate points (identical coordinates). + # Pick highest scale or score + s = scales if scores is None else scores + buffer = np.zeros((h, w)) + np.maximum.at(buffer, tuple(ij), s) + keep = np.where(buffer[tuple(ij)] == s)[0] + + # Pick lowest angle (arbitrary). + ij = ij[:, keep] + buffer[:] = np.inf + o_abs = np.abs(angles[keep]) + np.minimum.at(buffer, tuple(ij), o_abs) + mask = buffer[tuple(ij)] == o_abs + ij = ij[:, mask] + keep = keep[mask] + + if nms_radius > 0: + # Apply NMS on the remaining points + buffer[:] = 0 + buffer[tuple(ij)] = s[keep] # scores or scale + + local_max = torch.nn.functional.max_pool2d( + torch.from_numpy(buffer).unsqueeze(0), + kernel_size=nms_radius * 2 + 1, + stride=1, + padding=nms_radius, + ).squeeze(0) + is_local_max = buffer == local_max.numpy() + keep = keep[is_local_max[tuple(ij)]] + return keep + + +def sift_to_rootsift(x: torch.Tensor, eps=1e-6) -> torch.Tensor: + x = torch.nn.functional.normalize(x, p=1, dim=-1, eps=eps) + x.clip_(min=eps).sqrt_() + return torch.nn.functional.normalize(x, p=2, dim=-1, eps=eps) + + +def run_opencv_sift(features: cv2.Feature2D, image: np.ndarray) -> np.ndarray: """ Detect keypoints using OpenCV Detector. - Optionally, perform NMS and filter top-response keypoints. Optionally, perform description. Args: features: OpenCV based keypoints detector and descriptor image: Grayscale image of uint8 data type - describe: flag indicating whether to simultaneously compute descriptors Returns: - kpts: 1D array of detected cv2.KeyPoint + keypoints: 1D array of detected cv2.KeyPoint + scores: 1D array of responses + descriptors: 1D array of descriptors """ - if describe: - kpts, descriptors = features.detectAndCompute(image, None) - else: - kpts = features.detect(image, None) - kpts = np.array(kpts) - - responses = np.array([k.response for k in kpts], dtype=np.float32) - - # select all - top_score_idx = ... - pts = np.array([k.pt for k in kpts], dtype=np.float32) - scales = np.array([k.size for k in kpts], dtype=np.float32) - angles = np.array([k.angle for k in kpts], dtype=np.float32) - spts = np.concatenate([pts, scales[..., None], angles[..., None]], -1) - - if describe: - return spts[top_score_idx], responses[top_score_idx], descriptors[top_score_idx] - else: - return spts[top_score_idx], responses[top_score_idx] + detections, descriptors = features.detectAndCompute(image, None) + points = np.array([k.pt for k in detections], dtype=np.float32) + scores = np.array([k.response for k in detections], dtype=np.float32) + scales = np.array([k.size for k in detections], dtype=np.float32) + angles = np.deg2rad(np.array([k.angle for k in detections], dtype=np.float32)) + return points, scores, scales, angles, descriptors class SIFT(BaseModel): default_conf = { - "has_detector": True, - "has_descriptor": True, - "descriptor_dim": 128, - "pycolmap_options": { - "first_octave": 0, - "peak_threshold": 0.005, - "edge_threshold": 10, - }, "rootsift": True, - "nms_radius": None, - "max_num_keypoints": -1, - "max_num_keypoints_val": None, + "nms_radius": 0, # None to disable filtering entirely. + "max_num_keypoints": 4096, + "backend": "opencv", # in {opencv, pycolmap, pycolmap_cpu, pycolmap_cuda} + "detection_threshold": 0.0066667, # from COLMAP + "edge_threshold": 10, + "first_octave": -1, # only used by pycolmap, the default of COLMAP + "num_octaves": 4, "force_num_keypoints": False, - "randomize_keypoints_training": False, - "detector": "pycolmap", # ['pycolmap', 'pycolmap_cpu', 'pycolmap_cuda', 'cv2'] - "detection_threshold": None, } required_data_keys = ["image"] def _init(self, conf): - self.sift = None # lazy loading - - @torch.no_grad() - def extract_features(self, image): - image_np = image.cpu().numpy()[0] - assert image.shape[0] == 1 - assert image_np.min() >= -EPS and image_np.max() <= 1 + EPS - - detector = str(self.conf.detector) - - if self.sift is None and detector.startswith("pycolmap"): - options = OmegaConf.to_container(self.conf.pycolmap_options) + backend = self.conf.backend + if backend.startswith("pycolmap"): + if pycolmap is None: + raise ImportError( + "Cannot find module pycolmap: install it with pip" + "or use backend=opencv." + ) + options = { + "peak_threshold": self.conf.detection_threshold, + "edge_threshold": self.conf.edge_threshold, + "first_octave": self.conf.first_octave, + "num_octaves": self.conf.num_octaves, + "normalization": pycolmap.Normalization.L2, # L1_ROOT is buggy. + } device = ( - "auto" if detector == "pycolmap" else detector.replace("pycolmap_", "") + "auto" if backend == "pycolmap" else backend.replace("pycolmap_", "") ) - if self.conf.rootsift == "rootsift": - options["normalization"] = pycolmap.Normalization.L1_ROOT + if ( + backend == "pycolmap_cpu" or not pycolmap.has_cuda + ) and pycolmap.__version__ < "0.5.0": + warnings.warn( + "The pycolmap CPU SIFT is buggy in version < 0.5.0, " + "consider upgrading pycolmap or use the CUDA version.", + stacklevel=1, + ) else: - options["normalization"] = pycolmap.Normalization.L2 - if self.conf.detection_threshold is not None: - options["peak_threshold"] = self.conf.detection_threshold - options["max_num_features"] = self.conf.max_num_keypoints + options["max_num_features"] = self.conf.max_num_keypoints self.sift = pycolmap.Sift(options=options, device=device) - elif self.sift is None and self.conf.detector == "cv2": - self.sift = cv2.SIFT_create(contrastThreshold=self.conf.detection_threshold) + elif backend == "opencv": + self.sift = cv2.SIFT_create( + contrastThreshold=self.conf.detection_threshold, + nfeatures=self.conf.max_num_keypoints, + edgeThreshold=self.conf.edge_threshold, + nOctaveLayers=self.conf.num_octaves, + ) + else: + backends = {"opencv", "pycolmap", "pycolmap_cpu", "pycolmap_cuda"} + raise ValueError( + f"Unknown backend: {backend} not in " f"{{{','.join(backends)}}}." + ) - if detector.startswith("pycolmap"): - keypoints, scores, descriptors = self.sift.extract(image_np) - elif detector == "cv2": + def extract_single_image(self, image: torch.Tensor): + image_np = image.cpu().numpy().squeeze(0) + + if self.conf.backend.startswith("pycolmap"): + if version.parse(pycolmap.__version__) >= version.parse("0.5.0"): + detections, descriptors = self.sift.extract(image_np) + scores = None # Scores are not exposed by COLMAP anymore. + else: + detections, scores, descriptors = self.sift.extract(image_np) + keypoints = detections[:, :2] # Keep only (x, y). + scales, angles = detections[:, -2:].T + if scores is not None and ( + self.conf.backend == "pycolmap_cpu" or not pycolmap.has_cuda + ): + # Set the scores as a combination of abs. response and scale. + scores = np.abs(scores) * scales + elif self.conf.backend == "opencv": # TODO: Check if opencv keypoints are already in corner convention - keypoints, scores, descriptors = detect_kpts_opencv( + keypoints, scores, scales, angles, descriptors = run_opencv_sift( self.sift, (image_np * 255.0).astype(np.uint8) ) + pred = { + "keypoints": keypoints, + "scales": scales, + "oris": angles, + "descriptors": descriptors, + } + if scores is not None: + pred["keypoint_scores"] = scores + + # sometimes pycolmap returns points outside the image. We remove them + if self.conf.backend.startswith("pycolmap"): + is_inside = ( + pred["keypoints"] + 0.5 < np.array([image_np.shape[-2:][::-1]]) + ).all(-1) + pred = {k: v[is_inside] for k, v in pred.items()} if self.conf.nms_radius is not None: - mask = nms_keypoints(keypoints[:, :2], scores, self.conf.nms_radius) - keypoints = keypoints[mask] - scores = scores[mask] - descriptors = descriptors[mask] - - scales = keypoints[:, 2] - oris = np.rad2deg(keypoints[:, 3]) - - if self.conf.has_descriptor: - # We still renormalize because COLMAP does not normalize well, - # maybe due to numerical errors - if self.conf.rootsift: - descriptors = sift_to_rootsift(descriptors) - descriptors = torch.from_numpy(descriptors) - keypoints = torch.from_numpy(keypoints[:, :2]) # keep only x, y - scales = torch.from_numpy(scales) - oris = torch.from_numpy(oris) - scores = torch.from_numpy(scores) - - # Keep the k keypoints with highest score - max_kps = self.conf.max_num_keypoints - - # for val we allow different - if not self.training and self.conf.max_num_keypoints_val is not None: - max_kps = self.conf.max_num_keypoints_val - - if max_kps is not None and max_kps > 0: - if self.conf.randomize_keypoints_training and self.training: - # instead of selecting top-k, sample k by score weights - raise NotImplementedError - elif max_kps < scores.shape[0]: - # TODO: check that the scores from PyCOLMAP are 100% correct, - # follow https://github.com/mihaidusmanu/pycolmap/issues/8 - indices = torch.topk(scores, max_kps).indices - keypoints = keypoints[indices] - scales = scales[indices] - oris = oris[indices] - scores = scores[indices] - if self.conf.has_descriptor: - descriptors = descriptors[indices] + keep = filter_dog_point( + pred["keypoints"], + pred["scales"], + pred["oris"], + image_np.shape, + self.conf.nms_radius, + pred["keypoint_scores"], + ) + pred = {k: v[keep] for k, v in pred.items()} + + pred = {k: torch.from_numpy(v) for k, v in pred.items()} + if scores is not None: + # Keep the k keypoints with highest score + num_points = self.conf.max_num_keypoints + if num_points is not None and len(pred["keypoints"]) > num_points: + indices = torch.topk(pred["keypoint_scores"], num_points).indices + pred = {k: v[indices] for k, v in pred.items()} if self.conf.force_num_keypoints: - keypoints = pad_to_length( - keypoints, - max_kps, + num_points = min(self.conf.max_num_keypoints, len(pred["keypoints"])) + pred["keypoints"] = pad_to_length( + pred["keypoints"], + num_points, -2, mode="random_c", bounds=(0, min(image.shape[1:])), ) - scores = pad_to_length(scores, max_kps, -1, mode="zeros") - scales = pad_to_length(scales, max_kps, -1, mode="zeros") - oris = pad_to_length(oris, max_kps, -1, mode="zeros") - if self.conf.has_descriptor: - descriptors = pad_to_length(descriptors, max_kps, -2, mode="zeros") - - pred = { - "keypoints": keypoints, - "scales": scales, - "oris": oris, - "keypoint_scores": scores, - } - - if self.conf.has_descriptor: - pred["descriptors"] = descriptors + pred["scales"] = pad_to_length(pred["scales"], num_points, -1, mode="zeros") + pred["oris"] = pad_to_length(pred["oris"], num_points, -1, mode="zeros") + pred["descriptors"] = pad_to_length( + pred["descriptors"], num_points, -2, mode="zeros" + ) + if pred["keypoint_scores"] is not None: + scores = pad_to_length( + pred["keypoint_scores"], num_points, -1, mode="zeros" + ) return pred - @torch.no_grad() - def _forward(self, data): - pred = { - "keypoints": [], - "scales": [], - "oris": [], - "keypoint_scores": [], - "descriptors": [], - } - + def _forward(self, data: dict) -> dict: image = data["image"] - if image.shape[1] == 3: # RGB - scale = image.new_tensor([0.299, 0.587, 0.114]).view(1, 3, 1, 1) - image = (image * scale).sum(1, keepdim=True).cpu() - - for k in range(image.shape[0]): + if image.shape[1] == 3: + image = rgb_to_grayscale(image) + device = image.device + image = image.cpu() + pred = [] + for k in range(len(image)): img = image[k] if "image_size" in data.keys(): # avoid extracting points in padded areas w, h = data["image_size"][k] img = img[:, :h, :w] - p = self.extract_features(img) - for k, v in p.items(): - pred[k].append(v) - - if (image.shape[0] == 1) or self.conf.force_num_keypoints: - pred = {k: torch.stack(pred[k], 0) for k in pred.keys()} - - pred = {k: pred[k].to(device=data["image"].device) for k in pred.keys()} - - pred["oris"] = torch.deg2rad(pred["oris"]) + p = self.extract_single_image(img) + pred.append(p) + pred = {k: torch.stack([p[k] for p in pred], 0).to(device) for k in pred[0]} + if self.conf.rootsift: + pred["descriptors"] = sift_to_rootsift(pred["descriptors"]) return pred def loss(self, pred, data): diff --git a/gluefactory/models/extractors/sift_kornia.py b/gluefactory/models/extractors/sift_kornia.py index 78810e66..699e5a26 100644 --- a/gluefactory/models/extractors/sift_kornia.py +++ b/gluefactory/models/extractors/sift_kornia.py @@ -19,12 +19,13 @@ def _init(self, conf): self.sift = kornia.feature.SIFTFeature( num_features=self.conf.max_num_keypoints, rootsift=self.conf.rootsift ) + self.set_initialized() def _forward(self, data): lafs, scores, descriptors = self.sift(data["image"]) keypoints = kornia.feature.get_laf_center(lafs) - scales = kornia.feature.get_laf_scale(lafs) - oris = kornia.feature.get_laf_orientation(lafs) + scales = kornia.feature.get_laf_scale(lafs).squeeze(-1).squeeze(-1) + oris = kornia.feature.get_laf_orientation(lafs).squeeze(-1) pred = { "keypoints": keypoints, # @TODO: confirm keypoints are in corner convention "scales": scales, diff --git a/gluefactory/models/lines/deeplsd.py b/gluefactory/models/lines/deeplsd.py index c35aa01e..122f4b4f 100644 --- a/gluefactory/models/lines/deeplsd.py +++ b/gluefactory/models/lines/deeplsd.py @@ -34,6 +34,7 @@ def _init(self, conf): ckpt = torch.load(ckpt, map_location="cpu") self.net = deeplsd_inference.DeepLSD(conf.model_conf).eval() self.net.load_state_dict(ckpt["model"]) + self.set_initialized() def download_model(self, path): import subprocess diff --git a/gluefactory/models/matchers/gluestick.py b/gluefactory/models/matchers/gluestick.py index 0187e0c3..e16a8a52 100644 --- a/gluefactory/models/matchers/gluestick.py +++ b/gluefactory/models/matchers/gluestick.py @@ -119,7 +119,7 @@ def _init(self, conf): "Loading GlueStick model from " f'"{self.url.format(conf.version)}"' ) state_dict = torch.hub.load_state_dict_from_url( - self.url.format(conf.version), file_name=fname + self.url.format(conf.version), file_name=fname, map_location="cpu" ) if "model" in state_dict: diff --git a/gluefactory/models/matchers/kornia_loftr.py b/gluefactory/models/matchers/kornia_loftr.py index 45a20b7a..6fbd47b0 100644 --- a/gluefactory/models/matchers/kornia_loftr.py +++ b/gluefactory/models/matchers/kornia_loftr.py @@ -13,6 +13,7 @@ class LoFTRModule(BaseModel): def _init(self, conf): self.net = kornia.feature.LoFTR(pretrained="outdoor") + self.set_initialized() def _forward(self, data): image0 = data["view0"]["image"] diff --git a/gluefactory/models/matchers/lightglue_pretrained.py b/gluefactory/models/matchers/lightglue_pretrained.py index 2e7c71b6..275a9d54 100644 --- a/gluefactory/models/matchers/lightglue_pretrained.py +++ b/gluefactory/models/matchers/lightglue_pretrained.py @@ -17,17 +17,18 @@ class LightGlue(BaseModel): def _init(self, conf): dconf = OmegaConf.to_container(conf) - self.net = LightGlue_(dconf.pop("features"), **dconf).cuda() - # self.net.compile() + self.net = LightGlue_(dconf.pop("features"), **dconf) + self.set_initialized() def _forward(self, data): + required_keys = ["keypoints", "descriptors", "scales", "oris"] view0 = { - **{k: data[k + "0"] for k in ["keypoints", "descriptors"]}, **data["view0"], + **{k: data[k + "0"] for k in required_keys if (k + "0") in data}, } view1 = { - **{k: data[k + "1"] for k in ["keypoints", "descriptors"]}, **data["view1"], + **{k: data[k + "1"] for k in required_keys if (k + "1") in data}, } return self.net({"image0": view0, "image1": view1}) diff --git a/gluefactory/robust_estimators/homography/homography_est.py b/gluefactory/robust_estimators/homography/homography_est.py index 510650c4..780011ee 100644 --- a/gluefactory/robust_estimators/homography/homography_est.py +++ b/gluefactory/robust_estimators/homography/homography_est.py @@ -7,6 +7,7 @@ ransac_point_line_homography, ) +from ...utils.tensor import batch_to_numpy from ..base_estimator import BaseEstimator @@ -50,19 +51,20 @@ def _init(self, conf): pass def _forward(self, data): + feat = data["m_kpts0"] if "m_kpts0" in data else data["m_lines0"] + data = batch_to_numpy(data) m_features = { - "kpts0": data["m_kpts1"].numpy() if "m_kpts1" in data else None, - "kpts1": data["m_kpts0"].numpy() if "m_kpts0" in data else None, - "lines0": data["m_lines1"].numpy() if "m_lines1" in data else None, - "lines1": data["m_lines0"].numpy() if "m_lines0" in data else None, + "kpts0": data["m_kpts1"] if "m_kpts1" in data else None, + "kpts1": data["m_kpts0"] if "m_kpts0" in data else None, + "lines0": data["m_lines1"] if "m_lines1" in data else None, + "lines1": data["m_lines0"] if "m_lines0" in data else None, } - feat = data["m_kpts0"] if "m_kpts0" in data else data["m_lines0"] M = H_estimation_hybrid(**m_features, tol_px=self.conf.ransac_th) success = M is not None if not success: M = torch.eye(3, device=feat.device, dtype=feat.dtype) else: - M = torch.tensor(M).to(feat) + M = torch.from_numpy(M).to(feat) estimation = { "success": success, diff --git a/gluefactory/robust_estimators/homography/poselib.py b/gluefactory/robust_estimators/homography/poselib.py index e99e9493..6aa71496 100644 --- a/gluefactory/robust_estimators/homography/poselib.py +++ b/gluefactory/robust_estimators/homography/poselib.py @@ -16,8 +16,8 @@ def _init(self, conf): def _forward(self, data): pts0, pts1 = data["m_kpts0"], data["m_kpts1"] M, info = poselib.estimate_homography( - pts0.numpy(), - pts1.numpy(), + pts0.detach().cpu().numpy(), + pts1.detach().cpu().numpy(), { "max_reproj_error": self.conf.ransac_th, **OmegaConf.to_container(self.conf.options), diff --git a/gluefactory/scripts/export_megadepth.py b/gluefactory/scripts/export_megadepth.py index 95e89d81..84ae8dfb 100644 --- a/gluefactory/scripts/export_megadepth.py +++ b/gluefactory/scripts/export_megadepth.py @@ -37,14 +37,13 @@ }, }, "cv2-sift": { - "name": f"r{resize}_cv2-SIFT-k{n_kpts}", + "name": f"r{resize}_opencv-SIFT-k{n_kpts}", "keys": ["keypoints", "descriptors", "keypoint_scores", "oris", "scales"], "gray": True, "conf": { "name": "extractors.sift", "max_num_keypoints": 4096, - "detection_threshold": 0.001, - "detector": "cv2", + "backend": "opencv", }, }, "pycolmap-sift": { @@ -54,11 +53,7 @@ "conf": { "name": "extractors.sift", "max_num_keypoints": n_kpts, - "detection_threshold": 0.0001, - "detector": "pycolmap", - "pycolmap_options": { - "first_octave": -1, - }, + "backend": "pycolmap", }, }, "pycolmap-sift-gpu": { @@ -68,11 +63,7 @@ "conf": { "name": "extractors.sift", "max_num_keypoints": n_kpts, - "detection_threshold": 0.0066666, - "detector": "pycolmap_cuda", - "pycolmap_options": { - "first_octave": -1, - }, + "backend": "pycolmap_cuda", "nms_radius": 3, }, }, @@ -133,15 +124,18 @@ def run_export(feature_file, scene, args): conf = OmegaConf.create(conf) - keys = configs[args.method]["keys"] + ["depth_keypoints", "valid_depth_keypoints"] + keys = configs[args.method]["keys"] dataset = get_dataset(conf.data.name)(conf.data) loader = dataset.get_data_loader(conf.split or "test") device = "cuda" if torch.cuda.is_available() else "cpu" model = get_model(conf.model.name)(conf.model).eval().to(device) - callback_fn = None - # callback_fn=get_kp_depth # use this to store the depth of each keypoint + if args.export_sparse_depth: + callback_fn = get_kp_depth # use this to store the depth of each keypoint + keys = keys + ["depth_keypoints", "valid_depth_keypoints"] + else: + callback_fn = None export_predictions( loader, model, feature_file, as_half=True, keys=keys, callback_fn=callback_fn ) @@ -153,6 +147,7 @@ def run_export(feature_file, scene, args): parser.add_argument("--method", type=str, default="sp") parser.add_argument("--scenes", type=str, default=None) parser.add_argument("--num_workers", type=int, default=0) + parser.add_argument("--export_sparse_depth", action="store_true") args = parser.parse_args() export_name = configs[args.method]["name"] diff --git a/gluefactory/utils/tensor.py b/gluefactory/utils/tensor.py index f31bb580..d0a8ca50 100644 --- a/gluefactory/utils/tensor.py +++ b/gluefactory/utils/tensor.py @@ -40,3 +40,9 @@ def rbd(data: dict) -> dict: k: v[0] if isinstance(v, (torch.Tensor, np.ndarray, list)) else v for k, v in data.items() } + + +def index_batch(tensor_dict): + batch_size = len(next(iter(tensor_dict.values()))) + for i in range(batch_size): + yield map_tensor(tensor_dict, lambda t: t[i]) diff --git a/gluefactory/visualization/viz2d.py b/gluefactory/visualization/viz2d.py index 42a000a3..bfa64735 100644 --- a/gluefactory/visualization/viz2d.py +++ b/gluefactory/visualization/viz2d.py @@ -208,14 +208,14 @@ def plot_matches(kpts0, kpts1, color=None, lw=1.5, ps=4, a=1.0, labels=None, axe kpts0[:, 1], c=color, s=ps, - label=None if labels is None else labels[0], + label=None if labels is None or len(labels) == 0 else labels[0], ) ax1.scatter( kpts1[:, 0], kpts1[:, 1], c=color, s=ps, - label=None if labels is None else labels[1], + label=None if labels is None or len(labels) == 0 else labels[1], ) diff --git a/pyproject.toml b/pyproject.toml index 5185a753..b740a956 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,12 +38,12 @@ urls = {Repository = "https://github.com/cvg/glue-factory"} [project.optional-dependencies] extra = [ "pycolmap", - "poselib @ git+https://github.com/PoseLib/PoseLib.git", - "pytlsd @ git+https://github.com/iago-suarez/pytlsd.git", + "poselib @ git+https://github.com/PoseLib/PoseLib.git@9c8f3ca1baba69e19726cc7caded574873ec1f9e", + "pytlsd @ git+https://github.com/iago-suarez/pytlsd.git@v0.0.5", "deeplsd @ git+https://github.com/cvg/DeepLSD.git", - "homography_est @ git+https://github.com/rpautrat/homography_est.git", + "homography_est @ git+https://github.com/rpautrat/homography_est.git@17b200d528e6aa8ac61a878a29265bf5f9d36c41", ] -dev = ["black", "flake8", "isort"] +dev = ["black", "flake8", "isort", "parameterized"] [tool.setuptools.packages.find] include = ["gluefactory*"] diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_eval_utils.py b/tests/test_eval_utils.py new file mode 100644 index 00000000..fead8964 --- /dev/null +++ b/tests/test_eval_utils.py @@ -0,0 +1,88 @@ +import unittest + +import torch + +from gluefactory.eval.utils import eval_matches_homography +from gluefactory.geometry.homography import warp_points_torch + + +class TestEvalUtils(unittest.TestCase): + @staticmethod + def default_pts(): + return torch.tensor( + [ + [10.0, 10.0], + [10.0, 20.0], + [20.0, 20.0], + [20.0, 10.0], + ] + ) + + @staticmethod + def default_pred(kps0, kps1): + return { + "keypoints0": kps0, + "keypoints1": kps1, + "matches0": torch.arange(len(kps0)), + "matching_scores0": torch.ones(len(kps1)), + } + + def test_eval_matches_homography_trivial(self): + data = {"H_0to1": torch.eye(3)} + kps = self.default_pts() + pred = self.default_pred(kps, kps) + + results = eval_matches_homography(data, pred) + + self.assertEqual(results["prec@1px"], 1) + self.assertEqual(results["prec@3px"], 1) + self.assertEqual(results["num_matches"], 4) + self.assertEqual(results["num_keypoints"], 4) + + def test_eval_matches_homography_real(self): + data = {"H_0to1": torch.tensor([[1.5, 0.2, 21], [-0.3, 1.6, 33], [0, 0, 1.0]])} + kps0 = self.default_pts() + kps1 = warp_points_torch(kps0, data["H_0to1"], inverse=False) + pred = self.default_pred(kps0, kps1) + + results = eval_matches_homography(data, pred) + + self.assertEqual(results["prec@1px"], 1) + self.assertEqual(results["prec@3px"], 1) + + def test_eval_matches_homography_real_outliers(self): + data = {"H_0to1": torch.tensor([[1.5, 0.2, 21], [-0.3, 1.6, 33], [0, 0, 1.0]])} + kps0 = self.default_pts() + kps0 = torch.cat([kps0, torch.tensor([[5.0, 5.0]])]) + kps1 = warp_points_torch(kps0, data["H_0to1"], inverse=False) + # Move one keypoint 1.5 pixels away in x and y + kps1[-1] += 1.5 + pred = self.default_pred(kps0, kps1) + + results = eval_matches_homography(data, pred) + self.assertAlmostEqual(results["prec@1px"], 0.8) + self.assertAlmostEqual(results["prec@3px"], 1.0) + + def test_eval_matches_homography_batched(self): + H0 = torch.tensor([[1.5, 0.2, 21], [-0.3, 1.6, 33], [0, 0, 1.0]]) + H1 = torch.tensor([[0.7, 0.1, -5], [-0.1, 0.65, 13], [0, 0, 1.0]]) + data = {"H_0to1": torch.stack([H0, H1])} + kps0 = torch.stack([self.default_pts(), self.default_pts().flip(0)]) + kps1 = warp_points_torch(kps0, data["H_0to1"], inverse=False) + # In the first element of the batch there is one outlier + kps1[0, -1] += 5 + matches0 = torch.stack([torch.arange(4), torch.arange(4)]) + # In the second element of the batch there is only 2 matches + matches0[1, :2] = -1 + pred = { + "keypoints0": kps0, + "keypoints1": kps1, + "matches0": matches0, + "matching_scores0": torch.ones_like(matches0), + } + + results = eval_matches_homography(data, pred) + self.assertAlmostEqual(results["prec@1px"][0], 0.75) + self.assertAlmostEqual(results["prec@1px"][1], 1.0) + self.assertAlmostEqual(results["num_matches"][0], 4) + self.assertAlmostEqual(results["num_matches"][1], 2) diff --git a/tests/test_integration.py b/tests/test_integration.py new file mode 100644 index 00000000..e459ada5 --- /dev/null +++ b/tests/test_integration.py @@ -0,0 +1,132 @@ +import unittest +from collections import namedtuple +from os.path import splitext + +import cv2 +import matplotlib.pyplot as plt +import torch.cuda +from kornia import image_to_tensor +from omegaconf import OmegaConf +from parameterized import parameterized +from torch import Tensor + +from gluefactory import logger +from gluefactory.eval.utils import ( + eval_homography_dlt, + eval_homography_robust, + eval_matches_homography, +) +from gluefactory.models.two_view_pipeline import TwoViewPipeline +from gluefactory.settings import root +from gluefactory.utils.image import ImagePreprocessor +from gluefactory.utils.tensor import map_tensor +from gluefactory.utils.tools import set_seed +from gluefactory.visualization.viz2d import ( + plot_color_line_matches, + plot_images, + plot_matches, +) + + +def create_input_data(cv_img0, cv_img1, device): + img0 = image_to_tensor(cv_img0).float() / 255 + img1 = image_to_tensor(cv_img1).float() / 255 + ip = ImagePreprocessor({}) + data = {"view0": ip(img0), "view1": ip(img1)} + data = map_tensor( + data, + lambda t: t[None].to(device) + if isinstance(t, Tensor) + else torch.from_numpy(t)[None].to(device), + ) + return data + + +ExpectedResults = namedtuple("ExpectedResults", ("num_matches", "prec3px", "h_error")) + + +class TestIntegration(unittest.TestCase): + methods_to_test = [ + ("superpoint+NN.yaml", "poselib", ExpectedResults(1300, 0.8, 1.0)), + ("superpoint-open+NN.yaml", "poselib", ExpectedResults(1300, 0.8, 1.0)), + ( + "superpoint+lsd+gluestick.yaml", + "homography_est", + ExpectedResults(1300, 0.8, 1.0), + ), + ( + "superpoint+lightglue-official.yaml", + "poselib", + ExpectedResults(1300, 0.8, 1.0), + ), + ] + + visualize = False + + @parameterized.expand(methods_to_test) + @torch.no_grad() + def test_real_homography(self, conf_file, estimator, exp_results): + set_seed(0) + model_path = root / "gluefactory" / "configs" / conf_file + img_path0 = root / "assets" / "boat1.png" + img_path1 = root / "assets" / "boat2.png" + h_gt = torch.tensor( + [ + [0.85799, 0.21669, 9.4839], + [-0.21177, 0.85855, 130.48], + [1.5015e-06, 9.2033e-07, 1], + ] + ) + + device = "cuda" if torch.cuda.is_available() else "cpu" + gs = TwoViewPipeline(OmegaConf.load(model_path).model).to(device).eval() + + cv_img0, cv_img1 = cv2.imread(str(img_path0)), cv2.imread(str(img_path1)) + data = create_input_data(cv_img0, cv_img1, device) + pred = gs(data) + pred = map_tensor( + pred, lambda t: torch.squeeze(t, dim=0) if isinstance(t, Tensor) else t + ) + data["H_0to1"] = h_gt.to(device) + data["H_1to0"] = torch.linalg.inv(h_gt).to(device) + + results = eval_matches_homography(data, pred) + results = {**results, **eval_homography_dlt(data, pred)} + + results = { + **results, + **eval_homography_robust( + data, + pred, + {"estimator": estimator}, + ), + } + + logger.info(results) + self.assertGreater(results["num_matches"], exp_results.num_matches) + self.assertGreater(results["prec@3px"], exp_results.prec3px) + self.assertLess(results["H_error_ransac"], exp_results.h_error) + + if self.visualize: + pred = map_tensor( + pred, lambda t: t.cpu().numpy() if isinstance(t, Tensor) else t + ) + kp0, kp1 = pred["keypoints0"], pred["keypoints1"] + m0 = pred["matches0"] + valid0 = m0 != -1 + kpm0, kpm1 = kp0[valid0], kp1[m0[valid0]] + + plot_images([cv_img0, cv_img1]) + plot_matches(kpm0, kpm1, a=0.0) + plt.savefig(f"{splitext(conf_file)[0]}_point_matches.svg") + + if "lines0" in pred and "lines1" in pred: + lines0, lines1 = pred["lines0"], pred["lines1"] + lm0 = pred["line_matches0"] + lvalid0 = lm0 != -1 + linem0, linem1 = lines0[lvalid0], lines1[lm0[lvalid0]] + + plot_images([cv_img0, cv_img1]) + plot_color_line_matches([linem0, linem1]) + plt.savefig(f"{splitext(conf_file)[0]}_line_matches.svg") + plt.show()