diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml
new file mode 100644
index 00000000..05d61c16
--- /dev/null
+++ b/.github/workflows/python-tests.yml
@@ -0,0 +1,30 @@
+name: Python Tests
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    types: [ assigned, opened, synchronize, reopened ]
+jobs:
+  build:
+    name: Run Python Tests
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - uses: actions/setup-python@v4
+      with:
+        python-version: '3.10'
+        cache: 'pip'
+    - name: Install dependencies
+      run: |
+        sudo apt-get remove libunwind-14-dev || true
+        sudo apt-get install -y libceres-dev libeigen3-dev
+        python -m pip install --upgrade pip
+        python -m pip install pytest pytest-cov
+        python -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
+        python -m pip install -e .[dev]
+        python -m pip install -e .[extra]
+    - name: Test with pytest
+      run: |
+         set -o pipefail
+         pytest --junitxml=pytest.xml --cov=gluefactory tests/
\ No newline at end of file
diff --git a/README.md b/README.md
index f1132ca5..95f060d4 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ python3 -m pip install -e .[extra]
 All models and datasets in gluefactory have auto-downloaders, so you can get started right away!
 
 ## License
-The code and trained models in Glue Factory are released with an Apache-2.0 license. This includes LightGlue trained with an [open version of SuperPoint](https://github.com/rpautrat/SuperPoint). Third-party models that are not compatible with this license, such as SuperPoint (original) and SuperGlue, are provided in `gluefactory_nonfree`, where each model might follow its own, restrictive license.
+The code and trained models in Glue Factory are released with an Apache-2.0 license. This includes LightGlue and an [open version of SuperPoint](https://github.com/rpautrat/SuperPoint). Third-party models that are not compatible with this license, such as SuperPoint (original) and SuperGlue, are provided in `gluefactory_nonfree`, where each model might follow its own, restrictive license.
 
 ## Evaluation
 
@@ -66,8 +66,8 @@ Here are the results as Area Under the Curve (AUC) of the homography error at  1
 
 | Methods                                                      | DLT         | [OpenCV](../gluefactory/robust_estimators/homography/opencv.py)       | [PoseLib](../gluefactory/robust_estimators/homography/poselib.py)      |
 | ------------------------------------------------------------ | ------------------ | ------------------ | ------------------ |
-| [SuperPoint + SuperGlue](../gluefactory/configs/superpoint+superglue.yaml) | 32.1 / 65.0 / 75.7 | 32.9 / 55.7 / 68.0 | 37.0 / 68.2 / 78.7 |
-| [SuperPoint + LightGlue](../gluefactory/configs/superpoint+lightglue.yaml) | 35.1 / 67.2 / 77.6 | 34.2 / 57.9 / 69.9 | 37.1 / 67.4 / 77.8 |
+| [SuperPoint + SuperGlue](gluefactory/configs/superpoint+superglue-official.yaml) | 32.1 / 65.0 / 75.7 | 32.9 / 55.7 / 68.0 | 37.0 / 68.2 / 78.7 |
+| [SuperPoint + LightGlue](gluefactory/configs/superpoint+lightglue-official.yaml) | 35.1 / 67.2 / 77.6 | 34.2 / 57.9 / 69.9 | 37.1 / 67.4 / 77.8 |
 
 
 </details>
@@ -159,9 +159,12 @@ Here are the results as Area Under the Curve (AUC) of the pose error at  5/10/20
 
 | Methods                                                      | [pycolmap](../gluefactory/robust_estimators/relative_pose/pycolmap.py)         | [OpenCV](../gluefactory/robust_estimators/relative_pose/opencv.py)       | [PoseLib](../gluefactory/robust_estimators/relative_pose/poselib.py)      |
 | ------------------------------------------------------------ | ------------------ | ------------------ | ------------------ |
-| [SuperPoint + SuperGlue](../gluefactory/configs/superpoint+superglue.yaml) | 54.4 / 70.4 / 82.4 | 48.7 / 65.6 / 79.0 | 64.8 / 77.9 / 87.0 |
-| [SuperPoint + LightGlue](../gluefactory/configs/superpoint+lightglue.yaml) | 56.7 / 72.4 / 83.7 | 51.0 / 68.1 / 80.7 | 66.8 / 79.3 / 87.9 |
-| [SuperPoint + GlueStick](../gluefactory/configs/superpoint+lsd+gluestick.yaml) | 53.2 / 69.8 / 81.9 | 46.3 / 64.2 / 78.1 | 64.4 / 77.5 / 86.5 |
+| [SuperPoint + SuperGlue](gluefactory/configs/superpoint+superglue-official.yaml) | 54.4 / 70.4 / 82.4 | 48.7 / 65.6 / 79.0 | 64.8 / 77.9 / 87.0 |
+| [SuperPoint + LightGlue](gluefactory/configs/superpoint+lightglue-official.yaml) | 56.7 / 72.4 / 83.7 | 51.0 / 68.1 / 80.7 | 66.8 / 79.3 / 87.9 |
+| [SIFT (2K) + LightGlue](gluefactory/configs/sift+lightglue-official.yaml) | ? / ? / ? | 43.5 / 61.5 / 75.9 | 60.4 / 74.3 / 84.5 |
+| [SIFT (4K) + LightGlue](gluefactory/configs/sift+lightglue-official.yaml) | ? / ? / ? | 49.9 / 67.3 / 80.3 | 65.9 / 78.6 / 87.4 |
+| [ALIKED + LightGlue](gluefactory/configs/aliked+lightglue-official.yaml) | ? / ? / ? | 51.5 / 68.1 / 80.4 | 66.3 / 78.7 / 87.5 |
+| [SuperPoint + GlueStick](gluefactory/configs/superpoint+lsd+gluestick.yaml) | 53.2 / 69.8 / 81.9 | 46.3 / 64.2 / 78.1 | 64.4 / 77.5 / 86.5 |
 
 </details>
 
@@ -223,18 +226,18 @@ All training commands automatically download the datasets.
 <details>
 <summary>[Training LightGlue]</summary>
 
-We show how to train LightGlue with [SuperPoint open](https://github.com/rpautrat/SuperPoint).
+We show how to train LightGlue with [SuperPoint](https://github.com/magicleap/SuperPointPretrainedNetwork).
 We first pre-train LightGlue on the homography dataset:
 ```bash
 python -m gluefactory.train sp+lg_homography \  # experiment name
-    --conf gluefactory/configs/superpoint-open+lightglue_homography.yaml
+    --conf gluefactory/configs/superpoint+lightglue_homography.yaml
 ```
 Feel free to use any other experiment name. By default the checkpoints are written to `outputs/training/`. The default batch size of 128 corresponds to the results reported in the paper and requires 2x 3090 GPUs with 24GB of VRAM each as well as PyTorch >= 2.0 (FlashAttention).
 Configurations are managed by [OmegaConf](https://omegaconf.readthedocs.io/) so any entry can be overridden from the command line.
 If you have PyTorch < 2.0 or weaker GPUs, you may thus need to reduce the batch size via:
 ```bash
 python -m gluefactory.train sp+lg_homography \
-    --conf gluefactory/configs/superpoint-open+lightglue_homography.yaml  \
+    --conf gluefactory/configs/superpoint+lightglue_homography.yaml  \
     data.batch_size=32  # for 1x 1080 GPU
 ```
 Be aware that this can impact the overall performance. You might need to adjust the learning rate accordingly.
@@ -242,17 +245,17 @@ Be aware that this can impact the overall performance. You might need to adjust
 We then fine-tune the model on the MegaDepth dataset:
 ```bash
 python -m gluefactory.train sp+lg_megadepth \
-    --conf gluefactory/configs/superpoint-open+lightglue_megadepth.yaml \
+    --conf gluefactory/configs/superpoint+lightglue_megadepth.yaml \
     train.load_experiment=sp+lg_homography
 ```
 
 Here the default batch size is 32. To speed up training on MegaDepth, we suggest to cache the local features before training (requires around 150 GB of disk space):
 ```bash
 # extract features
-python -m gluefactory.scripts.export_megadepth --method sp_open --num_workers 8
+python -m gluefactory.scripts.export_megadepth --method sp --num_workers 8
 # run training with cached features
 python -m gluefactory.train sp+lg_megadepth \
-    --conf gluefactory/configs/superpoint-open+lightglue_megadepth.yaml \
+    --conf gluefactory/configs/superpoint+lightglue_megadepth.yaml \
     train.load_experiment=sp+lg_homography \
     data.load_features.do=True
 ```
@@ -297,10 +300,10 @@ Using the following local feature extractors:
 | Model     | LightGlue config |
 | --------- | --------- |
 | [SuperPoint (open)](https://github.com/rpautrat/SuperPoint) | `superpoint-open+lightglue_{homography,megadepth}.yaml` |
-| [SuperPoint (official)](https://github.com/magicleap/SuperPointPretrainedNetwork) | ❌ TODO |
+| [SuperPoint (official)](https://github.com/magicleap/SuperPointPretrainedNetwork) | `superpoint+lightglue_{homography,megadepth}.yaml` |
 | SIFT (via [pycolmap](https://github.com/colmap/pycolmap)) | `sift+lightglue_{homography,megadepth}.yaml` |
 | [ALIKED](https://github.com/Shiaoming/ALIKED) | `aliked+lightglue_{homography,megadepth}.yaml` |
-| [DISK](https://github.com/cvlab-epfl/disk) | ❌ TODO |
+| [DISK](https://github.com/cvlab-epfl/disk) | `disk+lightglue_{homography,megadepth}.yaml` |
 | Key.Net + HardNet | ❌ TODO |
 
 ## Coming soon
diff --git a/assets/boat1.png b/assets/boat1.png
new file mode 100644
index 00000000..89cca50e
Binary files /dev/null and b/assets/boat1.png differ
diff --git a/assets/boat2.png b/assets/boat2.png
new file mode 100644
index 00000000..5fb961bc
Binary files /dev/null and b/assets/boat2.png differ
diff --git a/gluefactory/configs/aliked+lightglue-official.yaml b/gluefactory/configs/aliked+lightglue-official.yaml
new file mode 100644
index 00000000..47bd8266
--- /dev/null
+++ b/gluefactory/configs/aliked+lightglue-official.yaml
@@ -0,0 +1,28 @@
+model:
+    name: two_view_pipeline
+    extractor:
+      name: extractors.aliked
+      max_num_keypoints: 2048
+      detection_threshold: 0.0
+    matcher:
+      name: matchers.lightglue_pretrained
+      features: aliked
+      depth_confidence: -1
+      width_confidence: -1
+      filter_threshold: 0.1
+benchmarks:
+    megadepth1500:
+      data:
+        preprocessing:
+          side: long
+          resize: 1600
+      eval:
+        estimator: opencv
+        ransac_th: 0.5
+    hpatches:
+      eval:
+        estimator: opencv
+        ransac_th: 0.5
+      model:
+        extractor:
+          max_num_keypoints: 1024  # overwrite config above
diff --git a/gluefactory/configs/disk+lightglue_homography.yaml b/gluefactory/configs/disk+lightglue_homography.yaml
new file mode 100644
index 00000000..867b1a2b
--- /dev/null
+++ b/gluefactory/configs/disk+lightglue_homography.yaml
@@ -0,0 +1,47 @@
+data:
+    name: homographies
+    data_dir: revisitop1m
+    train_size: 150000
+    val_size: 2000
+    batch_size: 128
+    num_workers: 14
+    homography:
+        difficulty: 0.7
+        max_angle: 45
+    photometric:
+        name: lg
+model:
+    name: two_view_pipeline
+    extractor:
+        name: extractors.disk_kornia
+        max_num_keypoints: 512
+        force_num_keypoints: True
+        detection_threshold: 0.0
+        trainable: False
+    ground_truth:
+        name: matchers.homography_matcher
+        th_positive: 3
+        th_negative: 3
+    matcher:
+        name: matchers.lightglue
+        filter_threshold: 0.1
+        input_dim: 128
+        flash: false
+        checkpointed: true
+train:
+    seed: 0
+    epochs: 40
+    log_every_iter: 100
+    eval_every_iter: 500
+    lr: 1e-4
+    lr_schedule:
+        start: 20
+        type: exp
+        on_epoch: true
+        exp_div_10: 10
+    plot: [5, 'gluefactory.visualization.visualize_batch.make_match_figures']
+benchmarks:
+    hpatches:
+      eval:
+        estimator: opencv
+        ransac_th: 0.5
diff --git a/gluefactory/configs/disk+lightglue_megadepth.yaml b/gluefactory/configs/disk+lightglue_megadepth.yaml
new file mode 100644
index 00000000..0beb3794
--- /dev/null
+++ b/gluefactory/configs/disk+lightglue_megadepth.yaml
@@ -0,0 +1,70 @@
+data:
+    name: megadepth
+    preprocessing:
+        resize: 1024
+        side: long
+        square_pad: True
+    train_split: train_scenes_clean.txt
+    train_num_per_scene: 300
+    val_split: valid_scenes_clean.txt
+    val_pairs: valid_pairs.txt
+    min_overlap: 0.1
+    max_overlap: 0.7
+    num_overlap_bins: 3
+    read_depth: true
+    read_image: true
+    batch_size: 32
+    num_workers: 14
+    load_features:
+        do: false  # enable this if you have cached predictions
+        path: exports/megadepth-undist-depth-r1024_DISK-k2048-nms5/{scene}.h5
+        padding_length: 2048
+        padding_fn: pad_local_features
+model:
+    name: two_view_pipeline
+    extractor:
+        name: extractors.disk_kornia
+        max_num_keypoints: 512
+        force_num_keypoints: True
+        detection_threshold: 0.0
+        trainable: False
+    ground_truth:
+        name: matchers.homography_matcher
+        th_positive: 3
+        th_negative: 3
+    matcher:
+        name: matchers.lightglue
+        filter_threshold: 0.1
+        input_dim: 128
+        flash: false
+        checkpointed: true
+    allow_no_extract: True
+train:
+    seed: 0
+    epochs: 50
+    log_every_iter: 100
+    eval_every_iter: 1000
+    lr: 1e-4
+    lr_schedule:
+        start: 30
+        type: exp
+        on_epoch: true
+        exp_div_10: 10
+    dataset_callback_fn: sample_new_items
+    plot: [5, 'gluefactory.visualization.visualize_batch.make_match_figures']
+benchmarks:
+    megadepth1500:
+        data:
+            preprocessing:
+                side: long
+                resize: 1024
+        eval:
+            estimator: opencv
+            ransac_th: 0.5
+    hpatches:
+        eval:
+            estimator: opencv
+            ransac_th: 0.5
+        model:
+            extractor:
+                max_num_keypoints: 1024
diff --git a/gluefactory/configs/sift+lightglue-official.yaml b/gluefactory/configs/sift+lightglue-official.yaml
new file mode 100644
index 00000000..7d22df58
--- /dev/null
+++ b/gluefactory/configs/sift+lightglue-official.yaml
@@ -0,0 +1,28 @@
+model:
+    name: two_view_pipeline
+    extractor:
+      name: extractors.sift
+      backend: pycolmap_cuda
+      max_num_keypoints: 4096
+    matcher:
+      name: matchers.lightglue_pretrained
+      features: sift
+      depth_confidence: -1
+      width_confidence: -1
+      filter_threshold: 0.1
+benchmarks:
+    megadepth1500:
+      data:
+        preprocessing:
+          side: long
+          resize: 1600
+      eval:
+        estimator: opencv
+        ransac_th: 0.5
+    hpatches:
+      eval:
+        estimator: opencv
+        ransac_th: 0.5
+      model:
+        extractor:
+          max_num_keypoints: 1024  # overwrite config above
diff --git a/gluefactory/configs/sift+lightglue_homography.yaml b/gluefactory/configs/sift+lightglue_homography.yaml
index b42c0e7c..2822a4f8 100644
--- a/gluefactory/configs/sift+lightglue_homography.yaml
+++ b/gluefactory/configs/sift+lightglue_homography.yaml
@@ -14,10 +14,10 @@ model:
     name: two_view_pipeline
     extractor:
         name: extractors.sift
-        detector: pycolmap_cuda
+        backend: pycolmap_cuda
         max_num_keypoints: 1024
         force_num_keypoints: True
-        detection_threshold: 0.0001
+        nms_radius: 3
         trainable: False
     ground_truth:
         name: matchers.homography_matcher
@@ -46,3 +46,6 @@ benchmarks:
       eval:
         estimator: opencv
         ransac_th: 0.5
+      model:
+        extractor:
+          nms_radius: 0
diff --git a/gluefactory/configs/sift+lightglue_megadepth.yaml b/gluefactory/configs/sift+lightglue_megadepth.yaml
index dca53c8a..bc8c87b3 100644
--- a/gluefactory/configs/sift+lightglue_megadepth.yaml
+++ b/gluefactory/configs/sift+lightglue_megadepth.yaml
@@ -25,10 +25,10 @@ model:
     name: two_view_pipeline
     extractor:
         name: extractors.sift
-        detector: pycolmap_cuda
+        backend: pycolmap_cuda
         max_num_keypoints: 2048
         force_num_keypoints: True
-        detection_threshold: 0.0001
+        nms_radius: 3
         trainable: False
     matcher:
         name: matchers.lightglue
@@ -62,6 +62,9 @@ benchmarks:
             preprocessing:
                 side: long
                 resize: 1600
+        model:
+            extractor:
+                nms_radius: 0
         eval:
             estimator: opencv
             ransac_th: 0.5
@@ -72,3 +75,4 @@ benchmarks:
         model:
             extractor:
                 max_num_keypoints: 1024
+                nms_radius: 0
diff --git a/gluefactory/configs/superpoint+lightglue_homography.yaml b/gluefactory/configs/superpoint+lightglue_homography.yaml
new file mode 100644
index 00000000..1f353b33
--- /dev/null
+++ b/gluefactory/configs/superpoint+lightglue_homography.yaml
@@ -0,0 +1,47 @@
+data:
+    name: homographies
+    data_dir: revisitop1m
+    train_size: 150000
+    val_size: 2000
+    batch_size: 128
+    num_workers: 14
+    homography:
+        difficulty: 0.7
+        max_angle: 45
+    photometric:
+        name: lg
+model:
+    name: two_view_pipeline
+    extractor:
+        name: gluefactory_nonfree.superpoint
+        max_num_keypoints: 512
+        force_num_keypoints: True
+        detection_threshold: 0.0
+        nms_radius: 3
+        trainable: False
+    ground_truth:
+        name: matchers.homography_matcher
+        th_positive: 3
+        th_negative: 3
+    matcher:
+        name: matchers.lightglue
+        filter_threshold: 0.1
+        flash: false
+        checkpointed: true
+train:
+    seed: 0
+    epochs: 40
+    log_every_iter: 100
+    eval_every_iter: 500
+    lr: 1e-4
+    lr_schedule:
+        start: 20
+        type: exp
+        on_epoch: true
+        exp_div_10: 10
+    plot: [5, 'gluefactory.visualization.visualize_batch.make_match_figures']
+benchmarks:
+    hpatches:
+      eval:
+        estimator: opencv
+        ransac_th: 0.5
diff --git a/gluefactory/configs/superpoint+lightglue_megadepth.yaml b/gluefactory/configs/superpoint+lightglue_megadepth.yaml
new file mode 100644
index 00000000..6e3a982a
--- /dev/null
+++ b/gluefactory/configs/superpoint+lightglue_megadepth.yaml
@@ -0,0 +1,71 @@
+data:
+    name: megadepth
+    preprocessing:
+        resize: 1024
+        side: long
+        square_pad: True
+    train_split: train_scenes_clean.txt
+    train_num_per_scene: 300
+    val_split: valid_scenes_clean.txt
+    val_pairs: valid_pairs.txt
+    min_overlap: 0.1
+    max_overlap: 0.7
+    num_overlap_bins: 3
+    read_depth: true
+    read_image: true
+    batch_size: 32
+    num_workers: 14
+    load_features:
+        do: false  # enable this if you have cached predictions
+        path: exports/megadepth-undist-depth-r1024_SP-k2048-nms3/{scene}.h5
+        padding_length: 2048
+        padding_fn: pad_local_features
+model:
+    name: two_view_pipeline
+    extractor:
+        name: gluefactory_nonfree.superpoint
+        max_num_keypoints: 2048
+        force_num_keypoints: True
+        detection_threshold: 0.0
+        nms_radius: 3
+        trainable: False
+    matcher:
+        name: matchers.lightglue
+        filter_threshold: 0.1
+        flash: false
+        checkpointed: true
+    ground_truth:
+        name: matchers.depth_matcher
+        th_positive: 3
+        th_negative: 5
+        th_epi: 5
+    allow_no_extract: True
+train:
+    seed: 0
+    epochs: 50
+    log_every_iter: 100
+    eval_every_iter: 1000
+    lr: 1e-4
+    lr_schedule:
+        start: 30
+        type: exp
+        on_epoch: true
+        exp_div_10: 10
+    dataset_callback_fn: sample_new_items
+    plot: [5, 'gluefactory.visualization.visualize_batch.make_match_figures']
+benchmarks:
+    megadepth1500:
+        data:
+            preprocessing:
+                side: long
+                resize: 1600
+        eval:
+            estimator: opencv
+            ransac_th: 0.5
+    hpatches:
+        eval:
+            estimator: opencv
+            ransac_th: 0.5
+        model:
+            extractor:
+                max_num_keypoints: 1024
diff --git a/gluefactory/eval/hpatches.py b/gluefactory/eval/hpatches.py
index 8be7b704..bcd799c3 100644
--- a/gluefactory/eval/hpatches.py
+++ b/gluefactory/eval/hpatches.py
@@ -5,6 +5,7 @@
 
 import matplotlib.pyplot as plt
 import numpy as np
+import torch
 from omegaconf import OmegaConf
 from tqdm import tqdm
 
@@ -12,6 +13,7 @@
 from ..models.cache_loader import CacheLoader
 from ..settings import EVAL_PATH
 from ..utils.export_predictions import export_predictions
+from ..utils.tensor import map_tensor
 from ..utils.tools import AUCMetric
 from ..visualization.viz2d import plot_cumulative
 from .eval_pipeline import EvalPipeline
@@ -105,9 +107,11 @@ def run_eval(self, loader, pred_file):
         cache_loader = CacheLoader({"path": str(pred_file), "collate": None}).eval()
         for i, data in enumerate(tqdm(loader)):
             pred = cache_loader(data)
+            # Remove batch dimension
+            data = map_tensor(data, lambda t: torch.squeeze(t, dim=0))
             # add custom evaluations here
             if "keypoints0" in pred:
-                results_i = eval_matches_homography(data, pred, {})
+                results_i = eval_matches_homography(data, pred)
                 results_i = {**results_i, **eval_homography_dlt(data, pred)}
             else:
                 results_i = {}
diff --git a/gluefactory/eval/io.py b/gluefactory/eval/io.py
index 067e8456..6a55d59e 100644
--- a/gluefactory/eval/io.py
+++ b/gluefactory/eval/io.py
@@ -89,6 +89,11 @@ def load_model(model_conf, checkpoint):
         model = load_experiment(checkpoint, conf=model_conf).eval()
     else:
         model = get_model("two_view_pipeline")(model_conf).eval()
+    if not model.is_initialized():
+        raise ValueError(
+            "The provided model has non-initialized parameters. "
+            + "Try to load a checkpoint instead."
+        )
     return model
 
 
diff --git a/gluefactory/eval/utils.py b/gluefactory/eval/utils.py
index c6e6f006..b89fe792 100644
--- a/gluefactory/eval/utils.py
+++ b/gluefactory/eval/utils.py
@@ -1,11 +1,12 @@
-import kornia
 import numpy as np
 import torch
+from kornia.geometry.homography import find_homography_dlt
 
 from ..geometry.epipolar import generalized_epi_dist, relative_pose_error
 from ..geometry.gt_generation import IGNORE_FEATURE
 from ..geometry.homography import homography_corner_error, sym_homography_error
 from ..robust_estimators import load_estimator
+from ..utils.tensor import index_batch
 from ..utils.tools import AUCMetric
 
 
@@ -26,6 +27,16 @@ def get_matches_scores(kpts0, kpts1, matches0, mscores0):
     return pts0, pts1, scores
 
 
+def eval_per_batch_item(data: dict, pred: dict, eval_f, *args, **kwargs):
+    # Batched data
+    results = [
+        eval_f(data_i, pred_i, *args, **kwargs)
+        for data_i, pred_i in zip(index_batch(data), index_batch(pred))
+    ]
+    # Return a dictionary of lists with the evaluation of each item
+    return {k: [r[k] for r in results] for k in results[0].keys()}
+
+
 def eval_matches_epipolar(data: dict, pred: dict) -> dict:
     check_keys_recursive(data, ["view0", "view1", "T_0to1"])
     check_keys_recursive(
@@ -58,23 +69,25 @@ def eval_matches_epipolar(data: dict, pred: dict) -> dict:
     return results
 
 
-def eval_matches_homography(data: dict, pred: dict, conf) -> dict:
+def eval_matches_homography(data: dict, pred: dict) -> dict:
     check_keys_recursive(data, ["H_0to1"])
     check_keys_recursive(
         pred, ["keypoints0", "keypoints1", "matches0", "matching_scores0"]
     )
 
     H_gt = data["H_0to1"]
+    if H_gt.ndim > 2:
+        return eval_per_batch_item(data, pred, eval_matches_homography)
+
     kp0, kp1 = pred["keypoints0"], pred["keypoints1"]
     m0, scores0 = pred["matches0"], pred["matching_scores0"]
     pts0, pts1, scores = get_matches_scores(kp0, kp1, m0, scores0)
-    err = sym_homography_error(pts0, pts1, H_gt[0])
+    err = sym_homography_error(pts0, pts1, H_gt)
     results = {}
     results["prec@1px"] = (err < 1).float().mean().nan_to_num().item()
     results["prec@3px"] = (err < 3).float().mean().nan_to_num().item()
     results["num_matches"] = pts0.shape[0]
     results["num_keypoints"] = (kp0.shape[0] + kp1.shape[0]) / 2.0
-
     return results
 
 
@@ -84,7 +97,7 @@ def eval_relative_pose_robust(data, pred, conf):
         pred, ["keypoints0", "keypoints1", "matches0", "matching_scores0"]
     )
 
-    T_gt = data["T_0to1"][0]
+    T_gt = data["T_0to1"]
     kp0, kp1 = pred["keypoints0"], pred["keypoints1"]
     m0, scores0 = pred["matches0"], pred["matching_scores0"]
     pts0, pts1, scores = get_matches_scores(kp0, kp1, m0, scores0)
@@ -107,9 +120,8 @@ def eval_relative_pose_robust(data, pred, conf):
     else:
         # R, t, inl = ret
         M = est["M_0to1"]
-        R, t = M.numpy()
         inl = est["inliers"].numpy()
-        r_error, t_error = relative_pose_error(T_gt, R, t)
+        t_error, r_error = relative_pose_error(T_gt, M.R, M.t)
         results["rel_pose_error"] = max(r_error, t_error)
         results["ransac_inl"] = np.sum(inl)
         results["ransac_inl%"] = np.mean(inl)
@@ -119,6 +131,9 @@ def eval_relative_pose_robust(data, pred, conf):
 
 def eval_homography_robust(data, pred, conf):
     H_gt = data["H_0to1"]
+    if H_gt.ndim > 2:
+        return eval_per_batch_item(data, pred, eval_relative_pose_robust, conf)
+
     estimator = load_estimator("homography", conf["estimator"])(conf)
 
     data_ = {}
@@ -158,24 +173,26 @@ def eval_homography_robust(data, pred, conf):
     return results
 
 
-def eval_homography_dlt(data, pred, *args):
+def eval_homography_dlt(data, pred):
     H_gt = data["H_0to1"]
     H_inf = torch.ones_like(H_gt) * float("inf")
 
     kp0, kp1 = pred["keypoints0"], pred["keypoints1"]
     m0, scores0 = pred["matches0"], pred["matching_scores0"]
     pts0, pts1, scores = get_matches_scores(kp0, kp1, m0, scores0)
+    scores = scores.to(pts0)
     results = {}
     try:
-        Hdlt = kornia.geometry.homography.find_homography_dlt(
-            pts0[None], pts1[None], scores[None].to(pts0)
-        )[0]
+        if H_gt.ndim == 2:
+            pts0, pts1, scores = pts0[None], pts1[None], scores[None]
+        h_dlt = find_homography_dlt(pts0, pts1, scores)
+        if H_gt.ndim == 2:
+            h_dlt = h_dlt[0]
     except AssertionError:
-        Hdlt = H_inf
+        h_dlt = H_inf
 
-    error_dlt = homography_corner_error(Hdlt, H_gt, data["view0"]["image_size"])
+    error_dlt = homography_corner_error(h_dlt, H_gt, data["view0"]["image_size"])
     results["H_error_dlt"] = error_dlt.item()
-
     return results
 
 
diff --git a/gluefactory/geometry/epipolar.py b/gluefactory/geometry/epipolar.py
index 7e1507c0..1f7bb9ce 100644
--- a/gluefactory/geometry/epipolar.py
+++ b/gluefactory/geometry/epipolar.py
@@ -1,4 +1,3 @@
-import numpy as np
 import torch
 
 from .utils import skew_symmetric, to_homogeneous
@@ -124,39 +123,33 @@ def decompose_essential_matrix(E):
 
 
 # pose errors
-# TODO: port to torch and batch
+# TODO: test for batched data
 def angle_error_mat(R1, R2):
-    cos = (np.trace(np.dot(R1.T, R2)) - 1) / 2
-    cos = np.clip(cos, -1.0, 1.0)  # numercial errors can make it out of bounds
-    return np.rad2deg(np.abs(np.arccos(cos)))
+    cos = (torch.trace(torch.einsum("...ij, ...jk -> ...ik", R1.T, R2)) - 1) / 2
+    cos = torch.clip(cos, -1.0, 1.0)  # numerical errors can make it out of bounds
+    return torch.rad2deg(torch.abs(torch.arccos(cos)))
 
 
-def angle_error_vec(v1, v2):
-    n = np.linalg.norm(v1) * np.linalg.norm(v2)
-    return np.rad2deg(np.arccos(np.clip(np.dot(v1, v2) / n, -1.0, 1.0)))
+def angle_error_vec(v1, v2, eps=1e-10):
+    n = torch.clip(v1.norm(dim=-1) * v2.norm(dim=-1), min=eps)
+    v1v2 = (v1 * v2).sum(dim=-1)  # dot product in the last dimension
+    return torch.rad2deg(torch.arccos(torch.clip(v1v2 / n, -1.0, 1.0)))
 
 
-def compute_pose_error(T_0to1, R, t):
-    R_gt = T_0to1[:3, :3]
-    t_gt = T_0to1[:3, 3]
-    error_t = angle_error_vec(t, t_gt)
-    error_t = np.minimum(error_t, 180 - error_t)  # ambiguity of E estimation
-    error_R = angle_error_mat(R, R_gt)
-    return error_t, error_R
-
+def relative_pose_error(T_0to1, R, t, ignore_gt_t_thr=0.0, eps=1e-10):
+    if isinstance(T_0to1, torch.Tensor):
+        R_gt, t_gt = T_0to1[:3, :3], T_0to1[:3, 3]
+    else:
+        R_gt, t_gt = T_0to1.R, T_0to1.t
+    R_gt, t_gt = torch.squeeze(R_gt), torch.squeeze(t_gt)
 
-def relative_pose_error(T_0to1, R, t, ignore_gt_t_thr=0.0):
     # angle error between 2 vectors
-    R_gt, t_gt = T_0to1.numpy()
-    n = np.linalg.norm(t) * np.linalg.norm(t_gt)
-    t_err = np.rad2deg(np.arccos(np.clip(np.dot(t, t_gt) / n, -1.0, 1.0)))
-    t_err = np.minimum(t_err, 180 - t_err)  # handle E ambiguity
-    if np.linalg.norm(t_gt) < ignore_gt_t_thr:  # pure rotation is challenging
+    t_err = angle_error_vec(t, t_gt, eps)
+    t_err = torch.minimum(t_err, 180 - t_err)  # handle E ambiguity
+    if t_gt.norm() < ignore_gt_t_thr:  # pure rotation is challenging
         t_err = 0
 
     # angle error between 2 rotation matrices
-    cos = (np.trace(np.dot(R.T, R_gt)) - 1) / 2
-    cos = np.clip(cos, -1.0, 1.0)  # handle numercial errors
-    R_err = np.rad2deg(np.abs(np.arccos(cos)))
+    r_err = angle_error_mat(R, R_gt)
 
-    return t_err, R_err
+    return t_err, r_err
diff --git a/gluefactory/geometry/homography.py b/gluefactory/geometry/homography.py
index 3acb9307..f87b9f90 100644
--- a/gluefactory/geometry/homography.py
+++ b/gluefactory/geometry/homography.py
@@ -164,7 +164,8 @@ def warp_points_torch(points, H, inverse=True):
     The inverse is used to be coherent with tf.contrib.image.transform
     Arguments:
         points: batched list of N points, shape (B, N, 2).
-        homography: batched or not (shapes (B, 3, 3) and (3, 3) respectively).
+        H: batched or not (shapes (B, 3, 3) and (3, 3) respectively).
+        inverse: Whether to multiply the points by H or the inverse of H
     Returns: a Tensor of shape (B, N, 2) containing the new coordinates of the warps.
     """
 
@@ -333,7 +334,7 @@ def sym_homography_error_all(kpts0, kpts1, H):
 
 
 def homography_corner_error(T, T_gt, image_size):
-    W, H = image_size[:, 0], image_size[:, 1]
+    W, H = image_size[..., 0], image_size[..., 1]
     corners0 = torch.Tensor([[0, 0], [W, 0], [W, H], [0, H]]).float().to(T)
     corners1_gt = from_homogeneous(to_homogeneous(corners0) @ T_gt.transpose(-1, -2))
     corners1 = from_homogeneous(to_homogeneous(corners0) @ T.transpose(-1, -2))
diff --git a/gluefactory/geometry/utils.py b/gluefactory/geometry/utils.py
index eec330a9..4734e341 100644
--- a/gluefactory/geometry/utils.py
+++ b/gluefactory/geometry/utils.py
@@ -23,6 +23,7 @@ def from_homogeneous(points, eps=0.0):
     """Remove the homogeneous dimension of N-dimensional points.
     Args:
         points: torch.Tensor or numpy.ndarray with size (..., N+1).
+        eps: Epsilon value to prevent zero division.
     Returns:
         A torch.Tensor or numpy ndarray with size (..., N).
     """
diff --git a/gluefactory/models/backbones/dinov2.py b/gluefactory/models/backbones/dinov2.py
index 48a48b59..cf828523 100644
--- a/gluefactory/models/backbones/dinov2.py
+++ b/gluefactory/models/backbones/dinov2.py
@@ -10,6 +10,7 @@ class DinoV2(BaseModel):
 
     def _init(self, conf):
         self.net = torch.hub.load("facebookresearch/dinov2", conf.weights)
+        self.set_initialized()
 
     def _forward(self, data):
         img = data["image"]
diff --git a/gluefactory/models/base_model.py b/gluefactory/models/base_model.py
index 7313d986..b4f66288 100644
--- a/gluefactory/models/base_model.py
+++ b/gluefactory/models/base_model.py
@@ -60,6 +60,8 @@ class BaseModel(nn.Module, metaclass=MetaModel):
     required_data_keys = []
     strict_conf = False
 
+    are_weights_initialized = False
+
     def __init__(self, conf):
         """Perform some logic and call the _init method of the child model."""
         super().__init__()
@@ -125,3 +127,31 @@ def _forward(self, data):
     def loss(self, pred, data):
         """To be implemented by the child class."""
         raise NotImplementedError
+
+    def load_state_dict(self, *args, **kwargs):
+        """Load the state dict of the model, and set the model to initialized."""
+        ret = super().load_state_dict(*args, **kwargs)
+        self.set_initialized()
+        return ret
+
+    def is_initialized(self):
+        """Recursively check if the model is initialized, i.e. weights are loaded"""
+        is_initialized = True  # initialize to true and perform recursive and
+        for _, w in self.named_children():
+            if isinstance(w, BaseModel):
+                # if children is BaseModel, we perform recursive check
+                is_initialized = is_initialized and w.is_initialized()
+            else:
+                # else, we check if self is initialized or the children has no params
+                n_params = len(list(w.parameters()))
+                is_initialized = is_initialized and (
+                    n_params == 0 or self.are_weights_initialized
+                )
+        return is_initialized
+
+    def set_initialized(self, to: bool = True):
+        """Recursively set the initialization state."""
+        self.are_weights_initialized = to
+        for _, w in self.named_parameters():
+            if isinstance(w, BaseModel):
+                w.set_initialized(to)
diff --git a/gluefactory/models/cache_loader.py b/gluefactory/models/cache_loader.py
index 3fbf0f71..b345a997 100644
--- a/gluefactory/models/cache_loader.py
+++ b/gluefactory/models/cache_loader.py
@@ -29,6 +29,15 @@ def pad_local_features(pred: dict, seq_l: int):
         pred["scales"] = pad_to_length(pred["scales"], seq_l, -1, mode="zeros")
     if "oris" in pred.keys():
         pred["oris"] = pad_to_length(pred["oris"], seq_l, -1, mode="zeros")
+
+    if "depth_keypoints" in pred.keys():
+        pred["depth_keypoints"] = pad_to_length(
+            pred["depth_keypoints"], seq_l, -1, mode="zeros"
+        )
+    if "valid_depth_keypoints" in pred.keys():
+        pred["valid_depth_keypoints"] = pad_to_length(
+            pred["valid_depth_keypoints"], seq_l, -1, mode="zeros"
+        )
     return pred
 
 
diff --git a/gluefactory/models/extractors/disk_kornia.py b/gluefactory/models/extractors/disk_kornia.py
index 4d60973d..e01ab89d 100644
--- a/gluefactory/models/extractors/disk_kornia.py
+++ b/gluefactory/models/extractors/disk_kornia.py
@@ -21,6 +21,7 @@ class DISK(BaseModel):
 
     def _init(self, conf):
         self.model = kornia.feature.DISK.from_pretrained(conf.weights)
+        self.set_initialized()
 
     def _get_dense_outputs(self, images):
         B = images.shape[0]
diff --git a/gluefactory/models/extractors/keynet_affnet_hardnet.py b/gluefactory/models/extractors/keynet_affnet_hardnet.py
index b9091ea4..419ee972 100644
--- a/gluefactory/models/extractors/keynet_affnet_hardnet.py
+++ b/gluefactory/models/extractors/keynet_affnet_hardnet.py
@@ -21,6 +21,7 @@ def _init(self, conf):
             upright=conf.upright,
             scale_laf=conf.scale_laf,
         )
+        self.set_initialized()
 
     def _forward(self, data):
         image = data["image"]
diff --git a/gluefactory/models/extractors/sift.py b/gluefactory/models/extractors/sift.py
index 5eb0c956..9f07725d 100644
--- a/gluefactory/models/extractors/sift.py
+++ b/gluefactory/models/extractors/sift.py
@@ -1,238 +1,233 @@
+import warnings
+
 import cv2
 import numpy as np
-import pycolmap
 import torch
-from omegaconf import OmegaConf
-from scipy.spatial import KDTree
+from kornia.color import rgb_to_grayscale
+from packaging import version
+
+try:
+    import pycolmap
+except ImportError:
+    pycolmap = None
 
 from ..base_model import BaseModel
 from ..utils.misc import pad_to_length
 
-EPS = 1e-6
-
-
-def sift_to_rootsift(x):
-    x = x / (np.linalg.norm(x, ord=1, axis=-1, keepdims=True) + EPS)
-    x = np.sqrt(x.clip(min=EPS))
-    x = x / (np.linalg.norm(x, axis=-1, keepdims=True) + EPS)
-    return x
-
-
-# from OpenGlue
-def nms_keypoints(kpts: np.ndarray, responses: np.ndarray, radius: float) -> np.ndarray:
-    # TODO: add approximate tree
-    kd_tree = KDTree(kpts)
-
-    sorted_idx = np.argsort(-responses)
-    kpts_to_keep_idx = []
-    removed_idx = set()
-
-    for idx in sorted_idx:
-        # skip point if it was already removed
-        if idx in removed_idx:
-            continue
-
-        kpts_to_keep_idx.append(idx)
-        point = kpts[idx]
-        neighbors = kd_tree.query_ball_point(point, r=radius)
-        # Variable `neighbors` contains the `point` itself
-        removed_idx.update(neighbors)
-
-    mask = np.zeros((kpts.shape[0],), dtype=bool)
-    mask[kpts_to_keep_idx] = True
-    return mask
-
 
-def detect_kpts_opencv(
-    features: cv2.Feature2D, image: np.ndarray, describe: bool = True
-) -> np.ndarray:
+def filter_dog_point(points, scales, angles, image_shape, nms_radius, scores=None):
+    h, w = image_shape
+    ij = np.round(points - 0.5).astype(int).T[::-1]
+
+    # Remove duplicate points (identical coordinates).
+    # Pick highest scale or score
+    s = scales if scores is None else scores
+    buffer = np.zeros((h, w))
+    np.maximum.at(buffer, tuple(ij), s)
+    keep = np.where(buffer[tuple(ij)] == s)[0]
+
+    # Pick lowest angle (arbitrary).
+    ij = ij[:, keep]
+    buffer[:] = np.inf
+    o_abs = np.abs(angles[keep])
+    np.minimum.at(buffer, tuple(ij), o_abs)
+    mask = buffer[tuple(ij)] == o_abs
+    ij = ij[:, mask]
+    keep = keep[mask]
+
+    if nms_radius > 0:
+        # Apply NMS on the remaining points
+        buffer[:] = 0
+        buffer[tuple(ij)] = s[keep]  # scores or scale
+
+        local_max = torch.nn.functional.max_pool2d(
+            torch.from_numpy(buffer).unsqueeze(0),
+            kernel_size=nms_radius * 2 + 1,
+            stride=1,
+            padding=nms_radius,
+        ).squeeze(0)
+        is_local_max = buffer == local_max.numpy()
+        keep = keep[is_local_max[tuple(ij)]]
+    return keep
+
+
+def sift_to_rootsift(x: torch.Tensor, eps=1e-6) -> torch.Tensor:
+    x = torch.nn.functional.normalize(x, p=1, dim=-1, eps=eps)
+    x.clip_(min=eps).sqrt_()
+    return torch.nn.functional.normalize(x, p=2, dim=-1, eps=eps)
+
+
+def run_opencv_sift(features: cv2.Feature2D, image: np.ndarray) -> np.ndarray:
     """
     Detect keypoints using OpenCV Detector.
-    Optionally, perform NMS and filter top-response keypoints.
     Optionally, perform description.
     Args:
         features: OpenCV based keypoints detector and descriptor
         image: Grayscale image of uint8 data type
-        describe: flag indicating whether to simultaneously compute descriptors
     Returns:
-        kpts: 1D array of detected cv2.KeyPoint
+        keypoints: 1D array of detected cv2.KeyPoint
+        scores: 1D array of responses
+        descriptors: 1D array of descriptors
     """
-    if describe:
-        kpts, descriptors = features.detectAndCompute(image, None)
-    else:
-        kpts = features.detect(image, None)
-    kpts = np.array(kpts)
-
-    responses = np.array([k.response for k in kpts], dtype=np.float32)
-
-    # select all
-    top_score_idx = ...
-    pts = np.array([k.pt for k in kpts], dtype=np.float32)
-    scales = np.array([k.size for k in kpts], dtype=np.float32)
-    angles = np.array([k.angle for k in kpts], dtype=np.float32)
-    spts = np.concatenate([pts, scales[..., None], angles[..., None]], -1)
-
-    if describe:
-        return spts[top_score_idx], responses[top_score_idx], descriptors[top_score_idx]
-    else:
-        return spts[top_score_idx], responses[top_score_idx]
+    detections, descriptors = features.detectAndCompute(image, None)
+    points = np.array([k.pt for k in detections], dtype=np.float32)
+    scores = np.array([k.response for k in detections], dtype=np.float32)
+    scales = np.array([k.size for k in detections], dtype=np.float32)
+    angles = np.deg2rad(np.array([k.angle for k in detections], dtype=np.float32))
+    return points, scores, scales, angles, descriptors
 
 
 class SIFT(BaseModel):
     default_conf = {
-        "has_detector": True,
-        "has_descriptor": True,
-        "descriptor_dim": 128,
-        "pycolmap_options": {
-            "first_octave": 0,
-            "peak_threshold": 0.005,
-            "edge_threshold": 10,
-        },
         "rootsift": True,
-        "nms_radius": None,
-        "max_num_keypoints": -1,
-        "max_num_keypoints_val": None,
+        "nms_radius": 0,  # None to disable filtering entirely.
+        "max_num_keypoints": 4096,
+        "backend": "opencv",  # in {opencv, pycolmap, pycolmap_cpu, pycolmap_cuda}
+        "detection_threshold": 0.0066667,  # from COLMAP
+        "edge_threshold": 10,
+        "first_octave": -1,  # only used by pycolmap, the default of COLMAP
+        "num_octaves": 4,
         "force_num_keypoints": False,
-        "randomize_keypoints_training": False,
-        "detector": "pycolmap",  # ['pycolmap', 'pycolmap_cpu', 'pycolmap_cuda', 'cv2']
-        "detection_threshold": None,
     }
 
     required_data_keys = ["image"]
 
     def _init(self, conf):
-        self.sift = None  # lazy loading
-
-    @torch.no_grad()
-    def extract_features(self, image):
-        image_np = image.cpu().numpy()[0]
-        assert image.shape[0] == 1
-        assert image_np.min() >= -EPS and image_np.max() <= 1 + EPS
-
-        detector = str(self.conf.detector)
-
-        if self.sift is None and detector.startswith("pycolmap"):
-            options = OmegaConf.to_container(self.conf.pycolmap_options)
+        backend = self.conf.backend
+        if backend.startswith("pycolmap"):
+            if pycolmap is None:
+                raise ImportError(
+                    "Cannot find module pycolmap: install it with pip"
+                    "or use backend=opencv."
+                )
+            options = {
+                "peak_threshold": self.conf.detection_threshold,
+                "edge_threshold": self.conf.edge_threshold,
+                "first_octave": self.conf.first_octave,
+                "num_octaves": self.conf.num_octaves,
+                "normalization": pycolmap.Normalization.L2,  # L1_ROOT is buggy.
+            }
             device = (
-                "auto" if detector == "pycolmap" else detector.replace("pycolmap_", "")
+                "auto" if backend == "pycolmap" else backend.replace("pycolmap_", "")
             )
-            if self.conf.rootsift == "rootsift":
-                options["normalization"] = pycolmap.Normalization.L1_ROOT
+            if (
+                backend == "pycolmap_cpu" or not pycolmap.has_cuda
+            ) and pycolmap.__version__ < "0.5.0":
+                warnings.warn(
+                    "The pycolmap CPU SIFT is buggy in version < 0.5.0, "
+                    "consider upgrading pycolmap or use the CUDA version.",
+                    stacklevel=1,
+                )
             else:
-                options["normalization"] = pycolmap.Normalization.L2
-            if self.conf.detection_threshold is not None:
-                options["peak_threshold"] = self.conf.detection_threshold
-            options["max_num_features"] = self.conf.max_num_keypoints
+                options["max_num_features"] = self.conf.max_num_keypoints
             self.sift = pycolmap.Sift(options=options, device=device)
-        elif self.sift is None and self.conf.detector == "cv2":
-            self.sift = cv2.SIFT_create(contrastThreshold=self.conf.detection_threshold)
+        elif backend == "opencv":
+            self.sift = cv2.SIFT_create(
+                contrastThreshold=self.conf.detection_threshold,
+                nfeatures=self.conf.max_num_keypoints,
+                edgeThreshold=self.conf.edge_threshold,
+                nOctaveLayers=self.conf.num_octaves,
+            )
+        else:
+            backends = {"opencv", "pycolmap", "pycolmap_cpu", "pycolmap_cuda"}
+            raise ValueError(
+                f"Unknown backend: {backend} not in " f"{{{','.join(backends)}}}."
+            )
 
-        if detector.startswith("pycolmap"):
-            keypoints, scores, descriptors = self.sift.extract(image_np)
-        elif detector == "cv2":
+    def extract_single_image(self, image: torch.Tensor):
+        image_np = image.cpu().numpy().squeeze(0)
+
+        if self.conf.backend.startswith("pycolmap"):
+            if version.parse(pycolmap.__version__) >= version.parse("0.5.0"):
+                detections, descriptors = self.sift.extract(image_np)
+                scores = None  # Scores are not exposed by COLMAP anymore.
+            else:
+                detections, scores, descriptors = self.sift.extract(image_np)
+            keypoints = detections[:, :2]  # Keep only (x, y).
+            scales, angles = detections[:, -2:].T
+            if scores is not None and (
+                self.conf.backend == "pycolmap_cpu" or not pycolmap.has_cuda
+            ):
+                # Set the scores as a combination of abs. response and scale.
+                scores = np.abs(scores) * scales
+        elif self.conf.backend == "opencv":
             # TODO: Check if opencv keypoints are already in corner convention
-            keypoints, scores, descriptors = detect_kpts_opencv(
+            keypoints, scores, scales, angles, descriptors = run_opencv_sift(
                 self.sift, (image_np * 255.0).astype(np.uint8)
             )
+        pred = {
+            "keypoints": keypoints,
+            "scales": scales,
+            "oris": angles,
+            "descriptors": descriptors,
+        }
+        if scores is not None:
+            pred["keypoint_scores"] = scores
+
+        # sometimes pycolmap returns points outside the image. We remove them
+        if self.conf.backend.startswith("pycolmap"):
+            is_inside = (
+                pred["keypoints"] + 0.5 < np.array([image_np.shape[-2:][::-1]])
+            ).all(-1)
+            pred = {k: v[is_inside] for k, v in pred.items()}
 
         if self.conf.nms_radius is not None:
-            mask = nms_keypoints(keypoints[:, :2], scores, self.conf.nms_radius)
-            keypoints = keypoints[mask]
-            scores = scores[mask]
-            descriptors = descriptors[mask]
-
-        scales = keypoints[:, 2]
-        oris = np.rad2deg(keypoints[:, 3])
-
-        if self.conf.has_descriptor:
-            # We still renormalize because COLMAP does not normalize well,
-            # maybe due to numerical errors
-            if self.conf.rootsift:
-                descriptors = sift_to_rootsift(descriptors)
-            descriptors = torch.from_numpy(descriptors)
-        keypoints = torch.from_numpy(keypoints[:, :2])  # keep only x, y
-        scales = torch.from_numpy(scales)
-        oris = torch.from_numpy(oris)
-        scores = torch.from_numpy(scores)
-
-        # Keep the k keypoints with highest score
-        max_kps = self.conf.max_num_keypoints
-
-        # for val we allow different
-        if not self.training and self.conf.max_num_keypoints_val is not None:
-            max_kps = self.conf.max_num_keypoints_val
-
-        if max_kps is not None and max_kps > 0:
-            if self.conf.randomize_keypoints_training and self.training:
-                # instead of selecting top-k, sample k by score weights
-                raise NotImplementedError
-            elif max_kps < scores.shape[0]:
-                # TODO: check that the scores from PyCOLMAP are 100% correct,
-                # follow https://github.com/mihaidusmanu/pycolmap/issues/8
-                indices = torch.topk(scores, max_kps).indices
-                keypoints = keypoints[indices]
-                scales = scales[indices]
-                oris = oris[indices]
-                scores = scores[indices]
-                if self.conf.has_descriptor:
-                    descriptors = descriptors[indices]
+            keep = filter_dog_point(
+                pred["keypoints"],
+                pred["scales"],
+                pred["oris"],
+                image_np.shape,
+                self.conf.nms_radius,
+                pred["keypoint_scores"],
+            )
+            pred = {k: v[keep] for k, v in pred.items()}
+
+        pred = {k: torch.from_numpy(v) for k, v in pred.items()}
+        if scores is not None:
+            # Keep the k keypoints with highest score
+            num_points = self.conf.max_num_keypoints
+            if num_points is not None and len(pred["keypoints"]) > num_points:
+                indices = torch.topk(pred["keypoint_scores"], num_points).indices
+                pred = {k: v[indices] for k, v in pred.items()}
 
         if self.conf.force_num_keypoints:
-            keypoints = pad_to_length(
-                keypoints,
-                max_kps,
+            num_points = min(self.conf.max_num_keypoints, len(pred["keypoints"]))
+            pred["keypoints"] = pad_to_length(
+                pred["keypoints"],
+                num_points,
                 -2,
                 mode="random_c",
                 bounds=(0, min(image.shape[1:])),
             )
-            scores = pad_to_length(scores, max_kps, -1, mode="zeros")
-            scales = pad_to_length(scales, max_kps, -1, mode="zeros")
-            oris = pad_to_length(oris, max_kps, -1, mode="zeros")
-            if self.conf.has_descriptor:
-                descriptors = pad_to_length(descriptors, max_kps, -2, mode="zeros")
-
-        pred = {
-            "keypoints": keypoints,
-            "scales": scales,
-            "oris": oris,
-            "keypoint_scores": scores,
-        }
-
-        if self.conf.has_descriptor:
-            pred["descriptors"] = descriptors
+            pred["scales"] = pad_to_length(pred["scales"], num_points, -1, mode="zeros")
+            pred["oris"] = pad_to_length(pred["oris"], num_points, -1, mode="zeros")
+            pred["descriptors"] = pad_to_length(
+                pred["descriptors"], num_points, -2, mode="zeros"
+            )
+            if pred["keypoint_scores"] is not None:
+                scores = pad_to_length(
+                    pred["keypoint_scores"], num_points, -1, mode="zeros"
+                )
         return pred
 
-    @torch.no_grad()
-    def _forward(self, data):
-        pred = {
-            "keypoints": [],
-            "scales": [],
-            "oris": [],
-            "keypoint_scores": [],
-            "descriptors": [],
-        }
-
+    def _forward(self, data: dict) -> dict:
         image = data["image"]
-        if image.shape[1] == 3:  # RGB
-            scale = image.new_tensor([0.299, 0.587, 0.114]).view(1, 3, 1, 1)
-            image = (image * scale).sum(1, keepdim=True).cpu()
-
-        for k in range(image.shape[0]):
+        if image.shape[1] == 3:
+            image = rgb_to_grayscale(image)
+        device = image.device
+        image = image.cpu()
+        pred = []
+        for k in range(len(image)):
             img = image[k]
             if "image_size" in data.keys():
                 # avoid extracting points in padded areas
                 w, h = data["image_size"][k]
                 img = img[:, :h, :w]
-            p = self.extract_features(img)
-            for k, v in p.items():
-                pred[k].append(v)
-
-        if (image.shape[0] == 1) or self.conf.force_num_keypoints:
-            pred = {k: torch.stack(pred[k], 0) for k in pred.keys()}
-
-        pred = {k: pred[k].to(device=data["image"].device) for k in pred.keys()}
-
-        pred["oris"] = torch.deg2rad(pred["oris"])
+            p = self.extract_single_image(img)
+            pred.append(p)
+        pred = {k: torch.stack([p[k] for p in pred], 0).to(device) for k in pred[0]}
+        if self.conf.rootsift:
+            pred["descriptors"] = sift_to_rootsift(pred["descriptors"])
         return pred
 
     def loss(self, pred, data):
diff --git a/gluefactory/models/extractors/sift_kornia.py b/gluefactory/models/extractors/sift_kornia.py
index 78810e66..699e5a26 100644
--- a/gluefactory/models/extractors/sift_kornia.py
+++ b/gluefactory/models/extractors/sift_kornia.py
@@ -19,12 +19,13 @@ def _init(self, conf):
         self.sift = kornia.feature.SIFTFeature(
             num_features=self.conf.max_num_keypoints, rootsift=self.conf.rootsift
         )
+        self.set_initialized()
 
     def _forward(self, data):
         lafs, scores, descriptors = self.sift(data["image"])
         keypoints = kornia.feature.get_laf_center(lafs)
-        scales = kornia.feature.get_laf_scale(lafs)
-        oris = kornia.feature.get_laf_orientation(lafs)
+        scales = kornia.feature.get_laf_scale(lafs).squeeze(-1).squeeze(-1)
+        oris = kornia.feature.get_laf_orientation(lafs).squeeze(-1)
         pred = {
             "keypoints": keypoints,  # @TODO: confirm keypoints are in corner convention
             "scales": scales,
diff --git a/gluefactory/models/lines/deeplsd.py b/gluefactory/models/lines/deeplsd.py
index c35aa01e..122f4b4f 100644
--- a/gluefactory/models/lines/deeplsd.py
+++ b/gluefactory/models/lines/deeplsd.py
@@ -34,6 +34,7 @@ def _init(self, conf):
         ckpt = torch.load(ckpt, map_location="cpu")
         self.net = deeplsd_inference.DeepLSD(conf.model_conf).eval()
         self.net.load_state_dict(ckpt["model"])
+        self.set_initialized()
 
     def download_model(self, path):
         import subprocess
diff --git a/gluefactory/models/matchers/gluestick.py b/gluefactory/models/matchers/gluestick.py
index 0187e0c3..e16a8a52 100644
--- a/gluefactory/models/matchers/gluestick.py
+++ b/gluefactory/models/matchers/gluestick.py
@@ -119,7 +119,7 @@ def _init(self, conf):
                     "Loading GlueStick model from " f'"{self.url.format(conf.version)}"'
                 )
                 state_dict = torch.hub.load_state_dict_from_url(
-                    self.url.format(conf.version), file_name=fname
+                    self.url.format(conf.version), file_name=fname, map_location="cpu"
                 )
 
             if "model" in state_dict:
diff --git a/gluefactory/models/matchers/kornia_loftr.py b/gluefactory/models/matchers/kornia_loftr.py
index 45a20b7a..6fbd47b0 100644
--- a/gluefactory/models/matchers/kornia_loftr.py
+++ b/gluefactory/models/matchers/kornia_loftr.py
@@ -13,6 +13,7 @@ class LoFTRModule(BaseModel):
 
     def _init(self, conf):
         self.net = kornia.feature.LoFTR(pretrained="outdoor")
+        self.set_initialized()
 
     def _forward(self, data):
         image0 = data["view0"]["image"]
diff --git a/gluefactory/models/matchers/lightglue_pretrained.py b/gluefactory/models/matchers/lightglue_pretrained.py
index 2e7c71b6..275a9d54 100644
--- a/gluefactory/models/matchers/lightglue_pretrained.py
+++ b/gluefactory/models/matchers/lightglue_pretrained.py
@@ -17,17 +17,18 @@ class LightGlue(BaseModel):
 
     def _init(self, conf):
         dconf = OmegaConf.to_container(conf)
-        self.net = LightGlue_(dconf.pop("features"), **dconf).cuda()
-        # self.net.compile()
+        self.net = LightGlue_(dconf.pop("features"), **dconf)
+        self.set_initialized()
 
     def _forward(self, data):
+        required_keys = ["keypoints", "descriptors", "scales", "oris"]
         view0 = {
-            **{k: data[k + "0"] for k in ["keypoints", "descriptors"]},
             **data["view0"],
+            **{k: data[k + "0"] for k in required_keys if (k + "0") in data},
         }
         view1 = {
-            **{k: data[k + "1"] for k in ["keypoints", "descriptors"]},
             **data["view1"],
+            **{k: data[k + "1"] for k in required_keys if (k + "1") in data},
         }
         return self.net({"image0": view0, "image1": view1})
 
diff --git a/gluefactory/robust_estimators/homography/homography_est.py b/gluefactory/robust_estimators/homography/homography_est.py
index 510650c4..780011ee 100644
--- a/gluefactory/robust_estimators/homography/homography_est.py
+++ b/gluefactory/robust_estimators/homography/homography_est.py
@@ -7,6 +7,7 @@
     ransac_point_line_homography,
 )
 
+from ...utils.tensor import batch_to_numpy
 from ..base_estimator import BaseEstimator
 
 
@@ -50,19 +51,20 @@ def _init(self, conf):
         pass
 
     def _forward(self, data):
+        feat = data["m_kpts0"] if "m_kpts0" in data else data["m_lines0"]
+        data = batch_to_numpy(data)
         m_features = {
-            "kpts0": data["m_kpts1"].numpy() if "m_kpts1" in data else None,
-            "kpts1": data["m_kpts0"].numpy() if "m_kpts0" in data else None,
-            "lines0": data["m_lines1"].numpy() if "m_lines1" in data else None,
-            "lines1": data["m_lines0"].numpy() if "m_lines0" in data else None,
+            "kpts0": data["m_kpts1"] if "m_kpts1" in data else None,
+            "kpts1": data["m_kpts0"] if "m_kpts0" in data else None,
+            "lines0": data["m_lines1"] if "m_lines1" in data else None,
+            "lines1": data["m_lines0"] if "m_lines0" in data else None,
         }
-        feat = data["m_kpts0"] if "m_kpts0" in data else data["m_lines0"]
         M = H_estimation_hybrid(**m_features, tol_px=self.conf.ransac_th)
         success = M is not None
         if not success:
             M = torch.eye(3, device=feat.device, dtype=feat.dtype)
         else:
-            M = torch.tensor(M).to(feat)
+            M = torch.from_numpy(M).to(feat)
 
         estimation = {
             "success": success,
diff --git a/gluefactory/robust_estimators/homography/poselib.py b/gluefactory/robust_estimators/homography/poselib.py
index e99e9493..6aa71496 100644
--- a/gluefactory/robust_estimators/homography/poselib.py
+++ b/gluefactory/robust_estimators/homography/poselib.py
@@ -16,8 +16,8 @@ def _init(self, conf):
     def _forward(self, data):
         pts0, pts1 = data["m_kpts0"], data["m_kpts1"]
         M, info = poselib.estimate_homography(
-            pts0.numpy(),
-            pts1.numpy(),
+            pts0.detach().cpu().numpy(),
+            pts1.detach().cpu().numpy(),
             {
                 "max_reproj_error": self.conf.ransac_th,
                 **OmegaConf.to_container(self.conf.options),
diff --git a/gluefactory/scripts/export_megadepth.py b/gluefactory/scripts/export_megadepth.py
index 95e89d81..84ae8dfb 100644
--- a/gluefactory/scripts/export_megadepth.py
+++ b/gluefactory/scripts/export_megadepth.py
@@ -37,14 +37,13 @@
         },
     },
     "cv2-sift": {
-        "name": f"r{resize}_cv2-SIFT-k{n_kpts}",
+        "name": f"r{resize}_opencv-SIFT-k{n_kpts}",
         "keys": ["keypoints", "descriptors", "keypoint_scores", "oris", "scales"],
         "gray": True,
         "conf": {
             "name": "extractors.sift",
             "max_num_keypoints": 4096,
-            "detection_threshold": 0.001,
-            "detector": "cv2",
+            "backend": "opencv",
         },
     },
     "pycolmap-sift": {
@@ -54,11 +53,7 @@
         "conf": {
             "name": "extractors.sift",
             "max_num_keypoints": n_kpts,
-            "detection_threshold": 0.0001,
-            "detector": "pycolmap",
-            "pycolmap_options": {
-                "first_octave": -1,
-            },
+            "backend": "pycolmap",
         },
     },
     "pycolmap-sift-gpu": {
@@ -68,11 +63,7 @@
         "conf": {
             "name": "extractors.sift",
             "max_num_keypoints": n_kpts,
-            "detection_threshold": 0.0066666,
-            "detector": "pycolmap_cuda",
-            "pycolmap_options": {
-                "first_octave": -1,
-            },
+            "backend": "pycolmap_cuda",
             "nms_radius": 3,
         },
     },
@@ -133,15 +124,18 @@ def run_export(feature_file, scene, args):
 
     conf = OmegaConf.create(conf)
 
-    keys = configs[args.method]["keys"] + ["depth_keypoints", "valid_depth_keypoints"]
+    keys = configs[args.method]["keys"]
     dataset = get_dataset(conf.data.name)(conf.data)
     loader = dataset.get_data_loader(conf.split or "test")
 
     device = "cuda" if torch.cuda.is_available() else "cpu"
     model = get_model(conf.model.name)(conf.model).eval().to(device)
 
-    callback_fn = None
-    # callback_fn=get_kp_depth  # use this to store the depth of each keypoint
+    if args.export_sparse_depth:
+        callback_fn = get_kp_depth  # use this to store the depth of each keypoint
+        keys = keys + ["depth_keypoints", "valid_depth_keypoints"]
+    else:
+        callback_fn = None
     export_predictions(
         loader, model, feature_file, as_half=True, keys=keys, callback_fn=callback_fn
     )
@@ -153,6 +147,7 @@ def run_export(feature_file, scene, args):
     parser.add_argument("--method", type=str, default="sp")
     parser.add_argument("--scenes", type=str, default=None)
     parser.add_argument("--num_workers", type=int, default=0)
+    parser.add_argument("--export_sparse_depth", action="store_true")
     args = parser.parse_args()
 
     export_name = configs[args.method]["name"]
diff --git a/gluefactory/utils/tensor.py b/gluefactory/utils/tensor.py
index f31bb580..d0a8ca50 100644
--- a/gluefactory/utils/tensor.py
+++ b/gluefactory/utils/tensor.py
@@ -40,3 +40,9 @@ def rbd(data: dict) -> dict:
         k: v[0] if isinstance(v, (torch.Tensor, np.ndarray, list)) else v
         for k, v in data.items()
     }
+
+
+def index_batch(tensor_dict):
+    batch_size = len(next(iter(tensor_dict.values())))
+    for i in range(batch_size):
+        yield map_tensor(tensor_dict, lambda t: t[i])
diff --git a/gluefactory/visualization/viz2d.py b/gluefactory/visualization/viz2d.py
index 42a000a3..bfa64735 100644
--- a/gluefactory/visualization/viz2d.py
+++ b/gluefactory/visualization/viz2d.py
@@ -208,14 +208,14 @@ def plot_matches(kpts0, kpts1, color=None, lw=1.5, ps=4, a=1.0, labels=None, axe
             kpts0[:, 1],
             c=color,
             s=ps,
-            label=None if labels is None else labels[0],
+            label=None if labels is None or len(labels) == 0 else labels[0],
         )
         ax1.scatter(
             kpts1[:, 0],
             kpts1[:, 1],
             c=color,
             s=ps,
-            label=None if labels is None else labels[1],
+            label=None if labels is None or len(labels) == 0 else labels[1],
         )
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 5185a753..b740a956 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,12 +38,12 @@ urls = {Repository = "https://github.com/cvg/glue-factory"}
 [project.optional-dependencies]
 extra = [
     "pycolmap",
-    "poselib @ git+https://github.com/PoseLib/PoseLib.git",
-    "pytlsd @ git+https://github.com/iago-suarez/pytlsd.git",
+    "poselib @ git+https://github.com/PoseLib/PoseLib.git@9c8f3ca1baba69e19726cc7caded574873ec1f9e",
+    "pytlsd @ git+https://github.com/iago-suarez/pytlsd.git@v0.0.5",
     "deeplsd @ git+https://github.com/cvg/DeepLSD.git",
-    "homography_est @ git+https://github.com/rpautrat/homography_est.git",
+    "homography_est @ git+https://github.com/rpautrat/homography_est.git@17b200d528e6aa8ac61a878a29265bf5f9d36c41",
 ]
-dev = ["black", "flake8", "isort"]
+dev = ["black", "flake8", "isort", "parameterized"]
 
 [tool.setuptools.packages.find]
 include = ["gluefactory*"]
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_eval_utils.py b/tests/test_eval_utils.py
new file mode 100644
index 00000000..fead8964
--- /dev/null
+++ b/tests/test_eval_utils.py
@@ -0,0 +1,88 @@
+import unittest
+
+import torch
+
+from gluefactory.eval.utils import eval_matches_homography
+from gluefactory.geometry.homography import warp_points_torch
+
+
+class TestEvalUtils(unittest.TestCase):
+    @staticmethod
+    def default_pts():
+        return torch.tensor(
+            [
+                [10.0, 10.0],
+                [10.0, 20.0],
+                [20.0, 20.0],
+                [20.0, 10.0],
+            ]
+        )
+
+    @staticmethod
+    def default_pred(kps0, kps1):
+        return {
+            "keypoints0": kps0,
+            "keypoints1": kps1,
+            "matches0": torch.arange(len(kps0)),
+            "matching_scores0": torch.ones(len(kps1)),
+        }
+
+    def test_eval_matches_homography_trivial(self):
+        data = {"H_0to1": torch.eye(3)}
+        kps = self.default_pts()
+        pred = self.default_pred(kps, kps)
+
+        results = eval_matches_homography(data, pred)
+
+        self.assertEqual(results["prec@1px"], 1)
+        self.assertEqual(results["prec@3px"], 1)
+        self.assertEqual(results["num_matches"], 4)
+        self.assertEqual(results["num_keypoints"], 4)
+
+    def test_eval_matches_homography_real(self):
+        data = {"H_0to1": torch.tensor([[1.5, 0.2, 21], [-0.3, 1.6, 33], [0, 0, 1.0]])}
+        kps0 = self.default_pts()
+        kps1 = warp_points_torch(kps0, data["H_0to1"], inverse=False)
+        pred = self.default_pred(kps0, kps1)
+
+        results = eval_matches_homography(data, pred)
+
+        self.assertEqual(results["prec@1px"], 1)
+        self.assertEqual(results["prec@3px"], 1)
+
+    def test_eval_matches_homography_real_outliers(self):
+        data = {"H_0to1": torch.tensor([[1.5, 0.2, 21], [-0.3, 1.6, 33], [0, 0, 1.0]])}
+        kps0 = self.default_pts()
+        kps0 = torch.cat([kps0, torch.tensor([[5.0, 5.0]])])
+        kps1 = warp_points_torch(kps0, data["H_0to1"], inverse=False)
+        # Move one keypoint 1.5 pixels away in x and y
+        kps1[-1] += 1.5
+        pred = self.default_pred(kps0, kps1)
+
+        results = eval_matches_homography(data, pred)
+        self.assertAlmostEqual(results["prec@1px"], 0.8)
+        self.assertAlmostEqual(results["prec@3px"], 1.0)
+
+    def test_eval_matches_homography_batched(self):
+        H0 = torch.tensor([[1.5, 0.2, 21], [-0.3, 1.6, 33], [0, 0, 1.0]])
+        H1 = torch.tensor([[0.7, 0.1, -5], [-0.1, 0.65, 13], [0, 0, 1.0]])
+        data = {"H_0to1": torch.stack([H0, H1])}
+        kps0 = torch.stack([self.default_pts(), self.default_pts().flip(0)])
+        kps1 = warp_points_torch(kps0, data["H_0to1"], inverse=False)
+        # In the first element of the batch there is one outlier
+        kps1[0, -1] += 5
+        matches0 = torch.stack([torch.arange(4), torch.arange(4)])
+        # In the second element of the batch there is only 2 matches
+        matches0[1, :2] = -1
+        pred = {
+            "keypoints0": kps0,
+            "keypoints1": kps1,
+            "matches0": matches0,
+            "matching_scores0": torch.ones_like(matches0),
+        }
+
+        results = eval_matches_homography(data, pred)
+        self.assertAlmostEqual(results["prec@1px"][0], 0.75)
+        self.assertAlmostEqual(results["prec@1px"][1], 1.0)
+        self.assertAlmostEqual(results["num_matches"][0], 4)
+        self.assertAlmostEqual(results["num_matches"][1], 2)
diff --git a/tests/test_integration.py b/tests/test_integration.py
new file mode 100644
index 00000000..e459ada5
--- /dev/null
+++ b/tests/test_integration.py
@@ -0,0 +1,132 @@
+import unittest
+from collections import namedtuple
+from os.path import splitext
+
+import cv2
+import matplotlib.pyplot as plt
+import torch.cuda
+from kornia import image_to_tensor
+from omegaconf import OmegaConf
+from parameterized import parameterized
+from torch import Tensor
+
+from gluefactory import logger
+from gluefactory.eval.utils import (
+    eval_homography_dlt,
+    eval_homography_robust,
+    eval_matches_homography,
+)
+from gluefactory.models.two_view_pipeline import TwoViewPipeline
+from gluefactory.settings import root
+from gluefactory.utils.image import ImagePreprocessor
+from gluefactory.utils.tensor import map_tensor
+from gluefactory.utils.tools import set_seed
+from gluefactory.visualization.viz2d import (
+    plot_color_line_matches,
+    plot_images,
+    plot_matches,
+)
+
+
+def create_input_data(cv_img0, cv_img1, device):
+    img0 = image_to_tensor(cv_img0).float() / 255
+    img1 = image_to_tensor(cv_img1).float() / 255
+    ip = ImagePreprocessor({})
+    data = {"view0": ip(img0), "view1": ip(img1)}
+    data = map_tensor(
+        data,
+        lambda t: t[None].to(device)
+        if isinstance(t, Tensor)
+        else torch.from_numpy(t)[None].to(device),
+    )
+    return data
+
+
+ExpectedResults = namedtuple("ExpectedResults", ("num_matches", "prec3px", "h_error"))
+
+
+class TestIntegration(unittest.TestCase):
+    methods_to_test = [
+        ("superpoint+NN.yaml", "poselib", ExpectedResults(1300, 0.8, 1.0)),
+        ("superpoint-open+NN.yaml", "poselib", ExpectedResults(1300, 0.8, 1.0)),
+        (
+            "superpoint+lsd+gluestick.yaml",
+            "homography_est",
+            ExpectedResults(1300, 0.8, 1.0),
+        ),
+        (
+            "superpoint+lightglue-official.yaml",
+            "poselib",
+            ExpectedResults(1300, 0.8, 1.0),
+        ),
+    ]
+
+    visualize = False
+
+    @parameterized.expand(methods_to_test)
+    @torch.no_grad()
+    def test_real_homography(self, conf_file, estimator, exp_results):
+        set_seed(0)
+        model_path = root / "gluefactory" / "configs" / conf_file
+        img_path0 = root / "assets" / "boat1.png"
+        img_path1 = root / "assets" / "boat2.png"
+        h_gt = torch.tensor(
+            [
+                [0.85799, 0.21669, 9.4839],
+                [-0.21177, 0.85855, 130.48],
+                [1.5015e-06, 9.2033e-07, 1],
+            ]
+        )
+
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        gs = TwoViewPipeline(OmegaConf.load(model_path).model).to(device).eval()
+
+        cv_img0, cv_img1 = cv2.imread(str(img_path0)), cv2.imread(str(img_path1))
+        data = create_input_data(cv_img0, cv_img1, device)
+        pred = gs(data)
+        pred = map_tensor(
+            pred, lambda t: torch.squeeze(t, dim=0) if isinstance(t, Tensor) else t
+        )
+        data["H_0to1"] = h_gt.to(device)
+        data["H_1to0"] = torch.linalg.inv(h_gt).to(device)
+
+        results = eval_matches_homography(data, pred)
+        results = {**results, **eval_homography_dlt(data, pred)}
+
+        results = {
+            **results,
+            **eval_homography_robust(
+                data,
+                pred,
+                {"estimator": estimator},
+            ),
+        }
+
+        logger.info(results)
+        self.assertGreater(results["num_matches"], exp_results.num_matches)
+        self.assertGreater(results["prec@3px"], exp_results.prec3px)
+        self.assertLess(results["H_error_ransac"], exp_results.h_error)
+
+        if self.visualize:
+            pred = map_tensor(
+                pred, lambda t: t.cpu().numpy() if isinstance(t, Tensor) else t
+            )
+            kp0, kp1 = pred["keypoints0"], pred["keypoints1"]
+            m0 = pred["matches0"]
+            valid0 = m0 != -1
+            kpm0, kpm1 = kp0[valid0], kp1[m0[valid0]]
+
+            plot_images([cv_img0, cv_img1])
+            plot_matches(kpm0, kpm1, a=0.0)
+            plt.savefig(f"{splitext(conf_file)[0]}_point_matches.svg")
+
+            if "lines0" in pred and "lines1" in pred:
+                lines0, lines1 = pred["lines0"], pred["lines1"]
+                lm0 = pred["line_matches0"]
+                lvalid0 = lm0 != -1
+                linem0, linem1 = lines0[lvalid0], lines1[lm0[lvalid0]]
+
+                plot_images([cv_img0, cv_img1])
+                plot_color_line_matches([linem0, linem1])
+                plt.savefig(f"{splitext(conf_file)[0]}_line_matches.svg")
+            plt.show()