From 35acf351d8fcbddddb58476cdc7de3012a13cef4 Mon Sep 17 00:00:00 2001
From: Paris Morgan <jparismorgan@gmail.com>
Date: Mon, 16 Sep 2024 16:04:07 +0200
Subject: [PATCH] Update IVF_PQ to set memory_budget in constructor, support
 preload feature_vectors and metadata only modes (#518)

---
 apis/python/src/tiledb/vector_search/index.py |   5 +
 .../src/tiledb/vector_search/ivf_pq_index.py  |  49 ++-
 .../python/src/tiledb/vector_search/module.cc |   9 +
 .../vector_search/type_erased_module.cc       |  66 ++-
 apis/python/test/local-benchmarks.py          |  24 +-
 apis/python/test/test_cloud.py                |   1 +
 apis/python/test/test_type_erased_module.py   |  48 +-
 src/include/api/ivf_pq_index.h                |  66 +--
 src/include/index/index_defs.h                |  10 +
 src/include/index/ivf_pq_index.h              | 363 +++++++---------
 src/include/test/unit_api_ivf_pq_index.cc     | 409 +++++++++---------
 src/include/test/unit_ivf_pq_index.cc         | 159 ++++---
 12 files changed, 620 insertions(+), 589 deletions(-)

diff --git a/apis/python/src/tiledb/vector_search/index.py b/apis/python/src/tiledb/vector_search/index.py
index 4a2d3c014..e99765b0b 100644
--- a/apis/python/src/tiledb/vector_search/index.py
+++ b/apis/python/src/tiledb/vector_search/index.py
@@ -192,6 +192,7 @@ def _query_with_driver(
         def query_udf(index_type, index_open_kwargs, query_kwargs):
             from tiledb.vector_search.flat_index import FlatIndex
             from tiledb.vector_search.ivf_flat_index import IVFFlatIndex
+            from tiledb.vector_search.ivf_pq_index import IVFPQIndex
             from tiledb.vector_search.vamana_index import VamanaIndex
 
             # Open index
@@ -199,8 +200,12 @@ def query_udf(index_type, index_open_kwargs, query_kwargs):
                 index = FlatIndex(**index_open_kwargs)
             elif index_type == "IVF_FLAT":
                 index = IVFFlatIndex(**index_open_kwargs)
+            elif index_type == "IVF_PQ":
+                index = IVFPQIndex(**index_open_kwargs)
             elif index_type == "VAMANA":
                 index = VamanaIndex(**index_open_kwargs)
+            else:
+                raise ValueError(f"Unsupported index_type: {index_type}")
 
             # Query index
             return index.query(**query_kwargs)
diff --git a/apis/python/src/tiledb/vector_search/ivf_pq_index.py b/apis/python/src/tiledb/vector_search/ivf_pq_index.py
index cbcd4d48e..78a04f7e6 100644
--- a/apis/python/src/tiledb/vector_search/ivf_pq_index.py
+++ b/apis/python/src/tiledb/vector_search/ivf_pq_index.py
@@ -37,6 +37,12 @@ class IVFPQIndex(index.Index):
         If not provided, all index data are loaded in main memory.
         Otherwise, no index data are loaded in main memory and this memory budget is
         applied during queries.
+    preload_k_factor_vectors: bool
+        When using `k_factor` in a query, we first query for `k_factor * k` pq-encoded vectors,
+        and then do a re-ranking step using the original input vectors for the top `k` vectors.
+        If `True`, we will load all the input vectors in main memory. This can only be used with
+        `memory_budget` set to `-1`, and is useful when the input vectors are small enough to fit in
+        memory and you want to speed up re-ranking.
     open_for_remote_query_execution: bool
         If `True`, do not load any index data in main memory locally, and instead load index data in the TileDB Cloud taskgraph created when a non-`None` `driver_mode` is passed to `query()`.
         If `False`, load index data in main memory locally. Note that you can still use a taskgraph for query execution, you'll just end up loading the data both on your local machine and in the cloud taskgraph.
@@ -48,15 +54,26 @@ def __init__(
         config: Optional[Mapping[str, Any]] = None,
         timestamp=None,
         memory_budget: int = -1,
+        preload_k_factor_vectors: bool = False,
         open_for_remote_query_execution: bool = False,
         group: tiledb.Group = None,
         **kwargs,
     ):
+        if preload_k_factor_vectors and memory_budget != -1:
+            raise ValueError(
+                "preload_k_factor_vectors can only be used with memory_budget set to -1."
+            )
+        if preload_k_factor_vectors and open_for_remote_query_execution:
+            raise ValueError(
+                "preload_k_factor_vectors can only be used with open_for_remote_query_execution set to False."
+            )
+
         self.index_open_kwargs = {
             "uri": uri,
             "config": config,
             "timestamp": timestamp,
             "memory_budget": memory_budget,
+            "preload_k_factor_vectors": preload_k_factor_vectors,
         }
         self.index_open_kwargs.update(kwargs)
         self.index_type = INDEX_TYPE
@@ -67,8 +84,21 @@ def __init__(
             open_for_remote_query_execution=open_for_remote_query_execution,
             group=group,
         )
-        # TODO(SC-48710): Add support for `open_for_remote_query_execution`. We don't leave `self.index`` as `None` because we need to be able to call index.dimensions().
-        self.index = vspy.IndexIVFPQ(self.ctx, uri, to_temporal_policy(timestamp))
+        strategy = (
+            vspy.IndexLoadStrategy.PQ_INDEX_AND_RERANKING_VECTORS
+            if preload_k_factor_vectors
+            else vspy.IndexLoadStrategy.PQ_OOC
+            if open_for_remote_query_execution
+            or (memory_budget != -1 and memory_budget != 0)
+            else vspy.IndexLoadStrategy.PQ_INDEX
+        )
+        self.index = vspy.IndexIVFPQ(
+            self.ctx,
+            uri,
+            strategy,
+            0 if memory_budget == -1 else memory_budget,
+            to_temporal_policy(timestamp),
+        )
         self.db_uri = self.group[
             storage_formats[self.storage_version]["PARTS_ARRAY_NAME"]
         ].uri
@@ -127,16 +157,9 @@ def query_internal(
         if not queries.flags.f_contiguous:
             queries = queries.copy(order="F")
         queries_feature_vector_array = vspy.FeatureVectorArray(queries)
-
-        if self.memory_budget == -1:
-            distances, ids = self.index.query_infinite_ram(
-                queries_feature_vector_array, k, nprobe, k_factor
-            )
-        else:
-            distances, ids = self.index.query_finite_ram(
-                queries_feature_vector_array, k, nprobe, self.memory_budget, k_factor
-            )
-
+        distances, ids = self.index.query(
+            queries_feature_vector_array, k=k, nprobe=nprobe, k_factor=k_factor
+        )
         return np.array(distances, copy=False), np.array(ids, copy=False)
 
 
@@ -203,7 +226,7 @@ def create(
         id_type=np.dtype(np.uint64).name,
         partitioning_index_type=np.dtype(np.uint64).name,
         dimensions=dimensions,
-        n_list=partitions if (partitions is not None and partitions is not -1) else 0,
+        n_list=partitions if (partitions is not None and partitions != -1) else 0,
         num_subspaces=num_subspaces,
         distance_metric=int(distance_metric),
     )
diff --git a/apis/python/src/tiledb/vector_search/module.cc b/apis/python/src/tiledb/vector_search/module.cc
index f32b045e5..496c047c4 100644
--- a/apis/python/src/tiledb/vector_search/module.cc
+++ b/apis/python/src/tiledb/vector_search/module.cc
@@ -18,6 +18,7 @@
 #include "detail/linalg/tdb_matrix.h"
 #include "detail/linalg/tdb_partitioned_matrix.h"
 #include "detail/time/temporal_policy.h"
+#include "index/index_defs.h"
 #include "utils/seeder.h"
 
 namespace py = pybind11;
@@ -1096,6 +1097,14 @@ PYBIND11_MODULE(_tiledbvspy, m) {
       .value("L2", DistanceMetric::L2)
       .export_values();
 
+  py::enum_<IndexLoadStrategy>(m, "IndexLoadStrategy")
+      .value("PQ_OOC", IndexLoadStrategy::PQ_OOC)
+      .value("PQ_INDEX", IndexLoadStrategy::PQ_INDEX)
+      .value(
+          "PQ_INDEX_AND_RERANKING_VECTORS",
+          IndexLoadStrategy::PQ_INDEX_AND_RERANKING_VECTORS)
+      .export_values();
+
   /* === Module inits === */
 
   init_kmeans(m);
diff --git a/apis/python/src/tiledb/vector_search/type_erased_module.cc b/apis/python/src/tiledb/vector_search/type_erased_module.cc
index 0beeaf84e..031235a01 100644
--- a/apis/python/src/tiledb/vector_search/type_erased_module.cc
+++ b/apis/python/src/tiledb/vector_search/type_erased_module.cc
@@ -368,10 +368,8 @@ void init_type_erased_module(py::module_& m) {
       .def("dimensions", &IndexFlatL2::dimensions)
       .def(
           "query",
-          [](IndexFlatL2& index,
-             const FeatureVectorArray& vectors,
-             size_t top_k) {
-            auto r = index.query(vectors, top_k);
+          [](IndexFlatL2& index, const FeatureVectorArray& vectors, size_t k) {
+            auto r = index.query(vectors, k);
             return make_python_pair(std::move(r));
           });
 
@@ -422,13 +420,13 @@ void init_type_erased_module(py::module_& m) {
           "query",
           [](IndexVamana& index,
              const FeatureVectorArray& vectors,
-             size_t top_k,
+             size_t k,
              uint32_t l_search) {
-            auto r = index.query(vectors, top_k, l_search);
+            auto r = index.query(vectors, k, l_search);
             return make_python_pair(std::move(r));
           },
           py::arg("vectors"),
-          py::arg("top_k"),
+          py::arg("k"),
           py::arg("l_search"))
       .def(
           "write_index",
@@ -467,12 +465,21 @@ void init_type_erased_module(py::module_& m) {
           [](IndexIVFPQ& instance,
              const tiledb::Context& ctx,
              const std::string& group_uri,
+             IndexLoadStrategy index_load_strategy,
+             size_t memory_budget,
              std::optional<TemporalPolicy> temporal_policy) {
-            new (&instance) IndexIVFPQ(ctx, group_uri, temporal_policy);
+            new (&instance) IndexIVFPQ(
+                ctx,
+                group_uri,
+                index_load_strategy,
+                memory_budget,
+                temporal_policy);
           },
           py::keep_alive<1, 2>(),  // IndexIVFPQ should keep ctx alive.
           py::arg("ctx"),
           py::arg("group_uri"),
+          py::arg("index_load_strategy") = IndexLoadStrategy::PQ_INDEX,
+          py::arg("memory_budget") = 0,
           py::arg("temporal_policy") = std::nullopt)
       .def(
           "__init__",
@@ -494,41 +501,18 @@ void init_type_erased_module(py::module_& m) {
           },
           py::arg("vectors"))
       .def(
-          "query_infinite_ram",
-          [](IndexIVFPQ& index,
-             const FeatureVectorArray& vectors,
-             size_t top_k,
-             size_t nprobe,
-             float k_factor) {
-            auto r = index.query(
-                QueryType::InfiniteRAM, vectors, top_k, nprobe, 0, k_factor);
-            return make_python_pair(std::move(r));
-          },
-          py::arg("vectors"),
-          py::arg("top_k"),
-          py::arg("nprobe"),
-          py::arg("k_factor") = 1.f)
-      .def(
-          "query_finite_ram",
+          "query",
           [](IndexIVFPQ& index,
              const FeatureVectorArray& vectors,
-             size_t top_k,
+             size_t k,
              size_t nprobe,
-             size_t memory_budget,
              float k_factor) {
-            auto r = index.query(
-                QueryType::FiniteRAM,
-                vectors,
-                top_k,
-                nprobe,
-                memory_budget,
-                k_factor);
+            auto r = index.query(vectors, k, nprobe, k_factor);
             return make_python_pair(std::move(r));
           },
           py::arg("vectors"),
-          py::arg("top_k"),
+          py::arg("k"),
           py::arg("nprobe"),
-          py::arg("memory_budget"),
           py::arg("k_factor") = 1.f)
       .def(
           "write_index",
@@ -603,24 +587,24 @@ void init_type_erased_module(py::module_& m) {
           "query_infinite_ram",
           [](IndexIVFFlat& index,
              const FeatureVectorArray& query,
-             size_t top_k,
+             size_t k,
              size_t nprobe) {
-            auto r = index.query_infinite_ram(query, top_k, nprobe);
+            auto r = index.query_infinite_ram(query, k, nprobe);
             return make_python_pair(std::move(r));
-          })  //  , py::arg("vectors"), py::arg("top_k") = 1, py::arg("nprobe")
+          })  //  , py::arg("vectors"), py::arg("k") = 1, py::arg("nprobe")
               //  = 10)
       .def(
           "query_finite_ram",
           [](IndexIVFFlat& index,
              const FeatureVectorArray& query,
-             size_t top_k,
+             size_t k,
              size_t nprobe,
              size_t upper_bound) {
-            auto r = index.query_finite_ram(query, top_k, nprobe, upper_bound);
+            auto r = index.query_finite_ram(query, k, nprobe, upper_bound);
             return make_python_pair(std::move(r));
           },
           py::arg("vectors"),
-          py::arg("top_k") = 1,
+          py::arg("k") = 1,
           py::arg("nprobe") = 10,
           py::arg("upper_bound") = 0)
       .def("feature_type_string", &IndexIVFFlat::feature_type_string)
diff --git a/apis/python/test/local-benchmarks.py b/apis/python/test/local-benchmarks.py
index 048a35e61..0c2628306 100644
--- a/apis/python/test/local-benchmarks.py
+++ b/apis/python/test/local-benchmarks.py
@@ -22,7 +22,10 @@
 from tiledb.vector_search.index import Index
 from tiledb.vector_search.ingestion import TrainingSamplingPolicy
 from tiledb.vector_search.ingestion import ingest
+from tiledb.vector_search.ivf_flat_index import IVFFlatIndex
+from tiledb.vector_search.ivf_pq_index import IVFPQIndex
 from tiledb.vector_search.utils import load_fvecs
+from tiledb.vector_search.vamana_index import VamanaIndex
 
 
 class RemoteURIType(Enum):
@@ -252,7 +255,7 @@ def save_charts(self):
         plt.xlabel("Average Query Accuracy")
         plt.ylabel("Time (seconds)")
         plt.title(f"Ingestion Time vs Average Query Accuracy {sift_string()}")
-        for idx, timer in self.timers:
+        for idx, timer in enumerate(self.timers):
             timer.add_data_to_ingestion_time_vs_average_query_accuracy(
                 markers[idx % len(markers)]
             )
@@ -265,7 +268,7 @@ def save_charts(self):
         plt.xlabel("Accuracy")
         plt.ylabel("Time (seconds)")
         plt.title(f"Query Time vs Accuracy {sift_string()}")
-        for idx, timer in self.timers:
+        for idx, timer in enumerate(self.timers):
             timer.add_data_to_query_time_vs_accuracy(markers[idx % len(markers)])
         plt.legend()
         plt.savefig(os.path.join(RESULTS_DIR, "query_time_vs_accuracy.png"))
@@ -295,6 +298,7 @@ def download_and_extract(url, download_path, extract_path):
 
 
 def get_uri(tag):
+    global config
     index_name = f"index_{tag.replace('=', '_')}"
     index_uri = ""
     if REMOTE_URI_TYPE == RemoteURIType.LOCAL:
@@ -346,7 +350,7 @@ def benchmark_ivf_flat():
         index_uri = get_uri(tag)
 
         timer.start(tag, TimerMode.INGESTION)
-        index = ingest(
+        ingest(
             index_type=index_type,
             index_uri=index_uri,
             source_uri=SIFT_BASE_PATH,
@@ -356,6 +360,10 @@ def benchmark_ivf_flat():
         )
         ingest_time = timer.stop(tag, TimerMode.INGESTION)
 
+        # The index returned by ingest() automatically has memory_budget=1000000 set. Open
+        # a fresh index so it's clear what config is being used.
+        index = IVFFlatIndex(index_uri, config)
+
         for nprobe in [1, 2, 3, 4, 5, 10, 20]:
             timer.start(tag, TimerMode.QUERY)
             _, result = index.query(queries, k=k, nprobe=nprobe)
@@ -386,7 +394,7 @@ def benchmark_vamana():
             index_uri = get_uri(tag)
 
             timer.start(tag, TimerMode.INGESTION)
-            index = ingest(
+            ingest(
                 index_type=index_type,
                 index_uri=index_uri,
                 source_uri=SIFT_BASE_PATH,
@@ -397,6 +405,8 @@ def benchmark_vamana():
             )
             ingest_time = timer.stop(tag, TimerMode.INGESTION)
 
+            index = VamanaIndex(index_uri, config)
+
             for l_search in [k, k + 50, k + 100, k + 200, k + 400]:
                 timer.start(tag, TimerMode.QUERY)
                 _, result = index.query(queries, k=k, l_search=l_search)
@@ -429,7 +439,7 @@ def benchmark_ivf_pq():
                 index_uri = get_uri(tag)
 
                 timer.start(tag, TimerMode.INGESTION)
-                index = ingest(
+                ingest(
                     index_type=index_type,
                     index_uri=index_uri,
                     source_uri=SIFT_BASE_PATH,
@@ -440,6 +450,10 @@ def benchmark_ivf_pq():
                 )
                 ingest_time = timer.stop(tag, TimerMode.INGESTION)
 
+                # The index returned by ingest() automatically has memory_budget=1000000 set. Open
+                # a fresh index so it's clear what config is being used.
+                index = IVFPQIndex(index_uri, config)
+
                 for nprobe in [5, 10, 20, 40, 60]:
                     timer.start(tag, TimerMode.QUERY)
                     _, result = index.query(
diff --git a/apis/python/test/test_cloud.py b/apis/python/test/test_cloud.py
index 0a7e06416..099756602 100644
--- a/apis/python/test/test_cloud.py
+++ b/apis/python/test/test_cloud.py
@@ -63,6 +63,7 @@ def run_cloud_test(self, index_uri, index_type, index_class):
             input_vectors_per_work_item=5000,
             config=tiledb.cloud.Config().dict(),
             mode=Mode.BATCH,
+            verbose=True,
         )
         tiledb_index_uri = groups.info(index_uri).tiledb_uri
 
diff --git a/apis/python/test/test_type_erased_module.py b/apis/python/test/test_type_erased_module.py
index 207364a78..f4e858185 100644
--- a/apis/python/test/test_type_erased_module.py
+++ b/apis/python/test/test_type_erased_module.py
@@ -499,19 +499,27 @@ def test_construct_IndexIVFPQ_with_empty_vector(tmp_path):
     a.train(training_set)
     a.add(training_set)
 
-    _, ids = a.query_infinite_ram(query_set, k_nn, nprobe)
-    assert recall(ids, groundtruth_set, k_nn) >= 0.87
+    _, ids = a.query(query_set, k_nn, nprobe)
+    accuracy = recall(ids, groundtruth_set, k_nn)
+    assert accuracy >= 0.87
 
     index_uri = os.path.join(tmp_path, "array")
     a.write_index(ctx, index_uri)
 
-    b = vspy.IndexIVFPQ(ctx, index_uri)
+    b_infinite = vspy.IndexIVFPQ(ctx, index_uri)
+    _, ids_infinite = b_infinite.query(query_set, k_nn, nprobe)
+    accuracy_infinite = recall(ids_infinite, groundtruth_set, k_nn)
+    assert accuracy == accuracy_infinite
 
-    _, ids = b.query_infinite_ram(query_set, k_nn, nprobe)
-    assert recall(ids, groundtruth_set, k_nn) >= 0.87
-
-    _, ids = b.query_finite_ram(query_set, k_nn, nprobe, 500)
-    assert recall(ids, groundtruth_set, k_nn) >= 0.87
+    b_finite = vspy.IndexIVFPQ(
+        ctx,
+        index_uri,
+        index_load_strategy=vspy.IndexLoadStrategy.PQ_OOC,
+        memory_budget=1000,
+    )
+    _, ids_finite = b_finite.query(query_set, k_nn, nprobe)
+    accuracy_finite = recall(ids_finite, groundtruth_set, k_nn)
+    assert accuracy == accuracy_finite
 
 
 def test_inplace_build_query_IndexIVFPQ(tmp_path):
@@ -535,19 +543,27 @@ def test_inplace_build_query_IndexIVFPQ(tmp_path):
     a.train(training_set)
     a.add(training_set)
 
-    _, ids = a.query_infinite_ram(query_set, k_nn, nprobe)
-    assert recall(ids, groundtruth_set, k_nn) >= 0.87
+    _, ids = a.query(query_set, k_nn, nprobe)
+    accuracy = recall(ids, groundtruth_set, k_nn)
+    assert accuracy >= 0.87
 
     index_uri = os.path.join(tmp_path, "array")
     a.write_index(ctx, index_uri)
 
-    b = vspy.IndexIVFPQ(ctx, index_uri)
+    b_infinite = vspy.IndexIVFPQ(ctx, index_uri)
+    _, ids_infinite = b_infinite.query(query_set, k_nn, nprobe)
+    accuracy_infinite = recall(ids_infinite, groundtruth_set, k_nn)
+    assert accuracy == accuracy_infinite
 
-    _, ids = b.query_infinite_ram(query_set, k_nn, nprobe)
-    assert recall(ids, groundtruth_set, k_nn) >= 0.87
-
-    _, ids = b.query_finite_ram(query_set, k_nn, nprobe, 500)
-    assert recall(ids, groundtruth_set, k_nn) >= 0.87
+    b_finite = vspy.IndexIVFPQ(
+        ctx,
+        index_uri,
+        index_load_strategy=vspy.IndexLoadStrategy.PQ_OOC,
+        memory_budget=999,
+    )
+    _, ids_finite = b_finite.query(query_set, k_nn, nprobe)
+    accuracy_finite = recall(ids_finite, groundtruth_set, k_nn)
+    assert accuracy == accuracy_finite
 
 
 def test_construct_IndexIVFFlat():
diff --git a/src/include/api/ivf_pq_index.h b/src/include/api/ivf_pq_index.h
index 5703a61b0..20959c798 100644
--- a/src/include/api/ivf_pq_index.h
+++ b/src/include/api/ivf_pq_index.h
@@ -144,6 +144,8 @@ class IndexIVFPQ {
   IndexIVFPQ(
       const tiledb::Context& ctx,
       const URI& group_uri,
+      IndexLoadStrategy index_load_strategy = IndexLoadStrategy::PQ_INDEX,
+      size_t upper_bound = 0,
       std::optional<TemporalPolicy> temporal_policy = std::nullopt) {
     read_types(
         ctx,
@@ -157,13 +159,15 @@ class IndexIVFPQ {
     if (uri_dispatch_table.find(type) == uri_dispatch_table.end()) {
       throw std::runtime_error("Unsupported datatype combination");
     }
-    index_ = uri_dispatch_table.at(type)(ctx, group_uri, temporal_policy);
+    index_ = uri_dispatch_table.at(type)(
+        ctx, group_uri, index_load_strategy, upper_bound, temporal_policy);
     n_list_ = index_->nlist();
     num_subspaces_ = index_->num_subspaces();
     max_iterations_ = index_->max_iterations();
     convergence_tolerance_ = index_->convergence_tolerance();
     reassign_ratio_ = index_->reassign_ratio();
     distance_metric_ = index_->distance_metric();
+    upper_bound_ = index_->upper_bound();
 
     if (dimensions_ != 0 && dimensions_ != index_->dimensions()) {
       throw std::runtime_error(
@@ -244,17 +248,14 @@ class IndexIVFPQ {
   }
 
   [[nodiscard]] auto query(
-      QueryType queryType,
       const QueryVectorArray& vectors,
       size_t top_k,
       size_t nprobe,
-      size_t upper_bound = 0,
       float k_factor = 1.f) {
     if (!index_) {
       throw std::runtime_error("Cannot query() because there is no index.");
     }
-    return index_->query(
-        queryType, vectors, top_k, nprobe, upper_bound, k_factor);
+    return index_->query(vectors, top_k, nprobe, k_factor);
   }
 
   void write_index(
@@ -304,6 +305,10 @@ class IndexIVFPQ {
     return dimensions_;
   }
 
+  constexpr size_t upper_bound() const {
+    return upper_bound_;
+  }
+
   constexpr auto n_list() const {
     return n_list_;
   }
@@ -396,11 +401,9 @@ class IndexIVFPQ {
 
     [[nodiscard]] virtual std::tuple<FeatureVectorArray, FeatureVectorArray>
     query(
-        QueryType queryType,
         const QueryVectorArray& vectors,
         size_t top_k,
         size_t nprobe,
-        size_t upper_bound,
         float k_factor) = 0;
 
     virtual void write_index(
@@ -410,6 +413,7 @@ class IndexIVFPQ {
         const std::string& storage_version) = 0;
 
     [[nodiscard]] virtual uint64_t dimensions() const = 0;
+    [[nodiscard]] virtual size_t upper_bound() const = 0;
     [[nodiscard]] virtual TemporalPolicy temporal_policy() const = 0;
     [[nodiscard]] virtual uint64_t nlist() const = 0;
     [[nodiscard]] virtual uint32_t num_subspaces() const = 0;
@@ -450,8 +454,15 @@ class IndexIVFPQ {
     index_impl(
         const tiledb::Context& ctx,
         const URI& index_uri,
+        IndexLoadStrategy index_load_strategy,
+        size_t upper_bound,
         std::optional<TemporalPolicy> temporal_policy)
-        : impl_index_(ctx, index_uri, temporal_policy) {
+        : impl_index_(
+              ctx,
+              index_uri,
+              index_load_strategy,
+              upper_bound,
+              temporal_policy) {
     }
 
     void train(const FeatureVectorArray& training_set) override {
@@ -507,11 +518,9 @@ class IndexIVFPQ {
      * @todo Make sure the extents of the returned arrays are used correctly.
      */
     [[nodiscard]] std::tuple<FeatureVectorArray, FeatureVectorArray> query(
-        QueryType queryType,
         const QueryVectorArray& vectors,
         size_t top_k,
         size_t nprobe,
-        size_t upper_bound,
         float k_factor) override {
       // @todo using index_type = size_t;
       auto dtype = vectors.feature_type();
@@ -523,8 +532,7 @@ class IndexIVFPQ {
               (float*)vectors.data(),
               extents(vectors)[0],
               extents(vectors)[1]};  // @todo ??
-          auto [s, t] = impl_index_.query(
-              queryType, qspan, top_k, nprobe, upper_bound, k_factor);
+          auto [s, t] = impl_index_.query(qspan, top_k, nprobe, k_factor);
           auto x = FeatureVectorArray{std::move(s)};
           auto y = FeatureVectorArray{std::move(t)};
           return {std::move(x), std::move(y)};
@@ -534,8 +542,7 @@ class IndexIVFPQ {
               (uint8_t*)vectors.data(),
               extents(vectors)[0],
               extents(vectors)[1]};  // @todo ??
-          auto [s, t] = impl_index_.query(
-              queryType, qspan, top_k, nprobe, upper_bound, k_factor);
+          auto [s, t] = impl_index_.query(qspan, top_k, nprobe, k_factor);
           auto x = FeatureVectorArray{std::move(s)};
           auto y = FeatureVectorArray{std::move(t)};
           return {std::move(x), std::move(y)};
@@ -557,6 +564,10 @@ class IndexIVFPQ {
       return ::dimensions(impl_index_);
     }
 
+    size_t upper_bound() const override {
+      return impl_index_.upper_bound();
+    }
+
     TemporalPolicy temporal_policy() const override {
       return impl_index_.temporal_policy();
     }
@@ -597,7 +608,7 @@ class IndexIVFPQ {
   using table_type = std::map<std::tuple<tiledb_datatype_t, tiledb_datatype_t, tiledb_datatype_t>, constructor_function>;
   static const table_type dispatch_table;
 
-  using uri_constructor_function = std::function<std::unique_ptr<index_base>(const tiledb::Context&, const std::string&, std::optional<TemporalPolicy>)>;
+  using uri_constructor_function = std::function<std::unique_ptr<index_base>(const tiledb::Context&, const std::string&, IndexLoadStrategy, size_t, std::optional<TemporalPolicy>)>;
   using uri_table_type = std::map<std::tuple<tiledb_datatype_t, tiledb_datatype_t, tiledb_datatype_t>, uri_constructor_function>;
   static const uri_table_type uri_dispatch_table;
 
@@ -607,6 +618,7 @@ class IndexIVFPQ {
   // clang-format on
 
   uint64_t dimensions_{0};
+  size_t upper_bound_{0};
   size_t n_list_{0};
   uint32_t num_subspaces_{16};
   uint32_t max_iterations_{2};
@@ -636,18 +648,18 @@ const IndexIVFPQ::table_type IndexIVFPQ::dispatch_table = {
 };
 
 const IndexIVFPQ::uri_table_type IndexIVFPQ::uri_dispatch_table = {
-  {{TILEDB_INT8,    TILEDB_UINT32, TILEDB_UINT32}, [](const tiledb::Context& ctx, const std::string& uri, std::optional<TemporalPolicy> temporal_policy) { return std::make_unique<index_impl<ivf_pq_index<int8_t,  uint32_t, uint32_t>>>(ctx, uri, temporal_policy); }},
-  {{TILEDB_UINT8,   TILEDB_UINT32, TILEDB_UINT32}, [](const tiledb::Context& ctx, const std::string& uri, std::optional<TemporalPolicy> temporal_policy) { return std::make_unique<index_impl<ivf_pq_index<uint8_t, uint32_t, uint32_t>>>(ctx, uri, temporal_policy); }},
-  {{TILEDB_FLOAT32, TILEDB_UINT32, TILEDB_UINT32}, [](const tiledb::Context& ctx, const std::string& uri, std::optional<TemporalPolicy> temporal_policy) { return std::make_unique<index_impl<ivf_pq_index<float,   uint32_t, uint32_t>>>(ctx, uri, temporal_policy); }},
-  {{TILEDB_INT8,    TILEDB_UINT32, TILEDB_UINT64}, [](const tiledb::Context& ctx, const std::string& uri, std::optional<TemporalPolicy> temporal_policy) { return std::make_unique<index_impl<ivf_pq_index<int8_t,  uint32_t, uint64_t>>>(ctx, uri, temporal_policy); }},
-  {{TILEDB_UINT8,   TILEDB_UINT32, TILEDB_UINT64}, [](const tiledb::Context& ctx, const std::string& uri, std::optional<TemporalPolicy> temporal_policy) { return std::make_unique<index_impl<ivf_pq_index<uint8_t, uint32_t, uint64_t>>>(ctx, uri, temporal_policy); }},
-  {{TILEDB_FLOAT32, TILEDB_UINT32, TILEDB_UINT64}, [](const tiledb::Context& ctx, const std::string& uri, std::optional<TemporalPolicy> temporal_policy) { return std::make_unique<index_impl<ivf_pq_index<float,   uint32_t, uint64_t>>>(ctx, uri, temporal_policy); }},
-  {{TILEDB_INT8,    TILEDB_UINT64, TILEDB_UINT32}, [](const tiledb::Context& ctx, const std::string& uri, std::optional<TemporalPolicy> temporal_policy) { return std::make_unique<index_impl<ivf_pq_index<int8_t,  uint64_t, uint32_t>>>(ctx, uri, temporal_policy); }},
-  {{TILEDB_UINT8,   TILEDB_UINT64, TILEDB_UINT32}, [](const tiledb::Context& ctx, const std::string& uri, std::optional<TemporalPolicy> temporal_policy) { return std::make_unique<index_impl<ivf_pq_index<uint8_t, uint64_t, uint32_t>>>(ctx, uri, temporal_policy); }},
-  {{TILEDB_FLOAT32, TILEDB_UINT64, TILEDB_UINT32}, [](const tiledb::Context& ctx, const std::string& uri, std::optional<TemporalPolicy> temporal_policy) { return std::make_unique<index_impl<ivf_pq_index<float,   uint64_t, uint32_t>>>(ctx, uri, temporal_policy); }},
-  {{TILEDB_INT8,    TILEDB_UINT64, TILEDB_UINT64}, [](const tiledb::Context& ctx, const std::string& uri, std::optional<TemporalPolicy> temporal_policy) { return std::make_unique<index_impl<ivf_pq_index<int8_t,  uint64_t, uint64_t>>>(ctx, uri, temporal_policy); }},
-  {{TILEDB_UINT8,   TILEDB_UINT64, TILEDB_UINT64}, [](const tiledb::Context& ctx, const std::string& uri, std::optional<TemporalPolicy> temporal_policy) { return std::make_unique<index_impl<ivf_pq_index<uint8_t, uint64_t, uint64_t>>>(ctx, uri, temporal_policy); }},
-  {{TILEDB_FLOAT32, TILEDB_UINT64, TILEDB_UINT64}, [](const tiledb::Context& ctx, const std::string& uri, std::optional<TemporalPolicy> temporal_policy) { return std::make_unique<index_impl<ivf_pq_index<float,   uint64_t, uint64_t>>>(ctx, uri, temporal_policy); }},
+  {{TILEDB_INT8,    TILEDB_UINT32, TILEDB_UINT32}, [](const tiledb::Context& ctx, const std::string& uri, IndexLoadStrategy index_load_strategy, size_t upper_bound, std::optional<TemporalPolicy> temporal_policy) { return std::make_unique<index_impl<ivf_pq_index<int8_t,  uint32_t, uint32_t>>>(ctx, uri, index_load_strategy, upper_bound, temporal_policy); }},
+  {{TILEDB_UINT8,   TILEDB_UINT32, TILEDB_UINT32}, [](const tiledb::Context& ctx, const std::string& uri, IndexLoadStrategy index_load_strategy, size_t upper_bound, std::optional<TemporalPolicy> temporal_policy) { return std::make_unique<index_impl<ivf_pq_index<uint8_t, uint32_t, uint32_t>>>(ctx, uri, index_load_strategy, upper_bound, temporal_policy); }},
+  {{TILEDB_FLOAT32, TILEDB_UINT32, TILEDB_UINT32}, [](const tiledb::Context& ctx, const std::string& uri, IndexLoadStrategy index_load_strategy, size_t upper_bound, std::optional<TemporalPolicy> temporal_policy) { return std::make_unique<index_impl<ivf_pq_index<float,   uint32_t, uint32_t>>>(ctx, uri, index_load_strategy, upper_bound, temporal_policy); }},
+  {{TILEDB_INT8,    TILEDB_UINT32, TILEDB_UINT64}, [](const tiledb::Context& ctx, const std::string& uri, IndexLoadStrategy index_load_strategy, size_t upper_bound, std::optional<TemporalPolicy> temporal_policy) { return std::make_unique<index_impl<ivf_pq_index<int8_t,  uint32_t, uint64_t>>>(ctx, uri, index_load_strategy, upper_bound, temporal_policy); }},
+  {{TILEDB_UINT8,   TILEDB_UINT32, TILEDB_UINT64}, [](const tiledb::Context& ctx, const std::string& uri, IndexLoadStrategy index_load_strategy, size_t upper_bound, std::optional<TemporalPolicy> temporal_policy) { return std::make_unique<index_impl<ivf_pq_index<uint8_t, uint32_t, uint64_t>>>(ctx, uri, index_load_strategy, upper_bound, temporal_policy); }},
+  {{TILEDB_FLOAT32, TILEDB_UINT32, TILEDB_UINT64}, [](const tiledb::Context& ctx, const std::string& uri, IndexLoadStrategy index_load_strategy, size_t upper_bound, std::optional<TemporalPolicy> temporal_policy) { return std::make_unique<index_impl<ivf_pq_index<float,   uint32_t, uint64_t>>>(ctx, uri, index_load_strategy, upper_bound, temporal_policy); }},
+  {{TILEDB_INT8,    TILEDB_UINT64, TILEDB_UINT32}, [](const tiledb::Context& ctx, const std::string& uri, IndexLoadStrategy index_load_strategy, size_t upper_bound, std::optional<TemporalPolicy> temporal_policy) { return std::make_unique<index_impl<ivf_pq_index<int8_t,  uint64_t, uint32_t>>>(ctx, uri, index_load_strategy, upper_bound, temporal_policy); }},
+  {{TILEDB_UINT8,   TILEDB_UINT64, TILEDB_UINT32}, [](const tiledb::Context& ctx, const std::string& uri, IndexLoadStrategy index_load_strategy, size_t upper_bound, std::optional<TemporalPolicy> temporal_policy) { return std::make_unique<index_impl<ivf_pq_index<uint8_t, uint64_t, uint32_t>>>(ctx, uri, index_load_strategy, upper_bound, temporal_policy); }},
+  {{TILEDB_FLOAT32, TILEDB_UINT64, TILEDB_UINT32}, [](const tiledb::Context& ctx, const std::string& uri, IndexLoadStrategy index_load_strategy, size_t upper_bound, std::optional<TemporalPolicy> temporal_policy) { return std::make_unique<index_impl<ivf_pq_index<float,   uint64_t, uint32_t>>>(ctx, uri, index_load_strategy, upper_bound, temporal_policy); }},
+  {{TILEDB_INT8,    TILEDB_UINT64, TILEDB_UINT64}, [](const tiledb::Context& ctx, const std::string& uri, IndexLoadStrategy index_load_strategy, size_t upper_bound, std::optional<TemporalPolicy> temporal_policy) { return std::make_unique<index_impl<ivf_pq_index<int8_t,  uint64_t, uint64_t>>>(ctx, uri, index_load_strategy, upper_bound, temporal_policy); }},
+  {{TILEDB_UINT8,   TILEDB_UINT64, TILEDB_UINT64}, [](const tiledb::Context& ctx, const std::string& uri, IndexLoadStrategy index_load_strategy, size_t upper_bound, std::optional<TemporalPolicy> temporal_policy) { return std::make_unique<index_impl<ivf_pq_index<uint8_t, uint64_t, uint64_t>>>(ctx, uri, index_load_strategy, upper_bound, temporal_policy); }},
+  {{TILEDB_FLOAT32, TILEDB_UINT64, TILEDB_UINT64}, [](const tiledb::Context& ctx, const std::string& uri, IndexLoadStrategy index_load_strategy, size_t upper_bound, std::optional<TemporalPolicy> temporal_policy) { return std::make_unique<index_impl<ivf_pq_index<float,   uint64_t, uint64_t>>>(ctx, uri, index_load_strategy, upper_bound, temporal_policy); }},
 };
 
 const IndexIVFPQ::clear_history_table_type IndexIVFPQ::clear_history_dispatch_table = {
diff --git a/src/include/index/index_defs.h b/src/include/index/index_defs.h
index 59affe3b9..81706b8c7 100644
--- a/src/include/index/index_defs.h
+++ b/src/include/index/index_defs.h
@@ -57,6 +57,16 @@ enum class IndexKind { FlatL2, IVFFlat, Vamana, IVFPQ };
 
 enum class QueryType { FiniteRAM, InfiniteRAM };
 
+enum class IndexLoadStrategy {
+  // Will load the index as an OOC index.
+  PQ_OOC,
+  // Load the PQ index data as well as metadata.
+  PQ_INDEX,
+  // Load everything DEFAULT does, but also preload feature vectors for
+  // reranking.
+  PQ_INDEX_AND_RERANKING_VECTORS,
+};
+
 /******************************************************************************
  * Static info for arrays associated with an index group
  ******************************************************************************/
diff --git a/src/include/index/ivf_pq_index.h b/src/include/index/ivf_pq_index.h
index 8d3378161..c5e09cc3f 100644
--- a/src/include/index/ivf_pq_index.h
+++ b/src/include/index/ivf_pq_index.h
@@ -108,6 +108,7 @@
 #include "detail/ivf/partition.h"
 #include "detail/ivf/qv.h"
 #include "detail/linalg/tdb_matrix_multi_range.h"
+#include "detail/linalg/tdb_matrix_with_ids.h"
 
 #include <tiledb/tiledb>
 #include <type_traits>
@@ -189,7 +190,9 @@ class ivf_pq_index {
   /****************************************************************************
    * Index group information
    ****************************************************************************/
+  size_t upper_bound_{0};
   TemporalPolicy temporal_policy_;
+  IndexLoadStrategy index_load_strategy_{IndexLoadStrategy::PQ_INDEX};
   std::unique_ptr<ivf_pq_group<ivf_pq_index>> group_;
 
   /****************************************************************************
@@ -198,6 +201,7 @@ class ivf_pq_index {
 
   // Cached information about the partitioned vectors in the index
   uint64_t dimensions_{0};
+  uint64_t num_vectors_{0};
   uint64_t num_partitions_{0};
 
   // Cached information about the pq encoding
@@ -350,16 +354,29 @@ class ivf_pq_index {
   ivf_pq_index(
       const tiledb::Context& ctx,
       const std::string& uri,
+      IndexLoadStrategy index_load_strategy = IndexLoadStrategy::PQ_INDEX,
+      size_t upper_bound = 0,
       std::optional<TemporalPolicy> temporal_policy = std::nullopt)
-      : temporal_policy_{temporal_policy.has_value() ? *temporal_policy : TemporalPolicy()}
+      : upper_bound_{upper_bound}
+      , temporal_policy_{temporal_policy.has_value() ? *temporal_policy : TemporalPolicy()}
+      , index_load_strategy_{index_load_strategy}
       , group_{std::make_unique<ivf_pq_group<ivf_pq_index>>(
             ctx, uri, TILEDB_READ, temporal_policy_)} {
+    if (upper_bound != 0 && index_load_strategy_ != IndexLoadStrategy::PQ_OOC) {
+      throw std::runtime_error(
+          "With upper_bound > 0 you must use IndexLoadStrategy::PQ_OOC.");
+    }
+    if (upper_bound == 0 && index_load_strategy_ == IndexLoadStrategy::PQ_OOC) {
+      throw std::runtime_error(
+          "With IndexLoadStrategy::PQ_OOC you must have an upper_bound > 0.");
+    }
     /**
      * Read the centroids. How the partitioned_pq_vectors_ are read in will be
      * determined by the type of query we are doing. But they will be read
      * in at this same timestamp.
      */
     dimensions_ = group_->get_dimensions();
+    num_vectors_ = group_->get_base_size();
     num_partitions_ = group_->get_num_partitions();
     num_subspaces_ = group_->get_num_subspaces();
     sub_dimensions_ = dimensions_ / num_subspaces_;
@@ -385,6 +402,54 @@ class ivf_pq_index {
             std::nullopt,
             num_clusters_,
             temporal_policy_);
+
+    if (upper_bound == 0) {
+      // Read the the complete index arrays into ("infinite") memory. This will
+      // read the centroids, indices, partitioned_ids, and and the complete set
+      // of partitioned_pq_vectors, along with metadata from a group_uri. Load
+      // all partitions for infinite query Note that the constructor will move
+      // the infinite_parts vector
+      auto infinite_parts =
+          std::vector<indices_type>(::num_vectors(flat_ivf_centroids_));
+      std::iota(begin(infinite_parts), end(infinite_parts), 0);
+      partitioned_pq_vectors_ = std::make_unique<tdb_pq_storage_type>(
+          group_->cached_ctx(),
+          group_->pq_ivf_vectors_uri(),
+          group_->pq_ivf_indices_uri(),
+          group_->get_num_partitions() + 1,
+          group_->pq_ivf_ids_uri(),
+          infinite_parts,
+          0,
+          temporal_policy_);
+
+      partitioned_pq_vectors_->load();
+
+      if (::num_vectors(*partitioned_pq_vectors_) !=
+          size(partitioned_pq_vectors_->ids())) {
+        throw std::runtime_error(
+            "[ivf_flat_index@ivf_pq_index] "
+            "::num_vectors(*partitioned_pq_vectors_) != "
+            "size(partitioned_pq_vectors_->ids())");
+      }
+      if (size(partitioned_pq_vectors_->indices()) !=
+          ::num_vectors(flat_ivf_centroids_) + 1) {
+        throw std::runtime_error(
+            "[ivf_flat_index@ivf_pq_index] "
+            "size(partitioned_pq_vectors_->indices()) != "
+            "::num_vectors(flat_ivf_centroids_) + 1");
+      }
+    }
+    if (index_load_strategy_ ==
+        IndexLoadStrategy::PQ_INDEX_AND_RERANKING_VECTORS) {
+      feature_vectors_ = tdbColMajorPreLoadMatrixWithIds<feature_type, id_type>(
+          group_->cached_ctx(),
+          group_->feature_vectors_uri(),
+          group_->ids_uri(),
+          dimensions_,
+          num_vectors_,
+          0,
+          temporal_policy_);
+    }
   }
 
   /****************************************************************************
@@ -608,7 +673,7 @@ class ivf_pq_index {
       const V& training_set, kmeans_init init = kmeans_init::random) {
     dimensions_ = ::dimensions(training_set);
     if (num_partitions_ == 0) {
-      num_partitions_ = std::sqrt(num_vectors(training_set));
+      num_partitions_ = std::sqrt(::num_vectors(training_set));
     }
 
     flat_ivf_centroids_ =
@@ -678,6 +743,8 @@ class ivf_pq_index {
       const Array& training_set,
       const Vector& training_set_ids,
       Distance distance = Distance{}) {
+    num_vectors_ = ::num_vectors(training_set);
+
     // 1. Fill in cluster_centroids_.
     // cluster_centroids_ holds the num_clusters_ (256) centroids for each
     // subspace.
@@ -774,9 +841,9 @@ class ivf_pq_index {
       class Distance = uncached_sub_sum_of_squares_distance>
   auto pq_encode(const U& training_set, Distance distance = Distance{}) const {
     auto pq_vectors =
-        std::make_unique<Matrix>(num_subspaces_, num_vectors(training_set));
+        std::make_unique<Matrix>(num_subspaces_, ::num_vectors(training_set));
     auto& pqv = *pq_vectors;
-    for (size_t i = 0; i < num_vectors(training_set); ++i) {
+    for (size_t i = 0; i < ::num_vectors(training_set); ++i) {
       pq_encode_one(training_set[i], pqv[i], distance);
     }
     return pq_vectors;
@@ -810,10 +877,10 @@ class ivf_pq_index {
     // vector, the distance from the second subspace to each of the pq
     // centroids.
     auto pq_vectors = std::make_unique<Matrix>(
-        num_subspaces_ * num_clusters_, num_vectors(query_vectors));
+        num_subspaces_ * num_clusters_, ::num_vectors(query_vectors));
     auto& pqv = *pq_vectors;
     auto local_distance = Distance{};
-    for (size_t i = 0; i < num_vectors(query_vectors); ++i) {
+    for (size_t i = 0; i < ::num_vectors(query_vectors); ++i) {
       auto sub_begin = 0;
       auto sub_id = 0;
       for (size_t subspace = 0; subspace < num_subspaces_; ++subspace) {
@@ -834,105 +901,6 @@ class ivf_pq_index {
     return pq_vectors;
   }
 
-  /*****************************************************************************
-   * Methods for reading and reading the index from a group.
-   *****************************************************************************/
-
-  /**
-   * @brief Read the the complete index arrays into ("infinite") memory.
-   * This will read the centroids, indices, partitioned_ids, and
-   * and the complete set of partitioned_pq_vectors, along with metadata
-   * from a group_uri.
-   */
-  auto read_index_infinite() {
-    if (!group_) {
-      if (!partitioned_pq_vectors_) {
-        throw std::runtime_error(
-            "[ivf_pq_index@read_index_infinite] Neither partitioned_pq_vectors "
-            "nor the group have been initialized");
-      }
-      // If we have created an empty index and then try to query it,
-      // partitioned_pq_vectors_ will be empty so we will try to read here, but
-      // we won't have a group_. Just return and leave partitioned_pq_vectors_
-      // empty.
-      return;
-    }
-
-    // Load all partitions for infinite query
-    // Note that the constructor will move the infinite_parts vector
-    auto infinite_parts =
-        std::vector<indices_type>(::num_vectors(flat_ivf_centroids_));
-    std::iota(begin(infinite_parts), end(infinite_parts), 0);
-    partitioned_pq_vectors_ = std::make_unique<tdb_pq_storage_type>(
-        group_->cached_ctx(),
-        group_->pq_ivf_vectors_uri(),
-        group_->pq_ivf_indices_uri(),
-        group_->get_num_partitions() + 1,
-        group_->pq_ivf_ids_uri(),
-        infinite_parts,
-        0,
-        temporal_policy_);
-
-    partitioned_pq_vectors_->load();
-
-    if (::num_vectors(*partitioned_pq_vectors_) !=
-        size(partitioned_pq_vectors_->ids())) {
-      throw std::runtime_error(
-          "[ivf_flat_index@read_index_infinite] "
-          "::num_vectors(*partitioned_pq_vectors_) != "
-          "size(partitioned_pq_vectors_->ids())");
-    }
-    if (size(partitioned_pq_vectors_->indices()) !=
-        ::num_vectors(flat_ivf_centroids_) + 1) {
-      throw std::runtime_error(
-          "[ivf_flat_index@read_index_infinite] "
-          "size(partitioned_pq_vectors_->indices()) != "
-          "::num_vectors(flat_ivf_centroids_) + 1");
-    }
-  }
-
-  /**
-   * @brief Open the index from the arrays contained in the group_uri.
-   * The "finite" queries only load as much data (ids and vectors) as are
-   * necessary for a given query -- so we can't load any data until we
-   * know what the query is. So, here we would have read the centroids and
-   * indices into memory, when creating the index but would not have read
-   * the partitioned_ids or partitioned_pq_vectors.
-   *
-   * @param group_uri
-   * @return bool indicating success or failure of read
-   */
-  template <feature_vector_array Q>
-  auto read_index_finite(
-      const Q& query_vectors, size_t nprobe, size_t upper_bound) {
-    if (!group_) {
-      throw std::runtime_error(
-          "[ivf_pq_index@read_index_finite] group_ is not initialized. This "
-          "happens if you do not load an index by URI. Please close the index "
-          "and re-open it by URI.");
-    }
-
-    auto&& [active_partitions, active_queries] =
-        detail::ivf::partition_ivf_flat_index<indices_type>(
-            flat_ivf_centroids_, query_vectors, nprobe, num_threads_);
-    auto partitioned_pq_vectors = std::make_unique<tdb_pq_storage_type>(
-        group_->cached_ctx(),
-        group_->pq_ivf_vectors_uri(),
-        group_->pq_ivf_indices_uri(),
-        group_->get_num_partitions() + 1,
-        group_->pq_ivf_ids_uri(),
-        active_partitions,
-        upper_bound,
-        temporal_policy_);
-
-    // NB: We don't load the partitioned_pq_vectors here. We will load them
-    // when we do the query.
-    return std::make_tuple(
-        std::move(active_partitions),
-        std::move(active_queries),
-        std::move(partitioned_pq_vectors));
-  }
-
   /**
    * @brief Write the index to storage. This would typically be done after a
    * set of input vectors has been read and a new group is created. Or after
@@ -1140,48 +1108,25 @@ class ivf_pq_index {
    *
    ****************************************************************************/
 
-  template <feature_vector_array Q>
-  auto query(
-      QueryType queryType,
-      const Q& query_vectors,
-      size_t k_nn,
-      size_t nprobe,
-      size_t upper_bound = 0,
-      float k_factor = 1.f) {
-    switch (queryType) {
-      case QueryType::InfiniteRAM:
-        return query_infinite_ram(query_vectors, k_nn, nprobe, k_factor);
-      case QueryType::FiniteRAM:
-        return query_finite_ram(
-            query_vectors, k_nn, nprobe, upper_bound, k_factor);
-      default:
-        throw std::runtime_error("Invalid query type");
-    }
-  }
-
   /**
    * @brief Perform a query on the index, returning the nearest neighbors
    * and distances. The function returns a matrix containing k_nn nearest
    * neighbors for each given query and a matrix containing the distances
    * corresponding to each returned neighbor.
    *
-   * This function searches for the nearest neighbors using "infinite RAM",
-   * that is, it loads the entire IVF index into memory and then applies the
-   * query.
-   *
    * @tparam Q Type of query vectors.
    * @param query_vectors Array of (uncompressed) vectors to query.
    * @param k_nn Number of nearest neighbors to return.
+   * @param nprobe Number of centroids to search.
    * @param k_factor Specifies the multiplier on k_nn for the initial IVF_PQ
    * query, after which re-ranking is run.
-   * @param nprobe Number of centroids to search.
    *
    * @return A tuple containing a matrix of nearest neighbors and a matrix
    * of the corresponding distances.
    *
    */
   template <feature_vector_array Q>
-  auto query_infinite_ram(
+  auto query(
       const Q& query_vectors,
       size_t k_nn,
       size_t nprobe,
@@ -1192,13 +1137,68 @@ class ivf_pq_index {
     if (::num_vectors(flat_ivf_centroids_) < nprobe) {
       nprobe = ::num_vectors(flat_ivf_centroids_);
     }
-    if (!partitioned_pq_vectors_ ||
-        partitioned_pq_vectors_->num_vectors() == 0 ||
-        partitioned_pq_vectors_->num_vectors() !=
-            partitioned_pq_vectors_->total_num_vectors()) {
-      read_index_infinite();
+
+    if (upper_bound_ > 0) {
+      // Searches for the nearest neighbors using "finite RAM",
+      // that is, it only loads that portion of the IVF index into memory that
+      // is necessary for the given query. In addition, it supports out of core
+      // operation, meaning that only a subset of the necessary partitions are
+      // loaded into memory at any one time.
+      if (!group_) {
+        throw std::runtime_error(
+            "[ivf_pq_index@read_index_finite] group_ is not initialized. This "
+            "happens if you do not load an index by URI. Please close the "
+            "index "
+            "and re-open it by URI.");
+      }
+
+      // Open the index from the arrays contained in the group_uri.
+      // The "finite" queries only load as much data (ids and vectors) as are
+      // necessary for a given query -- so we can't load any data until we
+      // know what the query is. So, here we would have read the centroids and
+      // indices into memory, when creating the index but would not have read
+      // the partitioned_ids or partitioned_pq_vectors.
+      auto&& [active_partitions, active_queries] =
+          detail::ivf::partition_ivf_flat_index<indices_type>(
+              flat_ivf_centroids_, query_vectors, nprobe, num_threads_);
+      auto partitioned_pq_vectors = std::make_unique<tdb_pq_storage_type>(
+          group_->cached_ctx(),
+          group_->pq_ivf_vectors_uri(),
+          group_->pq_ivf_indices_uri(),
+          group_->get_num_partitions() + 1,
+          group_->pq_ivf_ids_uri(),
+          active_partitions,
+          upper_bound_,
+          temporal_policy_);
+
+      auto query_to_pq_centroid_distance_tables =
+          std::move(*generate_query_to_pq_centroid_distance_tables<
+                    Q,
+                    ColMajorMatrix<float>>(query_vectors));
+      size_t k_initial = static_cast<size_t>(k_nn * k_factor);
+      auto&& [initial_distances, initial_ids, initial_indices] =
+          detail::ivf::query_finite_ram(
+              *partitioned_pq_vectors,
+              query_to_pq_centroid_distance_tables,
+              active_queries,
+              k_initial,
+              upper_bound_,
+              num_threads_,
+              make_pq_distance_query_to_pq_centroid_distance_tables<
+                  std::span<float>,
+                  decltype(pq_storage_type{}[0])>());
+      return rerank(
+          std::move(initial_distances),
+          std::move(initial_ids),
+          std::move(initial_indices),
+          query_vectors,
+          k_initial,
+          k_nn);
     }
 
+    // This function searches for the nearest neighbors using "infinite RAM". We
+    // have already loaded the partitioned_pq_vectors_ into memory in the
+    // constructor, so we can just run the query.
     auto&& [active_partitions, active_queries] =
         detail::ivf::partition_ivf_flat_index<indices_type>(
             flat_ivf_centroids_, query_vectors, nprobe, num_threads_);
@@ -1230,78 +1230,6 @@ class ivf_pq_index {
         k_nn);
   }
 
-  /**
-   * @brief Perform a query on the index, returning the nearest neighbors
-   * and distances. The function returns a matrix containing k_nn nearest
-   * neighbors for each given query and a matrix containing the distances
-   * corresponding to each returned neighbor.
-   *
-   * This function searches for the nearest neighbors using "finite RAM",
-   * that is, it only loads that portion of the IVF index into memory that
-   * is necessary for the given query. In addition, it supports out of core
-   * operation, meaning that only a subset of the necessary partitions are
-   * loaded into memory at any one time.
-   *
-   * See the documentation for that function in detail/ivf/qv.h
-   * for more details.
-   *
-   * @tparam Q Type of query vectors. Must meet requirements of
-   * `feature_vector_array`
-   * @param query_vectors Array of (uncompressed) vectors to query.
-   * @param k_nn Number of nearest neighbors to return.
-   * @param k_factor Specifies the multipler on k_nn for the initial IVF_PQ
-   * query, after which re-ranking is run.
-   * @param nprobe Number of centroids to search.
-   *
-   * @return A tuple containing a matrix of nearest neighbors and a matrix
-   * of the corresponding distances.
-   */
-  template <feature_vector_array Q>
-  auto query_finite_ram(
-      const Q& query_vectors,
-      size_t k_nn,
-      size_t nprobe,
-      size_t upper_bound = 0,
-      float k_factor = 1.f) {
-    if (!group_) {
-      throw std::runtime_error(
-          "[ivf_pq_index@query_finite_ram] Query with finite RAM can only be "
-          "run if you're loading the index by URI. Please open it by URI and "
-          "try again. If you just wrote the index, open it up again by URI.");
-    }
-    if (k_factor < 1.f) {
-      throw std::runtime_error("k_factor must be >= 1");
-    }
-    if (::num_vectors(flat_ivf_centroids_) < nprobe) {
-      nprobe = ::num_vectors(flat_ivf_centroids_);
-    }
-    auto&& [active_partitions, active_queries, partitioned_pq_vectors] =
-        read_index_finite(query_vectors, nprobe, upper_bound);
-    auto query_to_pq_centroid_distance_tables =
-        std::move(*generate_query_to_pq_centroid_distance_tables<
-                  Q,
-                  ColMajorMatrix<float>>(query_vectors));
-    size_t k_initial = static_cast<size_t>(k_nn * k_factor);
-    auto&& [initial_distances, initial_ids, initial_indices] =
-        detail::ivf::query_finite_ram(
-            *partitioned_pq_vectors,
-            query_to_pq_centroid_distance_tables,
-            active_queries,
-            k_initial,
-            upper_bound,
-            num_threads_,
-            make_pq_distance_query_to_pq_centroid_distance_tables<
-                std::span<float>,
-                decltype(pq_storage_type{}[0])>());
-    return rerank(
-        std::move(initial_distances),
-        std::move(initial_ids),
-        std::move(initial_indices),
-        query_vectors,
-        k_initial,
-        k_nn);
-  }
-
   auto rerank(
       ColMajorMatrix<float>&& initial_distances,
       ColMajorMatrix<id_type>&& initial_ids,
@@ -1428,6 +1356,14 @@ class ivf_pq_index {
     return dimensions_;
   }
 
+  uint64_t num_vectors() const {
+    return num_vectors_;
+  }
+
+  size_t upper_bound() const {
+    return upper_bound_;
+  }
+
   auto num_partitions() const {
     if (num_partitions_ != ::num_vectors(flat_ivf_centroids_)) {
       throw std::runtime_error(
@@ -1500,7 +1436,7 @@ class ivf_pq_index {
     double total_distance = 0.0;
     double total_normalizer = 0.0;
 
-    for (size_t i = 0; i < num_vectors(feature_vectors); ++i) {
+    for (size_t i = 0; i < ::num_vectors(feature_vectors); ++i) {
       if (debug) {
         std::cout << "-------------------" << std::endl;
       }
@@ -1588,8 +1524,8 @@ class ivf_pq_index {
       }
     }
 
-    for (size_t i = 0; i < num_vectors(feature_vectors); ++i) {
-      for (size_t j = i + 1; j < num_vectors(feature_vectors); ++j) {
+    for (size_t i = 0; i < ::num_vectors(feature_vectors); ++i) {
+      for (size_t j = i + 1; j < ::num_vectors(feature_vectors); ++j) {
         auto real_distance =
             l2_distance(feature_vectors[i], feature_vectors[j]);
         total_normalizer += real_distance;
@@ -1616,8 +1552,8 @@ class ivf_pq_index {
 
     score_type diff_max = 0.0;
     score_type vec_max = 0.0;
-    for (size_t i = 0; i < num_vectors(feature_vectors); ++i) {
-      for (size_t j = i + 1; j < num_vectors(feature_vectors); ++j) {
+    for (size_t i = 0; i < ::num_vectors(feature_vectors); ++i) {
+      for (size_t j = i + 1; j < ::num_vectors(feature_vectors); ++j) {
         auto real_distance =
             l2_distance(feature_vectors[i], feature_vectors[j]);
         total_normalizer += real_distance;
@@ -1669,6 +1605,9 @@ class ivf_pq_index {
     if (dimensions_ != rhs.dimensions_) {
       return false;
     }
+    if (num_vectors_ != rhs.num_vectors_) {
+      return false;
+    }
     if (num_partitions_ != rhs.num_partitions_) {
       return false;
     }
diff --git a/src/include/test/unit_api_ivf_pq_index.cc b/src/include/test/unit_api_ivf_pq_index.cc
index c9f05dbb4..6026e139a 100644
--- a/src/include/test/unit_api_ivf_pq_index.cc
+++ b/src/include/test/unit_api_ivf_pq_index.cc
@@ -212,46 +212,65 @@ TEST_CASE("create empty index and then train and query", "[api_ivf_pq_index]") {
     // loaded by URI.
     size_t top_k = 1;
     size_t nprobe = 1;
-    size_t upper_bound = 0;
     float k_factor = 2.f;
-    auto&& [scores_vector_array, ids_vector_array] = index.query(
-        QueryType::InfiniteRAM,
-        FeatureVectorArray(queries),
-        top_k,
-        nprobe,
-        upper_bound,
-        k_factor);
+    auto&& [scores, ids] =
+        index.query(FeatureVectorArray(queries), top_k, nprobe, k_factor);
     auto default_score = std::numeric_limits<float>::max();
     auto default_id = std::numeric_limits<uint32_t>::max();
     check_single_vector_equals(
-        scores_vector_array,
-        ids_vector_array,
+        scores,
+        ids,
         {default_score, default_score, default_score, default_score},
         {default_id, default_id, default_id, default_id});
   }
 
+  // Check IndexLoadStrategy.
   {
-    auto index = IndexIVFPQ(ctx, index_uri);
+    CHECK_THROWS(IndexIVFPQ(ctx, index_uri, IndexLoadStrategy::PQ_OOC, 0));
+    CHECK_NOTHROW(IndexIVFPQ(ctx, index_uri, IndexLoadStrategy::PQ_OOC, 10));
 
-    CHECK(index.feature_type_string() == feature_type);
-    CHECK(index.id_type_string() == id_type);
-    CHECK(index.partitioning_index_type_string() == partitioning_index_type);
+    CHECK_NOTHROW(IndexIVFPQ(ctx, index_uri, IndexLoadStrategy::PQ_INDEX, 0));
+    CHECK_THROWS(IndexIVFPQ(ctx, index_uri, IndexLoadStrategy::PQ_INDEX, 10));
+
+    CHECK_NOTHROW(IndexIVFPQ(
+        ctx, index_uri, IndexLoadStrategy::PQ_INDEX_AND_RERANKING_VECTORS, 0));
+    CHECK_THROWS(IndexIVFPQ(
+        ctx, index_uri, IndexLoadStrategy::PQ_INDEX_AND_RERANKING_VECTORS, 10));
+  }
+
+  // We can open, train, and query an infinite index.
+  {
+    std::unique_ptr<IndexIVFPQ> index;
+    SECTION("infinite") {
+      index = std::make_unique<IndexIVFPQ>(ctx, index_uri);
+    }
+    SECTION("finite") {
+      size_t upper_bound = 97;
+      index = std::make_unique<IndexIVFPQ>(
+          ctx, index_uri, IndexLoadStrategy::PQ_OOC, upper_bound);
+      CHECK(index->upper_bound() == upper_bound);
+    }
+
+    CHECK(index->feature_type_string() == feature_type);
+    CHECK(index->id_type_string() == id_type);
+    CHECK(index->partitioning_index_type_string() == partitioning_index_type);
 
     auto training = ColMajorMatrix<feature_type_type>{
         {{3, 1, 4}, {1, 5, 9}, {2, 6, 5}, {3, 5, 8}}};
     auto training_vector_array = FeatureVectorArray(training);
-    index.train(training_vector_array);
-    index.add(training_vector_array);
-    index.write_index(ctx, index_uri);
+    index->train(training_vector_array);
+    index->add(training_vector_array);
+    index->write_index(ctx, index_uri);
 
-    CHECK(index.feature_type_string() == feature_type);
-    CHECK(index.id_type_string() == id_type);
-    CHECK(index.partitioning_index_type_string() == partitioning_index_type);
+    CHECK(index->feature_type_string() == feature_type);
+    CHECK(index->id_type_string() == id_type);
+    CHECK(index->partitioning_index_type_string() == partitioning_index_type);
 
-    auto&& [scores_vector_array, ids_vector_array] =
-        index.query(QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1);
-    check_single_vector_equals(
-        scores_vector_array, ids_vector_array, {0, 0, 0, 0}, {0, 1, 2, 3});
+    size_t top_k = 1;
+    size_t nprobe = 1;
+    auto&& [scores, ids] =
+        index->query(FeatureVectorArray(queries), top_k, nprobe);
+    check_single_vector_equals(scores, ids, {0, 0, 0, 0}, {0, 1, 2, 3});
   }
 }
 
@@ -327,44 +346,34 @@ TEST_CASE(
     auto queries = ColMajorMatrix<feature_type_type>{
         {{8, 6, 7}, {5, 3, 0}, {9, 5, 0}, {2, 7, 3}}};
 
-    auto&& [scores_vector_array, ids_vector_array] =
-        index.query(QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1);
-    check_single_vector_equals(
-        scores_vector_array, ids_vector_array, {0, 0, 0, 0}, {10, 11, 12, 13});
+    auto&& [scores, ids] = index.query(FeatureVectorArray(queries), 1, 1);
+    check_single_vector_equals(scores, ids, {0, 0, 0, 0}, {10, 11, 12, 13});
   }
 
   {
-    auto index = IndexIVFPQ(ctx, index_uri);
     size_t top_k = 1;
     size_t nprobe = 1;
 
+    std::unique_ptr<IndexIVFPQ> index;
+    SECTION("infinite") {
+      index = std::make_unique<IndexIVFPQ>(ctx, index_uri);
+    }
+    SECTION("finite") {
+      size_t upper_bound = GENERATE(3, 4, 5, 100);
+      index = std::make_unique<IndexIVFPQ>(
+          ctx, index_uri, IndexLoadStrategy::PQ_OOC, upper_bound);
+      CHECK(index->upper_bound() == upper_bound);
+    }
+
+    CHECK(index->feature_type_string() == feature_type);
+    CHECK(index->id_type_string() == id_type);
+    CHECK(index->partitioning_index_type_string() == partitioning_index_type);
+
     auto queries = ColMajorMatrix<feature_type_type>{
         {{8, 6, 7}, {5, 3, 0}, {9, 5, 0}, {2, 7, 3}}};
-    for (auto upper_bound : {3, 4, 5, 100, 0}) {
-      auto&& [scores_vector_array, ids_vector_array] = index.query(
-          QueryType::FiniteRAM,
-          FeatureVectorArray(queries),
-          top_k,
-          nprobe,
-          upper_bound);
-      check_single_vector_equals(
-          scores_vector_array,
-          ids_vector_array,
-          {0, 0, 0, 0},
-          {10, 11, 12, 13});
-
-      auto&& [scores_vector_array_infinite, ids_vector_array_infinite] =
-          index.query(
-              QueryType::InfiniteRAM,
-              FeatureVectorArray(queries),
-              top_k,
-              nprobe);
-      check_single_vector_equals(
-          scores_vector_array_infinite,
-          ids_vector_array_infinite,
-          {0, 0, 0, 0},
-          {10, 11, 12, 13});
-    }
+    auto&& [scores, ids] =
+        index->query(FeatureVectorArray(queries), top_k, nprobe);
+    check_single_vector_equals(scores, ids, {0, 0, 0, 0}, {10, 11, 12, 13});
   }
 }
 
@@ -424,19 +433,20 @@ TEST_CASE(
 
     auto query_set = FeatureVectorArray(ctx, siftsmall_query_uri);
     auto groundtruth_set = FeatureVectorArray(ctx, siftsmall_groundtruth_uri);
-    auto&& [scores, ids] =
-        index.query(QueryType::InfiniteRAM, query_set, k_nn, 5);
+    auto&& [scores, ids] = index.query(query_set, k_nn, 5);
     auto intersections = count_intersections(ids, groundtruth_set, k_nn);
     CHECK((intersections / static_cast<double>(num_vectors(ids) * k_nn)) > 0.7);
   }
 
   {
-    auto index = IndexIVFPQ(ctx, index_uri);
     float k_factor = 20.f;
-
     auto query_set = FeatureVectorArray(ctx, siftsmall_query_uri);
     auto groundtruth_set = FeatureVectorArray(ctx, siftsmall_groundtruth_uri);
 
+    auto index = IndexIVFPQ(ctx, index_uri);
+    auto index_finite =
+        IndexIVFPQ(ctx, index_uri, IndexLoadStrategy::PQ_OOC, 450);
+
     for (auto [nprobe, expected_accuracy, expected_accuracy_with_reranking] :
          std::vector<std::tuple<int, float, float>>{
              {1, .4f, .45f},
@@ -444,15 +454,14 @@ TEST_CASE(
              {5, .7f, .7f},
              {10, .75f, .9f},
              {100, .8f, 1.f}}) {
-      auto&& [distances, ids] =
-          index.query(QueryType::InfiniteRAM, query_set, k_nn, nprobe);
+      auto&& [distances, ids] = index.query(query_set, k_nn, nprobe);
       auto intersections = count_intersections(ids, groundtruth_set, k_nn);
       CHECK(
           (intersections / static_cast<double>(num_vectors(ids) * k_nn)) >=
           expected_accuracy);
 
-      auto&& [distances_with_reranking, ids_with_reranking] = index.query(
-          QueryType::InfiniteRAM, query_set, k_nn, nprobe, 0, k_factor);
+      auto&& [distances_with_reranking, ids_with_reranking] =
+          index.query(query_set, k_nn, nprobe, k_factor);
       auto intersections_with_reranking =
           count_intersections(ids_with_reranking, groundtruth_set, k_nn);
       CHECK(
@@ -460,24 +469,16 @@ TEST_CASE(
            static_cast<double>(num_vectors(ids_with_reranking) * k_nn)) >=
           expected_accuracy_with_reranking);
 
-      for (auto upper_bound : {450, 1000, 0}) {
-        auto&& [distances_finite, ids_finite] = index.query(
-            QueryType::FiniteRAM, query_set, k_nn, nprobe, upper_bound);
-        CHECK(are_equal(ids_finite, ids));
-        CHECK(are_equal(distances_finite, distances));
-
-        auto&& [distances_finite_with_reranking, ids_finite_with_reranking] =
-            index.query(
-                QueryType::FiniteRAM,
-                query_set,
-                k_nn,
-                nprobe,
-                upper_bound,
-                k_factor);
-        CHECK(are_equal(ids_finite_with_reranking, ids_with_reranking));
-        CHECK(are_equal(
-            distances_finite_with_reranking, distances_with_reranking));
-      }
+      auto&& [distances_finite, ids_finite] =
+          index_finite.query(query_set, k_nn, nprobe);
+      CHECK(are_equal(ids_finite, ids));
+      CHECK(are_equal(distances_finite, distances));
+
+      auto&& [distances_finite_with_reranking, ids_finite_with_reranking] =
+          index_finite.query(query_set, k_nn, nprobe, k_factor);
+      CHECK(are_equal(ids_finite_with_reranking, ids_with_reranking));
+      CHECK(
+          are_equal(distances_finite_with_reranking, distances_with_reranking));
     }
   }
 }
@@ -546,7 +547,7 @@ TEST_CASE("build index and query", "[api_ivf_pq_index]") {
   a.train(training_set);
   a.add(training_set);
 
-  auto&& [s, t] = a.query(QueryType::InfiniteRAM, query_set, k_nn, 5);
+  auto&& [s, t] = a.query(query_set, k_nn, nprobe);
 
   auto intersections = count_intersections(t, groundtruth_set, k_nn);
   auto nt = num_vectors(t);
@@ -576,27 +577,32 @@ TEST_CASE("read index and query", "[api_ivf_pq_index]") {
   a.train(training_set);
   a.add(training_set);
   a.write_index(ctx, api_ivf_pq_index_uri);
-  auto b = IndexIVFPQ(ctx, api_ivf_pq_index_uri);
 
   auto query_set = FeatureVectorArray(ctx, siftsmall_query_uri);
   auto groundtruth_set = FeatureVectorArray(ctx, siftsmall_groundtruth_uri);
 
-  auto&& [scores_1, ids_1] =
-      a.query(QueryType::InfiniteRAM, query_set, k_nn, 5);
-  auto&& [scores_2, ids_2] =
-      b.query(QueryType::InfiniteRAM, query_set, k_nn, 5);
-  auto&& [scores_3, ids_3] =
-      b.query(QueryType::FiniteRAM, query_set, k_nn, 5, 500);
-  auto&& [scores_4, ids_4] =
-      b.query(QueryType::FiniteRAM, query_set, k_nn, 5, 0);
+  std::unique_ptr<IndexIVFPQ> b;
+  SECTION("infinite") {
+    b = std::make_unique<IndexIVFPQ>(ctx, api_ivf_pq_index_uri);
+  }
+  SECTION("finite") {
+    size_t upper_bound = GENERATE(500, 1000);
+    b = std::make_unique<IndexIVFPQ>(
+        ctx, api_ivf_pq_index_uri, IndexLoadStrategy::PQ_OOC, upper_bound);
+    CHECK(b->upper_bound() == upper_bound);
+  }
+
+  // Make sure the same query results are returned for two different indexes.
+  size_t nprobe = 5;
+  auto&& [scores_1, ids_1] = a.query(query_set, k_nn, nprobe);
+  auto&& [scores_2, ids_2] = b->query(query_set, k_nn, nprobe);
+
+  CHECK(are_equal(scores_1, scores_2));
+  CHECK(are_equal(ids_1, ids_2));
 
   auto intersections_1 = count_intersections(ids_1, groundtruth_set, k_nn);
   auto intersections_2 = count_intersections(ids_2, groundtruth_set, k_nn);
-  auto intersections_3 = count_intersections(ids_3, groundtruth_set, k_nn);
-  auto intersections_4 = count_intersections(ids_4, groundtruth_set, k_nn);
   CHECK(num_vectors(ids_1) == num_vectors(ids_2));
-  CHECK(num_vectors(ids_1) == num_vectors(ids_3));
-  CHECK(num_vectors(ids_1) == num_vectors(ids_4));
   auto recall =
       intersections_1 / static_cast<double>(num_vectors(ids_1) * k_nn);
   CHECK(recall > 0.7);
@@ -696,12 +702,11 @@ TEST_CASE("clear history with an open index", "[api_ivf_pq_index]") {
   index.add(training_vector_array);
   index.write_index(ctx, index_uri, TemporalPolicy(TimeTravel, 99));
 
-  auto&& [scores_vector_array, ids_vector_array] =
-      index.query(QueryType::InfiniteRAM, training_vector_array, 1, 1);
+  auto&& [scores, ids] = index.query(training_vector_array, 1, 1);
 
   auto second_index = IndexIVFPQ(ctx, index_uri);
-  auto&& [scores_vector_array_finite, ids_vector_array_finite] =
-      second_index.query(QueryType::FiniteRAM, training_vector_array, 1, 1);
+  auto&& [scores_finite, ids_finite] =
+      second_index.query(training_vector_array, 1, 1);
 
   // Here we check that we can clear_history() even with a index in memory. This
   // makes sure that every Array which IndexIVFPQ opens has been closed,
@@ -819,21 +824,20 @@ TEST_CASE("write and load index with timestamps", "[api_ivf_pq_index]") {
     CHECK(index.id_type_string() == id_type);
     CHECK(index.partitioning_index_type_string() == partitioning_index_type);
 
+    size_t top_k = 1;
+    size_t nprobe = 1;
+
     auto queries = ColMajorMatrix<feature_type_type>{
         {{1, 1, 1}, {2, 2, 2}, {3, 3, 3}, {4, 4, 4}}};
-    auto&& [scores_vector_array, ids_vector_array] = index.query(
-        QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1, 0);
-    check_single_vector_equals(
-        scores_vector_array, ids_vector_array, {0, 0, 0, 0}, {1, 2, 3, 4});
+    auto&& [scores, ids] =
+        index.query(FeatureVectorArray(queries), top_k, nprobe);
+    check_single_vector_equals(scores, ids, {0, 0, 0, 0}, {1, 2, 3, 4});
 
-    auto&& [scores_vector_array_with_reranking, ids_vector_array_with_reranking] =
-        index.query(
-            QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1, 0, 2.f);
+    float k_factor = 2.f;
+    auto&& [scores_with_reranking, ids_with_reranking] =
+        index.query(FeatureVectorArray(queries), top_k, nprobe, k_factor);
     check_single_vector_equals(
-        scores_vector_array_with_reranking,
-        ids_vector_array_with_reranking,
-        {0, 0, 0, 0},
-        {1, 2, 3, 4});
+        scores_with_reranking, ids_with_reranking, {0, 0, 0, 0}, {1, 2, 3, 4});
 
     auto typed_index = ivf_pq_index<
         feature_type_type,
@@ -861,21 +865,23 @@ TEST_CASE("write and load index with timestamps", "[api_ivf_pq_index]") {
     // Check that we can do finite and infinite queries and then train + write
     // the index.
     {
+      size_t top_k = 1;
+      size_t nprobe = 1;
       auto queries = ColMajorMatrix<feature_type_type>{
           {{1, 1, 1}, {2, 2, 2}, {3, 3, 3}, {4, 4, 4}}};
-      auto&& [scores_vector_array, ids_vector_array] = index.query(
-          QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1);
-      check_single_vector_equals(
-          scores_vector_array, ids_vector_array, {0, 0, 0, 0}, {1, 2, 3, 4});
-
-      auto&& [scores_vector_array_finite, ids_vector_array_finite] =
-          index.query(
-              QueryType::FiniteRAM, FeatureVectorArray(queries), 1, 1, 4);
-      check_single_vector_equals(
-          scores_vector_array_finite,
-          ids_vector_array_finite,
-          {0, 0, 0, 0},
-          {1, 2, 3, 4});
+      auto&& [scores, ids] =
+          index.query(FeatureVectorArray(queries), top_k, nprobe);
+      check_single_vector_equals(scores, ids, {0, 0, 0, 0}, {1, 2, 3, 4});
+
+      {
+        size_t upper_bound = 5;
+        auto index_finite =
+            IndexIVFPQ(ctx, index_uri, IndexLoadStrategy::PQ_OOC, upper_bound);
+        auto&& [scores_finite, ids_finite] =
+            index_finite.query(FeatureVectorArray(queries), top_k, nprobe);
+        check_single_vector_equals(
+            scores_finite, ids_finite, {0, 0, 0, 0}, {1, 2, 3, 4});
+      }
     }
 
     CHECK(index.temporal_policy().timestamp_start() == 0);
@@ -906,15 +912,14 @@ TEST_CASE("write and load index with timestamps", "[api_ivf_pq_index]") {
     CHECK(index.id_type_string() == id_type);
     CHECK(index.partitioning_index_type_string() == partitioning_index_type);
 
+    size_t top_k = 1;
+    size_t nprobe = 1;
     auto queries = ColMajorMatrix<feature_type_type>{
         {{11, 11, 11}, {22, 22, 22}, {33, 33, 33}, {44, 44, 44}, {55, 55, 55}}};
-    auto&& [scores_vector_array, ids_vector_array] =
-        index.query(QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1);
+    auto&& [scores, ids] =
+        index.query(FeatureVectorArray(queries), top_k, nprobe);
     check_single_vector_equals(
-        scores_vector_array,
-        ids_vector_array,
-        {0, 0, 0, 0, 0},
-        {11, 22, 33, 44, 55});
+        scores, ids, {0, 0, 0, 0, 0}, {11, 22, 33, 44, 55});
 
     auto typed_index = ivf_pq_index<
         feature_type_type,
@@ -946,10 +951,15 @@ TEST_CASE("write and load index with timestamps", "[api_ivf_pq_index]") {
         std::vector<uint64_t>{99, 100}.begin()));
   }
 
-  // Load it at timestamp 99 and make sure we can query it correctly.
-  {
+  // Load it at timestamp 99 and make sure we can query it correctly. Do this
+  // with both a finite and infinite index.
+  for (auto upper_bound : std::vector<size_t>{0, 4}) {
     auto temporal_policy = TemporalPolicy{TimeTravel, 99};
-    auto index = IndexIVFPQ(ctx, index_uri, temporal_policy);
+    auto load_strategy = upper_bound == 0 ? IndexLoadStrategy::PQ_INDEX :
+                                            IndexLoadStrategy::PQ_OOC;
+    auto index =
+        IndexIVFPQ(ctx, index_uri, load_strategy, upper_bound, temporal_policy);
+    CHECK(index.upper_bound() == upper_bound);
 
     CHECK(index.temporal_policy().timestamp_end() == 99);
     CHECK(index.feature_type_string() == feature_type);
@@ -959,41 +969,26 @@ TEST_CASE("write and load index with timestamps", "[api_ivf_pq_index]") {
     CHECK(index.convergence_tolerance() == convergence_tolerance);
     CHECK(index.reassign_ratio() == reassign_ratio);
 
+    size_t top_k = 1;
+    size_t nprobe = 1;
     auto queries = ColMajorMatrix<feature_type_type>{
         {{1, 1, 1}, {2, 2, 2}, {3, 3, 3}, {4, 4, 4}}};
-    auto&& [scores_vector_array_finite, ids_vector_array_finite] =
-        index.query(QueryType::FiniteRAM, FeatureVectorArray(queries), 1, 1);
-    check_single_vector_equals(
-        scores_vector_array_finite,
-        ids_vector_array_finite,
-        {0, 0, 0, 0},
-        {1, 2, 3, 4});
-    auto&& [scores_vector_array_finite_with_reranking, ids_vector_array_finite_with_reranking] =
-        index.query(
-            QueryType::FiniteRAM, FeatureVectorArray(queries), 1, 1, 0, 2.f);
-    check_single_vector_equals(
-        scores_vector_array_finite_with_reranking,
-        ids_vector_array_finite_with_reranking,
-        {0, 0, 0, 0},
-        {1, 2, 3, 4});
 
-    auto&& [scores_vector_array, ids_vector_array] =
-        index.query(QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1);
-    check_single_vector_equals(
-        scores_vector_array, ids_vector_array, {0, 0, 0, 0}, {1, 2, 3, 4});
-    auto&& [scores_vector_array_with_reranking, ids_vector_array_with_reranking] =
-        index.query(
-            QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1, 0, 2.f);
+    auto&& [scores, ids] =
+        index.query(FeatureVectorArray(queries), top_k, nprobe);
+    check_single_vector_equals(scores, ids, {0, 0, 0, 0}, {1, 2, 3, 4});
+
+    float k_factor = 2.f;
+    auto&& [scores_with_reranking, ids_with_reranking] =
+        index.query(FeatureVectorArray(queries), top_k, nprobe, k_factor);
     check_single_vector_equals(
-        scores_vector_array_with_reranking,
-        ids_vector_array_with_reranking,
-        {0, 0, 0, 0},
-        {1, 2, 3, 4});
+        scores_with_reranking, ids_with_reranking, {0, 0, 0, 0}, {1, 2, 3, 4});
 
     auto typed_index = ivf_pq_index<
         feature_type_type,
         id_type_type,
-        partitioning_index_type_type>(ctx, index_uri, temporal_policy);
+        partitioning_index_type_type>(
+        ctx, index_uri, load_strategy, upper_bound, temporal_policy);
     CHECK(typed_index.group().get_dimensions() == dimensions);
     CHECK(typed_index.group().get_temp_size() == 0);
     CHECK(typed_index.group().get_history_index() == 0);
@@ -1022,9 +1017,14 @@ TEST_CASE("write and load index with timestamps", "[api_ivf_pq_index]") {
 
   // Load it at timestamp 5 (before ingestion) and make sure we can query and be
   // returned fill values.
-  {
+  for (auto upper_bound : std::vector<size_t>{0, 4}) {
     auto temporal_policy = TemporalPolicy{TimeTravel, 0};
-    auto index = IndexIVFPQ(ctx, index_uri, temporal_policy);
+    auto load_strategy = upper_bound == 0 ? IndexLoadStrategy::PQ_INDEX :
+                                            IndexLoadStrategy::PQ_OOC;
+
+    auto index =
+        IndexIVFPQ(ctx, index_uri, load_strategy, upper_bound, temporal_policy);
+    CHECK(index.upper_bound() == upper_bound);
 
     CHECK(index.temporal_policy().timestamp_start() == 0);
     CHECK(index.temporal_policy().timestamp_end() == 0);
@@ -1035,43 +1035,30 @@ TEST_CASE("write and load index with timestamps", "[api_ivf_pq_index]") {
     CHECK(index.convergence_tolerance() == convergence_tolerance);
     CHECK(index.reassign_ratio() == reassign_ratio);
 
+    size_t top_k = 1;
+    size_t nprobe = 1;
+    float k_factor = 1.9f;
     auto queries = ColMajorMatrix<feature_type_type>{{{1, 1, 1}}};
-    auto&& [scores_vector_array, ids_vector_array] =
-        index.query(QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1);
-    check_single_vector_equals(
-        scores_vector_array,
-        ids_vector_array,
-        {std::numeric_limits<float>::max()},
-        {std::numeric_limits<uint32_t>::max()});
-    auto&& [scores_vector_array_with_reranking, ids_vector_array_with_reranking] =
-        index.query(
-            QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1, 0, 1.9f);
-    check_single_vector_equals(
-        scores_vector_array_with_reranking,
-        ids_vector_array_with_reranking,
-        {std::numeric_limits<float>::max()},
-        {std::numeric_limits<uint32_t>::max()});
-
-    auto&& [scores_vector_array_finite, ids_vector_array_finite] =
-        index.query(QueryType::FiniteRAM, FeatureVectorArray(queries), 1, 1, 4);
+    auto&& [scores, ids] =
+        index.query(FeatureVectorArray(queries), top_k, nprobe);
     check_single_vector_equals(
-        scores_vector_array_finite,
-        ids_vector_array_finite,
+        scores,
+        ids,
         {std::numeric_limits<float>::max()},
         {std::numeric_limits<uint32_t>::max()});
-    auto&& [scores_vector_array_finite_with_reranking, ids_vector_array_finite_with_reranking] =
-        index.query(
-            QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1, 4, 1.9f);
+    auto&& [scores_with_reranking, ids_with_reranking] =
+        index.query(FeatureVectorArray(queries), top_k, nprobe, k_factor);
     check_single_vector_equals(
-        scores_vector_array_finite_with_reranking,
-        ids_vector_array_finite_with_reranking,
+        scores_with_reranking,
+        ids_with_reranking,
         {std::numeric_limits<float>::max()},
         {std::numeric_limits<uint32_t>::max()});
 
     auto typed_index = ivf_pq_index<
         feature_type_type,
         id_type_type,
-        partitioning_index_type_type>(ctx, index_uri, temporal_policy);
+        partitioning_index_type_type>(
+        ctx, index_uri, load_strategy, upper_bound, temporal_policy);
     CHECK(typed_index.group().get_dimensions() == dimensions);
     CHECK(typed_index.group().get_temp_size() == 0);
     CHECK(typed_index.group().get_history_index() == 0);
@@ -1106,17 +1093,24 @@ TEST_CASE("write and load index with timestamps", "[api_ivf_pq_index]") {
 
   // Clear history for <= 99 and then load at 99, then make sure we cannot
   // query.
-  {
-    IndexIVFPQ::clear_history(ctx, index_uri, 99);
-
+  IndexIVFPQ::clear_history(ctx, index_uri, 99);
+  for (auto upper_bound : std::vector<size_t>{0, 3}) {
     auto temporal_policy = TemporalPolicy{TimeTravel, 99};
-    auto index = IndexIVFPQ(ctx, index_uri, temporal_policy);
+    auto load_strategy = upper_bound == 0 ? IndexLoadStrategy::PQ_INDEX :
+                                            IndexLoadStrategy::PQ_OOC;
+
+    auto index =
+        IndexIVFPQ(ctx, index_uri, load_strategy, upper_bound, temporal_policy);
+    CHECK(index.upper_bound() == upper_bound);
 
     CHECK(index.temporal_policy().timestamp_end() == 99);
     CHECK(index.feature_type_string() == feature_type);
     CHECK(index.id_type_string() == id_type);
     CHECK(index.partitioning_index_type_string() == partitioning_index_type);
 
+    size_t top_k = 1;
+    size_t nprobe = 1;
+    float k_factor = 17.3f;
     auto queries = ColMajorMatrix<feature_type_type>{
         {{1, 1, 1}, {2, 2, 2}, {3, 3, 3}, {4, 4, 4}}};
 
@@ -1127,39 +1121,22 @@ TEST_CASE("write and load index with timestamps", "[api_ivf_pq_index]") {
     auto expected_ids =
         std::vector<uint32_t>{default_id, default_id, default_id, default_id};
 
-    auto&& [scores_vector_array_finite, ids_vector_array_finite] =
-        index.query(QueryType::FiniteRAM, FeatureVectorArray(queries), 1, 1, 0);
-    check_single_vector_equals(
-        scores_vector_array_finite,
-        ids_vector_array_finite,
-        expected_scores,
-        expected_ids);
-    auto&& [scores_vector_array_finite_with_reranking, ids_vector_array_finite_with_reranking] =
-        index.query(
-            QueryType::FiniteRAM, FeatureVectorArray(queries), 1, 1, 0, 20.f);
-    check_single_vector_equals(
-        scores_vector_array_finite_with_reranking,
-        ids_vector_array_finite_with_reranking,
-        expected_scores,
-        expected_ids);
-
-    auto&& [scores_vector_array, ids_vector_array] =
-        index.query(QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1);
-    check_single_vector_equals(
-        scores_vector_array, ids_vector_array, expected_scores, expected_ids);
-    auto&& [scores_vector_array_with_reranking, ids_vector_array_with_reranking] =
-        index.query(
-            QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1, 0, 20.f);
+    auto&& [scores, ids] =
+        index.query(FeatureVectorArray(queries), top_k, nprobe);
+    check_single_vector_equals(scores, ids, expected_scores, expected_ids);
+    auto&& [scores_with_reranking, ids_with_reranking] =
+        index.query(FeatureVectorArray(queries), top_k, nprobe, k_factor);
     check_single_vector_equals(
-        scores_vector_array_with_reranking,
-        ids_vector_array_with_reranking,
+        scores_with_reranking,
+        ids_with_reranking,
         expected_scores,
         expected_ids);
 
     auto typed_index = ivf_pq_index<
         feature_type_type,
         id_type_type,
-        partitioning_index_type_type>(ctx, index_uri, temporal_policy);
+        partitioning_index_type_type>(
+        ctx, index_uri, load_strategy, upper_bound, temporal_policy);
     CHECK(typed_index.group().get_dimensions() == dimensions);
     CHECK(typed_index.group().get_temp_size() == 0);
     CHECK(typed_index.group().get_history_index() == 0);
diff --git a/src/include/test/unit_ivf_pq_index.cc b/src/include/test/unit_ivf_pq_index.cc
index 2571d1808..32768e04d 100644
--- a/src/include/test/unit_ivf_pq_index.cc
+++ b/src/include/test/unit_ivf_pq_index.cc
@@ -319,13 +319,16 @@ TEST_CASE("ivf_index write and read", "[ivf_pq_index]") {
   std::iota(begin(ids), end(ids), 0);
   auto idx = ivf_pq_index<float, uint32_t, uint32_t>(
       nlist, num_subspaces, max_iterations);
+  CHECK(idx.num_vectors() == 0);
   idx.train_ivf(training_set, kmeans_init::kmeanspp);
   idx.add(training_set, ids);
+  CHECK(idx.num_vectors() == ::num_vectors(training_set));
   idx.write_index(ctx, ivf_index_uri);
+  CHECK(idx.num_vectors() == ::num_vectors(training_set));
 
   // Load it from URI.
   auto idx2 = ivf_pq_index<float, uint32_t, uint32_t>(ctx, ivf_index_uri);
-  idx2.read_index_infinite();
+  CHECK(idx2.num_vectors() == ::num_vectors(training_set));
 
   // Check that the two indexes are the same.
   CHECK(idx.compare_cached_metadata(idx2));
@@ -353,6 +356,7 @@ TEST_CASE(
       siftsmall_indices_type>(20, 16, 50);
   pq_idx.train_ivf(training_set);
   pq_idx.add(training_set, ids);
+  CHECK(pq_idx.num_vectors() == ::num_vectors(training_set));
 
   SECTION("pq_encoding") {
     auto avg_error = pq_idx.verify_pq_encoding(training_set);
@@ -471,7 +475,7 @@ TEMPLATE_TEST_CASE(
 }
 #endif
 
-TEST_CASE("build index and query in place", "[ivf_pq_index]") {
+TEST_CASE("build index and infinite query in place", "[ivf_pq_index]") {
   tiledb::Context ctx;
   // size_t nlist = GENERATE(1, 100);
   size_t nlist = 20;
@@ -494,17 +498,10 @@ TEST_CASE("build index and query in place", "[ivf_pq_index]") {
   auto top_k_ivf_scores = ColMajorMatrix<float>();
   auto top_k_ivf = ColMajorMatrix<siftsmall_ids_type>();
 
-  SECTION("infinite") {
-    std::tie(top_k_ivf_scores, top_k_ivf) =
-        idx.query_infinite_ram(query_set, k_nn, nprobe);
-  }
+  std::tie(top_k_ivf_scores, top_k_ivf) = idx.query(query_set, k_nn, nprobe);
 
-  SECTION("finite") {
-    std::tie(top_k_ivf_scores, top_k_ivf) =
-        idx.query_finite_ram(query_set, k_nn, nprobe);
-  }
-
-  // NOTE: Can be used to debug the results.debug_slice(top_k_ivf, "top_k_ivf");
+  // NOTE: Can be used to debug the results:
+  // debug_slice(top_k_ivf, "top_k_ivf");
   // debug_slice(top_k_ivf_scores, "top_k_ivf_scores");
   // debug_slice(groundtruth_set, "groundtruth_set");
 
@@ -531,11 +528,13 @@ TEST_CASE("ivf_pq_index write and read", "[ivf_pq_index]") {
   uint64_t write_timestamp = 1000;
   idx.write_index(
       ctx, ivf_pq_index_uri, TemporalPolicy(TimeTravel, write_timestamp));
+  CHECK(idx.num_vectors() == ::num_vectors(training_set));
 
   {
     // Load the index and check metadata.
     auto idx2 = ivf_pq_index<siftsmall_feature_type, siftsmall_ids_type>(
         ctx, ivf_pq_index_uri);
+    CHECK(idx2.num_vectors() == ::num_vectors(training_set));
     CHECK(idx2.group().get_dimensions() == sift_dimensions);
     CHECK(idx2.group().get_temp_size() == 0);
 
@@ -563,6 +562,7 @@ TEST_CASE("ivf_pq_index write and read", "[ivf_pq_index]") {
     auto idx2 = ivf_pq_index<siftsmall_feature_type, siftsmall_ids_type>(
         ctx, ivf_pq_index_uri);
 
+    CHECK(idx2.num_vectors() == 0);
     CHECK(idx2.group().get_dimensions() == sift_dimensions);
     CHECK(idx2.group().get_temp_size() == 0);
 
@@ -591,15 +591,15 @@ TEST_CASE("query empty index", "[ivf_pq_index]") {
   {
     auto data =
         ColMajorMatrixWithIds<siftsmall_feature_type>(dimensions, num_vectors);
-    debug_matrix_with_ids(data, "data");
     index.train(data, data.raveled_ids());
     index.add(data, data.raveled_ids());
+    CHECK(index.num_vectors() == num_vectors);
   }
 
   // We can query an empty index.
   {
     size_t k_nn = 1;
-    auto&& [scores, ids] = index.query_infinite_ram(queries, k_nn, nlist);
+    auto&& [scores, ids] = index.query(queries, k_nn, nlist);
     CHECK(_cpo::num_vectors(scores) == _cpo::num_vectors(queries));
     CHECK(_cpo::num_vectors(ids) == _cpo::num_vectors(queries));
     CHECK(_cpo::dimensions(scores) == k_nn);
@@ -620,10 +620,12 @@ TEST_CASE("query empty index", "[ivf_pq_index]") {
 
   // We can load and query an empty index.
   {
-    auto index2 = ivf_pq_index<siftsmall_feature_type, siftsmall_ids_type>(
-        ctx, ivf_index_uri);
+    auto index_infinite =
+        ivf_pq_index<siftsmall_feature_type, siftsmall_ids_type>(
+            ctx, ivf_index_uri);
+    CHECK(index_infinite.num_vectors() == num_vectors);
     size_t k_nn = 1;
-    auto&& [scores, ids] = index2.query_infinite_ram(queries, k_nn, nlist);
+    auto&& [scores, ids] = index_infinite.query(queries, k_nn, nlist);
     CHECK(_cpo::num_vectors(scores) == _cpo::num_vectors(queries));
     CHECK(_cpo::num_vectors(ids) == _cpo::num_vectors(queries));
     CHECK(_cpo::dimensions(scores) == k_nn);
@@ -632,10 +634,13 @@ TEST_CASE("query empty index", "[ivf_pq_index]") {
     CHECK(ids(0, 0) == std::numeric_limits<uint64_t>::max());
   }
   {
-    auto index2 = ivf_pq_index<siftsmall_feature_type, siftsmall_ids_type>(
-        ctx, ivf_index_uri);
+    size_t upper_bound = 11;
+    auto index_finite =
+        ivf_pq_index<siftsmall_feature_type, siftsmall_ids_type>(
+            ctx, ivf_index_uri, IndexLoadStrategy::PQ_OOC, upper_bound);
+    CHECK(index_finite.num_vectors() == num_vectors);
     size_t k_nn = 1;
-    auto&& [scores, ids] = index2.query_finite_ram(queries, k_nn, nlist, 9);
+    auto&& [scores, ids] = index_finite.query(queries, k_nn, nlist, 9);
     CHECK(_cpo::num_vectors(scores) == _cpo::num_vectors(queries));
     CHECK(_cpo::num_vectors(ids) == _cpo::num_vectors(queries));
     CHECK(_cpo::dimensions(scores) == k_nn);
@@ -670,6 +675,7 @@ TEST_CASE("query simple", "[ivf_pq_index]") {
   auto ivf_index_uri =
       (std::filesystem::temp_directory_path() / "ivf_index").string();
 
+  CHECK(index.num_vectors() == 0);
   CHECK(index.nlist() == nlist);
 
   // We can train, add, query, and then write the index.
@@ -680,13 +686,15 @@ TEST_CASE("query simple", "[ivf_pq_index]") {
     index.train(training, training.raveled_ids());
     index.add(training, training.raveled_ids());
 
+    CHECK(index.num_vectors() == ::num_vectors(training));
+
     size_t k_nn = 1;
     size_t nprobe = nlist;
     for (int i = 1; i <= 4; ++i) {
       auto value = static_cast<feature_type>(i);
       auto queries =
           ColMajorMatrix<feature_type>{{{value, value, value, value}}};
-      auto&& [scores, ids] = index.query_infinite_ram(queries, k_nn, nprobe);
+      auto&& [scores, ids] = index.query(queries, k_nn, nprobe);
       CHECK(scores(0, 0) == 0);
       CHECK(ids(0, 0) == i * 11);
     }
@@ -699,7 +707,20 @@ TEST_CASE("query simple", "[ivf_pq_index]") {
 
   // We can load and query the index.
   {
-    auto index2 = ivf_pq_index<feature_type, id_type>(ctx, ivf_index_uri);
+    std::unique_ptr<ivf_pq_index<feature_type, id_type>> index2;
+    SECTION("infinite") {
+      index2 = std::make_unique<ivf_pq_index<feature_type, id_type>>(
+          ctx, ivf_index_uri);
+      CHECK(index2->upper_bound() == 0);
+    }
+    SECTION("finite") {
+      size_t upper_bound = 97;
+      index2 = std::make_unique<ivf_pq_index<feature_type, id_type>>(
+          ctx, ivf_index_uri, IndexLoadStrategy::PQ_OOC, upper_bound);
+      CHECK(index2->upper_bound() == upper_bound);
+    }
+    CHECK(index2->num_vectors() == 4);
+
     size_t k_nn = 1;
     size_t nprobe = nlist;
     for (int i = 1; i <= 4; ++i) {
@@ -707,10 +728,10 @@ TEST_CASE("query simple", "[ivf_pq_index]") {
       auto queries =
           ColMajorMatrix<feature_type>{{{value, value, value, value}}};
       auto&& [scores_from_finite, ids_from_finite] =
-          index2.query_finite_ram(queries, k_nn, nprobe, 5);
+          index2->query(queries, k_nn, nprobe, 5);
       CHECK(scores_from_finite(0, 0) == 0);
       CHECK(ids_from_finite(0, 0) == i * 11);
-      auto&& [scores, ids] = index2.query_infinite_ram(queries, k_nn, nprobe);
+      auto&& [scores, ids] = index2->query(queries, k_nn, nprobe);
       CHECK(scores(0, 0) == 0);
       CHECK(ids(0, 0) == i * 11);
     }
@@ -730,7 +751,7 @@ TEST_CASE("k_factor", "[ivf_pq_index]") {
   float reassign_ratio = 0.09f;
 
   size_t nprobe = nlist;
-  size_t k_nn = 20;
+  size_t k_nn = 40;
   float k_factor = 2.f;
   size_t upper_bound = 350;
 
@@ -747,11 +768,12 @@ TEST_CASE("k_factor", "[ivf_pq_index]") {
       DistanceMetric::L2);
   auto ivf_index_uri =
       (std::filesystem::temp_directory_path() / "ivf_index").string();
-
+  CHECK(index.num_vectors() == 0);
   CHECK(index.nlist() == nlist);
 
   // We can train, add, query, and then write the index.
   std::vector<id_type> ids(num_vectors);
+  size_t num_equal_no_reranking = 0;
   {
     std::vector<std::vector<feature_type>> vectors;
     for (int i = 1; i <= num_vectors; ++i) {
@@ -766,22 +788,26 @@ TEST_CASE("k_factor", "[ivf_pq_index]") {
     index.train(training, training.raveled_ids());
     index.add(training, training.raveled_ids());
 
+    CHECK(index.num_vectors() == ::num_vectors(training));
+
     auto queries = ColMajorMatrix<feature_type>{{{1, 1, 1, 1}}};
     {
       auto&& [scores_reranking, ids_reranking] =
-          index.query_infinite_ram(queries, k_nn, nprobe, k_factor);
+          index.query(queries, k_nn, nprobe, k_factor);
       CHECK(
           k_nn == check_single_vector_num_equal<uint32_t>(ids_reranking, ids));
       CHECK(scores_reranking(0, 0) == 0);
 
       auto&& [scores_no_reranking, ids_no_reranking] =
-          index.query_infinite_ram(queries, k_nn, nprobe, 1.f);
-      auto num_equal_no_reranking =
+          index.query(queries, k_nn, nprobe, 1.f);
+      num_equal_no_reranking =
           check_single_vector_num_equal(ids_no_reranking, ids);
       CHECK(num_equal_no_reranking != k_nn);
-      CHECK(num_equal_no_reranking > 5);
+      CHECK(num_equal_no_reranking >= 5);
     }
 
+    CHECK(index.num_vectors() == ::num_vectors(training));
+
     if (vfs.is_dir(ivf_index_uri)) {
       vfs.remove_dir(ivf_index_uri);
     }
@@ -790,39 +816,46 @@ TEST_CASE("k_factor", "[ivf_pq_index]") {
 
   // We can open the index by URI and query.
   {
-    auto index2 = ivf_pq_index<feature_type, id_type>(ctx, ivf_index_uri);
     auto queries = ColMajorMatrix<feature_type>{{{1, 1, 1, 1}}};
 
-    // query_infinite_ram.
+    // infinite.
     {
+      auto index_infinite =
+          ivf_pq_index<feature_type, id_type>(ctx, ivf_index_uri);
+      CHECK(index_infinite.num_vectors() == num_vectors);
+      CHECK(index_infinite.upper_bound() == 0);
       auto&& [scores_reranking, ids_reranking] =
-          index2.query_infinite_ram(queries, k_nn, nprobe, k_factor);
+          index_infinite.query(queries, k_nn, nprobe, k_factor);
       CHECK(
           k_nn == check_single_vector_num_equal<uint32_t>(ids_reranking, ids));
       CHECK(scores_reranking(0, 0) == 0);
 
       auto&& [scores_no_reranking, ids_no_reranking] =
-          index2.query_infinite_ram(queries, k_nn, nprobe, 1.f);
-      auto num_equal_no_reranking =
-          check_single_vector_num_equal(ids_no_reranking, ids);
-      CHECK(num_equal_no_reranking != k_nn);
-      CHECK(num_equal_no_reranking > 2);
+          index_infinite.query(queries, k_nn, nprobe, 1.f);
+      CHECK(
+          num_equal_no_reranking ==
+          check_single_vector_num_equal(ids_no_reranking, ids));
     }
 
-    // query_finite_ram.
+    // finite.
     {
+      size_t upper_bound = 300;
+      auto index_finite = ivf_pq_index<feature_type, id_type>(
+          ctx, ivf_index_uri, IndexLoadStrategy::PQ_OOC, upper_bound);
+      CHECK(index_finite.num_vectors() == num_vectors);
+      CHECK(index_finite.upper_bound() == upper_bound);
+
       auto&& [scores_reranking, ids_reranking] =
-          index2.query_finite_ram(queries, k_nn, nprobe, upper_bound, k_factor);
+          index_finite.query(queries, k_nn, nprobe, k_factor);
       CHECK(
           k_nn == check_single_vector_num_equal<uint32_t>(ids_reranking, ids));
       CHECK(scores_reranking(0, 0) == 0);
 
       auto&& [scores_no_reranking, ids_no_reranking] =
-          index2.query_finite_ram(queries, k_nn, nprobe, upper_bound, 1.f);
-      auto num_equal_no_reranking =
-          check_single_vector_num_equal(ids_no_reranking, ids);
-      CHECK(num_equal_no_reranking != k_nn);
-      CHECK(num_equal_no_reranking > 5);
+          index_finite.query(queries, k_nn, nprobe, 1.f);
+      CHECK(
+          num_equal_no_reranking ==
+          check_single_vector_num_equal(ids_no_reranking, ids));
     }
   }
 }
@@ -875,29 +908,37 @@ TEST_CASE("ivf_pq_index query index written twice", "[ivf_pq_index]") {
 
   // Load the index and query.
   {
-    auto index = ivf_pq_index<
+    std::unique_ptr<ivf_pq_index<
         feature_type_type,
         id_type_type,
-        partitioning_index_type_type>(ctx, index_uri);
+        partitioning_index_type_type>>
+        index;
+    SECTION("infinite") {
+      index = std::make_unique<ivf_pq_index<
+          feature_type_type,
+          id_type_type,
+          partitioning_index_type_type>>(ctx, index_uri);
+      CHECK(index->upper_bound() == 0);
+    }
+    SECTION("finite") {
+      size_t upper_bound = 5;
+      index = std::make_unique<ivf_pq_index<
+          feature_type_type,
+          id_type_type,
+          partitioning_index_type_type>>(
+          ctx, index_uri, IndexLoadStrategy::PQ_OOC, upper_bound);
+      CHECK(index->upper_bound() == upper_bound);
+    }
     auto queries = ColMajorMatrix<feature_type_type>{
         {{1, 1, 1}, {2, 2, 2}, {3, 3, 3}, {4, 4, 4}}};
 
-    auto&& [scores_from_finite, ids_from_finite] =
-        index.query_finite_ram(queries, 1, n_list, 5);
-    CHECK(std::equal(
-        scores_from_finite.data(),
-        scores_from_finite.data() + 4,
-        std::vector<float>{0, 0, 0, 0}.begin()));
-    CHECK(std::equal(
-        ids_from_finite.data(),
-        ids_from_finite.data() + 4,
-        std::vector<uint32_t>{1, 2, 3, 4}.begin()));
-
-    auto&& [scores, ids] = index.query_infinite_ram(queries, 1, n_list);
+    auto&& [scores, ids] = index->query(queries, 1, n_list, 5);
     CHECK(std::equal(
         scores.data(),
         scores.data() + 4,
         std::vector<float>{0, 0, 0, 0}.begin()));
+    CHECK(std::equal(
+        ids.data(), ids.data() + 4, std::vector<uint32_t>{1, 2, 3, 4}.begin()));
   }
 }