From 35acf351d8fcbddddb58476cdc7de3012a13cef4 Mon Sep 17 00:00:00 2001 From: Paris Morgan Date: Mon, 16 Sep 2024 16:04:07 +0200 Subject: [PATCH] Update IVF_PQ to set memory_budget in constructor, support preload feature_vectors and metadata only modes (#518) --- apis/python/src/tiledb/vector_search/index.py | 5 + .../src/tiledb/vector_search/ivf_pq_index.py | 49 ++- .../python/src/tiledb/vector_search/module.cc | 9 + .../vector_search/type_erased_module.cc | 66 ++- apis/python/test/local-benchmarks.py | 24 +- apis/python/test/test_cloud.py | 1 + apis/python/test/test_type_erased_module.py | 48 +- src/include/api/ivf_pq_index.h | 66 +-- src/include/index/index_defs.h | 10 + src/include/index/ivf_pq_index.h | 363 +++++++--------- src/include/test/unit_api_ivf_pq_index.cc | 409 +++++++++--------- src/include/test/unit_ivf_pq_index.cc | 159 ++++--- 12 files changed, 620 insertions(+), 589 deletions(-) diff --git a/apis/python/src/tiledb/vector_search/index.py b/apis/python/src/tiledb/vector_search/index.py index 4a2d3c014..e99765b0b 100644 --- a/apis/python/src/tiledb/vector_search/index.py +++ b/apis/python/src/tiledb/vector_search/index.py @@ -192,6 +192,7 @@ def _query_with_driver( def query_udf(index_type, index_open_kwargs, query_kwargs): from tiledb.vector_search.flat_index import FlatIndex from tiledb.vector_search.ivf_flat_index import IVFFlatIndex + from tiledb.vector_search.ivf_pq_index import IVFPQIndex from tiledb.vector_search.vamana_index import VamanaIndex # Open index @@ -199,8 +200,12 @@ def query_udf(index_type, index_open_kwargs, query_kwargs): index = FlatIndex(**index_open_kwargs) elif index_type == "IVF_FLAT": index = IVFFlatIndex(**index_open_kwargs) + elif index_type == "IVF_PQ": + index = IVFPQIndex(**index_open_kwargs) elif index_type == "VAMANA": index = VamanaIndex(**index_open_kwargs) + else: + raise ValueError(f"Unsupported index_type: {index_type}") # Query index return index.query(**query_kwargs) diff --git a/apis/python/src/tiledb/vector_search/ivf_pq_index.py b/apis/python/src/tiledb/vector_search/ivf_pq_index.py index cbcd4d48e..78a04f7e6 100644 --- a/apis/python/src/tiledb/vector_search/ivf_pq_index.py +++ b/apis/python/src/tiledb/vector_search/ivf_pq_index.py @@ -37,6 +37,12 @@ class IVFPQIndex(index.Index): If not provided, all index data are loaded in main memory. Otherwise, no index data are loaded in main memory and this memory budget is applied during queries. + preload_k_factor_vectors: bool + When using `k_factor` in a query, we first query for `k_factor * k` pq-encoded vectors, + and then do a re-ranking step using the original input vectors for the top `k` vectors. + If `True`, we will load all the input vectors in main memory. This can only be used with + `memory_budget` set to `-1`, and is useful when the input vectors are small enough to fit in + memory and you want to speed up re-ranking. open_for_remote_query_execution: bool If `True`, do not load any index data in main memory locally, and instead load index data in the TileDB Cloud taskgraph created when a non-`None` `driver_mode` is passed to `query()`. If `False`, load index data in main memory locally. Note that you can still use a taskgraph for query execution, you'll just end up loading the data both on your local machine and in the cloud taskgraph. @@ -48,15 +54,26 @@ def __init__( config: Optional[Mapping[str, Any]] = None, timestamp=None, memory_budget: int = -1, + preload_k_factor_vectors: bool = False, open_for_remote_query_execution: bool = False, group: tiledb.Group = None, **kwargs, ): + if preload_k_factor_vectors and memory_budget != -1: + raise ValueError( + "preload_k_factor_vectors can only be used with memory_budget set to -1." + ) + if preload_k_factor_vectors and open_for_remote_query_execution: + raise ValueError( + "preload_k_factor_vectors can only be used with open_for_remote_query_execution set to False." + ) + self.index_open_kwargs = { "uri": uri, "config": config, "timestamp": timestamp, "memory_budget": memory_budget, + "preload_k_factor_vectors": preload_k_factor_vectors, } self.index_open_kwargs.update(kwargs) self.index_type = INDEX_TYPE @@ -67,8 +84,21 @@ def __init__( open_for_remote_query_execution=open_for_remote_query_execution, group=group, ) - # TODO(SC-48710): Add support for `open_for_remote_query_execution`. We don't leave `self.index`` as `None` because we need to be able to call index.dimensions(). - self.index = vspy.IndexIVFPQ(self.ctx, uri, to_temporal_policy(timestamp)) + strategy = ( + vspy.IndexLoadStrategy.PQ_INDEX_AND_RERANKING_VECTORS + if preload_k_factor_vectors + else vspy.IndexLoadStrategy.PQ_OOC + if open_for_remote_query_execution + or (memory_budget != -1 and memory_budget != 0) + else vspy.IndexLoadStrategy.PQ_INDEX + ) + self.index = vspy.IndexIVFPQ( + self.ctx, + uri, + strategy, + 0 if memory_budget == -1 else memory_budget, + to_temporal_policy(timestamp), + ) self.db_uri = self.group[ storage_formats[self.storage_version]["PARTS_ARRAY_NAME"] ].uri @@ -127,16 +157,9 @@ def query_internal( if not queries.flags.f_contiguous: queries = queries.copy(order="F") queries_feature_vector_array = vspy.FeatureVectorArray(queries) - - if self.memory_budget == -1: - distances, ids = self.index.query_infinite_ram( - queries_feature_vector_array, k, nprobe, k_factor - ) - else: - distances, ids = self.index.query_finite_ram( - queries_feature_vector_array, k, nprobe, self.memory_budget, k_factor - ) - + distances, ids = self.index.query( + queries_feature_vector_array, k=k, nprobe=nprobe, k_factor=k_factor + ) return np.array(distances, copy=False), np.array(ids, copy=False) @@ -203,7 +226,7 @@ def create( id_type=np.dtype(np.uint64).name, partitioning_index_type=np.dtype(np.uint64).name, dimensions=dimensions, - n_list=partitions if (partitions is not None and partitions is not -1) else 0, + n_list=partitions if (partitions is not None and partitions != -1) else 0, num_subspaces=num_subspaces, distance_metric=int(distance_metric), ) diff --git a/apis/python/src/tiledb/vector_search/module.cc b/apis/python/src/tiledb/vector_search/module.cc index f32b045e5..496c047c4 100644 --- a/apis/python/src/tiledb/vector_search/module.cc +++ b/apis/python/src/tiledb/vector_search/module.cc @@ -18,6 +18,7 @@ #include "detail/linalg/tdb_matrix.h" #include "detail/linalg/tdb_partitioned_matrix.h" #include "detail/time/temporal_policy.h" +#include "index/index_defs.h" #include "utils/seeder.h" namespace py = pybind11; @@ -1096,6 +1097,14 @@ PYBIND11_MODULE(_tiledbvspy, m) { .value("L2", DistanceMetric::L2) .export_values(); + py::enum_(m, "IndexLoadStrategy") + .value("PQ_OOC", IndexLoadStrategy::PQ_OOC) + .value("PQ_INDEX", IndexLoadStrategy::PQ_INDEX) + .value( + "PQ_INDEX_AND_RERANKING_VECTORS", + IndexLoadStrategy::PQ_INDEX_AND_RERANKING_VECTORS) + .export_values(); + /* === Module inits === */ init_kmeans(m); diff --git a/apis/python/src/tiledb/vector_search/type_erased_module.cc b/apis/python/src/tiledb/vector_search/type_erased_module.cc index 0beeaf84e..031235a01 100644 --- a/apis/python/src/tiledb/vector_search/type_erased_module.cc +++ b/apis/python/src/tiledb/vector_search/type_erased_module.cc @@ -368,10 +368,8 @@ void init_type_erased_module(py::module_& m) { .def("dimensions", &IndexFlatL2::dimensions) .def( "query", - [](IndexFlatL2& index, - const FeatureVectorArray& vectors, - size_t top_k) { - auto r = index.query(vectors, top_k); + [](IndexFlatL2& index, const FeatureVectorArray& vectors, size_t k) { + auto r = index.query(vectors, k); return make_python_pair(std::move(r)); }); @@ -422,13 +420,13 @@ void init_type_erased_module(py::module_& m) { "query", [](IndexVamana& index, const FeatureVectorArray& vectors, - size_t top_k, + size_t k, uint32_t l_search) { - auto r = index.query(vectors, top_k, l_search); + auto r = index.query(vectors, k, l_search); return make_python_pair(std::move(r)); }, py::arg("vectors"), - py::arg("top_k"), + py::arg("k"), py::arg("l_search")) .def( "write_index", @@ -467,12 +465,21 @@ void init_type_erased_module(py::module_& m) { [](IndexIVFPQ& instance, const tiledb::Context& ctx, const std::string& group_uri, + IndexLoadStrategy index_load_strategy, + size_t memory_budget, std::optional temporal_policy) { - new (&instance) IndexIVFPQ(ctx, group_uri, temporal_policy); + new (&instance) IndexIVFPQ( + ctx, + group_uri, + index_load_strategy, + memory_budget, + temporal_policy); }, py::keep_alive<1, 2>(), // IndexIVFPQ should keep ctx alive. py::arg("ctx"), py::arg("group_uri"), + py::arg("index_load_strategy") = IndexLoadStrategy::PQ_INDEX, + py::arg("memory_budget") = 0, py::arg("temporal_policy") = std::nullopt) .def( "__init__", @@ -494,41 +501,18 @@ void init_type_erased_module(py::module_& m) { }, py::arg("vectors")) .def( - "query_infinite_ram", - [](IndexIVFPQ& index, - const FeatureVectorArray& vectors, - size_t top_k, - size_t nprobe, - float k_factor) { - auto r = index.query( - QueryType::InfiniteRAM, vectors, top_k, nprobe, 0, k_factor); - return make_python_pair(std::move(r)); - }, - py::arg("vectors"), - py::arg("top_k"), - py::arg("nprobe"), - py::arg("k_factor") = 1.f) - .def( - "query_finite_ram", + "query", [](IndexIVFPQ& index, const FeatureVectorArray& vectors, - size_t top_k, + size_t k, size_t nprobe, - size_t memory_budget, float k_factor) { - auto r = index.query( - QueryType::FiniteRAM, - vectors, - top_k, - nprobe, - memory_budget, - k_factor); + auto r = index.query(vectors, k, nprobe, k_factor); return make_python_pair(std::move(r)); }, py::arg("vectors"), - py::arg("top_k"), + py::arg("k"), py::arg("nprobe"), - py::arg("memory_budget"), py::arg("k_factor") = 1.f) .def( "write_index", @@ -603,24 +587,24 @@ void init_type_erased_module(py::module_& m) { "query_infinite_ram", [](IndexIVFFlat& index, const FeatureVectorArray& query, - size_t top_k, + size_t k, size_t nprobe) { - auto r = index.query_infinite_ram(query, top_k, nprobe); + auto r = index.query_infinite_ram(query, k, nprobe); return make_python_pair(std::move(r)); - }) // , py::arg("vectors"), py::arg("top_k") = 1, py::arg("nprobe") + }) // , py::arg("vectors"), py::arg("k") = 1, py::arg("nprobe") // = 10) .def( "query_finite_ram", [](IndexIVFFlat& index, const FeatureVectorArray& query, - size_t top_k, + size_t k, size_t nprobe, size_t upper_bound) { - auto r = index.query_finite_ram(query, top_k, nprobe, upper_bound); + auto r = index.query_finite_ram(query, k, nprobe, upper_bound); return make_python_pair(std::move(r)); }, py::arg("vectors"), - py::arg("top_k") = 1, + py::arg("k") = 1, py::arg("nprobe") = 10, py::arg("upper_bound") = 0) .def("feature_type_string", &IndexIVFFlat::feature_type_string) diff --git a/apis/python/test/local-benchmarks.py b/apis/python/test/local-benchmarks.py index 048a35e61..0c2628306 100644 --- a/apis/python/test/local-benchmarks.py +++ b/apis/python/test/local-benchmarks.py @@ -22,7 +22,10 @@ from tiledb.vector_search.index import Index from tiledb.vector_search.ingestion import TrainingSamplingPolicy from tiledb.vector_search.ingestion import ingest +from tiledb.vector_search.ivf_flat_index import IVFFlatIndex +from tiledb.vector_search.ivf_pq_index import IVFPQIndex from tiledb.vector_search.utils import load_fvecs +from tiledb.vector_search.vamana_index import VamanaIndex class RemoteURIType(Enum): @@ -252,7 +255,7 @@ def save_charts(self): plt.xlabel("Average Query Accuracy") plt.ylabel("Time (seconds)") plt.title(f"Ingestion Time vs Average Query Accuracy {sift_string()}") - for idx, timer in self.timers: + for idx, timer in enumerate(self.timers): timer.add_data_to_ingestion_time_vs_average_query_accuracy( markers[idx % len(markers)] ) @@ -265,7 +268,7 @@ def save_charts(self): plt.xlabel("Accuracy") plt.ylabel("Time (seconds)") plt.title(f"Query Time vs Accuracy {sift_string()}") - for idx, timer in self.timers: + for idx, timer in enumerate(self.timers): timer.add_data_to_query_time_vs_accuracy(markers[idx % len(markers)]) plt.legend() plt.savefig(os.path.join(RESULTS_DIR, "query_time_vs_accuracy.png")) @@ -295,6 +298,7 @@ def download_and_extract(url, download_path, extract_path): def get_uri(tag): + global config index_name = f"index_{tag.replace('=', '_')}" index_uri = "" if REMOTE_URI_TYPE == RemoteURIType.LOCAL: @@ -346,7 +350,7 @@ def benchmark_ivf_flat(): index_uri = get_uri(tag) timer.start(tag, TimerMode.INGESTION) - index = ingest( + ingest( index_type=index_type, index_uri=index_uri, source_uri=SIFT_BASE_PATH, @@ -356,6 +360,10 @@ def benchmark_ivf_flat(): ) ingest_time = timer.stop(tag, TimerMode.INGESTION) + # The index returned by ingest() automatically has memory_budget=1000000 set. Open + # a fresh index so it's clear what config is being used. + index = IVFFlatIndex(index_uri, config) + for nprobe in [1, 2, 3, 4, 5, 10, 20]: timer.start(tag, TimerMode.QUERY) _, result = index.query(queries, k=k, nprobe=nprobe) @@ -386,7 +394,7 @@ def benchmark_vamana(): index_uri = get_uri(tag) timer.start(tag, TimerMode.INGESTION) - index = ingest( + ingest( index_type=index_type, index_uri=index_uri, source_uri=SIFT_BASE_PATH, @@ -397,6 +405,8 @@ def benchmark_vamana(): ) ingest_time = timer.stop(tag, TimerMode.INGESTION) + index = VamanaIndex(index_uri, config) + for l_search in [k, k + 50, k + 100, k + 200, k + 400]: timer.start(tag, TimerMode.QUERY) _, result = index.query(queries, k=k, l_search=l_search) @@ -429,7 +439,7 @@ def benchmark_ivf_pq(): index_uri = get_uri(tag) timer.start(tag, TimerMode.INGESTION) - index = ingest( + ingest( index_type=index_type, index_uri=index_uri, source_uri=SIFT_BASE_PATH, @@ -440,6 +450,10 @@ def benchmark_ivf_pq(): ) ingest_time = timer.stop(tag, TimerMode.INGESTION) + # The index returned by ingest() automatically has memory_budget=1000000 set. Open + # a fresh index so it's clear what config is being used. + index = IVFPQIndex(index_uri, config) + for nprobe in [5, 10, 20, 40, 60]: timer.start(tag, TimerMode.QUERY) _, result = index.query( diff --git a/apis/python/test/test_cloud.py b/apis/python/test/test_cloud.py index 0a7e06416..099756602 100644 --- a/apis/python/test/test_cloud.py +++ b/apis/python/test/test_cloud.py @@ -63,6 +63,7 @@ def run_cloud_test(self, index_uri, index_type, index_class): input_vectors_per_work_item=5000, config=tiledb.cloud.Config().dict(), mode=Mode.BATCH, + verbose=True, ) tiledb_index_uri = groups.info(index_uri).tiledb_uri diff --git a/apis/python/test/test_type_erased_module.py b/apis/python/test/test_type_erased_module.py index 207364a78..f4e858185 100644 --- a/apis/python/test/test_type_erased_module.py +++ b/apis/python/test/test_type_erased_module.py @@ -499,19 +499,27 @@ def test_construct_IndexIVFPQ_with_empty_vector(tmp_path): a.train(training_set) a.add(training_set) - _, ids = a.query_infinite_ram(query_set, k_nn, nprobe) - assert recall(ids, groundtruth_set, k_nn) >= 0.87 + _, ids = a.query(query_set, k_nn, nprobe) + accuracy = recall(ids, groundtruth_set, k_nn) + assert accuracy >= 0.87 index_uri = os.path.join(tmp_path, "array") a.write_index(ctx, index_uri) - b = vspy.IndexIVFPQ(ctx, index_uri) + b_infinite = vspy.IndexIVFPQ(ctx, index_uri) + _, ids_infinite = b_infinite.query(query_set, k_nn, nprobe) + accuracy_infinite = recall(ids_infinite, groundtruth_set, k_nn) + assert accuracy == accuracy_infinite - _, ids = b.query_infinite_ram(query_set, k_nn, nprobe) - assert recall(ids, groundtruth_set, k_nn) >= 0.87 - - _, ids = b.query_finite_ram(query_set, k_nn, nprobe, 500) - assert recall(ids, groundtruth_set, k_nn) >= 0.87 + b_finite = vspy.IndexIVFPQ( + ctx, + index_uri, + index_load_strategy=vspy.IndexLoadStrategy.PQ_OOC, + memory_budget=1000, + ) + _, ids_finite = b_finite.query(query_set, k_nn, nprobe) + accuracy_finite = recall(ids_finite, groundtruth_set, k_nn) + assert accuracy == accuracy_finite def test_inplace_build_query_IndexIVFPQ(tmp_path): @@ -535,19 +543,27 @@ def test_inplace_build_query_IndexIVFPQ(tmp_path): a.train(training_set) a.add(training_set) - _, ids = a.query_infinite_ram(query_set, k_nn, nprobe) - assert recall(ids, groundtruth_set, k_nn) >= 0.87 + _, ids = a.query(query_set, k_nn, nprobe) + accuracy = recall(ids, groundtruth_set, k_nn) + assert accuracy >= 0.87 index_uri = os.path.join(tmp_path, "array") a.write_index(ctx, index_uri) - b = vspy.IndexIVFPQ(ctx, index_uri) + b_infinite = vspy.IndexIVFPQ(ctx, index_uri) + _, ids_infinite = b_infinite.query(query_set, k_nn, nprobe) + accuracy_infinite = recall(ids_infinite, groundtruth_set, k_nn) + assert accuracy == accuracy_infinite - _, ids = b.query_infinite_ram(query_set, k_nn, nprobe) - assert recall(ids, groundtruth_set, k_nn) >= 0.87 - - _, ids = b.query_finite_ram(query_set, k_nn, nprobe, 500) - assert recall(ids, groundtruth_set, k_nn) >= 0.87 + b_finite = vspy.IndexIVFPQ( + ctx, + index_uri, + index_load_strategy=vspy.IndexLoadStrategy.PQ_OOC, + memory_budget=999, + ) + _, ids_finite = b_finite.query(query_set, k_nn, nprobe) + accuracy_finite = recall(ids_finite, groundtruth_set, k_nn) + assert accuracy == accuracy_finite def test_construct_IndexIVFFlat(): diff --git a/src/include/api/ivf_pq_index.h b/src/include/api/ivf_pq_index.h index 5703a61b0..20959c798 100644 --- a/src/include/api/ivf_pq_index.h +++ b/src/include/api/ivf_pq_index.h @@ -144,6 +144,8 @@ class IndexIVFPQ { IndexIVFPQ( const tiledb::Context& ctx, const URI& group_uri, + IndexLoadStrategy index_load_strategy = IndexLoadStrategy::PQ_INDEX, + size_t upper_bound = 0, std::optional temporal_policy = std::nullopt) { read_types( ctx, @@ -157,13 +159,15 @@ class IndexIVFPQ { if (uri_dispatch_table.find(type) == uri_dispatch_table.end()) { throw std::runtime_error("Unsupported datatype combination"); } - index_ = uri_dispatch_table.at(type)(ctx, group_uri, temporal_policy); + index_ = uri_dispatch_table.at(type)( + ctx, group_uri, index_load_strategy, upper_bound, temporal_policy); n_list_ = index_->nlist(); num_subspaces_ = index_->num_subspaces(); max_iterations_ = index_->max_iterations(); convergence_tolerance_ = index_->convergence_tolerance(); reassign_ratio_ = index_->reassign_ratio(); distance_metric_ = index_->distance_metric(); + upper_bound_ = index_->upper_bound(); if (dimensions_ != 0 && dimensions_ != index_->dimensions()) { throw std::runtime_error( @@ -244,17 +248,14 @@ class IndexIVFPQ { } [[nodiscard]] auto query( - QueryType queryType, const QueryVectorArray& vectors, size_t top_k, size_t nprobe, - size_t upper_bound = 0, float k_factor = 1.f) { if (!index_) { throw std::runtime_error("Cannot query() because there is no index."); } - return index_->query( - queryType, vectors, top_k, nprobe, upper_bound, k_factor); + return index_->query(vectors, top_k, nprobe, k_factor); } void write_index( @@ -304,6 +305,10 @@ class IndexIVFPQ { return dimensions_; } + constexpr size_t upper_bound() const { + return upper_bound_; + } + constexpr auto n_list() const { return n_list_; } @@ -396,11 +401,9 @@ class IndexIVFPQ { [[nodiscard]] virtual std::tuple query( - QueryType queryType, const QueryVectorArray& vectors, size_t top_k, size_t nprobe, - size_t upper_bound, float k_factor) = 0; virtual void write_index( @@ -410,6 +413,7 @@ class IndexIVFPQ { const std::string& storage_version) = 0; [[nodiscard]] virtual uint64_t dimensions() const = 0; + [[nodiscard]] virtual size_t upper_bound() const = 0; [[nodiscard]] virtual TemporalPolicy temporal_policy() const = 0; [[nodiscard]] virtual uint64_t nlist() const = 0; [[nodiscard]] virtual uint32_t num_subspaces() const = 0; @@ -450,8 +454,15 @@ class IndexIVFPQ { index_impl( const tiledb::Context& ctx, const URI& index_uri, + IndexLoadStrategy index_load_strategy, + size_t upper_bound, std::optional temporal_policy) - : impl_index_(ctx, index_uri, temporal_policy) { + : impl_index_( + ctx, + index_uri, + index_load_strategy, + upper_bound, + temporal_policy) { } void train(const FeatureVectorArray& training_set) override { @@ -507,11 +518,9 @@ class IndexIVFPQ { * @todo Make sure the extents of the returned arrays are used correctly. */ [[nodiscard]] std::tuple query( - QueryType queryType, const QueryVectorArray& vectors, size_t top_k, size_t nprobe, - size_t upper_bound, float k_factor) override { // @todo using index_type = size_t; auto dtype = vectors.feature_type(); @@ -523,8 +532,7 @@ class IndexIVFPQ { (float*)vectors.data(), extents(vectors)[0], extents(vectors)[1]}; // @todo ?? - auto [s, t] = impl_index_.query( - queryType, qspan, top_k, nprobe, upper_bound, k_factor); + auto [s, t] = impl_index_.query(qspan, top_k, nprobe, k_factor); auto x = FeatureVectorArray{std::move(s)}; auto y = FeatureVectorArray{std::move(t)}; return {std::move(x), std::move(y)}; @@ -534,8 +542,7 @@ class IndexIVFPQ { (uint8_t*)vectors.data(), extents(vectors)[0], extents(vectors)[1]}; // @todo ?? - auto [s, t] = impl_index_.query( - queryType, qspan, top_k, nprobe, upper_bound, k_factor); + auto [s, t] = impl_index_.query(qspan, top_k, nprobe, k_factor); auto x = FeatureVectorArray{std::move(s)}; auto y = FeatureVectorArray{std::move(t)}; return {std::move(x), std::move(y)}; @@ -557,6 +564,10 @@ class IndexIVFPQ { return ::dimensions(impl_index_); } + size_t upper_bound() const override { + return impl_index_.upper_bound(); + } + TemporalPolicy temporal_policy() const override { return impl_index_.temporal_policy(); } @@ -597,7 +608,7 @@ class IndexIVFPQ { using table_type = std::map, constructor_function>; static const table_type dispatch_table; - using uri_constructor_function = std::function(const tiledb::Context&, const std::string&, std::optional)>; + using uri_constructor_function = std::function(const tiledb::Context&, const std::string&, IndexLoadStrategy, size_t, std::optional)>; using uri_table_type = std::map, uri_constructor_function>; static const uri_table_type uri_dispatch_table; @@ -607,6 +618,7 @@ class IndexIVFPQ { // clang-format on uint64_t dimensions_{0}; + size_t upper_bound_{0}; size_t n_list_{0}; uint32_t num_subspaces_{16}; uint32_t max_iterations_{2}; @@ -636,18 +648,18 @@ const IndexIVFPQ::table_type IndexIVFPQ::dispatch_table = { }; const IndexIVFPQ::uri_table_type IndexIVFPQ::uri_dispatch_table = { - {{TILEDB_INT8, TILEDB_UINT32, TILEDB_UINT32}, [](const tiledb::Context& ctx, const std::string& uri, std::optional temporal_policy) { return std::make_unique>>(ctx, uri, temporal_policy); }}, - {{TILEDB_UINT8, TILEDB_UINT32, TILEDB_UINT32}, [](const tiledb::Context& ctx, const std::string& uri, std::optional temporal_policy) { return std::make_unique>>(ctx, uri, temporal_policy); }}, - {{TILEDB_FLOAT32, TILEDB_UINT32, TILEDB_UINT32}, [](const tiledb::Context& ctx, const std::string& uri, std::optional temporal_policy) { return std::make_unique>>(ctx, uri, temporal_policy); }}, - {{TILEDB_INT8, TILEDB_UINT32, TILEDB_UINT64}, [](const tiledb::Context& ctx, const std::string& uri, std::optional temporal_policy) { return std::make_unique>>(ctx, uri, temporal_policy); }}, - {{TILEDB_UINT8, TILEDB_UINT32, TILEDB_UINT64}, [](const tiledb::Context& ctx, const std::string& uri, std::optional temporal_policy) { return std::make_unique>>(ctx, uri, temporal_policy); }}, - {{TILEDB_FLOAT32, TILEDB_UINT32, TILEDB_UINT64}, [](const tiledb::Context& ctx, const std::string& uri, std::optional temporal_policy) { return std::make_unique>>(ctx, uri, temporal_policy); }}, - {{TILEDB_INT8, TILEDB_UINT64, TILEDB_UINT32}, [](const tiledb::Context& ctx, const std::string& uri, std::optional temporal_policy) { return std::make_unique>>(ctx, uri, temporal_policy); }}, - {{TILEDB_UINT8, TILEDB_UINT64, TILEDB_UINT32}, [](const tiledb::Context& ctx, const std::string& uri, std::optional temporal_policy) { return std::make_unique>>(ctx, uri, temporal_policy); }}, - {{TILEDB_FLOAT32, TILEDB_UINT64, TILEDB_UINT32}, [](const tiledb::Context& ctx, const std::string& uri, std::optional temporal_policy) { return std::make_unique>>(ctx, uri, temporal_policy); }}, - {{TILEDB_INT8, TILEDB_UINT64, TILEDB_UINT64}, [](const tiledb::Context& ctx, const std::string& uri, std::optional temporal_policy) { return std::make_unique>>(ctx, uri, temporal_policy); }}, - {{TILEDB_UINT8, TILEDB_UINT64, TILEDB_UINT64}, [](const tiledb::Context& ctx, const std::string& uri, std::optional temporal_policy) { return std::make_unique>>(ctx, uri, temporal_policy); }}, - {{TILEDB_FLOAT32, TILEDB_UINT64, TILEDB_UINT64}, [](const tiledb::Context& ctx, const std::string& uri, std::optional temporal_policy) { return std::make_unique>>(ctx, uri, temporal_policy); }}, + {{TILEDB_INT8, TILEDB_UINT32, TILEDB_UINT32}, [](const tiledb::Context& ctx, const std::string& uri, IndexLoadStrategy index_load_strategy, size_t upper_bound, std::optional temporal_policy) { return std::make_unique>>(ctx, uri, index_load_strategy, upper_bound, temporal_policy); }}, + {{TILEDB_UINT8, TILEDB_UINT32, TILEDB_UINT32}, [](const tiledb::Context& ctx, const std::string& uri, IndexLoadStrategy index_load_strategy, size_t upper_bound, std::optional temporal_policy) { return std::make_unique>>(ctx, uri, index_load_strategy, upper_bound, temporal_policy); }}, + {{TILEDB_FLOAT32, TILEDB_UINT32, TILEDB_UINT32}, [](const tiledb::Context& ctx, const std::string& uri, IndexLoadStrategy index_load_strategy, size_t upper_bound, std::optional temporal_policy) { return std::make_unique>>(ctx, uri, index_load_strategy, upper_bound, temporal_policy); }}, + {{TILEDB_INT8, TILEDB_UINT32, TILEDB_UINT64}, [](const tiledb::Context& ctx, const std::string& uri, IndexLoadStrategy index_load_strategy, size_t upper_bound, std::optional temporal_policy) { return std::make_unique>>(ctx, uri, index_load_strategy, upper_bound, temporal_policy); }}, + {{TILEDB_UINT8, TILEDB_UINT32, TILEDB_UINT64}, [](const tiledb::Context& ctx, const std::string& uri, IndexLoadStrategy index_load_strategy, size_t upper_bound, std::optional temporal_policy) { return std::make_unique>>(ctx, uri, index_load_strategy, upper_bound, temporal_policy); }}, + {{TILEDB_FLOAT32, TILEDB_UINT32, TILEDB_UINT64}, [](const tiledb::Context& ctx, const std::string& uri, IndexLoadStrategy index_load_strategy, size_t upper_bound, std::optional temporal_policy) { return std::make_unique>>(ctx, uri, index_load_strategy, upper_bound, temporal_policy); }}, + {{TILEDB_INT8, TILEDB_UINT64, TILEDB_UINT32}, [](const tiledb::Context& ctx, const std::string& uri, IndexLoadStrategy index_load_strategy, size_t upper_bound, std::optional temporal_policy) { return std::make_unique>>(ctx, uri, index_load_strategy, upper_bound, temporal_policy); }}, + {{TILEDB_UINT8, TILEDB_UINT64, TILEDB_UINT32}, [](const tiledb::Context& ctx, const std::string& uri, IndexLoadStrategy index_load_strategy, size_t upper_bound, std::optional temporal_policy) { return std::make_unique>>(ctx, uri, index_load_strategy, upper_bound, temporal_policy); }}, + {{TILEDB_FLOAT32, TILEDB_UINT64, TILEDB_UINT32}, [](const tiledb::Context& ctx, const std::string& uri, IndexLoadStrategy index_load_strategy, size_t upper_bound, std::optional temporal_policy) { return std::make_unique>>(ctx, uri, index_load_strategy, upper_bound, temporal_policy); }}, + {{TILEDB_INT8, TILEDB_UINT64, TILEDB_UINT64}, [](const tiledb::Context& ctx, const std::string& uri, IndexLoadStrategy index_load_strategy, size_t upper_bound, std::optional temporal_policy) { return std::make_unique>>(ctx, uri, index_load_strategy, upper_bound, temporal_policy); }}, + {{TILEDB_UINT8, TILEDB_UINT64, TILEDB_UINT64}, [](const tiledb::Context& ctx, const std::string& uri, IndexLoadStrategy index_load_strategy, size_t upper_bound, std::optional temporal_policy) { return std::make_unique>>(ctx, uri, index_load_strategy, upper_bound, temporal_policy); }}, + {{TILEDB_FLOAT32, TILEDB_UINT64, TILEDB_UINT64}, [](const tiledb::Context& ctx, const std::string& uri, IndexLoadStrategy index_load_strategy, size_t upper_bound, std::optional temporal_policy) { return std::make_unique>>(ctx, uri, index_load_strategy, upper_bound, temporal_policy); }}, }; const IndexIVFPQ::clear_history_table_type IndexIVFPQ::clear_history_dispatch_table = { diff --git a/src/include/index/index_defs.h b/src/include/index/index_defs.h index 59affe3b9..81706b8c7 100644 --- a/src/include/index/index_defs.h +++ b/src/include/index/index_defs.h @@ -57,6 +57,16 @@ enum class IndexKind { FlatL2, IVFFlat, Vamana, IVFPQ }; enum class QueryType { FiniteRAM, InfiniteRAM }; +enum class IndexLoadStrategy { + // Will load the index as an OOC index. + PQ_OOC, + // Load the PQ index data as well as metadata. + PQ_INDEX, + // Load everything DEFAULT does, but also preload feature vectors for + // reranking. + PQ_INDEX_AND_RERANKING_VECTORS, +}; + /****************************************************************************** * Static info for arrays associated with an index group ******************************************************************************/ diff --git a/src/include/index/ivf_pq_index.h b/src/include/index/ivf_pq_index.h index 8d3378161..c5e09cc3f 100644 --- a/src/include/index/ivf_pq_index.h +++ b/src/include/index/ivf_pq_index.h @@ -108,6 +108,7 @@ #include "detail/ivf/partition.h" #include "detail/ivf/qv.h" #include "detail/linalg/tdb_matrix_multi_range.h" +#include "detail/linalg/tdb_matrix_with_ids.h" #include #include @@ -189,7 +190,9 @@ class ivf_pq_index { /**************************************************************************** * Index group information ****************************************************************************/ + size_t upper_bound_{0}; TemporalPolicy temporal_policy_; + IndexLoadStrategy index_load_strategy_{IndexLoadStrategy::PQ_INDEX}; std::unique_ptr> group_; /**************************************************************************** @@ -198,6 +201,7 @@ class ivf_pq_index { // Cached information about the partitioned vectors in the index uint64_t dimensions_{0}; + uint64_t num_vectors_{0}; uint64_t num_partitions_{0}; // Cached information about the pq encoding @@ -350,16 +354,29 @@ class ivf_pq_index { ivf_pq_index( const tiledb::Context& ctx, const std::string& uri, + IndexLoadStrategy index_load_strategy = IndexLoadStrategy::PQ_INDEX, + size_t upper_bound = 0, std::optional temporal_policy = std::nullopt) - : temporal_policy_{temporal_policy.has_value() ? *temporal_policy : TemporalPolicy()} + : upper_bound_{upper_bound} + , temporal_policy_{temporal_policy.has_value() ? *temporal_policy : TemporalPolicy()} + , index_load_strategy_{index_load_strategy} , group_{std::make_unique>( ctx, uri, TILEDB_READ, temporal_policy_)} { + if (upper_bound != 0 && index_load_strategy_ != IndexLoadStrategy::PQ_OOC) { + throw std::runtime_error( + "With upper_bound > 0 you must use IndexLoadStrategy::PQ_OOC."); + } + if (upper_bound == 0 && index_load_strategy_ == IndexLoadStrategy::PQ_OOC) { + throw std::runtime_error( + "With IndexLoadStrategy::PQ_OOC you must have an upper_bound > 0."); + } /** * Read the centroids. How the partitioned_pq_vectors_ are read in will be * determined by the type of query we are doing. But they will be read * in at this same timestamp. */ dimensions_ = group_->get_dimensions(); + num_vectors_ = group_->get_base_size(); num_partitions_ = group_->get_num_partitions(); num_subspaces_ = group_->get_num_subspaces(); sub_dimensions_ = dimensions_ / num_subspaces_; @@ -385,6 +402,54 @@ class ivf_pq_index { std::nullopt, num_clusters_, temporal_policy_); + + if (upper_bound == 0) { + // Read the the complete index arrays into ("infinite") memory. This will + // read the centroids, indices, partitioned_ids, and and the complete set + // of partitioned_pq_vectors, along with metadata from a group_uri. Load + // all partitions for infinite query Note that the constructor will move + // the infinite_parts vector + auto infinite_parts = + std::vector(::num_vectors(flat_ivf_centroids_)); + std::iota(begin(infinite_parts), end(infinite_parts), 0); + partitioned_pq_vectors_ = std::make_unique( + group_->cached_ctx(), + group_->pq_ivf_vectors_uri(), + group_->pq_ivf_indices_uri(), + group_->get_num_partitions() + 1, + group_->pq_ivf_ids_uri(), + infinite_parts, + 0, + temporal_policy_); + + partitioned_pq_vectors_->load(); + + if (::num_vectors(*partitioned_pq_vectors_) != + size(partitioned_pq_vectors_->ids())) { + throw std::runtime_error( + "[ivf_flat_index@ivf_pq_index] " + "::num_vectors(*partitioned_pq_vectors_) != " + "size(partitioned_pq_vectors_->ids())"); + } + if (size(partitioned_pq_vectors_->indices()) != + ::num_vectors(flat_ivf_centroids_) + 1) { + throw std::runtime_error( + "[ivf_flat_index@ivf_pq_index] " + "size(partitioned_pq_vectors_->indices()) != " + "::num_vectors(flat_ivf_centroids_) + 1"); + } + } + if (index_load_strategy_ == + IndexLoadStrategy::PQ_INDEX_AND_RERANKING_VECTORS) { + feature_vectors_ = tdbColMajorPreLoadMatrixWithIds( + group_->cached_ctx(), + group_->feature_vectors_uri(), + group_->ids_uri(), + dimensions_, + num_vectors_, + 0, + temporal_policy_); + } } /**************************************************************************** @@ -608,7 +673,7 @@ class ivf_pq_index { const V& training_set, kmeans_init init = kmeans_init::random) { dimensions_ = ::dimensions(training_set); if (num_partitions_ == 0) { - num_partitions_ = std::sqrt(num_vectors(training_set)); + num_partitions_ = std::sqrt(::num_vectors(training_set)); } flat_ivf_centroids_ = @@ -678,6 +743,8 @@ class ivf_pq_index { const Array& training_set, const Vector& training_set_ids, Distance distance = Distance{}) { + num_vectors_ = ::num_vectors(training_set); + // 1. Fill in cluster_centroids_. // cluster_centroids_ holds the num_clusters_ (256) centroids for each // subspace. @@ -774,9 +841,9 @@ class ivf_pq_index { class Distance = uncached_sub_sum_of_squares_distance> auto pq_encode(const U& training_set, Distance distance = Distance{}) const { auto pq_vectors = - std::make_unique(num_subspaces_, num_vectors(training_set)); + std::make_unique(num_subspaces_, ::num_vectors(training_set)); auto& pqv = *pq_vectors; - for (size_t i = 0; i < num_vectors(training_set); ++i) { + for (size_t i = 0; i < ::num_vectors(training_set); ++i) { pq_encode_one(training_set[i], pqv[i], distance); } return pq_vectors; @@ -810,10 +877,10 @@ class ivf_pq_index { // vector, the distance from the second subspace to each of the pq // centroids. auto pq_vectors = std::make_unique( - num_subspaces_ * num_clusters_, num_vectors(query_vectors)); + num_subspaces_ * num_clusters_, ::num_vectors(query_vectors)); auto& pqv = *pq_vectors; auto local_distance = Distance{}; - for (size_t i = 0; i < num_vectors(query_vectors); ++i) { + for (size_t i = 0; i < ::num_vectors(query_vectors); ++i) { auto sub_begin = 0; auto sub_id = 0; for (size_t subspace = 0; subspace < num_subspaces_; ++subspace) { @@ -834,105 +901,6 @@ class ivf_pq_index { return pq_vectors; } - /***************************************************************************** - * Methods for reading and reading the index from a group. - *****************************************************************************/ - - /** - * @brief Read the the complete index arrays into ("infinite") memory. - * This will read the centroids, indices, partitioned_ids, and - * and the complete set of partitioned_pq_vectors, along with metadata - * from a group_uri. - */ - auto read_index_infinite() { - if (!group_) { - if (!partitioned_pq_vectors_) { - throw std::runtime_error( - "[ivf_pq_index@read_index_infinite] Neither partitioned_pq_vectors " - "nor the group have been initialized"); - } - // If we have created an empty index and then try to query it, - // partitioned_pq_vectors_ will be empty so we will try to read here, but - // we won't have a group_. Just return and leave partitioned_pq_vectors_ - // empty. - return; - } - - // Load all partitions for infinite query - // Note that the constructor will move the infinite_parts vector - auto infinite_parts = - std::vector(::num_vectors(flat_ivf_centroids_)); - std::iota(begin(infinite_parts), end(infinite_parts), 0); - partitioned_pq_vectors_ = std::make_unique( - group_->cached_ctx(), - group_->pq_ivf_vectors_uri(), - group_->pq_ivf_indices_uri(), - group_->get_num_partitions() + 1, - group_->pq_ivf_ids_uri(), - infinite_parts, - 0, - temporal_policy_); - - partitioned_pq_vectors_->load(); - - if (::num_vectors(*partitioned_pq_vectors_) != - size(partitioned_pq_vectors_->ids())) { - throw std::runtime_error( - "[ivf_flat_index@read_index_infinite] " - "::num_vectors(*partitioned_pq_vectors_) != " - "size(partitioned_pq_vectors_->ids())"); - } - if (size(partitioned_pq_vectors_->indices()) != - ::num_vectors(flat_ivf_centroids_) + 1) { - throw std::runtime_error( - "[ivf_flat_index@read_index_infinite] " - "size(partitioned_pq_vectors_->indices()) != " - "::num_vectors(flat_ivf_centroids_) + 1"); - } - } - - /** - * @brief Open the index from the arrays contained in the group_uri. - * The "finite" queries only load as much data (ids and vectors) as are - * necessary for a given query -- so we can't load any data until we - * know what the query is. So, here we would have read the centroids and - * indices into memory, when creating the index but would not have read - * the partitioned_ids or partitioned_pq_vectors. - * - * @param group_uri - * @return bool indicating success or failure of read - */ - template - auto read_index_finite( - const Q& query_vectors, size_t nprobe, size_t upper_bound) { - if (!group_) { - throw std::runtime_error( - "[ivf_pq_index@read_index_finite] group_ is not initialized. This " - "happens if you do not load an index by URI. Please close the index " - "and re-open it by URI."); - } - - auto&& [active_partitions, active_queries] = - detail::ivf::partition_ivf_flat_index( - flat_ivf_centroids_, query_vectors, nprobe, num_threads_); - auto partitioned_pq_vectors = std::make_unique( - group_->cached_ctx(), - group_->pq_ivf_vectors_uri(), - group_->pq_ivf_indices_uri(), - group_->get_num_partitions() + 1, - group_->pq_ivf_ids_uri(), - active_partitions, - upper_bound, - temporal_policy_); - - // NB: We don't load the partitioned_pq_vectors here. We will load them - // when we do the query. - return std::make_tuple( - std::move(active_partitions), - std::move(active_queries), - std::move(partitioned_pq_vectors)); - } - /** * @brief Write the index to storage. This would typically be done after a * set of input vectors has been read and a new group is created. Or after @@ -1140,48 +1108,25 @@ class ivf_pq_index { * ****************************************************************************/ - template - auto query( - QueryType queryType, - const Q& query_vectors, - size_t k_nn, - size_t nprobe, - size_t upper_bound = 0, - float k_factor = 1.f) { - switch (queryType) { - case QueryType::InfiniteRAM: - return query_infinite_ram(query_vectors, k_nn, nprobe, k_factor); - case QueryType::FiniteRAM: - return query_finite_ram( - query_vectors, k_nn, nprobe, upper_bound, k_factor); - default: - throw std::runtime_error("Invalid query type"); - } - } - /** * @brief Perform a query on the index, returning the nearest neighbors * and distances. The function returns a matrix containing k_nn nearest * neighbors for each given query and a matrix containing the distances * corresponding to each returned neighbor. * - * This function searches for the nearest neighbors using "infinite RAM", - * that is, it loads the entire IVF index into memory and then applies the - * query. - * * @tparam Q Type of query vectors. * @param query_vectors Array of (uncompressed) vectors to query. * @param k_nn Number of nearest neighbors to return. + * @param nprobe Number of centroids to search. * @param k_factor Specifies the multiplier on k_nn for the initial IVF_PQ * query, after which re-ranking is run. - * @param nprobe Number of centroids to search. * * @return A tuple containing a matrix of nearest neighbors and a matrix * of the corresponding distances. * */ template - auto query_infinite_ram( + auto query( const Q& query_vectors, size_t k_nn, size_t nprobe, @@ -1192,13 +1137,68 @@ class ivf_pq_index { if (::num_vectors(flat_ivf_centroids_) < nprobe) { nprobe = ::num_vectors(flat_ivf_centroids_); } - if (!partitioned_pq_vectors_ || - partitioned_pq_vectors_->num_vectors() == 0 || - partitioned_pq_vectors_->num_vectors() != - partitioned_pq_vectors_->total_num_vectors()) { - read_index_infinite(); + + if (upper_bound_ > 0) { + // Searches for the nearest neighbors using "finite RAM", + // that is, it only loads that portion of the IVF index into memory that + // is necessary for the given query. In addition, it supports out of core + // operation, meaning that only a subset of the necessary partitions are + // loaded into memory at any one time. + if (!group_) { + throw std::runtime_error( + "[ivf_pq_index@read_index_finite] group_ is not initialized. This " + "happens if you do not load an index by URI. Please close the " + "index " + "and re-open it by URI."); + } + + // Open the index from the arrays contained in the group_uri. + // The "finite" queries only load as much data (ids and vectors) as are + // necessary for a given query -- so we can't load any data until we + // know what the query is. So, here we would have read the centroids and + // indices into memory, when creating the index but would not have read + // the partitioned_ids or partitioned_pq_vectors. + auto&& [active_partitions, active_queries] = + detail::ivf::partition_ivf_flat_index( + flat_ivf_centroids_, query_vectors, nprobe, num_threads_); + auto partitioned_pq_vectors = std::make_unique( + group_->cached_ctx(), + group_->pq_ivf_vectors_uri(), + group_->pq_ivf_indices_uri(), + group_->get_num_partitions() + 1, + group_->pq_ivf_ids_uri(), + active_partitions, + upper_bound_, + temporal_policy_); + + auto query_to_pq_centroid_distance_tables = + std::move(*generate_query_to_pq_centroid_distance_tables< + Q, + ColMajorMatrix>(query_vectors)); + size_t k_initial = static_cast(k_nn * k_factor); + auto&& [initial_distances, initial_ids, initial_indices] = + detail::ivf::query_finite_ram( + *partitioned_pq_vectors, + query_to_pq_centroid_distance_tables, + active_queries, + k_initial, + upper_bound_, + num_threads_, + make_pq_distance_query_to_pq_centroid_distance_tables< + std::span, + decltype(pq_storage_type{}[0])>()); + return rerank( + std::move(initial_distances), + std::move(initial_ids), + std::move(initial_indices), + query_vectors, + k_initial, + k_nn); } + // This function searches for the nearest neighbors using "infinite RAM". We + // have already loaded the partitioned_pq_vectors_ into memory in the + // constructor, so we can just run the query. auto&& [active_partitions, active_queries] = detail::ivf::partition_ivf_flat_index( flat_ivf_centroids_, query_vectors, nprobe, num_threads_); @@ -1230,78 +1230,6 @@ class ivf_pq_index { k_nn); } - /** - * @brief Perform a query on the index, returning the nearest neighbors - * and distances. The function returns a matrix containing k_nn nearest - * neighbors for each given query and a matrix containing the distances - * corresponding to each returned neighbor. - * - * This function searches for the nearest neighbors using "finite RAM", - * that is, it only loads that portion of the IVF index into memory that - * is necessary for the given query. In addition, it supports out of core - * operation, meaning that only a subset of the necessary partitions are - * loaded into memory at any one time. - * - * See the documentation for that function in detail/ivf/qv.h - * for more details. - * - * @tparam Q Type of query vectors. Must meet requirements of - * `feature_vector_array` - * @param query_vectors Array of (uncompressed) vectors to query. - * @param k_nn Number of nearest neighbors to return. - * @param k_factor Specifies the multipler on k_nn for the initial IVF_PQ - * query, after which re-ranking is run. - * @param nprobe Number of centroids to search. - * - * @return A tuple containing a matrix of nearest neighbors and a matrix - * of the corresponding distances. - */ - template - auto query_finite_ram( - const Q& query_vectors, - size_t k_nn, - size_t nprobe, - size_t upper_bound = 0, - float k_factor = 1.f) { - if (!group_) { - throw std::runtime_error( - "[ivf_pq_index@query_finite_ram] Query with finite RAM can only be " - "run if you're loading the index by URI. Please open it by URI and " - "try again. If you just wrote the index, open it up again by URI."); - } - if (k_factor < 1.f) { - throw std::runtime_error("k_factor must be >= 1"); - } - if (::num_vectors(flat_ivf_centroids_) < nprobe) { - nprobe = ::num_vectors(flat_ivf_centroids_); - } - auto&& [active_partitions, active_queries, partitioned_pq_vectors] = - read_index_finite(query_vectors, nprobe, upper_bound); - auto query_to_pq_centroid_distance_tables = - std::move(*generate_query_to_pq_centroid_distance_tables< - Q, - ColMajorMatrix>(query_vectors)); - size_t k_initial = static_cast(k_nn * k_factor); - auto&& [initial_distances, initial_ids, initial_indices] = - detail::ivf::query_finite_ram( - *partitioned_pq_vectors, - query_to_pq_centroid_distance_tables, - active_queries, - k_initial, - upper_bound, - num_threads_, - make_pq_distance_query_to_pq_centroid_distance_tables< - std::span, - decltype(pq_storage_type{}[0])>()); - return rerank( - std::move(initial_distances), - std::move(initial_ids), - std::move(initial_indices), - query_vectors, - k_initial, - k_nn); - } - auto rerank( ColMajorMatrix&& initial_distances, ColMajorMatrix&& initial_ids, @@ -1428,6 +1356,14 @@ class ivf_pq_index { return dimensions_; } + uint64_t num_vectors() const { + return num_vectors_; + } + + size_t upper_bound() const { + return upper_bound_; + } + auto num_partitions() const { if (num_partitions_ != ::num_vectors(flat_ivf_centroids_)) { throw std::runtime_error( @@ -1500,7 +1436,7 @@ class ivf_pq_index { double total_distance = 0.0; double total_normalizer = 0.0; - for (size_t i = 0; i < num_vectors(feature_vectors); ++i) { + for (size_t i = 0; i < ::num_vectors(feature_vectors); ++i) { if (debug) { std::cout << "-------------------" << std::endl; } @@ -1588,8 +1524,8 @@ class ivf_pq_index { } } - for (size_t i = 0; i < num_vectors(feature_vectors); ++i) { - for (size_t j = i + 1; j < num_vectors(feature_vectors); ++j) { + for (size_t i = 0; i < ::num_vectors(feature_vectors); ++i) { + for (size_t j = i + 1; j < ::num_vectors(feature_vectors); ++j) { auto real_distance = l2_distance(feature_vectors[i], feature_vectors[j]); total_normalizer += real_distance; @@ -1616,8 +1552,8 @@ class ivf_pq_index { score_type diff_max = 0.0; score_type vec_max = 0.0; - for (size_t i = 0; i < num_vectors(feature_vectors); ++i) { - for (size_t j = i + 1; j < num_vectors(feature_vectors); ++j) { + for (size_t i = 0; i < ::num_vectors(feature_vectors); ++i) { + for (size_t j = i + 1; j < ::num_vectors(feature_vectors); ++j) { auto real_distance = l2_distance(feature_vectors[i], feature_vectors[j]); total_normalizer += real_distance; @@ -1669,6 +1605,9 @@ class ivf_pq_index { if (dimensions_ != rhs.dimensions_) { return false; } + if (num_vectors_ != rhs.num_vectors_) { + return false; + } if (num_partitions_ != rhs.num_partitions_) { return false; } diff --git a/src/include/test/unit_api_ivf_pq_index.cc b/src/include/test/unit_api_ivf_pq_index.cc index c9f05dbb4..6026e139a 100644 --- a/src/include/test/unit_api_ivf_pq_index.cc +++ b/src/include/test/unit_api_ivf_pq_index.cc @@ -212,46 +212,65 @@ TEST_CASE("create empty index and then train and query", "[api_ivf_pq_index]") { // loaded by URI. size_t top_k = 1; size_t nprobe = 1; - size_t upper_bound = 0; float k_factor = 2.f; - auto&& [scores_vector_array, ids_vector_array] = index.query( - QueryType::InfiniteRAM, - FeatureVectorArray(queries), - top_k, - nprobe, - upper_bound, - k_factor); + auto&& [scores, ids] = + index.query(FeatureVectorArray(queries), top_k, nprobe, k_factor); auto default_score = std::numeric_limits::max(); auto default_id = std::numeric_limits::max(); check_single_vector_equals( - scores_vector_array, - ids_vector_array, + scores, + ids, {default_score, default_score, default_score, default_score}, {default_id, default_id, default_id, default_id}); } + // Check IndexLoadStrategy. { - auto index = IndexIVFPQ(ctx, index_uri); + CHECK_THROWS(IndexIVFPQ(ctx, index_uri, IndexLoadStrategy::PQ_OOC, 0)); + CHECK_NOTHROW(IndexIVFPQ(ctx, index_uri, IndexLoadStrategy::PQ_OOC, 10)); - CHECK(index.feature_type_string() == feature_type); - CHECK(index.id_type_string() == id_type); - CHECK(index.partitioning_index_type_string() == partitioning_index_type); + CHECK_NOTHROW(IndexIVFPQ(ctx, index_uri, IndexLoadStrategy::PQ_INDEX, 0)); + CHECK_THROWS(IndexIVFPQ(ctx, index_uri, IndexLoadStrategy::PQ_INDEX, 10)); + + CHECK_NOTHROW(IndexIVFPQ( + ctx, index_uri, IndexLoadStrategy::PQ_INDEX_AND_RERANKING_VECTORS, 0)); + CHECK_THROWS(IndexIVFPQ( + ctx, index_uri, IndexLoadStrategy::PQ_INDEX_AND_RERANKING_VECTORS, 10)); + } + + // We can open, train, and query an infinite index. + { + std::unique_ptr index; + SECTION("infinite") { + index = std::make_unique(ctx, index_uri); + } + SECTION("finite") { + size_t upper_bound = 97; + index = std::make_unique( + ctx, index_uri, IndexLoadStrategy::PQ_OOC, upper_bound); + CHECK(index->upper_bound() == upper_bound); + } + + CHECK(index->feature_type_string() == feature_type); + CHECK(index->id_type_string() == id_type); + CHECK(index->partitioning_index_type_string() == partitioning_index_type); auto training = ColMajorMatrix{ {{3, 1, 4}, {1, 5, 9}, {2, 6, 5}, {3, 5, 8}}}; auto training_vector_array = FeatureVectorArray(training); - index.train(training_vector_array); - index.add(training_vector_array); - index.write_index(ctx, index_uri); + index->train(training_vector_array); + index->add(training_vector_array); + index->write_index(ctx, index_uri); - CHECK(index.feature_type_string() == feature_type); - CHECK(index.id_type_string() == id_type); - CHECK(index.partitioning_index_type_string() == partitioning_index_type); + CHECK(index->feature_type_string() == feature_type); + CHECK(index->id_type_string() == id_type); + CHECK(index->partitioning_index_type_string() == partitioning_index_type); - auto&& [scores_vector_array, ids_vector_array] = - index.query(QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1); - check_single_vector_equals( - scores_vector_array, ids_vector_array, {0, 0, 0, 0}, {0, 1, 2, 3}); + size_t top_k = 1; + size_t nprobe = 1; + auto&& [scores, ids] = + index->query(FeatureVectorArray(queries), top_k, nprobe); + check_single_vector_equals(scores, ids, {0, 0, 0, 0}, {0, 1, 2, 3}); } } @@ -327,44 +346,34 @@ TEST_CASE( auto queries = ColMajorMatrix{ {{8, 6, 7}, {5, 3, 0}, {9, 5, 0}, {2, 7, 3}}}; - auto&& [scores_vector_array, ids_vector_array] = - index.query(QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1); - check_single_vector_equals( - scores_vector_array, ids_vector_array, {0, 0, 0, 0}, {10, 11, 12, 13}); + auto&& [scores, ids] = index.query(FeatureVectorArray(queries), 1, 1); + check_single_vector_equals(scores, ids, {0, 0, 0, 0}, {10, 11, 12, 13}); } { - auto index = IndexIVFPQ(ctx, index_uri); size_t top_k = 1; size_t nprobe = 1; + std::unique_ptr index; + SECTION("infinite") { + index = std::make_unique(ctx, index_uri); + } + SECTION("finite") { + size_t upper_bound = GENERATE(3, 4, 5, 100); + index = std::make_unique( + ctx, index_uri, IndexLoadStrategy::PQ_OOC, upper_bound); + CHECK(index->upper_bound() == upper_bound); + } + + CHECK(index->feature_type_string() == feature_type); + CHECK(index->id_type_string() == id_type); + CHECK(index->partitioning_index_type_string() == partitioning_index_type); + auto queries = ColMajorMatrix{ {{8, 6, 7}, {5, 3, 0}, {9, 5, 0}, {2, 7, 3}}}; - for (auto upper_bound : {3, 4, 5, 100, 0}) { - auto&& [scores_vector_array, ids_vector_array] = index.query( - QueryType::FiniteRAM, - FeatureVectorArray(queries), - top_k, - nprobe, - upper_bound); - check_single_vector_equals( - scores_vector_array, - ids_vector_array, - {0, 0, 0, 0}, - {10, 11, 12, 13}); - - auto&& [scores_vector_array_infinite, ids_vector_array_infinite] = - index.query( - QueryType::InfiniteRAM, - FeatureVectorArray(queries), - top_k, - nprobe); - check_single_vector_equals( - scores_vector_array_infinite, - ids_vector_array_infinite, - {0, 0, 0, 0}, - {10, 11, 12, 13}); - } + auto&& [scores, ids] = + index->query(FeatureVectorArray(queries), top_k, nprobe); + check_single_vector_equals(scores, ids, {0, 0, 0, 0}, {10, 11, 12, 13}); } } @@ -424,19 +433,20 @@ TEST_CASE( auto query_set = FeatureVectorArray(ctx, siftsmall_query_uri); auto groundtruth_set = FeatureVectorArray(ctx, siftsmall_groundtruth_uri); - auto&& [scores, ids] = - index.query(QueryType::InfiniteRAM, query_set, k_nn, 5); + auto&& [scores, ids] = index.query(query_set, k_nn, 5); auto intersections = count_intersections(ids, groundtruth_set, k_nn); CHECK((intersections / static_cast(num_vectors(ids) * k_nn)) > 0.7); } { - auto index = IndexIVFPQ(ctx, index_uri); float k_factor = 20.f; - auto query_set = FeatureVectorArray(ctx, siftsmall_query_uri); auto groundtruth_set = FeatureVectorArray(ctx, siftsmall_groundtruth_uri); + auto index = IndexIVFPQ(ctx, index_uri); + auto index_finite = + IndexIVFPQ(ctx, index_uri, IndexLoadStrategy::PQ_OOC, 450); + for (auto [nprobe, expected_accuracy, expected_accuracy_with_reranking] : std::vector>{ {1, .4f, .45f}, @@ -444,15 +454,14 @@ TEST_CASE( {5, .7f, .7f}, {10, .75f, .9f}, {100, .8f, 1.f}}) { - auto&& [distances, ids] = - index.query(QueryType::InfiniteRAM, query_set, k_nn, nprobe); + auto&& [distances, ids] = index.query(query_set, k_nn, nprobe); auto intersections = count_intersections(ids, groundtruth_set, k_nn); CHECK( (intersections / static_cast(num_vectors(ids) * k_nn)) >= expected_accuracy); - auto&& [distances_with_reranking, ids_with_reranking] = index.query( - QueryType::InfiniteRAM, query_set, k_nn, nprobe, 0, k_factor); + auto&& [distances_with_reranking, ids_with_reranking] = + index.query(query_set, k_nn, nprobe, k_factor); auto intersections_with_reranking = count_intersections(ids_with_reranking, groundtruth_set, k_nn); CHECK( @@ -460,24 +469,16 @@ TEST_CASE( static_cast(num_vectors(ids_with_reranking) * k_nn)) >= expected_accuracy_with_reranking); - for (auto upper_bound : {450, 1000, 0}) { - auto&& [distances_finite, ids_finite] = index.query( - QueryType::FiniteRAM, query_set, k_nn, nprobe, upper_bound); - CHECK(are_equal(ids_finite, ids)); - CHECK(are_equal(distances_finite, distances)); - - auto&& [distances_finite_with_reranking, ids_finite_with_reranking] = - index.query( - QueryType::FiniteRAM, - query_set, - k_nn, - nprobe, - upper_bound, - k_factor); - CHECK(are_equal(ids_finite_with_reranking, ids_with_reranking)); - CHECK(are_equal( - distances_finite_with_reranking, distances_with_reranking)); - } + auto&& [distances_finite, ids_finite] = + index_finite.query(query_set, k_nn, nprobe); + CHECK(are_equal(ids_finite, ids)); + CHECK(are_equal(distances_finite, distances)); + + auto&& [distances_finite_with_reranking, ids_finite_with_reranking] = + index_finite.query(query_set, k_nn, nprobe, k_factor); + CHECK(are_equal(ids_finite_with_reranking, ids_with_reranking)); + CHECK( + are_equal(distances_finite_with_reranking, distances_with_reranking)); } } } @@ -546,7 +547,7 @@ TEST_CASE("build index and query", "[api_ivf_pq_index]") { a.train(training_set); a.add(training_set); - auto&& [s, t] = a.query(QueryType::InfiniteRAM, query_set, k_nn, 5); + auto&& [s, t] = a.query(query_set, k_nn, nprobe); auto intersections = count_intersections(t, groundtruth_set, k_nn); auto nt = num_vectors(t); @@ -576,27 +577,32 @@ TEST_CASE("read index and query", "[api_ivf_pq_index]") { a.train(training_set); a.add(training_set); a.write_index(ctx, api_ivf_pq_index_uri); - auto b = IndexIVFPQ(ctx, api_ivf_pq_index_uri); auto query_set = FeatureVectorArray(ctx, siftsmall_query_uri); auto groundtruth_set = FeatureVectorArray(ctx, siftsmall_groundtruth_uri); - auto&& [scores_1, ids_1] = - a.query(QueryType::InfiniteRAM, query_set, k_nn, 5); - auto&& [scores_2, ids_2] = - b.query(QueryType::InfiniteRAM, query_set, k_nn, 5); - auto&& [scores_3, ids_3] = - b.query(QueryType::FiniteRAM, query_set, k_nn, 5, 500); - auto&& [scores_4, ids_4] = - b.query(QueryType::FiniteRAM, query_set, k_nn, 5, 0); + std::unique_ptr b; + SECTION("infinite") { + b = std::make_unique(ctx, api_ivf_pq_index_uri); + } + SECTION("finite") { + size_t upper_bound = GENERATE(500, 1000); + b = std::make_unique( + ctx, api_ivf_pq_index_uri, IndexLoadStrategy::PQ_OOC, upper_bound); + CHECK(b->upper_bound() == upper_bound); + } + + // Make sure the same query results are returned for two different indexes. + size_t nprobe = 5; + auto&& [scores_1, ids_1] = a.query(query_set, k_nn, nprobe); + auto&& [scores_2, ids_2] = b->query(query_set, k_nn, nprobe); + + CHECK(are_equal(scores_1, scores_2)); + CHECK(are_equal(ids_1, ids_2)); auto intersections_1 = count_intersections(ids_1, groundtruth_set, k_nn); auto intersections_2 = count_intersections(ids_2, groundtruth_set, k_nn); - auto intersections_3 = count_intersections(ids_3, groundtruth_set, k_nn); - auto intersections_4 = count_intersections(ids_4, groundtruth_set, k_nn); CHECK(num_vectors(ids_1) == num_vectors(ids_2)); - CHECK(num_vectors(ids_1) == num_vectors(ids_3)); - CHECK(num_vectors(ids_1) == num_vectors(ids_4)); auto recall = intersections_1 / static_cast(num_vectors(ids_1) * k_nn); CHECK(recall > 0.7); @@ -696,12 +702,11 @@ TEST_CASE("clear history with an open index", "[api_ivf_pq_index]") { index.add(training_vector_array); index.write_index(ctx, index_uri, TemporalPolicy(TimeTravel, 99)); - auto&& [scores_vector_array, ids_vector_array] = - index.query(QueryType::InfiniteRAM, training_vector_array, 1, 1); + auto&& [scores, ids] = index.query(training_vector_array, 1, 1); auto second_index = IndexIVFPQ(ctx, index_uri); - auto&& [scores_vector_array_finite, ids_vector_array_finite] = - second_index.query(QueryType::FiniteRAM, training_vector_array, 1, 1); + auto&& [scores_finite, ids_finite] = + second_index.query(training_vector_array, 1, 1); // Here we check that we can clear_history() even with a index in memory. This // makes sure that every Array which IndexIVFPQ opens has been closed, @@ -819,21 +824,20 @@ TEST_CASE("write and load index with timestamps", "[api_ivf_pq_index]") { CHECK(index.id_type_string() == id_type); CHECK(index.partitioning_index_type_string() == partitioning_index_type); + size_t top_k = 1; + size_t nprobe = 1; + auto queries = ColMajorMatrix{ {{1, 1, 1}, {2, 2, 2}, {3, 3, 3}, {4, 4, 4}}}; - auto&& [scores_vector_array, ids_vector_array] = index.query( - QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1, 0); - check_single_vector_equals( - scores_vector_array, ids_vector_array, {0, 0, 0, 0}, {1, 2, 3, 4}); + auto&& [scores, ids] = + index.query(FeatureVectorArray(queries), top_k, nprobe); + check_single_vector_equals(scores, ids, {0, 0, 0, 0}, {1, 2, 3, 4}); - auto&& [scores_vector_array_with_reranking, ids_vector_array_with_reranking] = - index.query( - QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1, 0, 2.f); + float k_factor = 2.f; + auto&& [scores_with_reranking, ids_with_reranking] = + index.query(FeatureVectorArray(queries), top_k, nprobe, k_factor); check_single_vector_equals( - scores_vector_array_with_reranking, - ids_vector_array_with_reranking, - {0, 0, 0, 0}, - {1, 2, 3, 4}); + scores_with_reranking, ids_with_reranking, {0, 0, 0, 0}, {1, 2, 3, 4}); auto typed_index = ivf_pq_index< feature_type_type, @@ -861,21 +865,23 @@ TEST_CASE("write and load index with timestamps", "[api_ivf_pq_index]") { // Check that we can do finite and infinite queries and then train + write // the index. { + size_t top_k = 1; + size_t nprobe = 1; auto queries = ColMajorMatrix{ {{1, 1, 1}, {2, 2, 2}, {3, 3, 3}, {4, 4, 4}}}; - auto&& [scores_vector_array, ids_vector_array] = index.query( - QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1); - check_single_vector_equals( - scores_vector_array, ids_vector_array, {0, 0, 0, 0}, {1, 2, 3, 4}); - - auto&& [scores_vector_array_finite, ids_vector_array_finite] = - index.query( - QueryType::FiniteRAM, FeatureVectorArray(queries), 1, 1, 4); - check_single_vector_equals( - scores_vector_array_finite, - ids_vector_array_finite, - {0, 0, 0, 0}, - {1, 2, 3, 4}); + auto&& [scores, ids] = + index.query(FeatureVectorArray(queries), top_k, nprobe); + check_single_vector_equals(scores, ids, {0, 0, 0, 0}, {1, 2, 3, 4}); + + { + size_t upper_bound = 5; + auto index_finite = + IndexIVFPQ(ctx, index_uri, IndexLoadStrategy::PQ_OOC, upper_bound); + auto&& [scores_finite, ids_finite] = + index_finite.query(FeatureVectorArray(queries), top_k, nprobe); + check_single_vector_equals( + scores_finite, ids_finite, {0, 0, 0, 0}, {1, 2, 3, 4}); + } } CHECK(index.temporal_policy().timestamp_start() == 0); @@ -906,15 +912,14 @@ TEST_CASE("write and load index with timestamps", "[api_ivf_pq_index]") { CHECK(index.id_type_string() == id_type); CHECK(index.partitioning_index_type_string() == partitioning_index_type); + size_t top_k = 1; + size_t nprobe = 1; auto queries = ColMajorMatrix{ {{11, 11, 11}, {22, 22, 22}, {33, 33, 33}, {44, 44, 44}, {55, 55, 55}}}; - auto&& [scores_vector_array, ids_vector_array] = - index.query(QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1); + auto&& [scores, ids] = + index.query(FeatureVectorArray(queries), top_k, nprobe); check_single_vector_equals( - scores_vector_array, - ids_vector_array, - {0, 0, 0, 0, 0}, - {11, 22, 33, 44, 55}); + scores, ids, {0, 0, 0, 0, 0}, {11, 22, 33, 44, 55}); auto typed_index = ivf_pq_index< feature_type_type, @@ -946,10 +951,15 @@ TEST_CASE("write and load index with timestamps", "[api_ivf_pq_index]") { std::vector{99, 100}.begin())); } - // Load it at timestamp 99 and make sure we can query it correctly. - { + // Load it at timestamp 99 and make sure we can query it correctly. Do this + // with both a finite and infinite index. + for (auto upper_bound : std::vector{0, 4}) { auto temporal_policy = TemporalPolicy{TimeTravel, 99}; - auto index = IndexIVFPQ(ctx, index_uri, temporal_policy); + auto load_strategy = upper_bound == 0 ? IndexLoadStrategy::PQ_INDEX : + IndexLoadStrategy::PQ_OOC; + auto index = + IndexIVFPQ(ctx, index_uri, load_strategy, upper_bound, temporal_policy); + CHECK(index.upper_bound() == upper_bound); CHECK(index.temporal_policy().timestamp_end() == 99); CHECK(index.feature_type_string() == feature_type); @@ -959,41 +969,26 @@ TEST_CASE("write and load index with timestamps", "[api_ivf_pq_index]") { CHECK(index.convergence_tolerance() == convergence_tolerance); CHECK(index.reassign_ratio() == reassign_ratio); + size_t top_k = 1; + size_t nprobe = 1; auto queries = ColMajorMatrix{ {{1, 1, 1}, {2, 2, 2}, {3, 3, 3}, {4, 4, 4}}}; - auto&& [scores_vector_array_finite, ids_vector_array_finite] = - index.query(QueryType::FiniteRAM, FeatureVectorArray(queries), 1, 1); - check_single_vector_equals( - scores_vector_array_finite, - ids_vector_array_finite, - {0, 0, 0, 0}, - {1, 2, 3, 4}); - auto&& [scores_vector_array_finite_with_reranking, ids_vector_array_finite_with_reranking] = - index.query( - QueryType::FiniteRAM, FeatureVectorArray(queries), 1, 1, 0, 2.f); - check_single_vector_equals( - scores_vector_array_finite_with_reranking, - ids_vector_array_finite_with_reranking, - {0, 0, 0, 0}, - {1, 2, 3, 4}); - auto&& [scores_vector_array, ids_vector_array] = - index.query(QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1); - check_single_vector_equals( - scores_vector_array, ids_vector_array, {0, 0, 0, 0}, {1, 2, 3, 4}); - auto&& [scores_vector_array_with_reranking, ids_vector_array_with_reranking] = - index.query( - QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1, 0, 2.f); + auto&& [scores, ids] = + index.query(FeatureVectorArray(queries), top_k, nprobe); + check_single_vector_equals(scores, ids, {0, 0, 0, 0}, {1, 2, 3, 4}); + + float k_factor = 2.f; + auto&& [scores_with_reranking, ids_with_reranking] = + index.query(FeatureVectorArray(queries), top_k, nprobe, k_factor); check_single_vector_equals( - scores_vector_array_with_reranking, - ids_vector_array_with_reranking, - {0, 0, 0, 0}, - {1, 2, 3, 4}); + scores_with_reranking, ids_with_reranking, {0, 0, 0, 0}, {1, 2, 3, 4}); auto typed_index = ivf_pq_index< feature_type_type, id_type_type, - partitioning_index_type_type>(ctx, index_uri, temporal_policy); + partitioning_index_type_type>( + ctx, index_uri, load_strategy, upper_bound, temporal_policy); CHECK(typed_index.group().get_dimensions() == dimensions); CHECK(typed_index.group().get_temp_size() == 0); CHECK(typed_index.group().get_history_index() == 0); @@ -1022,9 +1017,14 @@ TEST_CASE("write and load index with timestamps", "[api_ivf_pq_index]") { // Load it at timestamp 5 (before ingestion) and make sure we can query and be // returned fill values. - { + for (auto upper_bound : std::vector{0, 4}) { auto temporal_policy = TemporalPolicy{TimeTravel, 0}; - auto index = IndexIVFPQ(ctx, index_uri, temporal_policy); + auto load_strategy = upper_bound == 0 ? IndexLoadStrategy::PQ_INDEX : + IndexLoadStrategy::PQ_OOC; + + auto index = + IndexIVFPQ(ctx, index_uri, load_strategy, upper_bound, temporal_policy); + CHECK(index.upper_bound() == upper_bound); CHECK(index.temporal_policy().timestamp_start() == 0); CHECK(index.temporal_policy().timestamp_end() == 0); @@ -1035,43 +1035,30 @@ TEST_CASE("write and load index with timestamps", "[api_ivf_pq_index]") { CHECK(index.convergence_tolerance() == convergence_tolerance); CHECK(index.reassign_ratio() == reassign_ratio); + size_t top_k = 1; + size_t nprobe = 1; + float k_factor = 1.9f; auto queries = ColMajorMatrix{{{1, 1, 1}}}; - auto&& [scores_vector_array, ids_vector_array] = - index.query(QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1); - check_single_vector_equals( - scores_vector_array, - ids_vector_array, - {std::numeric_limits::max()}, - {std::numeric_limits::max()}); - auto&& [scores_vector_array_with_reranking, ids_vector_array_with_reranking] = - index.query( - QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1, 0, 1.9f); - check_single_vector_equals( - scores_vector_array_with_reranking, - ids_vector_array_with_reranking, - {std::numeric_limits::max()}, - {std::numeric_limits::max()}); - - auto&& [scores_vector_array_finite, ids_vector_array_finite] = - index.query(QueryType::FiniteRAM, FeatureVectorArray(queries), 1, 1, 4); + auto&& [scores, ids] = + index.query(FeatureVectorArray(queries), top_k, nprobe); check_single_vector_equals( - scores_vector_array_finite, - ids_vector_array_finite, + scores, + ids, {std::numeric_limits::max()}, {std::numeric_limits::max()}); - auto&& [scores_vector_array_finite_with_reranking, ids_vector_array_finite_with_reranking] = - index.query( - QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1, 4, 1.9f); + auto&& [scores_with_reranking, ids_with_reranking] = + index.query(FeatureVectorArray(queries), top_k, nprobe, k_factor); check_single_vector_equals( - scores_vector_array_finite_with_reranking, - ids_vector_array_finite_with_reranking, + scores_with_reranking, + ids_with_reranking, {std::numeric_limits::max()}, {std::numeric_limits::max()}); auto typed_index = ivf_pq_index< feature_type_type, id_type_type, - partitioning_index_type_type>(ctx, index_uri, temporal_policy); + partitioning_index_type_type>( + ctx, index_uri, load_strategy, upper_bound, temporal_policy); CHECK(typed_index.group().get_dimensions() == dimensions); CHECK(typed_index.group().get_temp_size() == 0); CHECK(typed_index.group().get_history_index() == 0); @@ -1106,17 +1093,24 @@ TEST_CASE("write and load index with timestamps", "[api_ivf_pq_index]") { // Clear history for <= 99 and then load at 99, then make sure we cannot // query. - { - IndexIVFPQ::clear_history(ctx, index_uri, 99); - + IndexIVFPQ::clear_history(ctx, index_uri, 99); + for (auto upper_bound : std::vector{0, 3}) { auto temporal_policy = TemporalPolicy{TimeTravel, 99}; - auto index = IndexIVFPQ(ctx, index_uri, temporal_policy); + auto load_strategy = upper_bound == 0 ? IndexLoadStrategy::PQ_INDEX : + IndexLoadStrategy::PQ_OOC; + + auto index = + IndexIVFPQ(ctx, index_uri, load_strategy, upper_bound, temporal_policy); + CHECK(index.upper_bound() == upper_bound); CHECK(index.temporal_policy().timestamp_end() == 99); CHECK(index.feature_type_string() == feature_type); CHECK(index.id_type_string() == id_type); CHECK(index.partitioning_index_type_string() == partitioning_index_type); + size_t top_k = 1; + size_t nprobe = 1; + float k_factor = 17.3f; auto queries = ColMajorMatrix{ {{1, 1, 1}, {2, 2, 2}, {3, 3, 3}, {4, 4, 4}}}; @@ -1127,39 +1121,22 @@ TEST_CASE("write and load index with timestamps", "[api_ivf_pq_index]") { auto expected_ids = std::vector{default_id, default_id, default_id, default_id}; - auto&& [scores_vector_array_finite, ids_vector_array_finite] = - index.query(QueryType::FiniteRAM, FeatureVectorArray(queries), 1, 1, 0); - check_single_vector_equals( - scores_vector_array_finite, - ids_vector_array_finite, - expected_scores, - expected_ids); - auto&& [scores_vector_array_finite_with_reranking, ids_vector_array_finite_with_reranking] = - index.query( - QueryType::FiniteRAM, FeatureVectorArray(queries), 1, 1, 0, 20.f); - check_single_vector_equals( - scores_vector_array_finite_with_reranking, - ids_vector_array_finite_with_reranking, - expected_scores, - expected_ids); - - auto&& [scores_vector_array, ids_vector_array] = - index.query(QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1); - check_single_vector_equals( - scores_vector_array, ids_vector_array, expected_scores, expected_ids); - auto&& [scores_vector_array_with_reranking, ids_vector_array_with_reranking] = - index.query( - QueryType::InfiniteRAM, FeatureVectorArray(queries), 1, 1, 0, 20.f); + auto&& [scores, ids] = + index.query(FeatureVectorArray(queries), top_k, nprobe); + check_single_vector_equals(scores, ids, expected_scores, expected_ids); + auto&& [scores_with_reranking, ids_with_reranking] = + index.query(FeatureVectorArray(queries), top_k, nprobe, k_factor); check_single_vector_equals( - scores_vector_array_with_reranking, - ids_vector_array_with_reranking, + scores_with_reranking, + ids_with_reranking, expected_scores, expected_ids); auto typed_index = ivf_pq_index< feature_type_type, id_type_type, - partitioning_index_type_type>(ctx, index_uri, temporal_policy); + partitioning_index_type_type>( + ctx, index_uri, load_strategy, upper_bound, temporal_policy); CHECK(typed_index.group().get_dimensions() == dimensions); CHECK(typed_index.group().get_temp_size() == 0); CHECK(typed_index.group().get_history_index() == 0); diff --git a/src/include/test/unit_ivf_pq_index.cc b/src/include/test/unit_ivf_pq_index.cc index 2571d1808..32768e04d 100644 --- a/src/include/test/unit_ivf_pq_index.cc +++ b/src/include/test/unit_ivf_pq_index.cc @@ -319,13 +319,16 @@ TEST_CASE("ivf_index write and read", "[ivf_pq_index]") { std::iota(begin(ids), end(ids), 0); auto idx = ivf_pq_index( nlist, num_subspaces, max_iterations); + CHECK(idx.num_vectors() == 0); idx.train_ivf(training_set, kmeans_init::kmeanspp); idx.add(training_set, ids); + CHECK(idx.num_vectors() == ::num_vectors(training_set)); idx.write_index(ctx, ivf_index_uri); + CHECK(idx.num_vectors() == ::num_vectors(training_set)); // Load it from URI. auto idx2 = ivf_pq_index(ctx, ivf_index_uri); - idx2.read_index_infinite(); + CHECK(idx2.num_vectors() == ::num_vectors(training_set)); // Check that the two indexes are the same. CHECK(idx.compare_cached_metadata(idx2)); @@ -353,6 +356,7 @@ TEST_CASE( siftsmall_indices_type>(20, 16, 50); pq_idx.train_ivf(training_set); pq_idx.add(training_set, ids); + CHECK(pq_idx.num_vectors() == ::num_vectors(training_set)); SECTION("pq_encoding") { auto avg_error = pq_idx.verify_pq_encoding(training_set); @@ -471,7 +475,7 @@ TEMPLATE_TEST_CASE( } #endif -TEST_CASE("build index and query in place", "[ivf_pq_index]") { +TEST_CASE("build index and infinite query in place", "[ivf_pq_index]") { tiledb::Context ctx; // size_t nlist = GENERATE(1, 100); size_t nlist = 20; @@ -494,17 +498,10 @@ TEST_CASE("build index and query in place", "[ivf_pq_index]") { auto top_k_ivf_scores = ColMajorMatrix(); auto top_k_ivf = ColMajorMatrix(); - SECTION("infinite") { - std::tie(top_k_ivf_scores, top_k_ivf) = - idx.query_infinite_ram(query_set, k_nn, nprobe); - } + std::tie(top_k_ivf_scores, top_k_ivf) = idx.query(query_set, k_nn, nprobe); - SECTION("finite") { - std::tie(top_k_ivf_scores, top_k_ivf) = - idx.query_finite_ram(query_set, k_nn, nprobe); - } - - // NOTE: Can be used to debug the results.debug_slice(top_k_ivf, "top_k_ivf"); + // NOTE: Can be used to debug the results: + // debug_slice(top_k_ivf, "top_k_ivf"); // debug_slice(top_k_ivf_scores, "top_k_ivf_scores"); // debug_slice(groundtruth_set, "groundtruth_set"); @@ -531,11 +528,13 @@ TEST_CASE("ivf_pq_index write and read", "[ivf_pq_index]") { uint64_t write_timestamp = 1000; idx.write_index( ctx, ivf_pq_index_uri, TemporalPolicy(TimeTravel, write_timestamp)); + CHECK(idx.num_vectors() == ::num_vectors(training_set)); { // Load the index and check metadata. auto idx2 = ivf_pq_index( ctx, ivf_pq_index_uri); + CHECK(idx2.num_vectors() == ::num_vectors(training_set)); CHECK(idx2.group().get_dimensions() == sift_dimensions); CHECK(idx2.group().get_temp_size() == 0); @@ -563,6 +562,7 @@ TEST_CASE("ivf_pq_index write and read", "[ivf_pq_index]") { auto idx2 = ivf_pq_index( ctx, ivf_pq_index_uri); + CHECK(idx2.num_vectors() == 0); CHECK(idx2.group().get_dimensions() == sift_dimensions); CHECK(idx2.group().get_temp_size() == 0); @@ -591,15 +591,15 @@ TEST_CASE("query empty index", "[ivf_pq_index]") { { auto data = ColMajorMatrixWithIds(dimensions, num_vectors); - debug_matrix_with_ids(data, "data"); index.train(data, data.raveled_ids()); index.add(data, data.raveled_ids()); + CHECK(index.num_vectors() == num_vectors); } // We can query an empty index. { size_t k_nn = 1; - auto&& [scores, ids] = index.query_infinite_ram(queries, k_nn, nlist); + auto&& [scores, ids] = index.query(queries, k_nn, nlist); CHECK(_cpo::num_vectors(scores) == _cpo::num_vectors(queries)); CHECK(_cpo::num_vectors(ids) == _cpo::num_vectors(queries)); CHECK(_cpo::dimensions(scores) == k_nn); @@ -620,10 +620,12 @@ TEST_CASE("query empty index", "[ivf_pq_index]") { // We can load and query an empty index. { - auto index2 = ivf_pq_index( - ctx, ivf_index_uri); + auto index_infinite = + ivf_pq_index( + ctx, ivf_index_uri); + CHECK(index_infinite.num_vectors() == num_vectors); size_t k_nn = 1; - auto&& [scores, ids] = index2.query_infinite_ram(queries, k_nn, nlist); + auto&& [scores, ids] = index_infinite.query(queries, k_nn, nlist); CHECK(_cpo::num_vectors(scores) == _cpo::num_vectors(queries)); CHECK(_cpo::num_vectors(ids) == _cpo::num_vectors(queries)); CHECK(_cpo::dimensions(scores) == k_nn); @@ -632,10 +634,13 @@ TEST_CASE("query empty index", "[ivf_pq_index]") { CHECK(ids(0, 0) == std::numeric_limits::max()); } { - auto index2 = ivf_pq_index( - ctx, ivf_index_uri); + size_t upper_bound = 11; + auto index_finite = + ivf_pq_index( + ctx, ivf_index_uri, IndexLoadStrategy::PQ_OOC, upper_bound); + CHECK(index_finite.num_vectors() == num_vectors); size_t k_nn = 1; - auto&& [scores, ids] = index2.query_finite_ram(queries, k_nn, nlist, 9); + auto&& [scores, ids] = index_finite.query(queries, k_nn, nlist, 9); CHECK(_cpo::num_vectors(scores) == _cpo::num_vectors(queries)); CHECK(_cpo::num_vectors(ids) == _cpo::num_vectors(queries)); CHECK(_cpo::dimensions(scores) == k_nn); @@ -670,6 +675,7 @@ TEST_CASE("query simple", "[ivf_pq_index]") { auto ivf_index_uri = (std::filesystem::temp_directory_path() / "ivf_index").string(); + CHECK(index.num_vectors() == 0); CHECK(index.nlist() == nlist); // We can train, add, query, and then write the index. @@ -680,13 +686,15 @@ TEST_CASE("query simple", "[ivf_pq_index]") { index.train(training, training.raveled_ids()); index.add(training, training.raveled_ids()); + CHECK(index.num_vectors() == ::num_vectors(training)); + size_t k_nn = 1; size_t nprobe = nlist; for (int i = 1; i <= 4; ++i) { auto value = static_cast(i); auto queries = ColMajorMatrix{{{value, value, value, value}}}; - auto&& [scores, ids] = index.query_infinite_ram(queries, k_nn, nprobe); + auto&& [scores, ids] = index.query(queries, k_nn, nprobe); CHECK(scores(0, 0) == 0); CHECK(ids(0, 0) == i * 11); } @@ -699,7 +707,20 @@ TEST_CASE("query simple", "[ivf_pq_index]") { // We can load and query the index. { - auto index2 = ivf_pq_index(ctx, ivf_index_uri); + std::unique_ptr> index2; + SECTION("infinite") { + index2 = std::make_unique>( + ctx, ivf_index_uri); + CHECK(index2->upper_bound() == 0); + } + SECTION("finite") { + size_t upper_bound = 97; + index2 = std::make_unique>( + ctx, ivf_index_uri, IndexLoadStrategy::PQ_OOC, upper_bound); + CHECK(index2->upper_bound() == upper_bound); + } + CHECK(index2->num_vectors() == 4); + size_t k_nn = 1; size_t nprobe = nlist; for (int i = 1; i <= 4; ++i) { @@ -707,10 +728,10 @@ TEST_CASE("query simple", "[ivf_pq_index]") { auto queries = ColMajorMatrix{{{value, value, value, value}}}; auto&& [scores_from_finite, ids_from_finite] = - index2.query_finite_ram(queries, k_nn, nprobe, 5); + index2->query(queries, k_nn, nprobe, 5); CHECK(scores_from_finite(0, 0) == 0); CHECK(ids_from_finite(0, 0) == i * 11); - auto&& [scores, ids] = index2.query_infinite_ram(queries, k_nn, nprobe); + auto&& [scores, ids] = index2->query(queries, k_nn, nprobe); CHECK(scores(0, 0) == 0); CHECK(ids(0, 0) == i * 11); } @@ -730,7 +751,7 @@ TEST_CASE("k_factor", "[ivf_pq_index]") { float reassign_ratio = 0.09f; size_t nprobe = nlist; - size_t k_nn = 20; + size_t k_nn = 40; float k_factor = 2.f; size_t upper_bound = 350; @@ -747,11 +768,12 @@ TEST_CASE("k_factor", "[ivf_pq_index]") { DistanceMetric::L2); auto ivf_index_uri = (std::filesystem::temp_directory_path() / "ivf_index").string(); - + CHECK(index.num_vectors() == 0); CHECK(index.nlist() == nlist); // We can train, add, query, and then write the index. std::vector ids(num_vectors); + size_t num_equal_no_reranking = 0; { std::vector> vectors; for (int i = 1; i <= num_vectors; ++i) { @@ -766,22 +788,26 @@ TEST_CASE("k_factor", "[ivf_pq_index]") { index.train(training, training.raveled_ids()); index.add(training, training.raveled_ids()); + CHECK(index.num_vectors() == ::num_vectors(training)); + auto queries = ColMajorMatrix{{{1, 1, 1, 1}}}; { auto&& [scores_reranking, ids_reranking] = - index.query_infinite_ram(queries, k_nn, nprobe, k_factor); + index.query(queries, k_nn, nprobe, k_factor); CHECK( k_nn == check_single_vector_num_equal(ids_reranking, ids)); CHECK(scores_reranking(0, 0) == 0); auto&& [scores_no_reranking, ids_no_reranking] = - index.query_infinite_ram(queries, k_nn, nprobe, 1.f); - auto num_equal_no_reranking = + index.query(queries, k_nn, nprobe, 1.f); + num_equal_no_reranking = check_single_vector_num_equal(ids_no_reranking, ids); CHECK(num_equal_no_reranking != k_nn); - CHECK(num_equal_no_reranking > 5); + CHECK(num_equal_no_reranking >= 5); } + CHECK(index.num_vectors() == ::num_vectors(training)); + if (vfs.is_dir(ivf_index_uri)) { vfs.remove_dir(ivf_index_uri); } @@ -790,39 +816,46 @@ TEST_CASE("k_factor", "[ivf_pq_index]") { // We can open the index by URI and query. { - auto index2 = ivf_pq_index(ctx, ivf_index_uri); auto queries = ColMajorMatrix{{{1, 1, 1, 1}}}; - // query_infinite_ram. + // infinite. { + auto index_infinite = + ivf_pq_index(ctx, ivf_index_uri); + CHECK(index_infinite.num_vectors() == num_vectors); + CHECK(index_infinite.upper_bound() == 0); auto&& [scores_reranking, ids_reranking] = - index2.query_infinite_ram(queries, k_nn, nprobe, k_factor); + index_infinite.query(queries, k_nn, nprobe, k_factor); CHECK( k_nn == check_single_vector_num_equal(ids_reranking, ids)); CHECK(scores_reranking(0, 0) == 0); auto&& [scores_no_reranking, ids_no_reranking] = - index2.query_infinite_ram(queries, k_nn, nprobe, 1.f); - auto num_equal_no_reranking = - check_single_vector_num_equal(ids_no_reranking, ids); - CHECK(num_equal_no_reranking != k_nn); - CHECK(num_equal_no_reranking > 2); + index_infinite.query(queries, k_nn, nprobe, 1.f); + CHECK( + num_equal_no_reranking == + check_single_vector_num_equal(ids_no_reranking, ids)); } - // query_finite_ram. + // finite. { + size_t upper_bound = 300; + auto index_finite = ivf_pq_index( + ctx, ivf_index_uri, IndexLoadStrategy::PQ_OOC, upper_bound); + CHECK(index_finite.num_vectors() == num_vectors); + CHECK(index_finite.upper_bound() == upper_bound); + auto&& [scores_reranking, ids_reranking] = - index2.query_finite_ram(queries, k_nn, nprobe, upper_bound, k_factor); + index_finite.query(queries, k_nn, nprobe, k_factor); CHECK( k_nn == check_single_vector_num_equal(ids_reranking, ids)); CHECK(scores_reranking(0, 0) == 0); auto&& [scores_no_reranking, ids_no_reranking] = - index2.query_finite_ram(queries, k_nn, nprobe, upper_bound, 1.f); - auto num_equal_no_reranking = - check_single_vector_num_equal(ids_no_reranking, ids); - CHECK(num_equal_no_reranking != k_nn); - CHECK(num_equal_no_reranking > 5); + index_finite.query(queries, k_nn, nprobe, 1.f); + CHECK( + num_equal_no_reranking == + check_single_vector_num_equal(ids_no_reranking, ids)); } } } @@ -875,29 +908,37 @@ TEST_CASE("ivf_pq_index query index written twice", "[ivf_pq_index]") { // Load the index and query. { - auto index = ivf_pq_index< + std::unique_ptr(ctx, index_uri); + partitioning_index_type_type>> + index; + SECTION("infinite") { + index = std::make_unique>(ctx, index_uri); + CHECK(index->upper_bound() == 0); + } + SECTION("finite") { + size_t upper_bound = 5; + index = std::make_unique>( + ctx, index_uri, IndexLoadStrategy::PQ_OOC, upper_bound); + CHECK(index->upper_bound() == upper_bound); + } auto queries = ColMajorMatrix{ {{1, 1, 1}, {2, 2, 2}, {3, 3, 3}, {4, 4, 4}}}; - auto&& [scores_from_finite, ids_from_finite] = - index.query_finite_ram(queries, 1, n_list, 5); - CHECK(std::equal( - scores_from_finite.data(), - scores_from_finite.data() + 4, - std::vector{0, 0, 0, 0}.begin())); - CHECK(std::equal( - ids_from_finite.data(), - ids_from_finite.data() + 4, - std::vector{1, 2, 3, 4}.begin())); - - auto&& [scores, ids] = index.query_infinite_ram(queries, 1, n_list); + auto&& [scores, ids] = index->query(queries, 1, n_list, 5); CHECK(std::equal( scores.data(), scores.data() + 4, std::vector{0, 0, 0, 0}.begin())); + CHECK(std::equal( + ids.data(), ids.data() + 4, std::vector{1, 2, 3, 4}.begin())); } }