Merge branch 'branch-24.12' into improved_multi_cta_algo

rapidsai · Dec 4, 2024 · e1a71e9 · e1a71e9
2 parents 3965c6f + fbbca05
commit e1a71e9
Show file tree

Hide file tree

Showing 32 changed files with 2,418 additions and 426 deletions.
diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml
@@ -24,7 +24,6 @@ dependencies:
 - gcc_linux-aarch64=11.*
 - glog>=0.6.0
 - h5py>=3.8.0
-- hnswlib=0.6.2
 - libcublas-dev=11.11.3.6
 - libcublas=11.11.3.6
 - libcurand-dev=10.3.0.86

diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
@@ -24,7 +24,6 @@ dependencies:
 - gcc_linux-64=11.*
 - glog>=0.6.0
 - h5py>=3.8.0
-- hnswlib=0.6.2
 - libcublas-dev=11.11.3.6
 - libcublas=11.11.3.6
 - libcurand-dev=10.3.0.86

diff --git a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml
@@ -25,7 +25,6 @@ dependencies:
 - gcc_linux-aarch64=11.*
 - glog>=0.6.0
 - h5py>=3.8.0
-- hnswlib=0.6.2
 - libcublas-dev
 - libcurand-dev
 - libcusolver-dev

diff --git a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml
@@ -25,7 +25,6 @@ dependencies:
 - gcc_linux-64=11.*
 - glog>=0.6.0
 - h5py>=3.8.0
-- hnswlib=0.6.2
 - libcublas-dev
 - libcurand-dev
 - libcusolver-dev

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -577,6 +577,7 @@ if(BUILD_SHARED_LIBS)
 
   if(BUILD_CAGRA_HNSWLIB)
     target_link_libraries(cuvs_objs PRIVATE hnswlib::hnswlib)
+    target_compile_definitions(cuvs PUBLIC CUVS_BUILD_CAGRA_HNSWLIB)
     target_compile_definitions(cuvs_objs PUBLIC CUVS_BUILD_CAGRA_HNSWLIB)
   endif()
 

diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
@@ -225,9 +225,7 @@ if(CUVS_ANN_BENCH_USE_CUVS_CAGRA)
 endif()
 
 if(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB)
-  ConfigureAnnBench(
-    NAME CUVS_CAGRA_HNSWLIB PATH src/cuvs/cuvs_cagra_hnswlib.cu LINKS cuvs hnswlib::hnswlib
-  )
+  ConfigureAnnBench(NAME CUVS_CAGRA_HNSWLIB PATH src/cuvs/cuvs_cagra_hnswlib.cu LINKS cuvs)
 endif()
 
 if(CUVS_ANN_BENCH_USE_CUVS_MG)

diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu
@@ -24,12 +24,35 @@
 
 namespace cuvs::bench {
 
+template <typename T, typename IdxT>
+void parse_build_param(const nlohmann::json& conf,
+                       typename cuvs::bench::cuvs_cagra_hnswlib<T, IdxT>::build_param& param)
+{
+  if (conf.contains("hierarchy")) {
+    if (conf.at("hierarchy") == "none") {
+      param.hnsw_index_params.hierarchy = cuvs::neighbors::hnsw::HnswHierarchy::NONE;
+    } else if (conf.at("hierarchy") == "cpu") {
+      param.hnsw_index_params.hierarchy = cuvs::neighbors::hnsw::HnswHierarchy::CPU;
+    } else {
+      THROW("Invalid value for hierarchy: %s", conf.at("hierarchy").get<std::string>().c_str());
+    }
+  }
+  if (conf.contains("ef_construction")) {
+    param.hnsw_index_params.ef_construction = conf.at("ef_construction");
+  }
+  if (conf.contains("num_threads")) {
+    param.hnsw_index_params.num_threads = conf.at("num_threads");
+  }
+}
+
 template <typename T, typename IdxT>
 void parse_search_param(const nlohmann::json& conf,
                         typename cuvs::bench::cuvs_cagra_hnswlib<T, IdxT>::search_param& param)
 {
-  param.ef = conf.at("ef");
-  if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); }
+  param.hnsw_search_param.ef = conf.at("ef");
+  if (conf.contains("num_threads")) {
+    param.hnsw_search_param.num_threads = conf.at("num_threads");
+  }
 }
 
 template <typename T>
@@ -43,9 +66,10 @@ auto create_algo(const std::string& algo_name,
 
   if constexpr (std::is_same_v<T, float> or std::is_same_v<T, std::uint8_t>) {
     if (algo_name == "raft_cagra_hnswlib" || algo_name == "cuvs_cagra_hnswlib") {
-      typename cuvs::bench::cuvs_cagra_hnswlib<T, uint32_t>::build_param param;
-      parse_build_param<T, uint32_t>(conf, param);
-      a = std::make_unique<cuvs::bench::cuvs_cagra_hnswlib<T, uint32_t>>(metric, dim, param);
+      typename cuvs::bench::cuvs_cagra_hnswlib<T, uint32_t>::build_param bparam;
+      ::parse_build_param<T, uint32_t>(conf, bparam.cagra_build_param);
+      parse_build_param<T, uint32_t>(conf, bparam);
+      a = std::make_unique<cuvs::bench::cuvs_cagra_hnswlib<T, uint32_t>>(metric, dim, bparam);
     }
   }
 

diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h
@@ -15,8 +15,8 @@
  */
 #pragma once
 
-#include "../hnswlib/hnswlib_wrapper.h"
 #include "cuvs_cagra_wrapper.h"
+#include <cuvs/neighbors/hnsw.hpp>
 
 #include <memory>
 
@@ -26,14 +26,20 @@ template <typename T, typename IdxT>
 class cuvs_cagra_hnswlib : public algo<T>, public algo_gpu {
  public:
   using search_param_base = typename algo<T>::search_param;
-  using build_param       = typename cuvs_cagra<T, IdxT>::build_param;
-  using search_param      = typename hnsw_lib<T>::search_param;
+
+  struct build_param {
+    typename cuvs_cagra<T, IdxT>::build_param cagra_build_param;
+    cuvs::neighbors::hnsw::index_params hnsw_index_params;
+  };
+
+  struct search_param : public search_param_base {
+    cuvs::neighbors::hnsw::search_params hnsw_search_param;
+  };
 
   cuvs_cagra_hnswlib(Metric metric, int dim, const build_param& param, int concurrent_searches = 1)
     : algo<T>(metric, dim),
-      cagra_build_{metric, dim, param, concurrent_searches},
-      // hnsw_lib param values don't matter since we don't build with hnsw_lib
-      hnswlib_search_{metric, dim, typename hnsw_lib<T>::build_param{50, 100}}
+      build_param_{param},
+      cagra_build_{metric, dim, param.cagra_build_param, concurrent_searches}
   {
   }
 
@@ -69,40 +75,67 @@ class cuvs_cagra_hnswlib : public algo<T>, public algo_gpu {
   }
 
  private:
+  raft::resources handle_{};
+  build_param build_param_;
+  search_param search_param_;
   cuvs_cagra<T, IdxT> cagra_build_;
-  hnsw_lib<T> hnswlib_search_;
+  std::shared_ptr<cuvs::neighbors::hnsw::index<T>> hnsw_index_;
 };
 
 template <typename T, typename IdxT>
 void cuvs_cagra_hnswlib<T, IdxT>::build(const T* dataset, size_t nrow)
 {
   cagra_build_.build(dataset, nrow);
+  auto* cagra_index      = cagra_build_.get_index();
+  auto host_dataset_view = raft::make_host_matrix_view<const T, int64_t>(dataset, nrow, this->dim_);
+  auto opt_dataset_view =
+    std::optional<raft::host_matrix_view<const T, int64_t>>(std::move(host_dataset_view));
+  hnsw_index_ = cuvs::neighbors::hnsw::from_cagra(
+    handle_, build_param_.hnsw_index_params, *cagra_index, opt_dataset_view);
 }
 
 template <typename T, typename IdxT>
 void cuvs_cagra_hnswlib<T, IdxT>::set_search_param(const search_param_base& param_)
 {
-  hnswlib_search_.set_search_param(param_);
+  search_param_ = dynamic_cast<const search_param&>(param_);
 }
 
 template <typename T, typename IdxT>
 void cuvs_cagra_hnswlib<T, IdxT>::save(const std::string& file) const
 {
-  cagra_build_.save_to_hnswlib(file);
+  cuvs::neighbors::hnsw::serialize(handle_, file, *(hnsw_index_.get()));
 }
 
 template <typename T, typename IdxT>
 void cuvs_cagra_hnswlib<T, IdxT>::load(const std::string& file)
 {
-  hnswlib_search_.load(file);
-  hnswlib_search_.set_base_layer_only();
+  cuvs::neighbors::hnsw::index<T>* idx = nullptr;
+  cuvs::neighbors::hnsw::deserialize(handle_,
+                                     build_param_.hnsw_index_params,
+                                     file,
+                                     this->dim_,
+                                     parse_metric_type(this->metric_),
+                                     &idx);
+  hnsw_index_ = std::shared_ptr<cuvs::neighbors::hnsw::index<T>>(idx);
 }
 
 template <typename T, typename IdxT>
 void cuvs_cagra_hnswlib<T, IdxT>::search(
   const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const
 {
-  hnswlib_search_.search(queries, batch_size, k, neighbors, distances);
+  // Only Latency mode is supported for now
+  auto queries_view =
+    raft::make_host_matrix_view<const T, int64_t>(queries, batch_size, this->dim_);
+  auto neighbors_view = raft::make_host_matrix_view<uint64_t, int64_t>(
+    reinterpret_cast<uint64_t*>(neighbors), batch_size, k);
+  auto distances_view = raft::make_host_matrix_view<float, int64_t>(distances, batch_size, k);
+
+  cuvs::neighbors::hnsw::search(handle_,
+                                search_param_.hnsw_search_param,
+                                *(hnsw_index_.get()),
+                                queries_view,
+                                neighbors_view,
+                                distances_view);
 }
 
 }  // namespace cuvs::bench
diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
@@ -154,6 +154,8 @@ class cuvs_cagra : public algo<T>, public algo_gpu {
   void save_to_hnswlib(const std::string& file) const;
   std::unique_ptr<algo<T>> copy() override;
 
+  auto get_index() const -> const cuvs::neighbors::cagra::index<T, IdxT>* { return index_.get(); }
+
  private:
   // handle_ must go first to make sure it dies last and all memory allocated in pool
   configured_raft_resources handle_{};

diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp b/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
@@ -33,15 +33,15 @@ void parse_build_param(const nlohmann::json& conf,
 {
   param.ef_construction = conf.at("efConstruction");
   param.m               = conf.at("M");
-  if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); }
+  if (conf.contains("num_threads")) { param.num_threads = conf.at("num_threads"); }
 }
 
 template <typename T>
 void parse_search_param(const nlohmann::json& conf,
                         typename cuvs::bench::hnsw_lib<T>::search_param& param)
 {
   param.ef = conf.at("ef");
-  if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); }
+  if (conf.contains("num_threads")) { param.num_threads = conf.at("num_threads"); }
 }
 
 template <typename T, template <typename> class Algo>

diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake
@@ -22,8 +22,12 @@ endif()
 # Be very strict when compiling with GCC as host compiler (and thus more lenient when compiling with
 # clang)
 if(CMAKE_COMPILER_IS_GNUCXX)
-  list(APPEND CUVS_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations)
-  list(APPEND CUVS_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations)
+  list(APPEND CUVS_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations
+       -Wno-reorder
+  )
+  list(APPEND CUVS_CUDA_FLAGS
+       -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations,-Wno-reorder
+  )
 
   # set warnings as errors
   if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2.0)