diff --git a/build.sh b/build.sh index ea69f8d0d..eb360ff32 100755 --- a/build.sh +++ b/build.sh @@ -40,7 +40,7 @@ HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool=" "$" + cuvs PUBLIC "$" + "$" ) if(NOT BUILD_CPU_ONLY) - # Keep RAFT as lightweight as possible. Only CUDA libs and rmm should be used in global target. - target_link_libraries(raft INTERFACE rmm::rmm cuco::cuco nvidia::cutlass::cutlass raft::Thrust) + # Keep cuVS as lightweight as possible. Only CUDA libs and rmm should be used in global target. + target_link_libraries(cuvs PUBLIC raft::raft) endif() -target_compile_features(raft INTERFACE cxx_std_17 $) -target_compile_options( - raft INTERFACE $<$:--expt-extended-lambda - --expt-relaxed-constexpr> -) - -set(RAFT_CUSOLVER_DEPENDENCY CUDA::cusolver${_ctk_static_suffix}) -set(RAFT_CUBLAS_DEPENDENCY CUDA::cublas${_ctk_static_suffix}) -set(RAFT_CURAND_DEPENDENCY CUDA::curand${_ctk_static_suffix}) -set(RAFT_CUSPARSE_DEPENDENCY CUDA::cusparse${_ctk_static_suffix}) - -set(RAFT_CTK_MATH_DEPENDENCIES ${RAFT_CUBLAS_DEPENDENCY} ${RAFT_CUSOLVER_DEPENDENCY} - ${RAFT_CUSPARSE_DEPENDENCY} ${RAFT_CURAND_DEPENDENCY} -) - # Endian detection include(TestBigEndian) test_big_endian(BIG_ENDIAN) if(BIG_ENDIAN) - target_compile_definitions(raft INTERFACE RAFT_SYSTEM_LITTLE_ENDIAN=0) + target_compile_definitions(cuvs PRIVATE CUVS_SYSTEM_LITTLE_ENDIAN=0) else() - target_compile_definitions(raft INTERFACE RAFT_SYSTEM_LITTLE_ENDIAN=1) + target_compile_definitions(cuvs PRIVATE CUVS_SYSTEM_LITTLE_ENDIAN=1) endif() -if(RAFT_COMPILE_LIBRARY) - file( - WRITE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld" - [=[ +file( + WRITE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld" + [=[ SECTIONS { .nvFatBinSegment : { *(.nvFatBinSegment) } .nv_fatbin : { *(.nv_fatbin) } } ]=] - ) -endif() +) # ################################################################################################## -# * NVTX support in raft ----------------------------------------------------- +# * NVTX support in cuvs ----------------------------------------------------- -if(RAFT_NVTX) +if(CUVS_NVTX) # This enables NVTX within the project with no option to disable it downstream. - target_link_libraries(raft INTERFACE CUDA::nvToolsExt) - target_compile_definitions(raft INTERFACE NVTX_ENABLED) + target_link_libraries(cuvs PUBLIC CUDA::nvToolsExt) + target_compile_definitions(cuvs PUBLIC NVTX_ENABLED) else() # Allow enable NVTX downstream if not set here. This creates a new option at build/install time, # which is set by default to OFF, but can be enabled in the dependent project. get_property( nvtx_option_help_string - CACHE RAFT_NVTX + CACHE CUVS_NVTX PROPERTY HELPSTRING ) string( CONCAT nvtx_export_string - "option(RAFT_NVTX \"" + "option(CUVS_NVTX \"" ${nvtx_option_help_string} "\" OFF)" [=[ -target_link_libraries(raft::raft INTERFACE $<$:CUDA::nvToolsExt>) -target_compile_definitions(raft::raft INTERFACE $<$:NVTX_ENABLED>) +target_link_libraries(cuvs::cuvs INTERFACE $<$:CUDA::nvToolsExt>) +target_compile_definitions(cuvs::cuvs INTERFACE $<$:NVTX_ENABLED>) ]=] ) endif() -# ################################################################################################## -# * raft_compiled ------------------------------------------------------------ -add_library(raft_compiled INTERFACE) - -if(TARGET raft_compiled AND (NOT TARGET raft::compiled)) - add_library(raft::compiled ALIAS raft_compiled) -endif() - -set_target_properties(raft_compiled PROPERTIES EXPORT_NAME compiled) - -if(RAFT_COMPILE_LIBRARY) - add_library( - raft_objs OBJECT - src/core/logger.cpp - src/distance/detail/pairwise_matrix/dispatch_canberra_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_canberra_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_correlation_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_correlation_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_cosine_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_cosine_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_kl_divergence_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_kl_divergence_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_l1_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_l1_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_l_inf_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_l_inf_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_rbf.cu - src/distance/detail/pairwise_matrix/dispatch_russel_rao_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_russel_rao_float_float_float_int.cu - src/distance/distance.cu - src/distance/fused_l2_nn.cu - src/linalg/detail/coalesced_reduction.cu - src/matrix/detail/select_k_double_int64_t.cu - src/matrix/detail/select_k_double_uint32_t.cu - src/matrix/detail/select_k_float_int64_t.cu - src/matrix/detail/select_k_float_uint32_t.cu - src/matrix/detail/select_k_float_int32.cu - src/matrix/detail/select_k_half_int64_t.cu - src/matrix/detail/select_k_half_uint32_t.cu - src/neighbors/ball_cover.cu - src/neighbors/brute_force_fused_l2_knn_float_int64_t.cu - src/neighbors/brute_force_knn_int64_t_float_int64_t.cu - src/neighbors/brute_force_knn_int64_t_float_uint32_t.cu - src/neighbors/brute_force_knn_int_float_int.cu - src/neighbors/brute_force_knn_uint32_t_float_uint32_t.cu - src/neighbors/brute_force_knn_index_float.cu - src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu - src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu - src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu - src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu - src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu - src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu - src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu - src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu - src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu - src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu - src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu - src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu - src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu - src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu - src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu - src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu - src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu - src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu - src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu - src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu - src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu - src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu - src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu - src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu - src/neighbors/detail/ivf_flat_interleaved_scan_float_float_int64_t.cu - src/neighbors/detail/ivf_flat_interleaved_scan_int8_t_int32_t_int64_t.cu - src/neighbors/detail/ivf_flat_interleaved_scan_uint8_t_uint32_t_int64_t.cu - src/neighbors/detail/ivf_flat_search.cu - src/neighbors/detail/ivf_pq_compute_similarity_float_float.cu - src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false.cu - src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true.cu - src/neighbors/detail/ivf_pq_compute_similarity_float_half.cu - src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false.cu - src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true.cu - src/neighbors/detail/ivf_pq_compute_similarity_half_half.cu - src/neighbors/detail/refine_host_float_float.cpp - src/neighbors/detail/refine_host_int8_t_float.cpp - src/neighbors/detail/refine_host_uint8_t_float.cpp - src/neighbors/detail/selection_faiss_int32_t_float.cu - src/neighbors/detail/selection_faiss_int_double.cu - src/neighbors/detail/selection_faiss_long_float.cu - src/neighbors/detail/selection_faiss_size_t_double.cu - src/neighbors/detail/selection_faiss_size_t_float.cu - src/neighbors/detail/selection_faiss_uint32_t_float.cu - src/neighbors/detail/selection_faiss_int64_t_double.cu - src/neighbors/detail/selection_faiss_int64_t_half.cu - src/neighbors/detail/selection_faiss_uint32_t_double.cu - src/neighbors/detail/selection_faiss_uint32_t_half.cu - src/neighbors/ivf_flat_build_float_int64_t.cu - src/neighbors/ivf_flat_build_int8_t_int64_t.cu - src/neighbors/ivf_flat_build_uint8_t_int64_t.cu - src/neighbors/ivf_flat_extend_float_int64_t.cu - src/neighbors/ivf_flat_extend_int8_t_int64_t.cu - src/neighbors/ivf_flat_extend_uint8_t_int64_t.cu - src/neighbors/ivf_flat_search_float_int64_t.cu - src/neighbors/ivf_flat_search_int8_t_int64_t.cu - src/neighbors/ivf_flat_search_uint8_t_int64_t.cu - src/neighbors/ivfpq_build_float_int64_t.cu - src/neighbors/ivfpq_build_int8_t_int64_t.cu - src/neighbors/ivfpq_build_uint8_t_int64_t.cu - src/neighbors/ivfpq_extend_float_int64_t.cu - src/neighbors/ivfpq_extend_int8_t_int64_t.cu - src/neighbors/ivfpq_extend_uint8_t_int64_t.cu - src/neighbors/ivfpq_search_float_int64_t.cu - src/neighbors/ivfpq_search_int8_t_int64_t.cu - src/neighbors/ivfpq_search_uint8_t_int64_t.cu - src/neighbors/refine_float_float.cu - src/neighbors/refine_int8_t_float.cu - src/neighbors/refine_uint8_t_float.cu - src/raft_runtime/cluster/cluster_cost.cuh - src/raft_runtime/cluster/cluster_cost_double.cu - src/raft_runtime/cluster/cluster_cost_float.cu - src/raft_runtime/cluster/kmeans_fit_double.cu - src/raft_runtime/cluster/kmeans_fit_float.cu - src/raft_runtime/cluster/kmeans_init_plus_plus_double.cu - src/raft_runtime/cluster/kmeans_init_plus_plus_float.cu - src/raft_runtime/cluster/update_centroids.cuh - src/raft_runtime/cluster/update_centroids_double.cu - src/raft_runtime/cluster/update_centroids_float.cu - src/raft_runtime/distance/fused_l2_min_arg.cu - src/raft_runtime/distance/pairwise_distance.cu - src/raft_runtime/matrix/select_k_float_int64_t.cu - src/raft_runtime/neighbors/brute_force_knn_int64_t_float.cu - src/raft_runtime/neighbors/cagra_build.cu - src/raft_runtime/neighbors/cagra_search.cu - src/raft_runtime/neighbors/cagra_serialize.cu - src/raft_runtime/neighbors/ivf_flat_build.cu - src/raft_runtime/neighbors/ivf_flat_search.cu - src/raft_runtime/neighbors/ivf_flat_serialize.cu - src/raft_runtime/neighbors/ivfpq_build.cu - src/raft_runtime/neighbors/ivfpq_deserialize.cu - src/raft_runtime/neighbors/ivfpq_search_float_int64_t.cu - src/raft_runtime/neighbors/ivfpq_search_int8_t_int64_t.cu - src/raft_runtime/neighbors/ivfpq_search_uint8_t_int64_t.cu - src/raft_runtime/neighbors/ivfpq_serialize.cu - src/raft_runtime/neighbors/refine_d_int64_t_float.cu - src/raft_runtime/neighbors/refine_d_int64_t_int8_t.cu - src/raft_runtime/neighbors/refine_d_int64_t_uint8_t.cu - src/raft_runtime/neighbors/refine_h_int64_t_float.cu - src/raft_runtime/neighbors/refine_h_int64_t_int8_t.cu - src/raft_runtime/neighbors/refine_h_int64_t_uint8_t.cu - src/raft_runtime/random/rmat_rectangular_generator_int64_double.cu - src/raft_runtime/random/rmat_rectangular_generator_int64_float.cu - src/raft_runtime/random/rmat_rectangular_generator_int_double.cu - src/raft_runtime/random/rmat_rectangular_generator_int_float.cu - src/spatial/knn/detail/ball_cover/registers_pass_one_2d_dist.cu - src/spatial/knn/detail/ball_cover/registers_pass_one_2d_euclidean.cu - src/spatial/knn/detail/ball_cover/registers_pass_one_2d_haversine.cu - src/spatial/knn/detail/ball_cover/registers_pass_one_3d_dist.cu - src/spatial/knn/detail/ball_cover/registers_pass_one_3d_euclidean.cu - src/spatial/knn/detail/ball_cover/registers_pass_one_3d_haversine.cu - src/spatial/knn/detail/ball_cover/registers_pass_two_2d_dist.cu - src/spatial/knn/detail/ball_cover/registers_pass_two_2d_euclidean.cu - src/spatial/knn/detail/ball_cover/registers_pass_two_2d_haversine.cu - src/spatial/knn/detail/ball_cover/registers_pass_two_3d_dist.cu - src/spatial/knn/detail/ball_cover/registers_pass_two_3d_euclidean.cu - src/spatial/knn/detail/ball_cover/registers_pass_two_3d_haversine.cu - src/spatial/knn/detail/fused_l2_knn_int32_t_float.cu - src/spatial/knn/detail/fused_l2_knn_int64_t_float.cu - src/spatial/knn/detail/fused_l2_knn_uint32_t_float.cu - src/util/memory_pool.cpp - ) - set_target_properties( - raft_objs - PROPERTIES CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON - CUDA_STANDARD 17 - CUDA_STANDARD_REQUIRED ON - POSITION_INDEPENDENT_CODE ON - ) - - target_compile_definitions(raft_objs PRIVATE "RAFT_EXPLICIT_INSTANTIATE_ONLY") - target_compile_options( - raft_objs PRIVATE "$<$:${RAFT_CXX_FLAGS}>" - "$<$:${RAFT_CUDA_FLAGS}>" - ) - - add_library(raft_lib SHARED $) - add_library(raft_lib_static STATIC $) - - set_target_properties( - raft_lib raft_lib_static - PROPERTIES OUTPUT_NAME raft - BUILD_RPATH "\$ORIGIN" - INSTALL_RPATH "\$ORIGIN" - INTERFACE_POSITION_INDEPENDENT_CODE ON - ) - - foreach(target raft_lib raft_lib_static raft_objs) - target_link_libraries( - ${target} - PUBLIC raft::raft - ${RAFT_CTK_MATH_DEPENDENCIES} # TODO: Once `raft::resources` is used everywhere, this - # will just be cublas - $ - ) - - # So consumers know when using libraft.so/libraft.a - target_compile_definitions(${target} PUBLIC "RAFT_COMPILED") - # ensure CUDA symbols aren't relocated to the middle of the debug build binaries - target_link_options(${target} PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") - endforeach() -endif() - -if(TARGET raft_lib AND (NOT TARGET raft::raft_lib)) - add_library(raft::raft_lib ALIAS raft_lib) -endif() - -target_link_libraries(raft_compiled INTERFACE raft::raft $) - -# ################################################################################################## -# * raft_compiled_static---------------------------------------------------------------------------- - -add_library(raft_compiled_static INTERFACE) - -if(TARGET raft_compiled_static AND (NOT TARGET raft::compiled_static)) - add_library(raft::compiled_static ALIAS raft_compiled_static) -endif() -set_target_properties(raft_compiled_static PROPERTIES EXPORT_NAME compiled_static) - -if(TARGET raft_lib_static AND (NOT TARGET raft::raft_lib_static)) - add_library(raft::raft_lib_static ALIAS raft_lib_static) -endif() - -target_link_libraries( - raft_compiled_static INTERFACE raft::raft $ +set_target_properties( + cuvs + PROPERTIES CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON ) -# ################################################################################################## -# * raft_distributed ------------------------------------------------------------------------------- -add_library(raft_distributed INTERFACE) - -if(TARGET raft_distributed AND (NOT TARGET raft::distributed)) - add_library(raft::distributed ALIAS raft_distributed) -endif() - -set_target_properties(raft_distributed PROPERTIES EXPORT_NAME distributed) - -rapids_find_generate_module( - NCCL - HEADER_NAMES nccl.h - LIBRARY_NAMES nccl - BUILD_EXPORT_SET raft-distributed-exports - INSTALL_EXPORT_SET raft-distributed-exports +target_compile_options( + cuvs PRIVATE "$<$:${CUVS_CXX_FLAGS}>" + "$<$:${CUVS_CUDA_FLAGS}>" ) - -rapids_export_package(BUILD ucx raft-distributed-exports) -rapids_export_package(INSTALL ucx raft-distributed-exports) -rapids_export_package(BUILD NCCL raft-distributed-exports) -rapids_export_package(INSTALL NCCL raft-distributed-exports) - -target_link_libraries(raft_distributed INTERFACE ucx::ucp NCCL::NCCL) +# ensure CUDA symbols aren't relocated to the middle of the debug build binaries +target_link_options(cuvs PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") # ################################################################################################## # * install targets----------------------------------------------------------- @@ -554,168 +439,45 @@ include(GNUInstallDirs) include(CPack) install( - TARGETS raft + TARGETS cuvs DESTINATION ${lib_dir} - COMPONENT raft - EXPORT raft-exports + COMPONENT cuvs + EXPORT cuvs-exports ) install( - TARGETS raft_compiled raft_compiled_static - DESTINATION ${lib_dir} - COMPONENT raft - EXPORT raft-compiled-exports -) - -if(TARGET raft_lib) - install( - TARGETS raft_lib - DESTINATION ${lib_dir} - COMPONENT compiled - EXPORT raft-compiled-lib-exports - ) - install( - TARGETS raft_lib_static - DESTINATION ${lib_dir} - COMPONENT compiled-static - EXPORT raft-compiled-static-lib-exports - ) - install( - DIRECTORY include/raft_runtime - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} - COMPONENT compiled - ) -endif() - -install( - TARGETS raft_distributed - DESTINATION ${lib_dir} - COMPONENT distributed - EXPORT raft-distributed-exports -) - -install( - DIRECTORY include/raft - COMPONENT raft + DIRECTORY include/cuvs + COMPONENT cuvs DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} ) -# Temporary install of raft.hpp while the file is removed install( - FILES include/raft.hpp - COMPONENT raft - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/raft -) - -install( - FILES ${CMAKE_CURRENT_BINARY_DIR}/include/raft/version_config.hpp - COMPONENT raft - DESTINATION include/raft -) - -# ################################################################################################## -# * install export ----------------------------------------------------------- -set(doc_string - [=[ -Provide targets for the RAFT: Reusable Accelerated Functions and Tools - -RAFT contains fundamental widely-used algorithms and primitives -for data science and machine learning. - -Optional Components: - - compiled - - compiled_static - - distributed - -Imported Targets: - - raft::raft - - raft::compiled brought in by the `compiled` optional component - - raft::compiled_static brought in by the `compiled_static` optional component - - raft::distributed brought in by the `distributed` optional component - -]=] -) - -set(code_string ${nvtx_export_string}) - -string( - APPEND - code_string - [=[ -if(NOT TARGET raft::Thrust) - thrust_create_target(raft::Thrust FROM_OPTIONS) -endif() -]=] -) - -string( - APPEND - code_string - [=[ -if(compiled IN_LIST raft_FIND_COMPONENTS) - enable_language(CUDA) -endif() -]=] -) -set(raft_components compiled distributed) -set(raft_export_sets raft-compiled-exports raft-distributed-exports) -if(TARGET raft_lib) - list(APPEND raft_components compiled compiled-static) - list(APPEND raft_export_sets raft-compiled-lib-exports raft-compiled-static-lib-exports) -endif() - -string( - APPEND - code_string - [=[ - option(RAFT_ENABLE_CUSOLVER_DEPENDENCY "Enable cusolver dependency" ON) - option(RAFT_ENABLE_CUBLAS_DEPENDENCY "Enable cublas dependency" ON) - option(RAFT_ENABLE_CURAND_DEPENDENCY "Enable curand dependency" ON) - option(RAFT_ENABLE_CUSPARSE_DEPENDENCY "Enable cusparse dependency" ON) - -mark_as_advanced(RAFT_ENABLE_CUSOLVER_DEPENDENCY) -mark_as_advanced(RAFT_ENABLE_CUBLAS_DEPENDENCY) -mark_as_advanced(RAFT_ENABLE_CURAND_DEPENDENCY) -mark_as_advanced(RAFT_ENABLE_CUSPARSE_DEPENDENCY) - -target_link_libraries(raft::raft INTERFACE - $<$:${RAFT_CUSOLVER_DEPENDENCY}> - $<$:${RAFT_CUBLAS_DEPENDENCY}> - $<$:${RAFT_CUSPARSE_DEPENDENCY}> - $<$:${RAFT_CURAND_DEPENDENCY}> -) -]=] + FILES ${CMAKE_CURRENT_BINARY_DIR}/include/cuvs/version_config.hpp + COMPONENT cuvs + DESTINATION include/cuvs ) # Use `rapids_export` for 22.04 as it will have COMPONENT support rapids_export( - INSTALL raft - EXPORT_SET raft-exports - COMPONENTS ${raft_components} - COMPONENTS_EXPORT_SET ${raft_export_sets} - GLOBAL_TARGETS raft compiled distributed - NAMESPACE raft:: - DOCUMENTATION doc_string - FINAL_CODE_BLOCK code_string + INSTALL cuvs + EXPORT_SET cuvs-exports + GLOBAL_TARGETS cuvs + NAMESPACE cuvs:: ) # ################################################################################################## # * build export ------------------------------------------------------------- rapids_export( - BUILD raft - EXPORT_SET raft-exports - COMPONENTS ${raft_components} - COMPONENTS_EXPORT_SET ${raft_export_sets} - GLOBAL_TARGETS raft compiled distributed - DOCUMENTATION doc_string - NAMESPACE raft:: - FINAL_CODE_BLOCK code_string + BUILD cuvs + EXPORT_SET cuvs-exports + GLOBAL_TARGETS cuvs + NAMESPACE cuvs:: ) # ################################################################################################## # * shared test/bench headers ------------------------------------------------ -if(BUILD_TESTS OR BUILD_PRIMS_BENCH) +if(BUILD_TESTS OR BUILD_MICRO_BENCH) include(internal/CMakeLists.txt) endif() @@ -729,8 +491,8 @@ endif() # ################################################################################################## # * build benchmark executable ----------------------------------------------- -if(BUILD_PRIMS_BENCH) - include(bench/prims/CMakeLists.txt) +if(BUILD_MICRO_BENCH) + include(bench/micro/CMakeLists.txt) endif() # ################################################################################################## diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 5919de07e..d1f4d281d 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -15,26 +15,22 @@ # ################################################################################################## # * benchmark options ------------------------------------------------------------------------------ -option(RAFT_ANN_BENCH_USE_FAISS_GPU_FLAT "Include faiss' brute-force knn algorithm in benchmark" ON) -option(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT "Include faiss' ivf flat algorithm in benchmark" ON) -option(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_PQ "Include faiss' ivf pq algorithm in benchmark" ON) -option(RAFT_ANN_BENCH_USE_FAISS_CPU_FLAT - "Include faiss' cpu brute-force knn algorithm in benchmark" ON -) -option(RAFT_ANN_BENCH_USE_FAISS_CPU_FLAT "Include faiss' cpu brute-force algorithm in benchmark" ON) - -option(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT "Include faiss' cpu ivf flat algorithm in benchmark" - ON -) -option(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_PQ "Include faiss' cpu ivf pq algorithm in benchmark" ON) -option(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT "Include raft's ivf flat algorithm in benchmark" ON) -option(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ "Include raft's ivf pq algorithm in benchmark" ON) -option(RAFT_ANN_BENCH_USE_RAFT_CAGRA "Include raft's CAGRA in benchmark" ON) -option(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB "Include raft's CAGRA in benchmark" ON) -option(RAFT_ANN_BENCH_USE_HNSWLIB "Include hnsw algorithm in benchmark" ON) -option(RAFT_ANN_BENCH_USE_GGNN "Include ggnn algorithm in benchmark" ON) -option(RAFT_ANN_BENCH_SINGLE_EXE - "Make a single executable with benchmark as shared library modules" OFF +option(CUVS_BENCH_USE_FAISS_GPU_FLAT "Include faiss' brute-force knn algorithm in benchmark" ON) +option(CUVS_BENCH_USE_FAISS_GPU_IVF_FLAT "Include faiss' ivf flat algorithm in benchmark" ON) +option(CUVS_BENCH_USE_FAISS_GPU_IVF_PQ "Include faiss' ivf pq algorithm in benchmark" ON) +option(CUVS_BENCH_USE_FAISS_CPU_FLAT "Include faiss' cpu brute-force knn algorithm in benchmark" ON) +option(CUVS_BENCH_USE_FAISS_CPU_FLAT "Include faiss' cpu brute-force algorithm in benchmark" ON) + +option(CUVS_BENCH_USE_FAISS_CPU_IVF_FLAT "Include faiss' cpu ivf flat algorithm in benchmark" ON) +option(CUVS_BENCH_USE_FAISS_CPU_IVF_PQ "Include faiss' cpu ivf pq algorithm in benchmark" ON) +option(CUVS_BENCH_USE_RAFT_IVF_FLAT "Include raft's ivf flat algorithm in benchmark" ON) +option(CUVS_BENCH_USE_RAFT_IVF_PQ "Include raft's ivf pq algorithm in benchmark" ON) +option(CUVS_BENCH_USE_RAFT_CAGRA "Include raft's CAGRA in benchmark" ON) +option(CUVS_BENCH_USE_RAFT_CAGRA_HNSWLIB "Include raft's CAGRA in benchmark" ON) +option(CUVS_BENCH_USE_HNSWLIB "Include hnsw algorithm in benchmark" ON) +option(CUVS_BENCH_USE_GGNN "Include ggnn algorithm in benchmark" ON) +option(CUVS_BENCH_SINGLE_EXE "Make a single executable with benchmark as shared library modules" + OFF ) # ################################################################################################## @@ -48,67 +44,67 @@ if(BUILD_CPU_ONLY) include(cmake/thirdparty/get_fmt.cmake) include(cmake/thirdparty/get_spdlog.cmake) - set(RAFT_FAISS_ENABLE_GPU OFF) - set(RAFT_ANN_BENCH_USE_FAISS_GPU_FLAT OFF) - set(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT OFF) - set(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_PQ OFF) - set(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT OFF) - set(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ OFF) - set(RAFT_ANN_BENCH_USE_RAFT_CAGRA OFF) - set(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB OFF) - set(RAFT_ANN_BENCH_USE_GGNN OFF) + set(CUVS_FAISS_ENABLE_GPU OFF) + set(CUVS_BENCH_USE_FAISS_GPU_FLAT OFF) + set(CUVS_BENCH_USE_FAISS_GPU_IVF_FLAT OFF) + set(CUVS_BENCH_USE_FAISS_GPU_IVF_PQ OFF) + set(CUVS_BENCH_USE_RAFT_IVF_FLAT OFF) + set(CUVS_BENCH_USE_RAFT_IVF_PQ OFF) + set(CUVS_BENCH_USE_RAFT_CAGRA OFF) + set(CUVS_BENCH_USE_RAFT_CAGRA_HNSWLIB OFF) + set(CUVS_BENCH_USE_GGNN OFF) else() # Disable faiss benchmarks on CUDA 12 since faiss is not yet CUDA 12-enabled. # https://github.com/rapidsai/raft/issues/1627 if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0.0) - set(RAFT_FAISS_ENABLE_GPU OFF) - set(RAFT_ANN_BENCH_USE_FAISS_GPU_FLAT OFF) - set(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT OFF) - set(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_PQ OFF) - set(RAFT_ANN_BENCH_USE_FAISS_CPU_FLAT OFF) - set(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_PQ OFF) - set(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT OFF) + set(CUVS_FAISS_ENABLE_GPU OFF) + set(CUVS_BENCH_USE_FAISS_GPU_FLAT OFF) + set(CUVS_BENCH_USE_FAISS_GPU_IVF_FLAT OFF) + set(CUVS_BENCH_USE_FAISS_GPU_IVF_PQ OFF) + set(CUVS_BENCH_USE_FAISS_CPU_FLAT OFF) + set(CUVS_BENCH_USE_FAISS_CPU_IVF_PQ OFF) + set(CUVS_BENCH_USE_FAISS_CPU_IVF_FLAT OFF) else() - set(RAFT_FAISS_ENABLE_GPU ON) + set(CUVS_FAISS_ENABLE_GPU ON) endif() endif() -set(RAFT_ANN_BENCH_USE_FAISS OFF) -if(RAFT_ANN_BENCH_USE_FAISS_GPU_FLAT - OR RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_PQ - OR RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT - OR RAFT_ANN_BENCH_USE_FAISS_CPU_FLAT - OR RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_PQ - OR RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT +set(CUVS_BENCH_USE_FAISS OFF) +if(CUVS_BENCH_USE_FAISS_GPU_FLAT + OR CUVS_BENCH_USE_FAISS_GPU_IVF_PQ + OR CUVS_BENCH_USE_FAISS_GPU_IVF_FLAT + OR CUVS_BENCH_USE_FAISS_CPU_FLAT + OR CUVS_BENCH_USE_FAISS_CPU_IVF_PQ + OR CUVS_BENCH_USE_FAISS_CPU_IVF_FLAT ) - set(RAFT_ANN_BENCH_USE_FAISS ON) + set(CUVS_BENCH_USE_FAISS ON) set(RAFT_USE_FAISS_STATIC ON) endif() -set(RAFT_ANN_BENCH_USE_RAFT OFF) -if(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ - OR RAFT_ANN_BENCH_USE_RAFT_BRUTE_FORCE - OR RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT - OR RAFT_ANN_BENCH_USE_RAFT_CAGRA - OR RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB +set(CUVS_BENCH_USE_RAFT OFF) +if(CUVS_BENCH_USE_RAFT_IVF_PQ + OR CUVS_BENCH_USE_RAFT_BRUTE_FORCE + OR CUVS_BENCH_USE_RAFT_IVF_FLAT + OR CUVS_BENCH_USE_RAFT_CAGRA + OR CUVS_BENCH_USE_RAFT_CAGRA_HNSWLIB ) - set(RAFT_ANN_BENCH_USE_RAFT ON) + set(CUVS_BENCH_USE_RAFT ON) endif() # ################################################################################################## # * Fetch requirements ------------------------------------------------------------- -if(RAFT_ANN_BENCH_USE_HNSWLIB OR RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB) +if(CUVS_BENCH_USE_HNSWLIB OR CUVS_BENCH_USE_RAFT_CAGRA_HNSWLIB) include(cmake/thirdparty/get_hnswlib.cmake) endif() include(cmake/thirdparty/get_nlohmann_json.cmake) -if(RAFT_ANN_BENCH_USE_GGNN) +if(CUVS_BENCH_USE_GGNN) include(cmake/thirdparty/get_ggnn.cmake) endif() -if(RAFT_ANN_BENCH_USE_FAISS) +if(CUVS_BENCH_USE_FAISS) # We need to ensure that faiss has all the conda information. So we currently use the very ugly # hammer of `link_libraries` to ensure that all targets in this directory and the faiss directory # will have the conda includes/link dirs @@ -132,16 +128,16 @@ function(ConfigureAnnBench) ConfigureAnnBench "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} ) - set(BENCH_NAME ${ConfigureAnnBench_NAME}_ANN_BENCH) + set(BENCH_NAME ${ConfigureAnnBench_NAME}_CUVS_BENCH) - if(RAFT_ANN_BENCH_SINGLE_EXE) + if(CUVS_BENCH_SINGLE_EXE) add_library(${BENCH_NAME} SHARED ${ConfigureAnnBench_PATH}) string(TOLOWER ${BENCH_NAME} BENCH_LIB_NAME) set_target_properties(${BENCH_NAME} PROPERTIES OUTPUT_NAME ${BENCH_LIB_NAME}) - add_dependencies(${BENCH_NAME} ANN_BENCH) + add_dependencies(${BENCH_NAME} CUVS_BENCH) else() add_executable(${BENCH_NAME} ${ConfigureAnnBench_PATH}) - target_compile_definitions(${BENCH_NAME} PRIVATE ANN_BENCH_BUILD_MAIN) + target_compile_definitions(${BENCH_NAME} PRIVATE CUVS_BENCH_BUILD_MAIN) target_link_libraries(${BENCH_NAME} PRIVATE benchmark::benchmark) endif() @@ -180,11 +176,10 @@ function(ConfigureAnnBench) "$<$:${RAFT_CUDA_FLAGS}>" ) - if(RAFT_ANN_BENCH_USE_${ConfigureAnnBench_NAME}) + if(CUVS_BENCH_USE_${ConfigureAnnBench_NAME}) target_compile_definitions( ${BENCH_NAME} - PUBLIC - RAFT_ANN_BENCH_USE_${ConfigureAnnBench_NAME}=RAFT_ANN_BENCH_USE_${ConfigureAnnBench_NAME} + PUBLIC CUVS_BENCH_USE_${ConfigureAnnBench_NAME}=CUVS_BENCH_USE_${ConfigureAnnBench_NAME} ) endif() @@ -204,56 +199,46 @@ endfunction() # ################################################################################################## # * Configure tests------------------------------------------------------------- -if(RAFT_ANN_BENCH_USE_HNSWLIB) +if(CUVS_BENCH_USE_HNSWLIB) ConfigureAnnBench( NAME HNSWLIB PATH bench/ann/src/hnswlib/hnswlib_benchmark.cpp INCLUDES ${CMAKE_CURRENT_BINARY_DIR}/_deps/hnswlib-src/hnswlib CXXFLAGS "${HNSW_CXX_FLAGS}" ) endif() -if(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ) +if(CUVS_BENCH_USE_RAFT_IVF_PQ) ConfigureAnnBench( - NAME - RAFT_IVF_PQ - PATH - bench/ann/src/raft/raft_benchmark.cu - $<$:bench/ann/src/raft/raft_ivf_pq.cu> - LINKS - raft::compiled + NAME RAFT_IVF_PQ PATH bench/ann/src/raft/raft_benchmark.cu + $<$:bench/ann/src/raft/raft_ivf_pq.cu> LINKS raft::compiled ) endif() -if(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT) +if(CUVS_BENCH_USE_RAFT_IVF_FLAT) ConfigureAnnBench( NAME RAFT_IVF_FLAT PATH bench/ann/src/raft/raft_benchmark.cu - $<$:bench/ann/src/raft/raft_ivf_flat.cu> + $<$:bench/ann/src/raft/raft_ivf_flat.cu> LINKS raft::compiled ) endif() -if(RAFT_ANN_BENCH_USE_RAFT_BRUTE_FORCE) +if(CUVS_BENCH_USE_RAFT_BRUTE_FORCE) ConfigureAnnBench( NAME RAFT_BRUTE_FORCE PATH bench/ann/src/raft/raft_benchmark.cu LINKS raft::compiled ) endif() -if(RAFT_ANN_BENCH_USE_RAFT_CAGRA) +if(CUVS_BENCH_USE_RAFT_CAGRA) ConfigureAnnBench( - NAME - RAFT_CAGRA - PATH - bench/ann/src/raft/raft_benchmark.cu - $<$:bench/ann/src/raft/raft_cagra.cu> - LINKS - raft::compiled + NAME RAFT_CAGRA PATH bench/ann/src/raft/raft_benchmark.cu + $<$:bench/ann/src/raft/raft_cagra.cu> LINKS raft::compiled ) endif() -if(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB) +if(CUVS_BENCH_USE_RAFT_CAGRA_HNSWLIB) ConfigureAnnBench( NAME RAFT_CAGRA_HNSWLIB @@ -275,48 +260,48 @@ endif() message("RAFT_FAISS_TARGETS: ${RAFT_FAISS_TARGETS}") message("CUDAToolkit_LIBRARY_DIR: ${CUDAToolkit_LIBRARY_DIR}") -if(RAFT_ANN_BENCH_USE_FAISS_CPU_FLAT) +if(CUVS_BENCH_USE_FAISS_CPU_FLAT) ConfigureAnnBench( NAME FAISS_CPU_FLAT PATH bench/ann/src/faiss/faiss_cpu_benchmark.cpp LINKS ${RAFT_FAISS_TARGETS} ) endif() -if(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT) +if(CUVS_BENCH_USE_FAISS_CPU_IVF_FLAT) ConfigureAnnBench( NAME FAISS_CPU_IVF_FLAT PATH bench/ann/src/faiss/faiss_cpu_benchmark.cpp LINKS ${RAFT_FAISS_TARGETS} ) endif() -if(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_PQ) +if(CUVS_BENCH_USE_FAISS_CPU_IVF_PQ) ConfigureAnnBench( NAME FAISS_CPU_IVF_PQ PATH bench/ann/src/faiss/faiss_cpu_benchmark.cpp LINKS ${RAFT_FAISS_TARGETS} ) endif() -if(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT) +if(CUVS_BENCH_USE_FAISS_GPU_IVF_FLAT) ConfigureAnnBench( NAME FAISS_GPU_IVF_FLAT PATH bench/ann/src/faiss/faiss_gpu_benchmark.cu LINKS ${RAFT_FAISS_TARGETS} ) endif() -if(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_PQ) +if(CUVS_BENCH_USE_FAISS_GPU_IVF_PQ) ConfigureAnnBench( NAME FAISS_GPU_IVF_PQ PATH bench/ann/src/faiss/faiss_gpu_benchmark.cu LINKS ${RAFT_FAISS_TARGETS} ) endif() -if(RAFT_ANN_BENCH_USE_FAISS_GPU_FLAT) +if(CUVS_BENCH_USE_FAISS_GPU_FLAT) ConfigureAnnBench( NAME FAISS_GPU_FLAT PATH bench/ann/src/faiss/faiss_gpu_benchmark.cu LINKS ${RAFT_FAISS_TARGETS} ) endif() -if(RAFT_ANN_BENCH_USE_GGNN) +if(CUVS_BENCH_USE_GGNN) include(cmake/thirdparty/get_glog.cmake) ConfigureAnnBench( NAME GGNN PATH bench/ann/src/ggnn/ggnn_benchmark.cu INCLUDES @@ -325,11 +310,12 @@ if(RAFT_ANN_BENCH_USE_GGNN) endif() # ################################################################################################## -# * Dynamically-loading ANN_BENCH executable ------------------------------------------------------- -if(RAFT_ANN_BENCH_SINGLE_EXE) - add_executable(ANN_BENCH bench/ann/src/common/benchmark.cpp) +# * Dynamically-loading CUVS_BENCH executable +# ------------------------------------------------------- +if(CUVS_BENCH_SINGLE_EXE) + add_executable(CUVS_BENCH bench/ann/src/common/benchmark.cpp) - # Build and link static version of the GBench to keep ANN_BENCH self-contained. + # Build and link static version of the GBench to keep CUVS_BENCH self-contained. get_target_property(TMP_PROP benchmark::benchmark SOURCES) add_library(benchmark_static STATIC ${TMP_PROP}) get_target_property(TMP_PROP benchmark::benchmark INCLUDE_DIRECTORIES) @@ -337,14 +323,14 @@ if(RAFT_ANN_BENCH_SINGLE_EXE) get_target_property(TMP_PROP benchmark::benchmark LINK_LIBRARIES) target_link_libraries(benchmark_static PUBLIC ${TMP_PROP}) - target_include_directories(ANN_BENCH PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) + target_include_directories(CUVS_BENCH PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) target_link_libraries( - ANN_BENCH PRIVATE nlohmann_json::nlohmann_json benchmark_static dl -static-libgcc - -static-libstdc++ CUDA::nvtx3 + CUVS_BENCH PRIVATE nlohmann_json::nlohmann_json benchmark_static dl -static-libgcc + -static-libstdc++ CUDA::nvtx3 ) set_target_properties( - ANN_BENCH + CUVS_BENCH PROPERTIES # set target compile options CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON @@ -358,21 +344,21 @@ if(RAFT_ANN_BENCH_SINGLE_EXE) # Disable NVTX when the nvtx3 headers are missing set(_CMAKE_REQUIRED_INCLUDES_ORIG ${CMAKE_REQUIRED_INCLUDES}) - get_target_property(CMAKE_REQUIRED_INCLUDES ANN_BENCH INCLUDE_DIRECTORIES) + get_target_property(CMAKE_REQUIRED_INCLUDES CUVS_BENCH INCLUDE_DIRECTORIES) CHECK_INCLUDE_FILE_CXX(nvtx3/nvToolsExt.h NVTX3_HEADERS_FOUND) set(CMAKE_REQUIRED_INCLUDES ${_CMAKE_REQUIRED_INCLUDES_ORIG}) target_compile_definitions( - ANN_BENCH + CUVS_BENCH PRIVATE - $<$:ANN_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}.${CUDAToolkit_VERSION_PATCH} + $<$:CUVS_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}.${CUDAToolkit_VERSION_PATCH} "> - $<$:ANN_BENCH_NVTX3_HEADERS_FOUND> + $<$:CUVS_BENCH_NVTX3_HEADERS_FOUND> ) - target_link_options(ANN_BENCH PRIVATE -export-dynamic) + target_link_options(CUVS_BENCH PRIVATE -export-dynamic) install( - TARGETS ANN_BENCH + TARGETS CUVS_BENCH COMPONENT ann_bench DESTINATION bin/ann EXCLUDE_FROM_ALL diff --git a/cpp/bench/ann/src/common/cuda_stub.hpp b/cpp/bench/ann/src/common/cuda_stub.hpp index 6e3b63cd3..d4d02d5f3 100644 --- a/cpp/bench/ann/src/common/cuda_stub.hpp +++ b/cpp/bench/ann/src/common/cuda_stub.hpp @@ -19,10 +19,10 @@ The content of this header is governed by two preprocessor definitions: - BUILD_CPU_ONLY - whether none of the CUDA functions are used. - - ANN_BENCH_LINK_CUDART - dynamically link against this string if defined. + - CUVS_BENCH_LINK_CUDART - dynamically link against this string if defined. ______________________________________________________________________________ -|BUILD_CPU_ONLY | ANN_BENCH_LINK_CUDART | cudart | cuda_runtime_api.h | +|BUILD_CPU_ONLY | CUVS_BENCH_LINK_CUDART | cudart | cuda_runtime_api.h | | | | found | needed | included | |---------|-----------------------|-----------|---------|--------------------| | ON | | false | false | NO | @@ -34,7 +34,7 @@ ______________________________________________________________________________ #ifndef BUILD_CPU_ONLY #include -#ifdef ANN_BENCH_LINK_CUDART +#ifdef CUVS_BENCH_LINK_CUDART #include #include #endif @@ -49,11 +49,11 @@ struct cuda_lib_handle { void* handle{nullptr}; explicit cuda_lib_handle() { -#ifdef ANN_BENCH_LINK_CUDART +#ifdef CUVS_BENCH_LINK_CUDART constexpr int kFlags = RTLD_NOW | RTLD_GLOBAL | RTLD_DEEPBIND | RTLD_NODELETE; // The full name of the linked cudart library 'cudart.so.MAJOR.MINOR.PATCH' - char libname[] = ANN_BENCH_LINK_CUDART; // NOLINT - handle = dlopen(ANN_BENCH_LINK_CUDART, kFlags); + char libname[] = CUVS_BENCH_LINK_CUDART; // NOLINT + handle = dlopen(CUVS_BENCH_LINK_CUDART, kFlags); if (handle != nullptr) { return; } // try strip the PATCH auto p = strrchr(libname, '.'); @@ -78,7 +78,7 @@ struct cuda_lib_handle { } ~cuda_lib_handle() noexcept { -#ifdef ANN_BENCH_LINK_CUDART +#ifdef CUVS_BENCH_LINK_CUDART if (handle != nullptr) { dlclose(handle); } #endif } @@ -86,7 +86,7 @@ struct cuda_lib_handle { template auto sym(const char* name) -> Symbol { -#ifdef ANN_BENCH_LINK_CUDART +#ifdef CUVS_BENCH_LINK_CUDART return reinterpret_cast(dlsym(handle, name)); #else return nullptr; @@ -108,7 +108,7 @@ struct cuda_lib_handle { { #if defined(BUILD_CPU_ONLY) return false; -#elif defined(ANN_BENCH_LINK_CUDART) +#elif defined(CUVS_BENCH_LINK_CUDART) return handle != nullptr; #else return true; @@ -118,7 +118,7 @@ struct cuda_lib_handle { static inline cuda_lib_handle cudart{}; -#ifdef ANN_BENCH_LINK_CUDART +#ifdef CUVS_BENCH_LINK_CUDART namespace stub { [[gnu::weak, gnu::noinline]] cudaError_t cudaMemcpy(void* dst, diff --git a/cpp/bench/ann/src/common/util.hpp b/cpp/bench/ann/src/common/util.hpp index e9e4a9ad2..544aa789a 100644 --- a/cpp/bench/ann/src/common/util.hpp +++ b/cpp/bench/ann/src/common/util.hpp @@ -18,7 +18,7 @@ #include "ann_types.hpp" #include "cuda_stub.hpp" // cuda-related utils -#ifdef ANN_BENCH_NVTX3_HEADERS_FOUND +#ifdef CUVS_BENCH_NVTX3_HEADERS_FOUND #include #endif @@ -182,7 +182,7 @@ inline auto cuda_info() } struct nvtx_case { -#ifdef ANN_BENCH_NVTX3_HEADERS_FOUND +#ifdef CUVS_BENCH_NVTX3_HEADERS_FOUND private: std::string case_name_; std::array iter_name_{0}; @@ -194,7 +194,7 @@ struct nvtx_case { public: struct nvtx_lap { -#ifdef ANN_BENCH_NVTX3_HEADERS_FOUND +#ifdef CUVS_BENCH_NVTX3_HEADERS_FOUND private: nvtxDomainHandle_t domain_; @@ -208,7 +208,7 @@ struct nvtx_case { #endif }; -#ifdef ANN_BENCH_NVTX3_HEADERS_FOUND +#ifdef CUVS_BENCH_NVTX3_HEADERS_FOUND explicit nvtx_case(std::string case_name) : case_name_(std::move(case_name)), domain_(nvtxDomainCreateA("ANN benchmark")) { @@ -237,7 +237,7 @@ struct nvtx_case { [[nodiscard]] auto lap() -> nvtx_case::nvtx_lap { -#ifdef ANN_BENCH_NVTX3_HEADERS_FOUND +#ifdef CUVS_BENCH_NVTX3_HEADERS_FOUND auto i = iteration_++; uint32_t c = (i % 5); uint32_t r = 150 + c * 20; diff --git a/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp b/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp index 97d1bbf30..82122e8fc 100644 --- a/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp +++ b/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp @@ -156,7 +156,7 @@ REGISTER_ALGO_INSTANCE(float); REGISTER_ALGO_INSTANCE(std::int8_t); REGISTER_ALGO_INSTANCE(std::uint8_t); -#ifdef ANN_BENCH_BUILD_MAIN +#ifdef CUVS_BENCH_BUILD_MAIN #include "../common/benchmark.hpp" int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); } #endif diff --git a/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu b/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu index 8b04ba198..77ad2c102 100644 --- a/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu +++ b/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu @@ -156,7 +156,7 @@ REGISTER_ALGO_INSTANCE(float); REGISTER_ALGO_INSTANCE(std::int8_t); REGISTER_ALGO_INSTANCE(std::uint8_t); -#ifdef ANN_BENCH_BUILD_MAIN +#ifdef CUVS_BENCH_BUILD_MAIN #include "../common/benchmark.hpp" int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); } #endif diff --git a/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu b/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu index 3b2e97062..01fd1359c 100644 --- a/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu +++ b/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu @@ -121,7 +121,7 @@ REGISTER_ALGO_INSTANCE(float); REGISTER_ALGO_INSTANCE(std::int8_t); REGISTER_ALGO_INSTANCE(std::uint8_t); -#ifdef ANN_BENCH_BUILD_MAIN +#ifdef CUVS_BENCH_BUILD_MAIN #include "../common/benchmark.hpp" int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); } #endif diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp b/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp index 1af19a22c..8e87c64d6 100644 --- a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp +++ b/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp @@ -114,7 +114,7 @@ REGISTER_ALGO_INSTANCE(float); REGISTER_ALGO_INSTANCE(std::int8_t); REGISTER_ALGO_INSTANCE(std::uint8_t); -#ifdef ANN_BENCH_BUILD_MAIN +#ifdef CUVS_BENCH_BUILD_MAIN #include "../common/benchmark.hpp" int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); } #endif diff --git a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h index 1eb0e53cc..a5d1a75e3 100644 --- a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h +++ b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h @@ -20,34 +20,34 @@ #include #undef WARP_SIZE -#ifdef RAFT_ANN_BENCH_USE_RAFT_BFKNN +#ifdef CUVS_BENCH_USE_RAFT_BFKNN #include "raft_wrapper.h" #endif -#ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT +#ifdef CUVS_BENCH_USE_RAFT_IVF_FLAT #include "raft_ivf_flat_wrapper.h" extern template class raft::bench::ann::RaftIvfFlatGpu; extern template class raft::bench::ann::RaftIvfFlatGpu; extern template class raft::bench::ann::RaftIvfFlatGpu; #endif -#if defined(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA) || \ - defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB) +#if defined(CUVS_BENCH_USE_RAFT_IVF_PQ) || defined(CUVS_BENCH_USE_RAFT_CAGRA) || \ + defined(CUVS_BENCH_USE_RAFT_CAGRA_HNSWLIB) #include "raft_ivf_pq_wrapper.h" #endif -#ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_PQ +#ifdef CUVS_BENCH_USE_RAFT_IVF_PQ extern template class raft::bench::ann::RaftIvfPQ; extern template class raft::bench::ann::RaftIvfPQ; extern template class raft::bench::ann::RaftIvfPQ; #endif -#if defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB) +#if defined(CUVS_BENCH_USE_RAFT_CAGRA) || defined(CUVS_BENCH_USE_RAFT_CAGRA_HNSWLIB) #include "raft_cagra_wrapper.h" #endif -#ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA +#ifdef CUVS_BENCH_USE_RAFT_CAGRA extern template class raft::bench::ann::RaftCagra; extern template class raft::bench::ann::RaftCagra; extern template class raft::bench::ann::RaftCagra; #endif -#ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT +#ifdef CUVS_BENCH_USE_RAFT_IVF_FLAT template void parse_build_param(const nlohmann::json& conf, typename raft::bench::ann::RaftIvfFlatGpu::BuildParam& param) @@ -65,8 +65,8 @@ void parse_search_param(const nlohmann::json& conf, } #endif -#if defined(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA) || \ - defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB) +#if defined(CUVS_BENCH_USE_RAFT_IVF_PQ) || defined(CUVS_BENCH_USE_RAFT_CAGRA) || \ + defined(CUVS_BENCH_USE_RAFT_CAGRA_HNSWLIB) template void parse_build_param(const nlohmann::json& conf, typename raft::bench::ann::RaftIvfPQ::BuildParam& param) @@ -132,7 +132,7 @@ void parse_search_param(const nlohmann::json& conf, } #endif -#if defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB) +#if defined(CUVS_BENCH_USE_RAFT_CAGRA) || defined(CUVS_BENCH_USE_RAFT_CAGRA_HNSWLIB) template void parse_build_param(const nlohmann::json& conf, raft::neighbors::experimental::nn_descent::index_params& param) diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu index f8c65a2d6..5489e7a86 100644 --- a/cpp/bench/ann/src/raft/raft_benchmark.cu +++ b/cpp/bench/ann/src/raft/raft_benchmark.cu @@ -47,28 +47,28 @@ std::unique_ptr> create_algo(const std::string& algo, std::unique_ptr> ann; if constexpr (std::is_same_v) { -#ifdef RAFT_ANN_BENCH_USE_RAFT_BFKNN +#ifdef CUVS_BENCH_USE_RAFT_BFKNN if (algo == "raft_bfknn") { ann = std::make_unique>(metric, dim); } #endif } if constexpr (std::is_same_v) {} -#ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT +#ifdef CUVS_BENCH_USE_RAFT_IVF_FLAT if (algo == "raft_ivf_flat") { typename raft::bench::ann::RaftIvfFlatGpu::BuildParam param; parse_build_param(conf, param); ann = std::make_unique>(metric, dim, param); } #endif -#ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_PQ +#ifdef CUVS_BENCH_USE_RAFT_IVF_PQ if (algo == "raft_ivf_pq") { typename raft::bench::ann::RaftIvfPQ::BuildParam param; parse_build_param(conf, param); ann = std::make_unique>(metric, dim, param); } #endif -#ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA +#ifdef CUVS_BENCH_USE_RAFT_CAGRA if (algo == "raft_cagra") { typename raft::bench::ann::RaftCagra::BuildParam param; parse_build_param(conf, param); @@ -85,13 +85,13 @@ template std::unique_ptr::AnnSearchParam> create_search_param( const std::string& algo, const nlohmann::json& conf) { -#ifdef RAFT_ANN_BENCH_USE_RAFT_BFKNN +#ifdef CUVS_BENCH_USE_RAFT_BFKNN if (algo == "raft_brute_force") { auto param = std::make_unique::AnnSearchParam>(); return param; } #endif -#ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT +#ifdef CUVS_BENCH_USE_RAFT_IVF_FLAT if (algo == "raft_ivf_flat") { auto param = std::make_unique::SearchParam>(); @@ -99,14 +99,14 @@ std::unique_ptr::AnnSearchParam> create_search return param; } #endif -#ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_PQ +#ifdef CUVS_BENCH_USE_RAFT_IVF_PQ if (algo == "raft_ivf_pq") { auto param = std::make_unique::SearchParam>(); parse_search_param(conf, *param); return param; } #endif -#ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA +#ifdef CUVS_BENCH_USE_RAFT_CAGRA if (algo == "raft_cagra") { auto param = std::make_unique::SearchParam>(); parse_search_param(conf, *param); @@ -124,7 +124,7 @@ REGISTER_ALGO_INSTANCE(float); REGISTER_ALGO_INSTANCE(std::int8_t); REGISTER_ALGO_INSTANCE(std::uint8_t); -#ifdef ANN_BENCH_BUILD_MAIN +#ifdef CUVS_BENCH_BUILD_MAIN #include "../common/benchmark.hpp" int main(int argc, char** argv) { diff --git a/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu b/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu index ce6fa255b..7b5ff0eba 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu +++ b/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu @@ -79,7 +79,7 @@ REGISTER_ALGO_INSTANCE(float); REGISTER_ALGO_INSTANCE(std::int8_t); REGISTER_ALGO_INSTANCE(std::uint8_t); -#ifdef ANN_BENCH_BUILD_MAIN +#ifdef CUVS_BENCH_BUILD_MAIN #include "../common/benchmark.hpp" int main(int argc, char** argv) { diff --git a/cpp/bench/micro/CMakeLists.txt b/cpp/bench/micro/CMakeLists.txt new file mode 100644 index 000000000..ca22baaf1 --- /dev/null +++ b/cpp/bench/micro/CMakeLists.txt @@ -0,0 +1,121 @@ +# ============================================================================= +# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# ################################################################################################## +# * compiler function ----------------------------------------------------------------------------- + +function(ConfigureBench) + + set(options OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY) + set(oneValueArgs NAME) + set(multiValueArgs PATH TARGETS CONFIGURATIONS) + + cmake_parse_arguments(ConfigureBench "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + set(BENCH_NAME ${ConfigureBench_NAME}) + + add_executable(${BENCH_NAME} ${ConfigureBench_PATH}) + + target_link_libraries( + ${BENCH_NAME} + PRIVATE raft::raft ${CUVS_CTK_MATH_DEPENDENCIES} benchmark::benchmark Threads::Threads + $ $ + ) + + set_target_properties( + ${BENCH_NAME} + PROPERTIES # set target compile options + INSTALL_RPATH "\$ORIGIN/../../../lib" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON + INTERFACE_POSITION_INDEPENDENT_CODE ON + ) + + target_compile_options( + ${BENCH_NAME} PRIVATE "$<$:${CUVS_CXX_FLAGS}>" + "$<$:${CUVS_CUDA_FLAGS}>" + ) + + if(ConfigureTest_EXPLICIT_INSTANTIATE_ONLY) + target_compile_definitions(${BENCH_NAME} PRIVATE "CUVS_EXPLICIT_INSTANTIATE_ONLY") + endif() + + target_include_directories( + ${BENCH_NAME} PUBLIC "$" + ) + + install( + TARGETS ${BENCH_NAME} + COMPONENT testing + DESTINATION bin/gbench/micro/libcuvs + EXCLUDE_FROM_ALL + ) + +endfunction() + +if(BUILD_PRIMS_BENCH) + ConfigureBench( + NAME CLUSTER_BENCH PATH bench/micro/cluster/kmeans_balanced.cu bench/micro/cluster/kmeans.cu + bench/micro/main.cpp OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY + ) + + ConfigureBench( + NAME TUNE_DISTANCE PATH bench/micro/distance/tune_pairwise/kernel.cu + bench/micro/distance/tune_pairwise/bench.cu bench/micro/main.cpp + ) + + ConfigureBench( + NAME + DISTANCE_BENCH + PATH + bench/micro/distance/distance_cosine.cu + bench/micro/distance/distance_exp_l2.cu + bench/micro/distance/distance_l1.cu + bench/micro/distance/distance_unexp_l2.cu + bench/micro/distance/fused_l2_nn.cu + bench/micro/distance/masked_nn.cu + bench/micro/distance/kernels.cu + bench/micro/main.cpp + OPTIONAL + LIB + EXPLICIT_INSTANTIATE_ONLY + ) + + ConfigureBench( + NAME + NEIGHBORS_BENCH + PATH + bench/micro/neighbors/knn/brute_force_float_int64_t.cu + bench/micro/neighbors/knn/brute_force_float_uint32_t.cu + bench/micro/neighbors/knn/cagra_float_uint32_t.cu + bench/micro/neighbors/knn/ivf_flat_filter_float_int64_t.cu + bench/micro/neighbors/knn/ivf_flat_float_int64_t.cu + bench/micro/neighbors/knn/ivf_flat_int8_t_int64_t.cu + bench/micro/neighbors/knn/ivf_flat_uint8_t_int64_t.cu + bench/micro/neighbors/knn/ivf_pq_float_int64_t.cu + bench/micro/neighbors/knn/ivf_pq_filter_float_int64_t.cu + bench/micro/neighbors/knn/ivf_pq_int8_t_int64_t.cu + bench/micro/neighbors/knn/ivf_pq_uint8_t_int64_t.cu + bench/micro/neighbors/refine_float_int64_t.cu + bench/micro/neighbors/refine_uint8_t_int64_t.cu + bench/micro/main.cpp + OPTIONAL + LIB + EXPLICIT_INSTANTIATE_ONLY + ) + +endif() diff --git a/cpp/bench/prims/cluster/kmeans.cu b/cpp/bench/micro/cluster/kmeans.cu similarity index 100% rename from cpp/bench/prims/cluster/kmeans.cu rename to cpp/bench/micro/cluster/kmeans.cu diff --git a/cpp/bench/prims/cluster/kmeans_balanced.cu b/cpp/bench/micro/cluster/kmeans_balanced.cu similarity index 100% rename from cpp/bench/prims/cluster/kmeans_balanced.cu rename to cpp/bench/micro/cluster/kmeans_balanced.cu diff --git a/cpp/bench/prims/common/benchmark.hpp b/cpp/bench/micro/common/benchmark.hpp similarity index 100% rename from cpp/bench/prims/common/benchmark.hpp rename to cpp/bench/micro/common/benchmark.hpp diff --git a/cpp/bench/prims/distance/distance_common.cuh b/cpp/bench/micro/distance/distance_common.cuh similarity index 100% rename from cpp/bench/prims/distance/distance_common.cuh rename to cpp/bench/micro/distance/distance_common.cuh diff --git a/cpp/bench/prims/distance/distance_cosine.cu b/cpp/bench/micro/distance/distance_cosine.cu similarity index 100% rename from cpp/bench/prims/distance/distance_cosine.cu rename to cpp/bench/micro/distance/distance_cosine.cu diff --git a/cpp/bench/prims/distance/distance_exp_l2.cu b/cpp/bench/micro/distance/distance_exp_l2.cu similarity index 100% rename from cpp/bench/prims/distance/distance_exp_l2.cu rename to cpp/bench/micro/distance/distance_exp_l2.cu diff --git a/cpp/bench/prims/distance/distance_l1.cu b/cpp/bench/micro/distance/distance_l1.cu similarity index 100% rename from cpp/bench/prims/distance/distance_l1.cu rename to cpp/bench/micro/distance/distance_l1.cu diff --git a/cpp/bench/prims/distance/distance_unexp_l2.cu b/cpp/bench/micro/distance/distance_unexp_l2.cu similarity index 100% rename from cpp/bench/prims/distance/distance_unexp_l2.cu rename to cpp/bench/micro/distance/distance_unexp_l2.cu diff --git a/cpp/bench/prims/distance/fused_l2_nn.cu b/cpp/bench/micro/distance/fused_l2_nn.cu similarity index 100% rename from cpp/bench/prims/distance/fused_l2_nn.cu rename to cpp/bench/micro/distance/fused_l2_nn.cu diff --git a/cpp/bench/prims/distance/kernels.cu b/cpp/bench/micro/distance/kernels.cu similarity index 100% rename from cpp/bench/prims/distance/kernels.cu rename to cpp/bench/micro/distance/kernels.cu diff --git a/cpp/bench/prims/distance/masked_nn.cu b/cpp/bench/micro/distance/masked_nn.cu similarity index 100% rename from cpp/bench/prims/distance/masked_nn.cu rename to cpp/bench/micro/distance/masked_nn.cu diff --git a/cpp/bench/prims/distance/tune_pairwise/bench.cu b/cpp/bench/micro/distance/tune_pairwise/bench.cu similarity index 100% rename from cpp/bench/prims/distance/tune_pairwise/bench.cu rename to cpp/bench/micro/distance/tune_pairwise/bench.cu diff --git a/cpp/bench/prims/distance/tune_pairwise/kernel.cu b/cpp/bench/micro/distance/tune_pairwise/kernel.cu similarity index 100% rename from cpp/bench/prims/distance/tune_pairwise/kernel.cu rename to cpp/bench/micro/distance/tune_pairwise/kernel.cu diff --git a/cpp/bench/prims/distance/tune_pairwise/kernel.cuh b/cpp/bench/micro/distance/tune_pairwise/kernel.cuh similarity index 100% rename from cpp/bench/prims/distance/tune_pairwise/kernel.cuh rename to cpp/bench/micro/distance/tune_pairwise/kernel.cuh diff --git a/cpp/bench/prims/main.cpp b/cpp/bench/micro/main.cpp similarity index 100% rename from cpp/bench/prims/main.cpp rename to cpp/bench/micro/main.cpp diff --git a/cpp/bench/prims/matrix/main.cpp b/cpp/bench/micro/matrix/main.cpp similarity index 100% rename from cpp/bench/prims/matrix/main.cpp rename to cpp/bench/micro/matrix/main.cpp diff --git a/cpp/bench/prims/matrix/select_k.cu b/cpp/bench/micro/matrix/select_k.cu similarity index 100% rename from cpp/bench/prims/matrix/select_k.cu rename to cpp/bench/micro/matrix/select_k.cu diff --git a/cpp/bench/prims/neighbors/cagra_bench.cuh b/cpp/bench/micro/neighbors/cagra_bench.cuh similarity index 100% rename from cpp/bench/prims/neighbors/cagra_bench.cuh rename to cpp/bench/micro/neighbors/cagra_bench.cuh diff --git a/cpp/bench/prims/neighbors/knn.cuh b/cpp/bench/micro/neighbors/knn.cuh similarity index 100% rename from cpp/bench/prims/neighbors/knn.cuh rename to cpp/bench/micro/neighbors/knn.cuh diff --git a/cpp/bench/prims/neighbors/knn/brute_force_float_int64_t.cu b/cpp/bench/micro/neighbors/knn/brute_force_float_int64_t.cu similarity index 100% rename from cpp/bench/prims/neighbors/knn/brute_force_float_int64_t.cu rename to cpp/bench/micro/neighbors/knn/brute_force_float_int64_t.cu diff --git a/cpp/bench/prims/neighbors/knn/brute_force_float_uint32_t.cu b/cpp/bench/micro/neighbors/knn/brute_force_float_uint32_t.cu similarity index 100% rename from cpp/bench/prims/neighbors/knn/brute_force_float_uint32_t.cu rename to cpp/bench/micro/neighbors/knn/brute_force_float_uint32_t.cu diff --git a/cpp/bench/prims/neighbors/knn/cagra_float_uint32_t.cu b/cpp/bench/micro/neighbors/knn/cagra_float_uint32_t.cu similarity index 100% rename from cpp/bench/prims/neighbors/knn/cagra_float_uint32_t.cu rename to cpp/bench/micro/neighbors/knn/cagra_float_uint32_t.cu diff --git a/cpp/bench/prims/neighbors/knn/ivf_flat_filter_float_int64_t.cu b/cpp/bench/micro/neighbors/knn/ivf_flat_filter_float_int64_t.cu similarity index 100% rename from cpp/bench/prims/neighbors/knn/ivf_flat_filter_float_int64_t.cu rename to cpp/bench/micro/neighbors/knn/ivf_flat_filter_float_int64_t.cu diff --git a/cpp/bench/prims/neighbors/knn/ivf_flat_float_int64_t.cu b/cpp/bench/micro/neighbors/knn/ivf_flat_float_int64_t.cu similarity index 100% rename from cpp/bench/prims/neighbors/knn/ivf_flat_float_int64_t.cu rename to cpp/bench/micro/neighbors/knn/ivf_flat_float_int64_t.cu diff --git a/cpp/bench/prims/neighbors/knn/ivf_flat_int8_t_int64_t.cu b/cpp/bench/micro/neighbors/knn/ivf_flat_int8_t_int64_t.cu similarity index 100% rename from cpp/bench/prims/neighbors/knn/ivf_flat_int8_t_int64_t.cu rename to cpp/bench/micro/neighbors/knn/ivf_flat_int8_t_int64_t.cu diff --git a/cpp/bench/prims/neighbors/knn/ivf_flat_uint8_t_int64_t.cu b/cpp/bench/micro/neighbors/knn/ivf_flat_uint8_t_int64_t.cu similarity index 100% rename from cpp/bench/prims/neighbors/knn/ivf_flat_uint8_t_int64_t.cu rename to cpp/bench/micro/neighbors/knn/ivf_flat_uint8_t_int64_t.cu diff --git a/cpp/bench/prims/neighbors/knn/ivf_pq_filter_float_int64_t.cu b/cpp/bench/micro/neighbors/knn/ivf_pq_filter_float_int64_t.cu similarity index 100% rename from cpp/bench/prims/neighbors/knn/ivf_pq_filter_float_int64_t.cu rename to cpp/bench/micro/neighbors/knn/ivf_pq_filter_float_int64_t.cu diff --git a/cpp/bench/prims/neighbors/knn/ivf_pq_float_int64_t.cu b/cpp/bench/micro/neighbors/knn/ivf_pq_float_int64_t.cu similarity index 100% rename from cpp/bench/prims/neighbors/knn/ivf_pq_float_int64_t.cu rename to cpp/bench/micro/neighbors/knn/ivf_pq_float_int64_t.cu diff --git a/cpp/bench/prims/neighbors/knn/ivf_pq_int8_t_int64_t.cu b/cpp/bench/micro/neighbors/knn/ivf_pq_int8_t_int64_t.cu similarity index 100% rename from cpp/bench/prims/neighbors/knn/ivf_pq_int8_t_int64_t.cu rename to cpp/bench/micro/neighbors/knn/ivf_pq_int8_t_int64_t.cu diff --git a/cpp/bench/prims/neighbors/knn/ivf_pq_uint8_t_int64_t.cu b/cpp/bench/micro/neighbors/knn/ivf_pq_uint8_t_int64_t.cu similarity index 100% rename from cpp/bench/prims/neighbors/knn/ivf_pq_uint8_t_int64_t.cu rename to cpp/bench/micro/neighbors/knn/ivf_pq_uint8_t_int64_t.cu diff --git a/cpp/bench/prims/neighbors/refine.cuh b/cpp/bench/micro/neighbors/refine.cuh similarity index 100% rename from cpp/bench/prims/neighbors/refine.cuh rename to cpp/bench/micro/neighbors/refine.cuh diff --git a/cpp/bench/prims/neighbors/refine_float_int64_t.cu b/cpp/bench/micro/neighbors/refine_float_int64_t.cu similarity index 100% rename from cpp/bench/prims/neighbors/refine_float_int64_t.cu rename to cpp/bench/micro/neighbors/refine_float_int64_t.cu diff --git a/cpp/bench/prims/neighbors/refine_uint8_t_int64_t.cu b/cpp/bench/micro/neighbors/refine_uint8_t_int64_t.cu similarity index 100% rename from cpp/bench/prims/neighbors/refine_uint8_t_int64_t.cu rename to cpp/bench/micro/neighbors/refine_uint8_t_int64_t.cu diff --git a/cpp/bench/prims/CMakeLists.txt b/cpp/bench/prims/CMakeLists.txt deleted file mode 100644 index fe58453d0..000000000 --- a/cpp/bench/prims/CMakeLists.txt +++ /dev/null @@ -1,166 +0,0 @@ -# ============================================================================= -# Copyright (c) 2022-2023, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. -# ============================================================================= - -# ################################################################################################## -# * compiler function ----------------------------------------------------------------------------- - -function(ConfigureBench) - - set(options OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY) - set(oneValueArgs NAME) - set(multiValueArgs PATH TARGETS CONFIGURATIONS) - - cmake_parse_arguments(ConfigureBench "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - - set(BENCH_NAME ${ConfigureBench_NAME}) - - add_executable(${BENCH_NAME} ${ConfigureBench_PATH}) - - target_link_libraries( - ${BENCH_NAME} - PRIVATE raft::raft - raft_internal - $<$:raft::compiled> - ${RAFT_CTK_MATH_DEPENDENCIES} - benchmark::benchmark - Threads::Threads - $ - $ - ) - - set_target_properties( - ${BENCH_NAME} - PROPERTIES # set target compile options - INSTALL_RPATH "\$ORIGIN/../../../lib" - CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON - CUDA_STANDARD 17 - CUDA_STANDARD_REQUIRED ON - POSITION_INDEPENDENT_CODE ON - INTERFACE_POSITION_INDEPENDENT_CODE ON - ) - - target_compile_options( - ${BENCH_NAME} PRIVATE "$<$:${RAFT_CXX_FLAGS}>" - "$<$:${RAFT_CUDA_FLAGS}>" - ) - - if(ConfigureTest_EXPLICIT_INSTANTIATE_ONLY) - target_compile_definitions(${BENCH_NAME} PRIVATE "RAFT_EXPLICIT_INSTANTIATE_ONLY") - endif() - - target_include_directories( - ${BENCH_NAME} PUBLIC "$" - ) - - install( - TARGETS ${BENCH_NAME} - COMPONENT testing - DESTINATION bin/gbench/prims/libraft - EXCLUDE_FROM_ALL - ) - -endfunction() - -if(BUILD_PRIMS_BENCH) - ConfigureBench( - NAME CORE_BENCH PATH bench/prims/core/bitset.cu bench/prims/core/copy.cu bench/prims/main.cpp - ) - - ConfigureBench( - NAME CLUSTER_BENCH PATH bench/prims/cluster/kmeans_balanced.cu bench/prims/cluster/kmeans.cu - bench/prims/main.cpp OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureBench( - NAME TUNE_DISTANCE PATH bench/prims/distance/tune_pairwise/kernel.cu - bench/prims/distance/tune_pairwise/bench.cu bench/prims/main.cpp - ) - - ConfigureBench( - NAME - DISTANCE_BENCH - PATH - bench/prims/distance/distance_cosine.cu - bench/prims/distance/distance_exp_l2.cu - bench/prims/distance/distance_l1.cu - bench/prims/distance/distance_unexp_l2.cu - bench/prims/distance/fused_l2_nn.cu - bench/prims/distance/masked_nn.cu - bench/prims/distance/kernels.cu - bench/prims/main.cpp - OPTIONAL - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureBench( - NAME - LINALG_BENCH - PATH - bench/prims/linalg/add.cu - bench/prims/linalg/map_then_reduce.cu - bench/prims/linalg/matrix_vector_op.cu - bench/prims/linalg/norm.cu - bench/prims/linalg/normalize.cu - bench/prims/linalg/reduce_cols_by_key.cu - bench/prims/linalg/reduce_rows_by_key.cu - bench/prims/linalg/reduce.cu - bench/prims/main.cpp - ) - - ConfigureBench( - NAME - MATRIX_BENCH - PATH - bench/prims/matrix/argmin.cu - bench/prims/matrix/gather.cu - bench/prims/matrix/select_k.cu - bench/prims/matrix/main.cpp - OPTIONAL - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureBench( - NAME RANDOM_BENCH PATH bench/prims/random/make_blobs.cu bench/prims/random/permute.cu - bench/prims/random/rng.cu bench/prims/main.cpp - ) - - ConfigureBench(NAME SPARSE_BENCH PATH bench/prims/sparse/convert_csr.cu bench/prims/main.cpp) - - ConfigureBench( - NAME - NEIGHBORS_BENCH - PATH - bench/prims/neighbors/knn/brute_force_float_int64_t.cu - bench/prims/neighbors/knn/brute_force_float_uint32_t.cu - bench/prims/neighbors/knn/cagra_float_uint32_t.cu - bench/prims/neighbors/knn/ivf_flat_filter_float_int64_t.cu - bench/prims/neighbors/knn/ivf_flat_float_int64_t.cu - bench/prims/neighbors/knn/ivf_flat_int8_t_int64_t.cu - bench/prims/neighbors/knn/ivf_flat_uint8_t_int64_t.cu - bench/prims/neighbors/knn/ivf_pq_float_int64_t.cu - bench/prims/neighbors/knn/ivf_pq_filter_float_int64_t.cu - bench/prims/neighbors/knn/ivf_pq_int8_t_int64_t.cu - bench/prims/neighbors/knn/ivf_pq_uint8_t_int64_t.cu - bench/prims/neighbors/refine_float_int64_t.cu - bench/prims/neighbors/refine_uint8_t_int64_t.cu - bench/prims/main.cpp - OPTIONAL - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - -endif() diff --git a/cpp/bench/prims/core/bitset.cu b/cpp/bench/prims/core/bitset.cu deleted file mode 100644 index ce3136bcd..000000000 --- a/cpp/bench/prims/core/bitset.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include - -namespace raft::bench::core { - -struct bitset_inputs { - uint32_t bitset_len; - uint32_t mask_len; - uint32_t query_len; -}; // struct bitset_inputs - -template -struct bitset_bench : public fixture { - bitset_bench(const bitset_inputs& p) - : params(p), - mask{raft::make_device_vector(res, p.mask_len)}, - queries{raft::make_device_vector(res, p.query_len)}, - outputs{raft::make_device_vector(res, p.query_len)} - { - raft::random::RngState state{42}; - raft::random::uniformInt(res, state, mask.view(), index_t{0}, index_t{p.bitset_len}); - } - - void run_benchmark(::benchmark::State& state) override - { - loop_on_state(state, [this]() { - auto my_bitset = raft::core::bitset( - this->res, raft::make_const_mdspan(mask.view()), params.bitset_len); - my_bitset.test(this->res, raft::make_const_mdspan(queries.view()), outputs.view()); - }); - } - - private: - raft::resources res; - bitset_inputs params; - raft::device_vector mask, queries; - raft::device_vector outputs; -}; // struct bitset - -const std::vector bitset_input_vecs{ - {256 * 1024 * 1024, 64 * 1024 * 1024, 256 * 1024 * 1024}, // Standard Bench - {256 * 1024 * 1024, 64 * 1024 * 1024, 1024 * 1024 * 1024}, // Extra queries - {128 * 1024 * 1024, 1024 * 1024 * 1024, 256 * 1024 * 1024}, // Extra mask to test atomics impact -}; - -using Uint8_32 = bitset_bench; -using Uint16_64 = bitset_bench; -using Uint32_32 = bitset_bench; -using Uint32_64 = bitset_bench; - -RAFT_BENCH_REGISTER(Uint8_32, "", bitset_input_vecs); -RAFT_BENCH_REGISTER(Uint16_64, "", bitset_input_vecs); -RAFT_BENCH_REGISTER(Uint32_32, "", bitset_input_vecs); -RAFT_BENCH_REGISTER(Uint32_64, "", bitset_input_vecs); - -} // namespace raft::bench::core diff --git a/cpp/bench/prims/core/copy.cu b/cpp/bench/prims/core/copy.cu deleted file mode 100644 index 31ee83b92..000000000 --- a/cpp/bench/prims/core/copy.cu +++ /dev/null @@ -1,401 +0,0 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace raft::bench::core { - -template -auto constexpr const default_dims = []() { - auto dims = std::array{}; - std::fill(dims.begin(), dims.end(), 2); - return dims; -}(); - -template -auto constexpr const default_dims = std::array{3000000}; - -template -auto constexpr const default_dims = std::array{1000, 3000}; - -template -auto constexpr const default_dims = std::array{20, 300, 500}; - -template > -struct bench_array_type; - -template -struct bench_array_type> { - template - auto static constexpr const extent_type = raft::dynamic_extent; - - using type = - std::conditional_t...>, LayoutPolicy>, - device_mdarray...>, LayoutPolicy>>; -}; - -template -struct params { - std::array dims = default_dims; - using src_array_type = - typename bench_array_type::type; - using dst_array_type = - typename bench_array_type::type; -}; - -template -struct CopyBench : public fixture { - using params_type = - params; - using src_array_type = typename params_type::src_array_type; - using dst_array_type = typename params_type::dst_array_type; - explicit CopyBench(const params_type& ps) - : fixture{true}, - res_{}, - params_{ps}, - src_{ - res_, - typename src_array_type::mapping_type{ - std::apply([](auto... exts) { return make_extents(exts...); }, ps.dims)}, - typename src_array_type::container_policy_type{}, - }, - dst_{ - res_, - typename dst_array_type::mapping_type{ - std::apply([](auto... exts) { return make_extents(exts...); }, ps.dims)}, - typename dst_array_type::container_policy_type{}, - } - { - res_.get_cublas_handle(); // initialize cublas handle - auto src_data = std::vector(src_.size()); - std::iota(src_data.begin(), src_data.end(), SrcT{}); - raft::copy(src_.data_handle(), src_data.data(), src_.size(), res_.get_stream()); - } - - void run_benchmark(::benchmark::State& state) override - { - loop_on_state(state, [this]() { raft::copy(res_, dst_.view(), src_.view()); }); - } - - private: - raft::device_resources res_; - params_type params_; - src_array_type src_; - dst_array_type dst_; -}; - -template -auto static const inputs = std::vector{ParamsT{}}; - -#define COPY_REGISTER(BenchT) \ - RAFT_BENCH_REGISTER(BenchT, "BenchT", inputs) - -using copy_bench_device_device_1d_same_dtype_same_layout = CopyBench; -using copy_bench_device_device_1d_same_dtype_diff_layout = CopyBench; -using copy_bench_device_device_1d_diff_dtype_diff_layout = CopyBench; -using copy_bench_device_device_2d_same_dtype_diff_layout = CopyBench; -using copy_bench_device_device_2d_same_dtype_diff_layout_cublas = CopyBench; -using copy_bench_device_device_3d_diff_dtype_diff_layout = CopyBench; -using copy_bench_device_device_3d_diff_dtype_same_layout = CopyBench; - -using copy_bench_host_host_1d_same_dtype_same_layout = CopyBench; -using copy_bench_host_host_1d_same_dtype_diff_layout = CopyBench; -using copy_bench_host_host_1d_diff_dtype_diff_layout = CopyBench; -using copy_bench_host_host_2d_same_dtype_diff_layout = CopyBench; -using copy_bench_host_host_2d_same_dtype_diff_layout_float_float = CopyBench; -using copy_bench_host_host_3d_diff_dtype_same_layout = CopyBench; -using copy_bench_host_host_3d_diff_dtype_diff_layout = CopyBench; - -using copy_bench_device_host_1d_same_dtype_same_layout = CopyBench; -using copy_bench_device_host_1d_same_dtype_diff_layout = CopyBench; -using copy_bench_device_host_1d_diff_dtype_diff_layout = CopyBench; -using copy_bench_device_host_2d_same_dtype_diff_layout = CopyBench; -using copy_bench_device_host_2d_same_dtype_diff_layout_cublas = CopyBench; -using copy_bench_device_host_3d_diff_dtype_same_layout = CopyBench; -using copy_bench_device_host_3d_diff_dtype_diff_layout = CopyBench; - -using copy_bench_host_device_1d_same_dtype_same_layout = CopyBench; -using copy_bench_host_device_1d_same_dtype_diff_layout = CopyBench; -using copy_bench_host_device_1d_diff_dtype_diff_layout = CopyBench; -using copy_bench_host_device_2d_same_dtype_diff_layout = CopyBench; -using copy_bench_host_device_2d_same_dtype_diff_layout_cublas = CopyBench; -using copy_bench_host_device_3d_diff_dtype_diff_layout = CopyBench; -using copy_bench_host_device_3d_diff_dtype_same_layout = CopyBench; - -// COPY_REGISTER(copy_bench_same_dtype_1d_host_host); -COPY_REGISTER(copy_bench_device_device_1d_same_dtype_same_layout); -COPY_REGISTER(copy_bench_device_device_1d_same_dtype_diff_layout); -COPY_REGISTER(copy_bench_device_device_1d_diff_dtype_diff_layout); -COPY_REGISTER(copy_bench_device_device_2d_same_dtype_diff_layout); -COPY_REGISTER(copy_bench_device_device_2d_same_dtype_diff_layout_cublas); -COPY_REGISTER(copy_bench_device_device_3d_diff_dtype_same_layout); -COPY_REGISTER(copy_bench_device_device_3d_diff_dtype_diff_layout); - -COPY_REGISTER(copy_bench_host_host_1d_same_dtype_same_layout); -COPY_REGISTER(copy_bench_host_host_1d_same_dtype_diff_layout); -COPY_REGISTER(copy_bench_host_host_1d_diff_dtype_diff_layout); -COPY_REGISTER(copy_bench_host_host_2d_same_dtype_diff_layout); -COPY_REGISTER(copy_bench_host_host_2d_same_dtype_diff_layout_float_float); -COPY_REGISTER(copy_bench_host_host_3d_diff_dtype_same_layout); -COPY_REGISTER(copy_bench_host_host_3d_diff_dtype_diff_layout); - -COPY_REGISTER(copy_bench_device_host_1d_same_dtype_same_layout); -COPY_REGISTER(copy_bench_device_host_1d_same_dtype_diff_layout); -COPY_REGISTER(copy_bench_device_host_1d_diff_dtype_diff_layout); -COPY_REGISTER(copy_bench_device_host_2d_same_dtype_diff_layout); -COPY_REGISTER(copy_bench_device_host_2d_same_dtype_diff_layout_cublas); -COPY_REGISTER(copy_bench_device_host_3d_diff_dtype_same_layout); -COPY_REGISTER(copy_bench_device_host_3d_diff_dtype_diff_layout); - -COPY_REGISTER(copy_bench_host_device_1d_same_dtype_same_layout); -COPY_REGISTER(copy_bench_host_device_1d_same_dtype_diff_layout); -COPY_REGISTER(copy_bench_host_device_1d_diff_dtype_diff_layout); -COPY_REGISTER(copy_bench_host_device_2d_same_dtype_diff_layout); -COPY_REGISTER(copy_bench_host_device_2d_same_dtype_diff_layout_cublas); -COPY_REGISTER(copy_bench_host_device_3d_diff_dtype_same_layout); -COPY_REGISTER(copy_bench_host_device_3d_diff_dtype_diff_layout); - -} // namespace raft::bench::core diff --git a/cpp/bench/prims/linalg/add.cu b/cpp/bench/prims/linalg/add.cu deleted file mode 100644 index 456214ad7..000000000 --- a/cpp/bench/prims/linalg/add.cu +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -namespace raft::bench::linalg { - -struct add_inputs { - int len; -}; // struct add_inputs - -template -struct add : public fixture { - add(const add_inputs& p) : params(p), ptr0(p.len, stream), ptr1(p.len, stream) {} - - void run_benchmark(::benchmark::State& state) override - { - loop_on_state(state, [this]() { - raft::linalg::add(ptr0.data(), ptr0.data(), ptr1.data(), params.len, stream); - }); - } - - private: - add_inputs params; - rmm::device_uvector ptr0, ptr1; -}; // struct add - -const std::vector add_input_vecs{ - {256 * 1024 * 1024}, {256 * 1024 * 1024 + 2}, {256 * 1024 * 1024 + 1} - -}; - -RAFT_BENCH_REGISTER(add, "", add_input_vecs); -RAFT_BENCH_REGISTER(add, "", add_input_vecs); - -} // namespace raft::bench::linalg diff --git a/cpp/bench/prims/linalg/map_then_reduce.cu b/cpp/bench/prims/linalg/map_then_reduce.cu deleted file mode 100644 index 84aebd85b..000000000 --- a/cpp/bench/prims/linalg/map_then_reduce.cu +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -namespace raft::bench::linalg { - -struct map_then_reduce_inputs { - int len; -}; - -template -struct Identity { - HDI Type operator()(Type a) { return a; } -}; - -template -struct map_then_reduce : public fixture { - map_then_reduce(const map_then_reduce_inputs& p) : params(p), in(p.len, stream), out(1, stream) {} - - void run_benchmark(::benchmark::State& state) override - { - loop_on_state(state, [this]() { - raft::linalg::mapThenSumReduce(out.data(), params.len, Identity(), stream, in.data()); - }); - } - - private: - map_then_reduce_inputs params; - rmm::device_uvector out, in; -}; // struct MapThenReduce - -const std::vector map_then_reduce_input_vecs{ - {1024 * 1024}, - {32 * 1024 * 1024}, - {1024 * 1024 * 1024}, - {1024 * 1024 + 2}, - {32 * 1024 * 1024 + 2}, - {1024 * 1024 * 1024 + 2}, - {1024 * 1024 + 1}, - {32 * 1024 * 1024 + 1}, - {1024 * 1024 * 1024 + 1}, - -}; - -RAFT_BENCH_REGISTER(map_then_reduce, "", map_then_reduce_input_vecs); -RAFT_BENCH_REGISTER(map_then_reduce, "", map_then_reduce_input_vecs); - -} // namespace raft::bench::linalg diff --git a/cpp/bench/prims/linalg/matrix_vector_op.cu b/cpp/bench/prims/linalg/matrix_vector_op.cu deleted file mode 100644 index d1fbaee79..000000000 --- a/cpp/bench/prims/linalg/matrix_vector_op.cu +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -namespace raft::bench::linalg { - -template -struct mat_vec_op_inputs { - IdxT rows, cols; - bool rowMajor, bcastAlongRows; - IdxT inAlignOffset, outAlignOffset; -}; // struct mat_vec_op_inputs - -template -inline auto operator<<(std::ostream& os, const mat_vec_op_inputs& p) -> std::ostream& -{ - os << p.rows << "#" << p.cols << "#" << p.rowMajor << "#" << p.bcastAlongRows << "#" - << p.inAlignOffset << "#" << p.outAlignOffset; - return os; -} - -template -struct mat_vec_op : public fixture { - mat_vec_op(const mat_vec_op_inputs& p) - : params(p), - out(p.rows * p.cols + params.outAlignOffset, stream), - in(p.rows * p.cols + params.inAlignOffset, stream), - vec1(p.bcastAlongRows ? p.cols : p.rows, stream), - vec2(p.bcastAlongRows ? p.cols : p.rows, stream) - { - } - - void run_benchmark(::benchmark::State& state) override - { - std::ostringstream label_stream; - label_stream << params; - state.SetLabel(label_stream.str()); - - loop_on_state(state, [this]() { - if constexpr (OpT::useTwoVectors) { - raft::linalg::matrixVectorOp(out.data() + params.outAlignOffset, - in.data() + params.inAlignOffset, - vec1.data(), - vec2.data(), - params.cols, - params.rows, - params.rowMajor, - params.bcastAlongRows, - OpT{}, - stream); - } else { - raft::linalg::matrixVectorOp(out.data() + params.outAlignOffset, - in.data() + params.inAlignOffset, - vec1.data(), - params.cols, - params.rows, - params.rowMajor, - params.bcastAlongRows, - OpT{}, - stream); - } - }); - } - - private: - mat_vec_op_inputs params; - rmm::device_uvector out, in, vec1, vec2; -}; // struct MatVecOp - -template -std::vector> get_mv_inputs() -{ - std::vector> out; - - // Scalability benchmark with round dimensions - std::vector rows = {1000, 100000, 1000000}; - std::vector cols = {8, 64, 256, 1024}; - for (bool rowMajor : {true, false}) { - for (bool alongRows : {true, false}) { - for (IdxT rows_ : rows) { - for (IdxT cols_ : cols) { - out.push_back({rows_, cols_, rowMajor, alongRows, 0, 0}); - } - } - } - } - - // Odd dimensions, misalignment - std::vector> rowcols = { - {44739207, 7}, - {44739207, 15}, - {44739207, 16}, - {44739207, 17}, - {2611236, 256}, - {2611236, 257}, - {2611236, 263}, - }; - for (bool rowMajor : {true, false}) { - for (bool alongRows : {true, false}) { - for (auto rc : rowcols) { - for (IdxT inAlignOffset : {0, 1}) { - for (IdxT outAlignOffset : {0, 1}) { - out.push_back({std::get<0>(rc), - std::get<1>(rc), - rowMajor, - alongRows, - inAlignOffset, - outAlignOffset}); - } - } - } - } - } - return out; -} - -const std::vector> mv_input_i32 = get_mv_inputs(); -const std::vector> mv_input_i64 = get_mv_inputs(); - -template -struct Add1Vec { - static constexpr bool useTwoVectors = false; - HDI T operator()(T a, T b) const { return a + b; }; -}; -template -struct Add2Vec { - static constexpr bool useTwoVectors = true; - HDI T operator()(T a, T b, T c) const { return a + b + c; }; -}; - -RAFT_BENCH_REGISTER((mat_vec_op, float, int>), "", mv_input_i32); -RAFT_BENCH_REGISTER((mat_vec_op, double, int>), "", mv_input_i32); -RAFT_BENCH_REGISTER((mat_vec_op, float, int>), "", mv_input_i32); -RAFT_BENCH_REGISTER((mat_vec_op, double, int>), "", mv_input_i32); -RAFT_BENCH_REGISTER((mat_vec_op, float, int64_t>), "", mv_input_i64); -RAFT_BENCH_REGISTER((mat_vec_op, double, int64_t>), "", mv_input_i64); -RAFT_BENCH_REGISTER((mat_vec_op, float, int64_t>), "", mv_input_i64); -RAFT_BENCH_REGISTER((mat_vec_op, double, int64_t>), "", mv_input_i64); - -} // namespace raft::bench::linalg diff --git a/cpp/bench/prims/linalg/norm.cu b/cpp/bench/prims/linalg/norm.cu deleted file mode 100644 index 1db23e4ca..000000000 --- a/cpp/bench/prims/linalg/norm.cu +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include - -#include - -namespace raft::bench::linalg { - -template -struct norm_input { - IdxT rows, cols; -}; - -template -inline auto operator<<(std::ostream& os, const norm_input& p) -> std::ostream& -{ - os << p.rows << "#" << p.cols; - return os; -} - -template -struct rowNorm : public fixture { - rowNorm(const norm_input& p) : params(p), in(p.rows * p.cols, stream), dots(p.rows, stream) - { - raft::random::RngState rng{1234}; - raft::random::uniform(handle, rng, in.data(), p.rows * p.cols, (T)-10.0, (T)10.0); - } - - void run_benchmark(::benchmark::State& state) override - { - std::ostringstream label_stream; - label_stream << params; - state.SetLabel(label_stream.str()); - - loop_on_state(state, [this]() { - auto input_view = raft::make_device_matrix_view( - in.data(), params.rows, params.cols); - auto output_view = - raft::make_device_vector_view(dots.data(), params.rows); - raft::linalg::norm(handle, - input_view, - output_view, - raft::linalg::L2Norm, - raft::linalg::Apply::ALONG_ROWS, - raft::sqrt_op()); - }); - } - - private: - norm_input params; - rmm::device_uvector in, dots; -}; // struct rowNorm - -const std::vector> norm_inputs_i32 = - raft::util::itertools::product>({10, 100, 1000, 10000, 100000}, - {16, 32, 64, 128, 256, 512, 1024}); -const std::vector> norm_inputs_i64 = - raft::util::itertools::product>({10, 100, 1000, 10000, 100000}, - {16, 32, 64, 128, 256, 512, 1024}); - -RAFT_BENCH_REGISTER((rowNorm), "", norm_inputs_i32); -RAFT_BENCH_REGISTER((rowNorm), "", norm_inputs_i32); -RAFT_BENCH_REGISTER((rowNorm), "", norm_inputs_i64); -RAFT_BENCH_REGISTER((rowNorm), "", norm_inputs_i64); - -} // namespace raft::bench::linalg diff --git a/cpp/bench/prims/linalg/normalize.cu b/cpp/bench/prims/linalg/normalize.cu deleted file mode 100644 index 91319e774..000000000 --- a/cpp/bench/prims/linalg/normalize.cu +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include - -#include - -namespace raft::bench::linalg { - -template -struct normalize_input { - IdxT rows, cols; -}; - -template -inline auto operator<<(std::ostream& os, const normalize_input& p) -> std::ostream& -{ - os << p.rows << "#" << p.cols; - return os; -} - -template -struct rowNormalize : public fixture { - rowNormalize(const normalize_input& p) - : params(p), in(p.rows * p.cols, stream), out(p.rows * p.cols, stream) - { - raft::random::RngState rng{1234}; - raft::random::uniform(handle, rng, in.data(), p.rows * p.cols, (T)-10.0, (T)10.0); - } - - void run_benchmark(::benchmark::State& state) override - { - std::ostringstream label_stream; - label_stream << params; - state.SetLabel(label_stream.str()); - - loop_on_state(state, [this]() { - auto input_view = raft::make_device_matrix_view( - in.data(), params.rows, params.cols); - auto output_view = raft::make_device_matrix_view( - out.data(), params.rows, params.cols); - raft::linalg::row_normalize(handle, input_view, output_view, raft::linalg::L2Norm); - }); - } - - private: - normalize_input params; - rmm::device_uvector in, out; -}; // struct rowNormalize - -const std::vector> normalize_inputs_i32 = - raft::util::itertools::product>( - {10, 100, 1000, 10000, 100000}, {8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384}); -const std::vector> normalize_inputs_i64 = - raft::util::itertools::product>( - {10, 100, 1000, 10000, 100000}, {8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384}); - -RAFT_BENCH_REGISTER((rowNormalize), "", normalize_inputs_i32); -RAFT_BENCH_REGISTER((rowNormalize), "", normalize_inputs_i32); -RAFT_BENCH_REGISTER((rowNormalize), "", normalize_inputs_i64); -RAFT_BENCH_REGISTER((rowNormalize), "", normalize_inputs_i64); - -} // namespace raft::bench::linalg diff --git a/cpp/bench/prims/linalg/reduce.cu b/cpp/bench/prims/linalg/reduce.cu deleted file mode 100644 index cf41c5916..000000000 --- a/cpp/bench/prims/linalg/reduce.cu +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include - -namespace raft::bench::linalg { - -struct input_size { - int rows, cols; - bool along_rows; -}; - -template -struct reduce : public fixture { - reduce(bool along_rows, const input_size& p) - : input_size(p), along_rows(along_rows), in(p.rows * p.cols, stream), out(p.rows, stream) - { - } - - void run_benchmark(::benchmark::State& state) override - { - loop_on_state(state, [this]() { - raft::linalg::reduce( - out.data(), in.data(), input_size.cols, input_size.rows, T(0.f), true, along_rows, stream); - }); - } - - private: - bool along_rows; - input_size input_size; - rmm::device_uvector in, out; -}; // struct reduce - -const std::vector kInputSizes{{8 * 1024, 1024}, - {1024, 8 * 1024}, - {8 * 1024, 8 * 1024}, - {32 * 1024, 1024}, - {1024, 32 * 1024}, - {32 * 1024, 32 * 1024}}; - -const std::vector kAlongRows{false, true}; - -RAFT_BENCH_REGISTER(reduce, "", kAlongRows, kInputSizes); -RAFT_BENCH_REGISTER(reduce, "", kAlongRows, kInputSizes); - -} // namespace raft::bench::linalg diff --git a/cpp/bench/prims/linalg/reduce_cols_by_key.cu b/cpp/bench/prims/linalg/reduce_cols_by_key.cu deleted file mode 100644 index 1b584e80c..000000000 --- a/cpp/bench/prims/linalg/reduce_cols_by_key.cu +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include - -#include - -namespace raft::bench::linalg { - -template -struct rcbk_params { - IdxT rows, cols; - IdxT keys; -}; - -template -inline auto operator<<(std::ostream& os, const rcbk_params& p) -> std::ostream& -{ - os << p.rows << "#" << p.cols << "#" << p.keys; - return os; -} - -template -struct reduce_cols_by_key : public fixture { - reduce_cols_by_key(const rcbk_params& p) - : params(p), in(p.rows * p.cols, stream), out(p.rows * p.keys, stream), keys(p.cols, stream) - { - raft::random::RngState rng{42}; - raft::random::uniformInt(handle, rng, keys.data(), p.cols, (KeyT)0, (KeyT)p.keys); - } - - void run_benchmark(::benchmark::State& state) override - { - std::ostringstream label_stream; - label_stream << params; - state.SetLabel(label_stream.str()); - - loop_on_state(state, [this]() { - raft::linalg::reduce_cols_by_key( - in.data(), keys.data(), out.data(), params.rows, params.cols, params.keys, stream, false); - }); - } - - protected: - rcbk_params params; - rmm::device_uvector in, out; - rmm::device_uvector keys; -}; // struct reduce_cols_by_key - -const std::vector> rcbk_inputs_i32 = - raft::util::itertools::product>( - {1, 10, 100, 1000}, {1000, 10000, 100000}, {8, 32, 128, 512, 2048}); -const std::vector> rcbk_inputs_i64 = - raft::util::itertools::product>( - {1, 10, 100, 1000}, {1000, 10000, 100000}, {8, 32, 128, 512, 2048}); - -RAFT_BENCH_REGISTER((reduce_cols_by_key), "", rcbk_inputs_i32); -RAFT_BENCH_REGISTER((reduce_cols_by_key), "", rcbk_inputs_i32); -RAFT_BENCH_REGISTER((reduce_cols_by_key), "", rcbk_inputs_i64); -RAFT_BENCH_REGISTER((reduce_cols_by_key), "", rcbk_inputs_i64); - -} // namespace raft::bench::linalg diff --git a/cpp/bench/prims/linalg/reduce_rows_by_key.cu b/cpp/bench/prims/linalg/reduce_rows_by_key.cu deleted file mode 100644 index b68cefc27..000000000 --- a/cpp/bench/prims/linalg/reduce_rows_by_key.cu +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -#include - -namespace raft::bench::linalg { - -struct rrbk_params { - int64_t rows, cols; - int64_t keys; -}; - -template -struct reduce_rows_by_key : public fixture { - reduce_rows_by_key(const rrbk_params& p) - : params(p), - in(p.rows * p.cols, stream), - out(p.keys * p.cols, stream), - keys(p.rows, stream), - workspace(p.rows, stream) - { - raft::random::RngState rng{42}; - raft::random::uniformInt(handle, rng, keys.data(), p.rows, (KeyT)0, (KeyT)p.keys); - } - - void run_benchmark(::benchmark::State& state) override - { - loop_on_state(state, [this]() { - raft::linalg::reduce_rows_by_key(in.data(), - params.cols, - keys.data(), - workspace.data(), - params.rows, - params.cols, - params.keys, - out.data(), - stream, - false); - }); - } - - protected: - rrbk_params params; - rmm::device_uvector in, out; - rmm::device_uvector keys; - rmm::device_uvector workspace; -}; // struct reduce_rows_by_key - -const std::vector kInputSizes{ - {10000, 128, 64}, - {100000, 128, 64}, - {1000000, 128, 64}, - {10000000, 128, 64}, - {10000, 128, 256}, - {100000, 128, 256}, - {1000000, 128, 256}, - {10000000, 128, 256}, - {10000, 128, 1024}, - {100000, 128, 1024}, - {1000000, 128, 1024}, - {10000000, 128, 1024}, - {10000, 128, 4096}, - {100000, 128, 4096}, - {1000000, 128, 4096}, - {10000000, 128, 4096}, -}; - -RAFT_BENCH_REGISTER((reduce_rows_by_key), "", kInputSizes); -RAFT_BENCH_REGISTER((reduce_rows_by_key), "", kInputSizes); - -} // namespace raft::bench::linalg diff --git a/cpp/bench/prims/matrix/argmin.cu b/cpp/bench/prims/matrix/argmin.cu deleted file mode 100644 index afee81aa0..000000000 --- a/cpp/bench/prims/matrix/argmin.cu +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -#include - -namespace raft::bench::matrix { - -template -struct ArgminParams { - IdxT rows, cols; -}; - -template -struct Argmin : public fixture { - Argmin(const ArgminParams& p) : params(p), matrix(this->handle), indices(this->handle) {} - - void allocate_data(const ::benchmark::State& state) override - { - matrix = raft::make_device_matrix(handle, params.rows, params.cols); - indices = raft::make_device_vector(handle, params.rows); - - raft::random::RngState rng{1234}; - raft::random::uniform( - handle, rng, matrix.data_handle(), params.rows * params.cols, T(-1), T(1)); - resource::sync_stream(handle, stream); - } - - void run_benchmark(::benchmark::State& state) override - { - loop_on_state(state, [this]() { - raft::matrix::argmin(handle, raft::make_const_mdspan(matrix.view()), indices.view()); - }); - } - - private: - ArgminParams params; - raft::device_matrix matrix; - raft::device_vector indices; -}; // struct Argmin - -const std::vector> argmin_inputs_i64 = - raft::util::itertools::product>({1000, 10000, 100000, 1000000, 10000000}, - {64, 128, 256, 512, 1024}); - -RAFT_BENCH_REGISTER((Argmin), "", argmin_inputs_i64); -RAFT_BENCH_REGISTER((Argmin), "", argmin_inputs_i64); - -} // namespace raft::bench::matrix diff --git a/cpp/bench/prims/matrix/gather.cu b/cpp/bench/prims/matrix/gather.cu deleted file mode 100644 index 00a145ffa..000000000 --- a/cpp/bench/prims/matrix/gather.cu +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -#include - -namespace raft::bench::matrix { - -template -struct GatherParams { - IdxT rows, cols, map_length; -}; - -template -inline auto operator<<(std::ostream& os, const GatherParams& p) -> std::ostream& -{ - os << p.rows << "#" << p.cols << "#" << p.map_length; - return os; -} - -template -struct Gather : public fixture { - Gather(const GatherParams& p) - : params(p), matrix(this->handle), map(this->handle), out(this->handle), stencil(this->handle) - { - } - - void allocate_data(const ::benchmark::State& state) override - { - matrix = raft::make_device_matrix(handle, params.rows, params.cols); - map = raft::make_device_vector(handle, params.map_length); - out = raft::make_device_matrix(handle, params.map_length, params.cols); - stencil = raft::make_device_vector(handle, Conditional ? params.map_length : IdxT(0)); - - raft::random::RngState rng{1234}; - raft::random::uniform( - handle, rng, matrix.data_handle(), params.rows * params.cols, T(-1), T(1)); - raft::random::uniformInt( - handle, rng, map.data_handle(), params.map_length, (MapT)0, (MapT)params.rows); - if constexpr (Conditional) { - raft::random::uniform(handle, rng, stencil.data_handle(), params.map_length, T(-1), T(1)); - } - resource::sync_stream(handle, stream); - } - - void run_benchmark(::benchmark::State& state) override - { - std::ostringstream label_stream; - label_stream << params; - state.SetLabel(label_stream.str()); - - loop_on_state(state, [this]() { - auto matrix_const_view = raft::make_const_mdspan(matrix.view()); - auto map_const_view = raft::make_const_mdspan(map.view()); - if constexpr (Conditional) { - auto stencil_const_view = raft::make_const_mdspan(stencil.view()); - auto pred_op = raft::plug_const_op(T(0.0), raft::greater_op()); - raft::matrix::gather_if( - handle, matrix_const_view, out.view(), map_const_view, stencil_const_view, pred_op); - } else { - raft::matrix::gather(handle, matrix_const_view, map_const_view, out.view()); - } - }); - } - - private: - GatherParams params; - raft::device_matrix matrix, out; - raft::device_vector stencil; - raft::device_vector map; -}; // struct Gather - -template -using GatherIf = Gather; - -const std::vector> gather_inputs_i64 = - raft::util::itertools::product>( - {1000000}, {10, 20, 50, 100, 200, 500}, {1000, 10000, 100000, 1000000}); - -RAFT_BENCH_REGISTER((Gather), "", gather_inputs_i64); -RAFT_BENCH_REGISTER((Gather), "", gather_inputs_i64); -RAFT_BENCH_REGISTER((GatherIf), "", gather_inputs_i64); -RAFT_BENCH_REGISTER((GatherIf), "", gather_inputs_i64); -} // namespace raft::bench::matrix diff --git a/cpp/bench/prims/random/make_blobs.cu b/cpp/bench/prims/random/make_blobs.cu deleted file mode 100644 index f43d914cf..000000000 --- a/cpp/bench/prims/random/make_blobs.cu +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include - -namespace raft::bench::random { -struct make_blobs_inputs { - int rows, cols, clusters; - bool row_major; -}; // struct make_blobs_inputs - -inline auto operator<<(std::ostream& os, const make_blobs_inputs& p) -> std::ostream& -{ - os << p.rows << "#" << p.cols << "#" << p.clusters << "#" << p.row_major; - return os; -} - -template -struct make_blobs : public fixture { - make_blobs(const make_blobs_inputs& p) - : params(p), data(p.rows * p.cols, stream), labels(p.rows, stream) - { - } - - void run_benchmark(::benchmark::State& state) override - { - std::ostringstream label_stream; - label_stream << params; - state.SetLabel(label_stream.str()); - - loop_on_state(state, [this]() { - raft::random::make_blobs(data.data(), - labels.data(), - params.rows, - params.cols, - params.clusters, - this->stream, - params.row_major); - }); - } - - private: - make_blobs_inputs params; - rmm::device_uvector data; - rmm::device_uvector labels; -}; // struct MakeBlobs - -static std::vector get_make_blobs_input_vecs() -{ - std::vector out; - make_blobs_inputs p; - for (auto rows : std::vector{100000, 1000000}) { - for (auto cols : std::vector{10, 100}) { - for (auto clusters : std::vector{2, 10, 100}) { - p.rows = rows; - p.cols = cols; - p.clusters = clusters; - p.row_major = true; - out.push_back(p); - p.row_major = false; - out.push_back(p); - } - } - } - return out; -} - -RAFT_BENCH_REGISTER(make_blobs, "", get_make_blobs_input_vecs()); -RAFT_BENCH_REGISTER(make_blobs, "", get_make_blobs_input_vecs()); - -} // namespace raft::bench::random diff --git a/cpp/bench/prims/random/permute.cu b/cpp/bench/prims/random/permute.cu deleted file mode 100644 index 829cf4272..000000000 --- a/cpp/bench/prims/random/permute.cu +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include - -#include - -namespace raft::bench::random { - -struct permute_inputs { - int rows, cols; - bool needPerms, needShuffle, rowMajor; -}; // struct permute_inputs - -template -struct permute : public fixture { - permute(const permute_inputs& p) - : params(p), - perms(p.needPerms ? p.rows : 0, stream), - out(p.rows * p.cols, stream), - in(p.rows * p.cols, stream) - { - raft::random::RngState r(123456ULL); - uniform(handle, r, in.data(), p.rows, T(-1.0), T(1.0)); - } - - void run_benchmark(::benchmark::State& state) override - { - raft::random::RngState r(123456ULL); - loop_on_state(state, [this, &r]() { - raft::random::permute( - perms.data(), out.data(), in.data(), params.cols, params.rows, params.rowMajor, stream); - }); - } - - private: - raft::device_resources handle; - permute_inputs params; - rmm::device_uvector out, in; - rmm::device_uvector perms; -}; // struct permute - -const std::vector permute_input_vecs = { - {32 * 1024, 128, true, true, true}, - {1024 * 1024, 128, true, true, true}, - {32 * 1024, 128 + 2, true, true, true}, - {1024 * 1024, 128 + 2, true, true, true}, - {32 * 1024, 128 + 1, true, true, true}, - {1024 * 1024, 128 + 1, true, true, true}, - - {32 * 1024, 128, true, true, false}, - {1024 * 1024, 128, true, true, false}, - {32 * 1024, 128 + 2, true, true, false}, - {1024 * 1024, 128 + 2, true, true, false}, - {32 * 1024, 128 + 1, true, true, false}, - {1024 * 1024, 128 + 1, true, true, false}, - -}; - -RAFT_BENCH_REGISTER(permute, "", permute_input_vecs); -RAFT_BENCH_REGISTER(permute, "", permute_input_vecs); - -} // namespace raft::bench::random diff --git a/cpp/bench/prims/random/rng.cu b/cpp/bench/prims/random/rng.cu deleted file mode 100644 index d15c9441d..000000000 --- a/cpp/bench/prims/random/rng.cu +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -#include - -namespace raft::bench::random { - -enum RandomType { - RNG_Normal, - RNG_LogNormal, - RNG_Uniform, - RNG_Gumbel, - RNG_Logistic, - RNG_Exp, - RNG_Rayleigh, - RNG_Laplace, - RNG_Fill -}; // enum RandomType - -template -struct rng_inputs { - int len; - RandomType type; - raft::random::GeneratorType gtype; - T start, end; -}; // struct rng_inputs - -template -struct rng : public fixture { - rng(const rng_inputs& p) : params(p), ptr(p.len, stream) {} - - void run_benchmark(::benchmark::State& state) override - { - raft::random::RngState r(123456ULL, params.gtype); - loop_on_state(state, [this, &r]() { - switch (params.type) { - case RNG_Normal: normal(handle, r, ptr.data(), params.len, params.start, params.end); break; - case RNG_LogNormal: - lognormal(handle, r, ptr.data(), params.len, params.start, params.end); - break; - case RNG_Uniform: - uniform(handle, r, ptr.data(), params.len, params.start, params.end); - break; - case RNG_Gumbel: gumbel(handle, r, ptr.data(), params.len, params.start, params.end); break; - case RNG_Logistic: - logistic(handle, r, ptr.data(), params.len, params.start, params.end); - break; - case RNG_Exp: exponential(handle, r, ptr.data(), params.len, params.start); break; - case RNG_Rayleigh: rayleigh(handle, r, ptr.data(), params.len, params.start); break; - case RNG_Laplace: - laplace(handle, r, ptr.data(), params.len, params.start, params.end); - break; - case RNG_Fill: fill(handle, r, ptr.data(), params.len, params.start); break; - }; - }); - } - - private: - rng_inputs params; - rmm::device_uvector ptr; -}; // struct RngBench - -template -static std::vector> get_rng_input_vecs() -{ - using namespace raft::random; - return { - {1024 * 1024, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, - {32 * 1024 * 1024, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, - {1024 * 1024 * 1024, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, - {1024 * 1024 + 2, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, - {32 * 1024 * 1024 + 2, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, - {1024 * 1024 * 1024 + 2, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, - {1024 * 1024 + 1, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, - {32 * 1024 * 1024 + 1, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, - {1024 * 1024 * 1024 + 1, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, - - {1024 * 1024, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, - {32 * 1024 * 1024, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, - {1024 * 1024 * 1024, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, - {1024 * 1024 + 2, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, - {32 * 1024 * 1024 + 2, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, - {1024 * 1024 * 1024 + 2, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, - {1024 * 1024 + 1, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, - {32 * 1024 * 1024 + 1, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, - {1024 * 1024 * 1024 + 1, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, - - {1024 * 1024, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, - {32 * 1024 * 1024, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, - {1024 * 1024 * 1024, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, - {1024 * 1024 + 2, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, - {32 * 1024 * 1024 + 2, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, - {1024 * 1024 * 1024 + 2, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, - {1024 * 1024 + 1, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, - {32 * 1024 * 1024 + 1, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, - {1024 * 1024 * 1024 + 1, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, - }; -} - -RAFT_BENCH_REGISTER(rng, "", get_rng_input_vecs()); -RAFT_BENCH_REGISTER(rng, "", get_rng_input_vecs()); - -} // namespace raft::bench::random diff --git a/cpp/bench/prims/sparse/convert_csr.cu b/cpp/bench/prims/sparse/convert_csr.cu deleted file mode 100644 index 634c749a5..000000000 --- a/cpp/bench/prims/sparse/convert_csr.cu +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include -#include - -namespace raft::bench::sparse { - -template -struct bench_param { - index_t num_cols; - index_t num_rows; - index_t divisor; -}; - -template -RAFT_KERNEL init_adj_kernel(bool* adj, index_t num_rows, index_t num_cols, index_t divisor) -{ - index_t r = blockDim.y * blockIdx.y + threadIdx.y; - index_t c = blockDim.x * blockIdx.x + threadIdx.x; - - for (; r < num_rows; r += gridDim.y * blockDim.y) { - for (; c < num_cols; c += gridDim.x * blockDim.x) { - adj[r * num_cols + c] = c % divisor == 0; - } - } -} - -template -void init_adj(bool* adj, index_t num_rows, index_t num_cols, index_t divisor, cudaStream_t stream) -{ - // adj matrix: element a_ij is set to one if j is divisible by divisor. - dim3 block(32, 32); - const index_t max_y_grid_dim = 65535; - dim3 grid(num_cols / 32 + 1, (int)min(num_rows / 32 + 1, max_y_grid_dim)); - init_adj_kernel<<>>(adj, num_rows, num_cols, divisor); - RAFT_CHECK_CUDA(stream); -} - -template -struct bench_base : public fixture { - bench_base(const bench_param& p) - : params(p), - handle(stream), - adj(p.num_rows * p.num_cols, stream), - row_ind(p.num_rows, stream), - row_ind_host(p.num_rows), - row_counters(p.num_rows, stream), - // col_ind is over-dimensioned because nnz is unknown at this point - col_ind(p.num_rows * p.num_cols, stream) - { - init_adj(adj.data(), p.num_rows, p.num_cols, p.divisor, stream); - - std::vector row_ind_host(p.num_rows); - for (size_t i = 0; i < row_ind_host.size(); ++i) { - size_t nnz_per_row = raft::ceildiv(p.num_cols, p.divisor); - row_ind_host[i] = nnz_per_row * i; - } - raft::update_device(row_ind.data(), row_ind_host.data(), row_ind.size(), stream); - } - - void run_benchmark(::benchmark::State& state) override - { - loop_on_state(state, [this]() { - raft::sparse::convert::adj_to_csr(handle, - adj.data(), - row_ind.data(), - params.num_rows, - params.num_cols, - row_counters.data(), - col_ind.data()); - }); - - // Estimate bandwidth: - index_t num_entries = params.num_rows * params.num_cols; - index_t bytes_read = num_entries * sizeof(bool); - index_t bytes_write = num_entries / params.divisor * sizeof(index_t); - - state.counters["BW"] = benchmark::Counter(bytes_read + bytes_write, - benchmark::Counter::kIsIterationInvariantRate, - benchmark::Counter::OneK::kIs1024); - state.counters["BW read"] = benchmark::Counter( - bytes_read, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::OneK::kIs1024); - state.counters["BW write"] = benchmark::Counter(bytes_write, - benchmark::Counter::kIsIterationInvariantRate, - benchmark::Counter::OneK::kIs1024); - - state.counters["Fraction nz"] = benchmark::Counter(100.0 / ((double)params.divisor)); - state.counters["Columns"] = benchmark::Counter(params.num_cols); - state.counters["Rows"] = benchmark::Counter(params.num_rows); - } - - protected: - raft::device_resources handle; - bench_param params; - rmm::device_uvector adj; - rmm::device_uvector row_ind; - std::vector row_ind_host; - rmm::device_uvector row_counters; - rmm::device_uvector col_ind; -}; // struct bench_base - -const int64_t num_cols = 1 << 30; - -const std::vector> bench_params = { - {num_cols, 1, 8}, - {num_cols >> 3, 1 << 3, 8}, - {num_cols >> 6, 1 << 6, 8}, - - {num_cols, 1, 64}, - {num_cols >> 3, 1 << 3, 64}, - {num_cols >> 6, 1 << 6, 64}, - - {num_cols, 1, 2048}, - {num_cols >> 3, 1 << 3, 2048}, - {num_cols >> 6, 1 << 6, 2048}, -}; - -RAFT_BENCH_REGISTER(bench_base, "", bench_params); - -} // namespace raft::bench::sparse diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake new file mode 100644 index 000000000..6128b5c43 --- /dev/null +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -0,0 +1,63 @@ +# ============================================================================= +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. + +# Use RAPIDS_VERSION from cmake/thirdparty/fetch_rapids.cmake +set(RAFT_VERSION "${RAPIDS_VERSION}") +set(RAFT_FORK "rapidsai") +set(RAFT_PINNED_TAG "branch-${RAPIDS_VERSION}") + +function(find_and_configure_raft) + set(oneValueArgs VERSION FORK PINNED_TAG COMPILE_LIBRARY ENABLE_NVTX ENABLE_MNMG_DEPENDENCIES) + cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN} ) + + set(RAFT_COMPONENTS "") + if(PKG_COMPILE_LIBRARY) + string(APPEND RAFT_COMPONENTS " compiled") + endif() + + if(PKG_ENABLE_MNMG_DEPENDENCIES) + string(APPEND RAFT_COMPONENTS " distributed") + endif() + + #----------------------------------------------------- + # Invoke CPM find_package() + #----------------------------------------------------- + rapids_cpm_find(raft ${PKG_VERSION} + GLOBAL_TARGETS raft::raft + BUILD_EXPORT_SET raft-template-exports + INSTALL_EXPORT_SET raft-template-exports + COMPONENTS ${RAFT_COMPONENTS} + CPM_ARGS + GIT_REPOSITORY https://github.com/${PKG_FORK}/raft.git + GIT_TAG ${PKG_PINNED_TAG} + SOURCE_SUBDIR cpp + OPTIONS + "BUILD_TESTS OFF" + "BUILD_PRIMS_BENCH OFF" + "BUILD_ANN_BENCH OFF" + "RAFT_NVTX ${ENABLE_NVTX}" + "RAFT_COMPILE_LIBRARY ${PKG_COMPILE_LIBRARY}" + ) +endfunction() + +# Change pinned tag here to test a commit in CI +# To use a different RAFT locally, set the CMake variable +# CPM_raft_SOURCE=/path/to/local/raft +find_and_configure_raft(VERSION ${RAFT_VERSION}.00 + FORK ${RAFT_FORK} + PINNED_TAG ${RAFT_PINNED_TAG} + COMPILE_LIBRARY ON + ENABLE_MNMG_DEPENDENCIES OFF + ENABLE_NVTX OFF +) diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile index 3eb0763ea..0a2c7f8f6 100644 --- a/cpp/doxygen/Doxyfile +++ b/cpp/doxygen/Doxyfile @@ -32,7 +32,7 @@ DOXYFILE_ENCODING = UTF-8 # title of most generated pages and in a few other places. # The default value is: My Project. -PROJECT_NAME = "RAFT C++ API" +PROJECT_NAME = "cuVS C++ API" # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version @@ -879,27 +879,7 @@ RECURSIVE = YES # Note that relative paths are relative to the directory from which doxygen is # run. -EXCLUDE = ../include/raft/sparse/linalg/symmetrize.hpp \ - ../include/raft/cache \ - ../include/raft/common \ - ../include/raft/lap \ - ../include/raft/sparse/selection \ - ../include/raft/sparse/csr.hpp \ - ../include/raft/linalg/lanczos.cuh \ - ../include/raft/linalg/lanczos.hpp \ - ../include/raft/util/cuda_utils.cuh \ - ../include/raft/util/cudart_utils.hpp \ - ../include/raft/util/device_atomics.cuh \ - ../include/raft/util/device_utils.cuh \ - ../include/raft/core/error.hpp \ - ../include/raft/core/handle.hpp \ - ../include/raft/util/integer_utils.hpp \ - ../include/raft/util/pow2_utils.cuh \ - ../include/raft/util/vectorized.cuh \ - ../include/raft/raft.hpp \ - ../include/raft/core/cudart_utils.hpp \ - ../include/raft/matrix/math.cuh \ - ../include/raft/matrix/matrix.cuh +EXCLUDE = ../include/cuvs/sparse/selection # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded diff --git a/cpp/doxygen/main_page.md b/cpp/doxygen/main_page.md index ff0c7820c..d002df001 100644 --- a/cpp/doxygen/main_page.md +++ b/cpp/doxygen/main_page.md @@ -1,22 +1,3 @@ -# libraft +# libcuvs -RAFT contains fundamental widely-used algorithms and primitives for data science, graph and machine learning. The algorithms are CUDA-accelerated and form building-blocks for rapidly composing analytics. - -By taking a primitives-based approach to algorithm development, RAFT -- accelerates algorithm construction time, -- reduces the maintenance burden by maximizing reuse across projects, and -- centralizes core reusable computations, allowing future optimizations to benefit all algorithms that use them. - -While not exhaustive, the following general categories help summarize the accelerated functions in RAFT: - -##### -| Category | Examples | -| --- | --- | -| **Data Formats** | sparse & dense, conversions, data generation | -| **Dense Linear Algebra** | matrix arithmetic, norms, factorization, least squares, svd & eigenvalue problems | -| **Spatial** | pairwise distances, nearest neighbors, neighborhood graph construction | -| **Sparse Operations** | linear algebra, eigenvalue problems, slicing, symmetrization, labeling | -| **Basic Clustering** | spectral clustering, hierarchical clustering, k-means | -| **Solvers** | combinatorial optimization, iterative solvers | -| **Statistics** | sampling, moments and summary statistics, metrics | -| **Distributed Tools** | multi-node multi-gpu infrastructure | +cuVS is a library for vector search on the GPU \ No newline at end of file diff --git a/cpp/src/distance/distance.cu b/cpp/src/distance/distance.cu index 8c9460831..0a35a335e 100644 --- a/cpp/src/distance/distance.cu +++ b/cpp/src/distance/distance.cu @@ -14,8 +14,8 @@ * limitations under the License. */ -#include // rbf_fin_op -#include +#include // rbf_fin_op +#include /* * Hierarchy of instantiations: diff --git a/cpp/src/matrix/detail/select_k_double_int64_t.cu b/cpp/src/matrix/detail/select_k_double_int64_t.cu index c75a5b526..c3949149f 100644 --- a/cpp/src/matrix/detail/select_k_double_int64_t.cu +++ b/cpp/src/matrix/detail/select_k_double_int64_t.cu @@ -16,7 +16,7 @@ #include -#define instantiate_raft_matrix_detail_select_k(T, IdxT) \ +#define instantiate_cuvs_matrix_detail_select_k(T, IdxT) \ template void raft::matrix::detail::select_k(raft::resources const& handle, \ const T* in_val, \ const IdxT* in_idx, \ @@ -29,6 +29,6 @@ rmm::mr::device_memory_resource* mr, \ bool sorted) -instantiate_raft_matrix_detail_select_k(double, int64_t); +instantiate_cuvs_matrix_detail_select_k(double, int64_t); -#undef instantiate_raft_matrix_detail_select_k +#undef instantiate_cuvs_matrix_detail_select_k diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 6c03da8d7..1e01d59b2 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -25,40 +25,38 @@ function(ConfigureTest) set(oneValueArgs NAME GPUS PERCENT) set(multiValueArgs PATH TARGETS CONFIGURATIONS) - cmake_parse_arguments(_RAFT_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - if(NOT DEFINED _RAFT_TEST_GPUS AND NOT DEFINED _RAFT_TEST_PERCENT) - set(_RAFT_TEST_GPUS 1) - set(_RAFT_TEST_PERCENT 30) + cmake_parse_arguments(_CUVS_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + if(NOT DEFINED _CUVS_TEST_GPUS AND NOT DEFINED _CUVS_TEST_PERCENT) + set(_CUVS_TEST_GPUS 1) + set(_CUVS_TEST_PERCENT 30) endif() - if(NOT DEFINED _RAFT_TEST_GPUS) - set(_RAFT_TEST_GPUS 1) + if(NOT DEFINED _CUVS_TEST_GPUS) + set(_CUVS_TEST_GPUS 1) endif() - if(NOT DEFINED _RAFT_TEST_PERCENT) - set(_RAFT_TEST_PERCENT 100) + if(NOT DEFINED _CUVS_TEST_PERCENT) + set(_CUVS_TEST_PERCENT 100) endif() - if(_RAFT_TEST_NOCUDA) - set(TEST_NAME "${_RAFT_TEST_NAME}_NOCUDA") + if(_CUVS_TEST_NOCUDA) + set(TEST_NAME "${_CUVS_TEST_NAME}_NOCUDA") else() - set(TEST_NAME ${_RAFT_TEST_NAME}) + set(TEST_NAME ${_CUVS_TEST_NAME}) endif() - add_executable(${TEST_NAME} ${_RAFT_TEST_PATH}) + add_executable(${TEST_NAME} ${_CUVS_TEST_PATH}) target_link_libraries( ${TEST_NAME} - PRIVATE raft - raft_internal - $<$:raft::compiled> + PRIVATE raft::raft GTest::gtest GTest::gtest_main Threads::Threads - ${RAFT_CTK_MATH_DEPENDENCIES} + ${CUVS_CTK_MATH_DEPENDENCIES} $ $ ) set_target_properties( ${TEST_NAME} - PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$" + PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$" INSTALL_RPATH "\$ORIGIN/../../../lib" CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON @@ -66,23 +64,23 @@ function(ConfigureTest) CUDA_STANDARD_REQUIRED ON ) target_compile_options( - ${TEST_NAME} PRIVATE "$<$:${RAFT_CXX_FLAGS}>" - "$<$:${RAFT_CUDA_FLAGS}>" + ${TEST_NAME} PRIVATE "$<$:${CUVS_CXX_FLAGS}>" + "$<$:${CUVS_CUDA_FLAGS}>" ) - if(_RAFT_TEST_EXPLICIT_INSTANTIATE_ONLY) - target_compile_definitions(${TEST_NAME} PRIVATE "RAFT_EXPLICIT_INSTANTIATE_ONLY") + if(_CUVS_TEST_EXPLICIT_INSTANTIATE_ONLY) + target_compile_definitions(${TEST_NAME} PRIVATE "CUVS_EXPLICIT_INSTANTIATE_ONLY") endif() - if(_RAFT_TEST_NOCUDA) - target_compile_definitions(${TEST_NAME} PRIVATE "RAFT_DISABLE_CUDA") + if(_CUVS_TEST_NOCUDA) + target_compile_definitions(${TEST_NAME} PRIVATE "CUVS_DISABLE_CUDA") endif() - target_include_directories(${TEST_NAME} PUBLIC "$") + target_include_directories(${TEST_NAME} PUBLIC "$") rapids_test_add( NAME ${TEST_NAME} COMMAND ${TEST_NAME} - GPUS ${_RAFT_TEST_GPUS} - PERCENT ${_RAFT_TEST_PERCENT} + GPUS ${_CUVS_TEST_GPUS} + PERCENT ${_CUVS_TEST_PERCENT} INSTALL_COMPONENT_SET testing ) endfunction() @@ -108,43 +106,6 @@ if(BUILD_TESTS) EXPLICIT_INSTANTIATE_ONLY ) - ConfigureTest( - NAME - CORE_TEST - PATH - test/core/bitset.cu - test/core/device_resources_manager.cpp - test/core/device_setter.cpp - test/core/logger.cpp - test/core/math_device.cu - test/core/math_host.cpp - test/core/operators_device.cu - test/core/operators_host.cpp - test/core/handle.cpp - test/core/interruptible.cu - test/core/nvtx.cpp - test/core/mdarray.cu - test/core/mdspan_copy.cpp - test/core/mdspan_copy.cu - test/core/mdspan_utils.cu - test/core/numpy_serializer.cu - test/core/memory_type.cpp - test/core/sparse_matrix.cu - test/core/sparse_matrix.cpp - test/core/span.cpp - test/core/span.cu - test/core/stream_view.cpp - test/core/temporary_device_buffer.cu - test/test.cpp - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME CORE_TEST PATH test/core/stream_view.cpp test/core/mdspan_copy.cpp LIB - EXPLICIT_INSTANTIATE_ONLY NOCUDA - ) - ConfigureTest( NAME DISTANCE_TEST @@ -174,168 +135,6 @@ if(BUILD_TESTS) EXPLICIT_INSTANTIATE_ONLY ) - list( - APPEND - EXT_HEADER_TEST_SOURCES - test/ext_headers/raft_neighbors_brute_force.cu - test/ext_headers/raft_distance_distance.cu - test/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu - test/ext_headers/raft_matrix_detail_select_k.cu - test/ext_headers/raft_neighbors_ball_cover.cu - test/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu - test/ext_headers/raft_distance_fused_l2_nn.cu - test/ext_headers/raft_neighbors_ivf_pq.cu - test/ext_headers/raft_util_memory_pool.cpp - test/ext_headers/raft_neighbors_ivf_flat.cu - test/ext_headers/raft_core_logger.cpp - test/ext_headers/raft_neighbors_refine.cu - test/ext_headers/raft_neighbors_detail_ivf_flat_search.cu - test/ext_headers/raft_neighbors_detail_selection_faiss.cu - test/ext_headers/raft_linalg_detail_coalesced_reduction.cu - test/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu - test/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu - test/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu - ) - - # Test that the split headers compile in isolation with: - # - # * EXT_HEADERS_TEST_COMPILED_EXPLICIT: RAFT_COMPILED, RAFT_EXPLICIT_INSTANTIATE_ONLY defined - # * EXT_HEADERS_TEST_COMPILED_IMPLICIT: RAFT_COMPILED defined - # * EXT_HEADERS_TEST_IMPLICIT: no macros defined. - ConfigureTest( - NAME EXT_HEADERS_TEST_COMPILED_EXPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB - EXPLICIT_INSTANTIATE_ONLY - ) - ConfigureTest(NAME EXT_HEADERS_TEST_COMPILED_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB) - ConfigureTest(NAME EXT_HEADERS_TEST_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES}) - - ConfigureTest(NAME LABEL_TEST PATH test/label/label.cu test/label/merge_labels.cu) - - ConfigureTest( - NAME - LINALG_TEST - PATH - test/linalg/add.cu - test/linalg/axpy.cu - test/linalg/binary_op.cu - test/linalg/cholesky_r1.cu - test/linalg/coalesced_reduction.cu - test/linalg/divide.cu - test/linalg/dot.cu - test/linalg/eig.cu - test/linalg/eig_sel.cu - test/linalg/gemm_layout.cu - test/linalg/gemv.cu - test/linalg/map.cu - test/linalg/map_then_reduce.cu - test/linalg/matrix_vector.cu - test/linalg/matrix_vector_op.cu - test/linalg/mean_squared_error.cu - test/linalg/multiply.cu - test/linalg/norm.cu - test/linalg/normalize.cu - test/linalg/power.cu - test/linalg/randomized_svd.cu - test/linalg/reduce.cu - test/linalg/reduce_cols_by_key.cu - test/linalg/reduce_rows_by_key.cu - test/linalg/rsvd.cu - test/linalg/sqrt.cu - test/linalg/strided_reduction.cu - test/linalg/subtract.cu - test/linalg/svd.cu - test/linalg/ternary_op.cu - test/linalg/transpose.cu - test/linalg/unary_op.cu - ) - - ConfigureTest( - NAME - MATRIX_TEST - PATH - test/matrix/argmax.cu - test/matrix/argmin.cu - test/matrix/columnSort.cu - test/matrix/diagonal.cu - test/matrix/gather.cu - test/matrix/scatter.cu - test/matrix/eye.cu - test/matrix/linewise_op.cu - test/matrix/math.cu - test/matrix/matrix.cu - test/matrix/norm.cu - test/matrix/reverse.cu - test/matrix/slice.cu - test/matrix/triangular.cu - test/sparse/spectral_matrix.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest(NAME MATRIX_SELECT_TEST PATH test/matrix/select_k.cu LIB EXPLICIT_INSTANTIATE_ONLY) - - ConfigureTest( - NAME MATRIX_SELECT_LARGE_TEST PATH test/matrix/select_large_k.cu LIB EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME - RANDOM_TEST - PATH - test/random/make_blobs.cu - test/random/make_regression.cu - test/random/multi_variable_gaussian.cu - test/random/rng_pcg_host_api.cu - test/random/permute.cu - test/random/rng.cu - test/random/rng_discrete.cu - test/random/rng_int.cu - test/random/rmat_rectangular_generator.cu - test/random/sample_without_replacement.cu - ) - - ConfigureTest( - NAME SOLVERS_TEST PATH test/cluster/cluster_solvers_deprecated.cu test/linalg/eigen_solvers.cu - test/lap/lap.cu test/sparse/mst.cu LIB EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME - SPARSE_TEST - PATH - test/sparse/add.cu - test/sparse/convert_coo.cu - test/sparse/convert_csr.cu - test/sparse/csr_row_slice.cu - test/sparse/csr_to_dense.cu - test/sparse/csr_transpose.cu - test/sparse/degree.cu - test/sparse/filter.cu - test/sparse/norm.cu - test/sparse/normalize.cu - test/sparse/reduce.cu - test/sparse/row_op.cu - test/sparse/sort.cu - test/sparse/spgemmi.cu - test/sparse/symmetrize.cu - ) - - ConfigureTest( - NAME SPARSE_DIST_TEST PATH test/sparse/dist_coo_spmv.cu test/sparse/distance.cu - test/sparse/gram.cu LIB EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME - SPARSE_NEIGHBORS_TEST - PATH - test/sparse/neighbors/cross_component_nn.cu - test/sparse/neighbors/brute_force.cu - test/sparse/neighbors/knn_graph.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - ConfigureTest( NAME NEIGHBORS_TEST @@ -421,9 +220,6 @@ if(BUILD_TESTS) NAME STATS_TEST PATH - test/stats/accuracy.cu - test/stats/adjusted_rand_index.cu - test/stats/completeness_score.cu test/stats/contingencyMatrix.cu test/stats/cov.cu test/stats/dispersion.cu @@ -432,38 +228,15 @@ if(BUILD_TESTS) test/stats/homogeneity_score.cu test/stats/information_criterion.cu test/stats/kl_divergence.cu - test/stats/mean.cu - test/stats/meanvar.cu - test/stats/mean_center.cu - test/stats/minmax.cu test/stats/mutual_info_score.cu test/stats/neighborhood_recall.cu test/stats/r2_score.cu test/stats/rand_index.cu - test/stats/regression_metrics.cu test/stats/silhouette_score.cu - test/stats/stddev.cu - test/stats/sum.cu test/stats/trustworthiness.cu - test/stats/weighted_mean.cu - test/stats/v_measure.cu LIB EXPLICIT_INSTANTIATE_ONLY ) - - ConfigureTest( - NAME - UTILS_TEST - PATH - test/core/seive.cu - test/util/bitonic_sort.cu - test/util/cudart_utils.cpp - test/util/device_atomics.cu - test/util/integer_utils.cpp - test/util/integer_utils.cu - test/util/pow2_utils.cu - test/util/reduction.cu - ) endif() # ################################################################################################## diff --git a/dependencies.yaml b/dependencies.yaml index 8be7a24dd..7d6c318d6 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -81,16 +81,16 @@ files: - test_python_common - test_cuvs - cupy - py_build_cuda_ann_bench: + py_build_cuvs_bench: output: pyproject - pyproject_dir: python/cuda-ann-bench + pyproject_dir: python/cuvs-bench extras: table: build-system includes: - build_wheels - py_run_cuda_ann_bench: + py_run_cuvs_bench: output: pyproject - pyproject_dir: python/cuda-ann-bench + pyproject_dir: python/cuvs-bench extras: table: project includes: diff --git a/python/cuda-ann-bench/LICENSE b/python/cuvs-bench/LICENSE similarity index 100% rename from python/cuda-ann-bench/LICENSE rename to python/cuvs-bench/LICENSE diff --git a/python/cuda-ann-bench/pyproject.toml b/python/cuvs-bench/pyproject.toml similarity index 97% rename from python/cuda-ann-bench/pyproject.toml rename to python/cuvs-bench/pyproject.toml index ad2306179..508a9f94c 100644 --- a/python/cuda-ann-bench/pyproject.toml +++ b/python/cuvs-bench/pyproject.toml @@ -10,7 +10,7 @@ requires = [ [project] name = "cuvs-ann-bench" version = "24.02.00" -description = "CUDA ANN benchmarks" +description = "cuVS benchmarks" authors = [ { name = "NVIDIA Corporation" }, ] diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/__init__.py b/python/cuvs-bench/src/cuvs-bench/__init__.py similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/__init__.py rename to python/cuvs-bench/src/cuvs-bench/__init__.py diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/constraints/__init__.py b/python/cuvs-bench/src/cuvs-bench/constraints/__init__.py similarity index 87% rename from python/cuda-ann-bench/src/cuda-ann-bench/constraints/__init__.py rename to python/cuvs-bench/src/cuvs-bench/constraints/__init__.py index d827c3d1e..2de3ac98e 100644 --- a/python/cuda-ann-bench/src/cuda-ann-bench/constraints/__init__.py +++ b/python/cuvs-bench/src/cuvs-bench/constraints/__init__.py @@ -16,13 +16,13 @@ DTYPE_SIZES = {"float": 4, "half": 2, "fp8": 1} -def raft_ivf_pq_build_constraints(params, dims): +def cuvs_ivf_pq_build_constraints(params, dims): if "pq_dim" in params: return params["pq_dim"] <= dims return True -def raft_ivf_pq_search_constraints(params, build_params, k, batch_size): +def cuvs_ivf_pq_search_constraints(params, build_params, k, batch_size): ret = True if "internalDistanceDtype" in params and "smemLutDtype" in params: ret = ( @@ -35,7 +35,7 @@ def raft_ivf_pq_search_constraints(params, build_params, k, batch_size): return ret -def raft_cagra_search_constraints(params, build_params, k, batch_size): +def cuvs_cagra_search_constraints(params, build_params, k, batch_size): if "itopk" in params: return params["itopk"] >= k diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/data_export/__main__.py b/python/cuvs-bench/src/cuvs-bench/data_export/__main__.py similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/data_export/__main__.py rename to python/cuvs-bench/src/cuvs-bench/data_export/__main__.py diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/generate_groundtruth/__main__.py b/python/cuvs-bench/src/cuvs-bench/generate_groundtruth/__main__.py similarity index 99% rename from python/cuda-ann-bench/src/cuda-ann-bench/generate_groundtruth/__main__.py rename to python/cuvs-bench/src/cuvs-bench/generate_groundtruth/__main__.py index f4d97edea..1e3dc9859 100644 --- a/python/cuda-ann-bench/src/cuda-ann-bench/generate_groundtruth/__main__.py +++ b/python/cuvs-bench/src/cuvs-bench/generate_groundtruth/__main__.py @@ -22,9 +22,10 @@ import numpy as np import rmm from pylibraft.common import DeviceResources -from pylibraft.neighbors.brute_force import knn from rmm.allocators.cupy import rmm_cupy_allocator +from cuvs.neighbors.brute_force import knn + from .utils import memmap_bin_file, suffix_from_dtype, write_bin diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/generate_groundtruth/utils.py b/python/cuvs-bench/src/cuvs-bench/generate_groundtruth/utils.py similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/generate_groundtruth/utils.py rename to python/cuvs-bench/src/cuvs-bench/generate_groundtruth/utils.py diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/get_dataset/__main__.py b/python/cuvs-bench/src/cuvs-bench/get_dataset/__main__.py similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/get_dataset/__main__.py rename to python/cuvs-bench/src/cuvs-bench/get_dataset/__main__.py diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/get_dataset/fbin_to_f16bin.py b/python/cuvs-bench/src/cuvs-bench/get_dataset/fbin_to_f16bin.py similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/get_dataset/fbin_to_f16bin.py rename to python/cuvs-bench/src/cuvs-bench/get_dataset/fbin_to_f16bin.py diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/get_dataset/hdf5_to_fbin.py b/python/cuvs-bench/src/cuvs-bench/get_dataset/hdf5_to_fbin.py similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/get_dataset/hdf5_to_fbin.py rename to python/cuvs-bench/src/cuvs-bench/get_dataset/hdf5_to_fbin.py diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/plot/__main__.py b/python/cuvs-bench/src/cuvs-bench/plot/__main__.py similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/plot/__main__.py rename to python/cuvs-bench/src/cuvs-bench/plot/__main__.py diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/__main__.py b/python/cuvs-bench/src/cuvs-bench/run/__main__.py similarity index 99% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/__main__.py rename to python/cuvs-bench/src/cuvs-bench/run/__main__.py index 4611f3926..20d5010c1 100644 --- a/python/cuda-ann-bench/src/cuda-ann-bench/run/__main__.py +++ b/python/cuvs-bench/src/cuvs-bench/run/__main__.py @@ -325,7 +325,7 @@ def main(): parser.add_argument( "--algo-groups", help='add comma separated . to run. \ - Example usage: "--algo-groups=raft_cagra.large,hnswlib.large"', + Example usage: "--algo-groups=cuvs_cagra.large,hnswlib.large"', ) parser.add_argument( "-f", diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/algos.yaml b/python/cuvs-bench/src/cuvs-bench/run/algos.yaml similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/algos.yaml rename to python/cuvs-bench/src/cuvs-bench/run/algos.yaml diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/algos/faiss_gpu_ivf_flat.yaml b/python/cuvs-bench/src/cuvs-bench/run/conf/algos/faiss_gpu_ivf_flat.yaml similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/algos/faiss_gpu_ivf_flat.yaml rename to python/cuvs-bench/src/cuvs-bench/run/conf/algos/faiss_gpu_ivf_flat.yaml diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/algos/faiss_gpu_ivf_pq.yaml b/python/cuvs-bench/src/cuvs-bench/run/conf/algos/faiss_gpu_ivf_pq.yaml similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/algos/faiss_gpu_ivf_pq.yaml rename to python/cuvs-bench/src/cuvs-bench/run/conf/algos/faiss_gpu_ivf_pq.yaml diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/algos/hnswlib.yaml b/python/cuvs-bench/src/cuvs-bench/run/conf/algos/hnswlib.yaml similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/algos/hnswlib.yaml rename to python/cuvs-bench/src/cuvs-bench/run/conf/algos/hnswlib.yaml diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/algos/raft_cagra.yaml b/python/cuvs-bench/src/cuvs-bench/run/conf/algos/raft_cagra.yaml similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/algos/raft_cagra.yaml rename to python/cuvs-bench/src/cuvs-bench/run/conf/algos/raft_cagra.yaml diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/algos/raft_cagra_hnswlib.yaml b/python/cuvs-bench/src/cuvs-bench/run/conf/algos/raft_cagra_hnswlib.yaml similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/algos/raft_cagra_hnswlib.yaml rename to python/cuvs-bench/src/cuvs-bench/run/conf/algos/raft_cagra_hnswlib.yaml diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/algos/raft_ivf_flat.yaml b/python/cuvs-bench/src/cuvs-bench/run/conf/algos/raft_ivf_flat.yaml similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/algos/raft_ivf_flat.yaml rename to python/cuvs-bench/src/cuvs-bench/run/conf/algos/raft_ivf_flat.yaml diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/algos/raft_ivf_pq.yaml b/python/cuvs-bench/src/cuvs-bench/run/conf/algos/raft_ivf_pq.yaml similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/algos/raft_ivf_pq.yaml rename to python/cuvs-bench/src/cuvs-bench/run/conf/algos/raft_ivf_pq.yaml diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/bigann-100M.json b/python/cuvs-bench/src/cuvs-bench/run/conf/bigann-100M.json similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/bigann-100M.json rename to python/cuvs-bench/src/cuvs-bench/run/conf/bigann-100M.json diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/datasets.yaml b/python/cuvs-bench/src/cuvs-bench/run/conf/datasets.yaml similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/datasets.yaml rename to python/cuvs-bench/src/cuvs-bench/run/conf/datasets.yaml diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/deep-100M.json b/python/cuvs-bench/src/cuvs-bench/run/conf/deep-100M.json similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/deep-100M.json rename to python/cuvs-bench/src/cuvs-bench/run/conf/deep-100M.json diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/deep-1B.json b/python/cuvs-bench/src/cuvs-bench/run/conf/deep-1B.json similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/deep-1B.json rename to python/cuvs-bench/src/cuvs-bench/run/conf/deep-1B.json diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/deep-image-96-inner.json b/python/cuvs-bench/src/cuvs-bench/run/conf/deep-image-96-inner.json similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/deep-image-96-inner.json rename to python/cuvs-bench/src/cuvs-bench/run/conf/deep-image-96-inner.json diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/fashion-mnist-784-euclidean.json b/python/cuvs-bench/src/cuvs-bench/run/conf/fashion-mnist-784-euclidean.json similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/fashion-mnist-784-euclidean.json rename to python/cuvs-bench/src/cuvs-bench/run/conf/fashion-mnist-784-euclidean.json diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/gist-960-euclidean.json b/python/cuvs-bench/src/cuvs-bench/run/conf/gist-960-euclidean.json similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/gist-960-euclidean.json rename to python/cuvs-bench/src/cuvs-bench/run/conf/gist-960-euclidean.json diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/glove-100-angular.json b/python/cuvs-bench/src/cuvs-bench/run/conf/glove-100-angular.json similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/glove-100-angular.json rename to python/cuvs-bench/src/cuvs-bench/run/conf/glove-100-angular.json diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/glove-100-inner.json b/python/cuvs-bench/src/cuvs-bench/run/conf/glove-100-inner.json similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/glove-100-inner.json rename to python/cuvs-bench/src/cuvs-bench/run/conf/glove-100-inner.json diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/glove-50-angular.json b/python/cuvs-bench/src/cuvs-bench/run/conf/glove-50-angular.json similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/glove-50-angular.json rename to python/cuvs-bench/src/cuvs-bench/run/conf/glove-50-angular.json diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/glove-50-inner.json b/python/cuvs-bench/src/cuvs-bench/run/conf/glove-50-inner.json similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/glove-50-inner.json rename to python/cuvs-bench/src/cuvs-bench/run/conf/glove-50-inner.json diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/lastfm-65-angular.json b/python/cuvs-bench/src/cuvs-bench/run/conf/lastfm-65-angular.json similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/lastfm-65-angular.json rename to python/cuvs-bench/src/cuvs-bench/run/conf/lastfm-65-angular.json diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/mnist-784-euclidean.json b/python/cuvs-bench/src/cuvs-bench/run/conf/mnist-784-euclidean.json similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/mnist-784-euclidean.json rename to python/cuvs-bench/src/cuvs-bench/run/conf/mnist-784-euclidean.json diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/nytimes-256-angular.json b/python/cuvs-bench/src/cuvs-bench/run/conf/nytimes-256-angular.json similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/nytimes-256-angular.json rename to python/cuvs-bench/src/cuvs-bench/run/conf/nytimes-256-angular.json diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/nytimes-256-inner.json b/python/cuvs-bench/src/cuvs-bench/run/conf/nytimes-256-inner.json similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/nytimes-256-inner.json rename to python/cuvs-bench/src/cuvs-bench/run/conf/nytimes-256-inner.json diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/sift-128-euclidean.json b/python/cuvs-bench/src/cuvs-bench/run/conf/sift-128-euclidean.json similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/sift-128-euclidean.json rename to python/cuvs-bench/src/cuvs-bench/run/conf/sift-128-euclidean.json diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/wiki_all_10M.json b/python/cuvs-bench/src/cuvs-bench/run/conf/wiki_all_10M.json similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/wiki_all_10M.json rename to python/cuvs-bench/src/cuvs-bench/run/conf/wiki_all_10M.json diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/wiki_all_1M.json b/python/cuvs-bench/src/cuvs-bench/run/conf/wiki_all_1M.json similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/wiki_all_1M.json rename to python/cuvs-bench/src/cuvs-bench/run/conf/wiki_all_1M.json diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/run/conf/wiki_all_88M.json b/python/cuvs-bench/src/cuvs-bench/run/conf/wiki_all_88M.json similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/run/conf/wiki_all_88M.json rename to python/cuvs-bench/src/cuvs-bench/run/conf/wiki_all_88M.json diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/split_groundtruth/__main__.py b/python/cuvs-bench/src/cuvs-bench/split_groundtruth/__main__.py similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/split_groundtruth/__main__.py rename to python/cuvs-bench/src/cuvs-bench/split_groundtruth/__main__.py diff --git a/python/cuda-ann-bench/src/cuda-ann-bench/split_groundtruth/split_groundtruth.pl b/python/cuvs-bench/src/cuvs-bench/split_groundtruth/split_groundtruth.pl similarity index 100% rename from python/cuda-ann-bench/src/cuda-ann-bench/split_groundtruth/split_groundtruth.pl rename to python/cuvs-bench/src/cuvs-bench/split_groundtruth/split_groundtruth.pl