From 63f8d71cb6314b9424a0023a6ab4b6c31133f5ec Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Thu, 1 Feb 2024 07:45:24 -0800 Subject: [PATCH] CAGRA C-api compile fixes (#28) The cagra_c.h file was using in-class member initializers, which are only available in c++11 - and not in C. This resulted in compile failures when trying to build C libraries using CAGRA. Fix and add a basic C smoke test that would have caught this. Authors: - Ben Frederickson (https://github.com/benfred) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/28 --- build.sh | 2 + cpp/include/cuvs/neighbors/cagra_c.h | 100 +++++++++++++++++++-------- cpp/src/neighbors/cagra_c.cpp | 63 ++++++++++++++--- cpp/test/CMakeLists.txt | 4 +- cpp/test/neighbors/ann_cagra_c.cu | 8 ++- cpp/test/neighbors/c_api.c | 31 +++++++++ 6 files changed, 169 insertions(+), 39 deletions(-) create mode 100644 cpp/test/neighbors/c_api.c diff --git a/build.sh b/build.sh index 81bcff971..6dd250c51 100755 --- a/build.sh +++ b/build.sh @@ -351,10 +351,12 @@ if (( ${NUMARGS} == 0 )) || hasArg libcuvs || hasArg docs || hasArg tests || has -DCMAKE_CUDA_ARCHITECTURES=${CUVS_CMAKE_CUDA_ARCHITECTURES} \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ -DCUVS_COMPILE_LIBRARY=${COMPILE_LIBRARY} \ + -DBUILD_C_LIBRARY=${COMPILE_LIBRARY} \ -DCUVS_NVTX=${NVTX} \ -DCUDA_LOG_COMPILE_TIME=${LOG_COMPILE_TIME} \ -DDISABLE_DEPRECATION_WARNINGS=${DISABLE_DEPRECATION_WARNINGS} \ -DBUILD_TESTS=${BUILD_TESTS} \ + -DBUILD_C_TESTS=${BUILD_TESTS} \ -DBUILD_CPU_ONLY=${BUILD_CPU_ONLY} \ -DCMAKE_MESSAGE_LOG_LEVEL=${CMAKE_LOG_LEVEL} \ ${CACHE_ARGS} \ diff --git a/cpp/include/cuvs/neighbors/cagra_c.h b/cpp/include/cuvs/neighbors/cagra_c.h index 105f4f1b9..59861b502 100644 --- a/cpp/include/cuvs/neighbors/cagra_c.h +++ b/cpp/include/cuvs/neighbors/cagra_c.h @@ -46,15 +46,33 @@ enum cagraGraphBuildAlgo { */ struct cagraIndexParams { /** Degree of input graph for pruning. */ - size_t intermediate_graph_degree = 128; + size_t intermediate_graph_degree; /** Degree of output graph. */ - size_t graph_degree = 64; + size_t graph_degree; /** ANN algorithm to build knn graph. */ - cagraGraphBuildAlgo build_algo = IVF_PQ; + enum cagraGraphBuildAlgo build_algo; /** Number of Iterations to run if building with NN_DESCENT */ - size_t nn_descent_niter = 20; + size_t nn_descent_niter; }; +typedef struct cagraIndexParams* cuvsCagraIndexParams_t; + +/** + * @brief Allocate CAGRA Index params, and populate with default values + * + * @param[in] params cuvsCagraIndexParams_t to allocate + * @return cuvsError_t + */ +cuvsError_t cuvsCagraIndexParamsCreate(cuvsCagraIndexParams_t* params); + +/** + * @brief De-allocate CAGRA Index params + * + * @param[in] params + * @return cuvsError_t + */ +cuvsError_t cuvsCagraIndexParamsDestroy(cuvsCagraIndexParams_t index); + /** * @brief Enum to denote algorithm used to search CAGRA Index * @@ -78,49 +96,67 @@ enum cagraHashMode { HASH, SMALL, AUTO_HASH }; * @brief Supplemental parameters to search CAGRA index * */ -typedef struct { +struct cagraSearchParams { /** Maximum number of queries to search at the same time (batch size). Auto select when 0.*/ - size_t max_queries = 0; + size_t max_queries; /** Number of intermediate search results retained during the search. * * This is the main knob to adjust trade off between accuracy and search speed. * Higher values improve the search accuracy. */ - size_t itopk_size = 64; + size_t itopk_size; /** Upper limit of search iterations. Auto select when 0.*/ - size_t max_iterations = 0; + size_t max_iterations; // In the following we list additional search parameters for fine tuning. // Reasonable default values are automatically chosen. /** Which search implementation to use. */ - cagraSearchAlgo algo = AUTO; + enum cagraSearchAlgo algo; /** Number of threads used to calculate a single distance. 4, 8, 16, or 32. */ - size_t team_size = 0; + size_t team_size; /** Number of graph nodes to select as the starting point for the search in each iteration. aka * search width?*/ - size_t search_width = 1; + size_t search_width; /** Lower limit of search iterations. */ - size_t min_iterations = 0; + size_t min_iterations; /** Thread block size. 0, 64, 128, 256, 512, 1024. Auto selection when 0. */ - size_t thread_block_size = 0; + size_t thread_block_size; /** Hashmap type. Auto selection when AUTO. */ - cagraHashMode hashmap_mode = AUTO_HASH; + enum cagraHashMode hashmap_mode; /** Lower limit of hashmap bit length. More than 8. */ - size_t hashmap_min_bitlen = 0; + size_t hashmap_min_bitlen; /** Upper limit of hashmap fill rate. More than 0.1, less than 0.9.*/ - float hashmap_max_fill_rate = 0.5; + float hashmap_max_fill_rate; /** Number of iterations of initial random seed node selection. 1 or more. */ - uint32_t num_random_samplings = 1; + uint32_t num_random_samplings; /** Bit mask used for initial random seed node selection. */ - uint64_t rand_xor_mask = 0x128394; -} cagraSearchParams; + uint64_t rand_xor_mask; +}; + +typedef struct cagraSearchParams* cuvsCagraSearchParams_t; + +/** + * @brief Allocate CAGRA search params, and populate with default values + * + * @param[in] params cuvsCagraSearchParams_t to allocate + * @return cuvsError_t + */ +cuvsError_t cuvsCagraSearchParamsCreate(cuvsCagraSearchParams_t* params); + +/** + * @brief De-allocate CAGRA search params + * + * @param[in] params + * @return cuvsError_t + */ +cuvsError_t cuvsCagraSearchParamsDestroy(cuvsCagraSearchParams_t params); /** * @brief Struct to hold address of cuvs::neighbors::cagra::index and its active trained dtype @@ -168,6 +204,10 @@ cuvsError_t cagraIndexDestroy(cagraIndex_t index); * // Assume a populated `DLManagedTensor` type here * DLManagedTensor dataset; * + * // Create default index params + * cuvsCagraIndexParams_t params; + * cuvsError_t params_create_status = cuvsCagraIndexParamsCreate(¶ms); + * * // Create CAGRA index * cagraIndex_t index; * cuvsError_t index_create_status = cagraIndexCreate(&index); @@ -175,24 +215,25 @@ cuvsError_t cagraIndexDestroy(cagraIndex_t index); * // Build the CAGRA Index * cuvsError_t build_status = cagraBuild(res, params, &dataset, index); * - * // de-allocate `index` and `res` + * // de-allocate `params`, `index` and `res` + * cuvsError_t params_destroy_status = cuvsCagraIndexParamsDestroy(params); * cuvsError_t index_destroy_status = cagraIndexDestroy(index); * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res); * @endcode * * @param[in] res cuvsResources_t opaque C handle - * @param[in] params cagraIndexParams used to build CAGRA index + * @param[in] params cuvsCagraIndexParams_t used to build CAGRA index * @param[in] dataset DLManagedTensor* training dataset * @param[out] index cagraIndex_t Newly built CAGRA index * @return cuvsError_t */ cuvsError_t cagraBuild(cuvsResources_t res, - cagraIndexParams params, + cuvsCagraIndexParams_t params, DLManagedTensor* dataset, cagraIndex_t index); /** - * @brief Build a CAGRA index with a `DLManagedTensor` which has underlying + * @brief Search a CAGRA index with a `DLManagedTensor` which has underlying * `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`. * It is also important to note that the CAGRA Index must have been built * with the same type of `queries`, such that `index.dtype.code == @@ -214,24 +255,27 @@ cuvsError_t cagraBuild(cuvsResources_t res, * DLManagedTensor queries; * DLManagedTensor neighbors; * + * // Create default search params + * cuvsCagraSearchParams_t params; + * cuvsError_t params_create_status = cuvsCagraSearchParamsCreate(¶ms); + * * // Search the `index` built using `cagraBuild` - * cagraSearchParams params; * cuvsError_t search_status = cagraSearch(res, params, index, queries, neighbors, distances); * - * // de-allocate `index` and `res` - * cuvsError_t index_destroy_status = cagraIndexDestroy(index); + * // de-allocate `params` and `res` + * cuvsError_t params_destroy_status = cuvsCagraSearchParamsDestroy(params); * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res); * @endcode * * @param[in] res cuvsResources_t opaque C handle - * @param[in] params cagraSearchParams used to search CAGRA index + * @param[in] params cuvsCagraSearchParams_t used to search CAGRA index * @param[in] index cagraIndex which has been returned by `cagraBuild` * @param[in] queries DLManagedTensor* queries dataset to search * @param[out] neighbors DLManagedTensor* output `k` neighbors for queries * @param[out] distances DLManagedTensor* output `k` distances for queries */ cuvsError_t cagraSearch(cuvsResources_t res, - cagraSearchParams params, + cuvsCagraSearchParams_t params, cagraIndex_t index, DLManagedTensor* queries, DLManagedTensor* neighbors, diff --git a/cpp/src/neighbors/cagra_c.cpp b/cpp/src/neighbors/cagra_c.cpp index 1c9403efe..638c9a23d 100644 --- a/cpp/src/neighbors/cagra_c.cpp +++ b/cpp/src/neighbors/cagra_c.cpp @@ -131,7 +131,7 @@ extern "C" cuvsError_t cagraIndexDestroy(cagraIndex_t index_c_ptr) } extern "C" cuvsError_t cagraBuild(cuvsResources_t res, - cagraIndexParams params, + cuvsCagraIndexParams_t params, DLManagedTensor* dataset_tensor, cagraIndex_t index) { @@ -139,13 +139,13 @@ extern "C" cuvsError_t cagraBuild(cuvsResources_t res, auto dataset = dataset_tensor->dl_tensor; if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 32) { - index->addr = reinterpret_cast(_build(res, params, dataset_tensor)); + index->addr = reinterpret_cast(_build(res, *params, dataset_tensor)); index->dtype.code = kDLFloat; } else if (dataset.dtype.code == kDLInt && dataset.dtype.bits == 8) { - index->addr = reinterpret_cast(_build(res, params, dataset_tensor)); + index->addr = reinterpret_cast(_build(res, *params, dataset_tensor)); index->dtype.code = kDLInt; } else if (dataset.dtype.code == kDLUInt && dataset.dtype.bits == 8) { - index->addr = reinterpret_cast(_build(res, params, dataset_tensor)); + index->addr = reinterpret_cast(_build(res, *params, dataset_tensor)); index->dtype.code = kDLUInt; } else { RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d", @@ -159,7 +159,7 @@ extern "C" cuvsError_t cagraBuild(cuvsResources_t res, } extern "C" cuvsError_t cagraSearch(cuvsResources_t res, - cagraSearchParams params, + cuvsCagraSearchParams_t params, cagraIndex_t index_c_ptr, DLManagedTensor* queries_tensor, DLManagedTensor* neighbors_tensor, @@ -186,11 +186,11 @@ extern "C" cuvsError_t cagraSearch(cuvsResources_t res, RAFT_EXPECTS(queries.dtype.code == index.dtype.code, "type mismatch between index and queries"); if (queries.dtype.code == kDLFloat && queries.dtype.bits == 32) { - _search(res, params, index, queries_tensor, neighbors_tensor, distances_tensor); + _search(res, *params, index, queries_tensor, neighbors_tensor, distances_tensor); } else if (queries.dtype.code == kDLInt && queries.dtype.bits == 8) { - _search(res, params, index, queries_tensor, neighbors_tensor, distances_tensor); + _search(res, *params, index, queries_tensor, neighbors_tensor, distances_tensor); } else if (queries.dtype.code == kDLUInt && queries.dtype.bits == 8) { - _search(res, params, index, queries_tensor, neighbors_tensor, distances_tensor); + _search(res, *params, index, queries_tensor, neighbors_tensor, distances_tensor); } else { RAFT_FAIL("Unsupported queries DLtensor dtype: %d and bits: %d", queries.dtype.code, @@ -201,3 +201,50 @@ extern "C" cuvsError_t cagraSearch(cuvsResources_t res, return CUVS_ERROR; } } + +extern "C" cuvsError_t cuvsCagraIndexParamsCreate(cuvsCagraIndexParams_t* params) +{ + try { + *params = new cagraIndexParams{.intermediate_graph_degree = 128, + .graph_degree = 64, + .build_algo = IVF_PQ, + .nn_descent_niter = 20}; + return CUVS_SUCCESS; + } catch (...) { + return CUVS_ERROR; + } +} + +extern "C" cuvsError_t cuvsCagraIndexParamsDestroy(cuvsCagraIndexParams_t params) +{ + try { + delete params; + return CUVS_SUCCESS; + } catch (...) { + return CUVS_ERROR; + } +} + +extern "C" cuvsError_t cuvsCagraSearchParamsCreate(cuvsCagraSearchParams_t* params) +{ + try { + *params = new cagraSearchParams{.itopk_size = 64, + .search_width = 1, + .hashmap_max_fill_rate = 0.5, + .num_random_samplings = 1, + .rand_xor_mask = 0x128394}; + return CUVS_SUCCESS; + } catch (...) { + return CUVS_ERROR; + } +} + +extern "C" cuvsError_t cuvsCagraSearchParamsDestroy(cuvsCagraSearchParams_t params) +{ + try { + delete params; + return CUVS_SUCCESS; + } catch (...) { + return CUVS_ERROR; + } +} diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index e5880797a..609bc2d4e 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -117,6 +117,8 @@ if(BUILD_C_TESTS) enable_language(C) add_executable(cuvs_c_test test/core/c_api.c) - target_link_libraries(cuvs_c_test PUBLIC cuvs::c_api) + + add_executable(cuvs_c_neighbors_test test/neighbors/c_api.c) + target_link_libraries(cuvs_c_neighbors_test PUBLIC cuvs::c_api) endif() diff --git a/cpp/test/neighbors/ann_cagra_c.cu b/cpp/test/neighbors/ann_cagra_c.cu index 1a8dc920f..4870ac3b8 100644 --- a/cpp/test/neighbors/ann_cagra_c.cu +++ b/cpp/test/neighbors/ann_cagra_c.cu @@ -60,7 +60,8 @@ TEST(CagraC, BuildSearch) cagraIndexCreate(&index); // build index - cagraIndexParams build_params; + cuvsCagraIndexParams_t build_params; + cuvsCagraIndexParamsCreate(&build_params); cagraBuild(res, build_params, &dataset_tensor, index); // create queries DLTensor @@ -110,7 +111,8 @@ TEST(CagraC, BuildSearch) distances_tensor.dl_tensor.strides = nullptr; // search index - cagraSearchParams search_params; + cuvsCagraSearchParams_t search_params; + cuvsCagraSearchParamsCreate(&search_params); cagraSearch(res, search_params, index, &queries_tensor, &neighbors_tensor, &distances_tensor); // verify output @@ -124,6 +126,8 @@ TEST(CagraC, BuildSearch) cudaFree(distances_d); // de-allocate index and res + cuvsCagraSearchParamsDestroy(search_params); + cuvsCagraIndexParamsDestroy(build_params); cagraIndexDestroy(index); cuvsResourcesDestroy(res); } diff --git a/cpp/test/neighbors/c_api.c b/cpp/test/neighbors/c_api.c new file mode 100644 index 000000000..d4f5ad08e --- /dev/null +++ b/cpp/test/neighbors/c_api.c @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +int main() +{ + // simple smoke test to make sure that we can compile the cagra_c.h API + // using a c compiler. This isn't aiming to be a full test, just checking + // that the exposed C-API is valid C code and doesn't contain C++ features + cagraIndex_t index; + cagraIndexCreate(&index); + cagraIndexDestroy(index); + return 0; +}