Skip to content

Commit

Permalink
CAGRA C-api compile fixes (#28)
Browse files Browse the repository at this point in the history
The cagra_c.h file was using in-class member initializers, which are only available in c++11 - and not in C. This resulted in compile failures when trying to build C libraries using CAGRA.

Fix and add a basic C smoke test that would have caught this.

Authors:
  - Ben Frederickson (https://github.com/benfred)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: #28
  • Loading branch information
benfred authored Feb 1, 2024
1 parent 728215b commit 63f8d71
Show file tree
Hide file tree
Showing 6 changed files with 169 additions and 39 deletions.
2 changes: 2 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -351,10 +351,12 @@ if (( ${NUMARGS} == 0 )) || hasArg libcuvs || hasArg docs || hasArg tests || has
-DCMAKE_CUDA_ARCHITECTURES=${CUVS_CMAKE_CUDA_ARCHITECTURES} \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DCUVS_COMPILE_LIBRARY=${COMPILE_LIBRARY} \
-DBUILD_C_LIBRARY=${COMPILE_LIBRARY} \
-DCUVS_NVTX=${NVTX} \
-DCUDA_LOG_COMPILE_TIME=${LOG_COMPILE_TIME} \
-DDISABLE_DEPRECATION_WARNINGS=${DISABLE_DEPRECATION_WARNINGS} \
-DBUILD_TESTS=${BUILD_TESTS} \
-DBUILD_C_TESTS=${BUILD_TESTS} \
-DBUILD_CPU_ONLY=${BUILD_CPU_ONLY} \
-DCMAKE_MESSAGE_LOG_LEVEL=${CMAKE_LOG_LEVEL} \
${CACHE_ARGS} \
Expand Down
100 changes: 72 additions & 28 deletions cpp/include/cuvs/neighbors/cagra_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,33 @@ enum cagraGraphBuildAlgo {
*/
struct cagraIndexParams {
/** Degree of input graph for pruning. */
size_t intermediate_graph_degree = 128;
size_t intermediate_graph_degree;
/** Degree of output graph. */
size_t graph_degree = 64;
size_t graph_degree;
/** ANN algorithm to build knn graph. */
cagraGraphBuildAlgo build_algo = IVF_PQ;
enum cagraGraphBuildAlgo build_algo;
/** Number of Iterations to run if building with NN_DESCENT */
size_t nn_descent_niter = 20;
size_t nn_descent_niter;
};

typedef struct cagraIndexParams* cuvsCagraIndexParams_t;

/**
* @brief Allocate CAGRA Index params, and populate with default values
*
* @param[in] params cuvsCagraIndexParams_t to allocate
* @return cuvsError_t
*/
cuvsError_t cuvsCagraIndexParamsCreate(cuvsCagraIndexParams_t* params);

/**
* @brief De-allocate CAGRA Index params
*
* @param[in] params
* @return cuvsError_t
*/
cuvsError_t cuvsCagraIndexParamsDestroy(cuvsCagraIndexParams_t index);

/**
* @brief Enum to denote algorithm used to search CAGRA Index
*
Expand All @@ -78,49 +96,67 @@ enum cagraHashMode { HASH, SMALL, AUTO_HASH };
* @brief Supplemental parameters to search CAGRA index
*
*/
typedef struct {
struct cagraSearchParams {
/** Maximum number of queries to search at the same time (batch size). Auto select when 0.*/
size_t max_queries = 0;
size_t max_queries;

/** Number of intermediate search results retained during the search.
*
* This is the main knob to adjust trade off between accuracy and search speed.
* Higher values improve the search accuracy.
*/
size_t itopk_size = 64;
size_t itopk_size;

/** Upper limit of search iterations. Auto select when 0.*/
size_t max_iterations = 0;
size_t max_iterations;

// In the following we list additional search parameters for fine tuning.
// Reasonable default values are automatically chosen.

/** Which search implementation to use. */
cagraSearchAlgo algo = AUTO;
enum cagraSearchAlgo algo;

/** Number of threads used to calculate a single distance. 4, 8, 16, or 32. */
size_t team_size = 0;
size_t team_size;

/** Number of graph nodes to select as the starting point for the search in each iteration. aka
* search width?*/
size_t search_width = 1;
size_t search_width;
/** Lower limit of search iterations. */
size_t min_iterations = 0;
size_t min_iterations;

/** Thread block size. 0, 64, 128, 256, 512, 1024. Auto selection when 0. */
size_t thread_block_size = 0;
size_t thread_block_size;
/** Hashmap type. Auto selection when AUTO. */
cagraHashMode hashmap_mode = AUTO_HASH;
enum cagraHashMode hashmap_mode;
/** Lower limit of hashmap bit length. More than 8. */
size_t hashmap_min_bitlen = 0;
size_t hashmap_min_bitlen;
/** Upper limit of hashmap fill rate. More than 0.1, less than 0.9.*/
float hashmap_max_fill_rate = 0.5;
float hashmap_max_fill_rate;

/** Number of iterations of initial random seed node selection. 1 or more. */
uint32_t num_random_samplings = 1;
uint32_t num_random_samplings;
/** Bit mask used for initial random seed node selection. */
uint64_t rand_xor_mask = 0x128394;
} cagraSearchParams;
uint64_t rand_xor_mask;
};

typedef struct cagraSearchParams* cuvsCagraSearchParams_t;

/**
* @brief Allocate CAGRA search params, and populate with default values
*
* @param[in] params cuvsCagraSearchParams_t to allocate
* @return cuvsError_t
*/
cuvsError_t cuvsCagraSearchParamsCreate(cuvsCagraSearchParams_t* params);

/**
* @brief De-allocate CAGRA search params
*
* @param[in] params
* @return cuvsError_t
*/
cuvsError_t cuvsCagraSearchParamsDestroy(cuvsCagraSearchParams_t params);

/**
* @brief Struct to hold address of cuvs::neighbors::cagra::index and its active trained dtype
Expand Down Expand Up @@ -168,31 +204,36 @@ cuvsError_t cagraIndexDestroy(cagraIndex_t index);
* // Assume a populated `DLManagedTensor` type here
* DLManagedTensor dataset;
*
* // Create default index params
* cuvsCagraIndexParams_t params;
* cuvsError_t params_create_status = cuvsCagraIndexParamsCreate(&params);
*
* // Create CAGRA index
* cagraIndex_t index;
* cuvsError_t index_create_status = cagraIndexCreate(&index);
*
* // Build the CAGRA Index
* cuvsError_t build_status = cagraBuild(res, params, &dataset, index);
*
* // de-allocate `index` and `res`
* // de-allocate `params`, `index` and `res`
* cuvsError_t params_destroy_status = cuvsCagraIndexParamsDestroy(params);
* cuvsError_t index_destroy_status = cagraIndexDestroy(index);
* cuvsError_t res_destroy_status = cuvsResourcesDestroy(res);
* @endcode
*
* @param[in] res cuvsResources_t opaque C handle
* @param[in] params cagraIndexParams used to build CAGRA index
* @param[in] params cuvsCagraIndexParams_t used to build CAGRA index
* @param[in] dataset DLManagedTensor* training dataset
* @param[out] index cagraIndex_t Newly built CAGRA index
* @return cuvsError_t
*/
cuvsError_t cagraBuild(cuvsResources_t res,
cagraIndexParams params,
cuvsCagraIndexParams_t params,
DLManagedTensor* dataset,
cagraIndex_t index);

/**
* @brief Build a CAGRA index with a `DLManagedTensor` which has underlying
* @brief Search a CAGRA index with a `DLManagedTensor` which has underlying
* `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`.
* It is also important to note that the CAGRA Index must have been built
* with the same type of `queries`, such that `index.dtype.code ==
Expand All @@ -214,24 +255,27 @@ cuvsError_t cagraBuild(cuvsResources_t res,
* DLManagedTensor queries;
* DLManagedTensor neighbors;
*
* // Create default search params
* cuvsCagraSearchParams_t params;
* cuvsError_t params_create_status = cuvsCagraSearchParamsCreate(&params);
*
* // Search the `index` built using `cagraBuild`
* cagraSearchParams params;
* cuvsError_t search_status = cagraSearch(res, params, index, queries, neighbors, distances);
*
* // de-allocate `index` and `res`
* cuvsError_t index_destroy_status = cagraIndexDestroy(index);
* // de-allocate `params` and `res`
* cuvsError_t params_destroy_status = cuvsCagraSearchParamsDestroy(params);
* cuvsError_t res_destroy_status = cuvsResourcesDestroy(res);
* @endcode
*
* @param[in] res cuvsResources_t opaque C handle
* @param[in] params cagraSearchParams used to search CAGRA index
* @param[in] params cuvsCagraSearchParams_t used to search CAGRA index
* @param[in] index cagraIndex which has been returned by `cagraBuild`
* @param[in] queries DLManagedTensor* queries dataset to search
* @param[out] neighbors DLManagedTensor* output `k` neighbors for queries
* @param[out] distances DLManagedTensor* output `k` distances for queries
*/
cuvsError_t cagraSearch(cuvsResources_t res,
cagraSearchParams params,
cuvsCagraSearchParams_t params,
cagraIndex_t index,
DLManagedTensor* queries,
DLManagedTensor* neighbors,
Expand Down
63 changes: 55 additions & 8 deletions cpp/src/neighbors/cagra_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,21 +131,21 @@ extern "C" cuvsError_t cagraIndexDestroy(cagraIndex_t index_c_ptr)
}

extern "C" cuvsError_t cagraBuild(cuvsResources_t res,
cagraIndexParams params,
cuvsCagraIndexParams_t params,
DLManagedTensor* dataset_tensor,
cagraIndex_t index)
{
try {
auto dataset = dataset_tensor->dl_tensor;

if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 32) {
index->addr = reinterpret_cast<uintptr_t>(_build<float>(res, params, dataset_tensor));
index->addr = reinterpret_cast<uintptr_t>(_build<float>(res, *params, dataset_tensor));
index->dtype.code = kDLFloat;
} else if (dataset.dtype.code == kDLInt && dataset.dtype.bits == 8) {
index->addr = reinterpret_cast<uintptr_t>(_build<int8_t>(res, params, dataset_tensor));
index->addr = reinterpret_cast<uintptr_t>(_build<int8_t>(res, *params, dataset_tensor));
index->dtype.code = kDLInt;
} else if (dataset.dtype.code == kDLUInt && dataset.dtype.bits == 8) {
index->addr = reinterpret_cast<uintptr_t>(_build<uint8_t>(res, params, dataset_tensor));
index->addr = reinterpret_cast<uintptr_t>(_build<uint8_t>(res, *params, dataset_tensor));
index->dtype.code = kDLUInt;
} else {
RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d",
Expand All @@ -159,7 +159,7 @@ extern "C" cuvsError_t cagraBuild(cuvsResources_t res,
}

extern "C" cuvsError_t cagraSearch(cuvsResources_t res,
cagraSearchParams params,
cuvsCagraSearchParams_t params,
cagraIndex_t index_c_ptr,
DLManagedTensor* queries_tensor,
DLManagedTensor* neighbors_tensor,
Expand All @@ -186,11 +186,11 @@ extern "C" cuvsError_t cagraSearch(cuvsResources_t res,
RAFT_EXPECTS(queries.dtype.code == index.dtype.code, "type mismatch between index and queries");

if (queries.dtype.code == kDLFloat && queries.dtype.bits == 32) {
_search<float>(res, params, index, queries_tensor, neighbors_tensor, distances_tensor);
_search<float>(res, *params, index, queries_tensor, neighbors_tensor, distances_tensor);
} else if (queries.dtype.code == kDLInt && queries.dtype.bits == 8) {
_search<int8_t>(res, params, index, queries_tensor, neighbors_tensor, distances_tensor);
_search<int8_t>(res, *params, index, queries_tensor, neighbors_tensor, distances_tensor);
} else if (queries.dtype.code == kDLUInt && queries.dtype.bits == 8) {
_search<uint8_t>(res, params, index, queries_tensor, neighbors_tensor, distances_tensor);
_search<uint8_t>(res, *params, index, queries_tensor, neighbors_tensor, distances_tensor);
} else {
RAFT_FAIL("Unsupported queries DLtensor dtype: %d and bits: %d",
queries.dtype.code,
Expand All @@ -201,3 +201,50 @@ extern "C" cuvsError_t cagraSearch(cuvsResources_t res,
return CUVS_ERROR;
}
}

extern "C" cuvsError_t cuvsCagraIndexParamsCreate(cuvsCagraIndexParams_t* params)
{
try {
*params = new cagraIndexParams{.intermediate_graph_degree = 128,
.graph_degree = 64,
.build_algo = IVF_PQ,
.nn_descent_niter = 20};
return CUVS_SUCCESS;
} catch (...) {
return CUVS_ERROR;
}
}

extern "C" cuvsError_t cuvsCagraIndexParamsDestroy(cuvsCagraIndexParams_t params)
{
try {
delete params;
return CUVS_SUCCESS;
} catch (...) {
return CUVS_ERROR;
}
}

extern "C" cuvsError_t cuvsCagraSearchParamsCreate(cuvsCagraSearchParams_t* params)
{
try {
*params = new cagraSearchParams{.itopk_size = 64,
.search_width = 1,
.hashmap_max_fill_rate = 0.5,
.num_random_samplings = 1,
.rand_xor_mask = 0x128394};
return CUVS_SUCCESS;
} catch (...) {
return CUVS_ERROR;
}
}

extern "C" cuvsError_t cuvsCagraSearchParamsDestroy(cuvsCagraSearchParams_t params)
{
try {
delete params;
return CUVS_SUCCESS;
} catch (...) {
return CUVS_ERROR;
}
}
4 changes: 3 additions & 1 deletion cpp/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ if(BUILD_C_TESTS)
enable_language(C)

add_executable(cuvs_c_test test/core/c_api.c)

target_link_libraries(cuvs_c_test PUBLIC cuvs::c_api)

add_executable(cuvs_c_neighbors_test test/neighbors/c_api.c)
target_link_libraries(cuvs_c_neighbors_test PUBLIC cuvs::c_api)
endif()
8 changes: 6 additions & 2 deletions cpp/test/neighbors/ann_cagra_c.cu
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ TEST(CagraC, BuildSearch)
cagraIndexCreate(&index);

// build index
cagraIndexParams build_params;
cuvsCagraIndexParams_t build_params;
cuvsCagraIndexParamsCreate(&build_params);
cagraBuild(res, build_params, &dataset_tensor, index);

// create queries DLTensor
Expand Down Expand Up @@ -110,7 +111,8 @@ TEST(CagraC, BuildSearch)
distances_tensor.dl_tensor.strides = nullptr;

// search index
cagraSearchParams search_params;
cuvsCagraSearchParams_t search_params;
cuvsCagraSearchParamsCreate(&search_params);
cagraSearch(res, search_params, index, &queries_tensor, &neighbors_tensor, &distances_tensor);

// verify output
Expand All @@ -124,6 +126,8 @@ TEST(CagraC, BuildSearch)
cudaFree(distances_d);

// de-allocate index and res
cuvsCagraSearchParamsDestroy(search_params);
cuvsCagraIndexParamsDestroy(build_params);
cagraIndexDestroy(index);
cuvsResourcesDestroy(res);
}
31 changes: 31 additions & 0 deletions cpp/test/neighbors/c_api.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cuvs/core/c_api.h>
#include <cuvs/neighbors/cagra_c.h>
#include <stdio.h>
#include <stdlib.h>

int main()
{
// simple smoke test to make sure that we can compile the cagra_c.h API
// using a c compiler. This isn't aiming to be a full test, just checking
// that the exposed C-API is valid C code and doesn't contain C++ features
cagraIndex_t index;
cagraIndexCreate(&index);
cagraIndexDestroy(index);
return 0;
}

0 comments on commit 63f8d71

Please sign in to comment.