CAGRA C example and DlPack docs (#51)

Authors: - Divye Gala (https://github.com/divyegala) - Corey J. Nolet (https://github.com/cjnolet) - Kyle Edwards (https://github.com/KyleFromNVIDIA) - Bradley Dice (https://github.com/bdice) - Ben Frederickson (https://github.com/benfred) Approvers: - Dante Gama Dessavre (https://github.com/dantegd) - Corey J. Nolet (https://github.com/cjnolet) URL: #51
rapidsai · Apr 2, 2024 · d62e1ce · d62e1ce
1 parent 1fa8bef
commit d62e1ce
Show file tree

Hide file tree

Showing 12 changed files with 372 additions and 81 deletions.
diff --git a/build.sh b/build.sh
@@ -419,7 +419,7 @@ fi
 # Initiate build for c++ examples (if needed)
 
 if hasArg examples; then
-    pushd ${REPODIR}/examples/cpp
+    pushd ${REPODIR}/examples
     ./build.sh
     popd
 fi
diff --git a/docs/source/interoperability.rst b/docs/source/interoperability.rst
@@ -4,6 +4,40 @@ Interoperability
 DLPack (C)
 ^^^^^^^^^^
 
+Approximate nearest neighbor (ANN) indexes provide an interface to build and search an index via a C API. [DLPack v0.8](https://github.com/dmlc/dlpack/blob/main/README.md), a tensor interface framework, is used as the standard to interact with our C API.
+
+Representing a tensor with DLPack is simple, as it is a POD struct that stores information about the tensor at runtime. At the moment, `DLManagedTensor` from DLPack v0.8 is compatible with out C API however we will soon upgrade to `DLManagedTensorVersioned` from DLPack v1.0 as it will help us maintain ABI and API compatibility.
+
+Here's an example on how to represent device memory using `DLManagedTensor`:
+
+.. code-block:: c
+
+    #include <dlpack/dlpack.h>
+
+    // Create data representation in host memory
+    float dataset[2][1] = {{0.2, 0.1}};
+    // copy data to device memory
+    float *dataset_dev;
+    cuvsRMMAlloc(&dataset_dev, sizeof(float) * 2 * 1);
+    cudaMemcpy(dataset_dev, dataset, sizeof(float) * 2 * 1, cudaMemcpyDefault);
+
+    // Use DLPack for representing the data as a tensor
+    DLManagedTensor dataset_tensor;
+    dataset_tensor.dl_tensor.data               = dataset;
+    dataset_tensor.dl_tensor.device.device_type = kDLCUDA;
+    dataset_tensor.dl_tensor.ndim               = 2;
+    dataset_tensor.dl_tensor.dtype.code         = kDLFloat;
+    dataset_tensor.dl_tensor.dtype.bits         = 32;
+    dataset_tensor.dl_tensor.dtype.lanes        = 1;
+    int64_t dataset_shape[2]                    = {2, 1};
+    dataset_tensor.dl_tensor.shape              = dataset_shape;
+    dataset_tensor.dl_tensor.strides            = nullptr;
+
+    // free memory after use
+    cuvsRMMFree(dataset_dev);
+
+Please refer to cuVS C API `documentation <c_api.rst>`_ to learn more.
+
 Multi-dimensional span (C++)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 

diff --git a/examples/cpp/README.md → examples/README.md b/examples/cpp/README.md → examples/README.md
@@ -1,16 +1,16 @@
-# cuVS C++ Examples
+# cuVS C and C++ Examples
 
 This template project provides a drop-in sample to either start building a new application with, or using CUVS in an existing CMake project. 
 
 First, please refer to our [installation docs](https://docs.rapids.ai/api/cuvs/stable/build.html#cuda-gpu-requirements) for the minimum requirements to use cuVS.
 
 Once the minimum requirements are satisfied, this example template application can be built with the provided `build.sh` script. This is a bash script that calls the appropriate CMake commands, so you can look into it to see the typical CMake based build workflow.  
 
-This directory (`CUVS_SOURCE/examples/cpp`) can be copied directly in order to build a new application with CUVS.
+The directories (`CUVS_SOURCE/examples/c`) or (`CUVS_SOURCE/examples/cpp`) can be copied directly in order to build a new application with cuVS.
 
 cuVS can be integrated into an existing CMake project by copying the contents in the `configure rapids-cmake` and `configure cuvs` sections of the provided `CMakeLists.txt` into your project, along with `cmake/thirdparty/get_cuvs.cmake`. 
 
-Make sure to link against the appropriate Cmake targets. Use `cuvs::cuvs` to utilize the shared library.
+Make sure to link against the appropriate CMake targets. Use `cuvs::c_api` and `cuvs::cuvs` to use the C and C++ shared libraries respectively.
 
 ```cmake
 target_link_libraries(your_app_target PRIVATE cuvs::cuvs)

diff --git a/examples/build.sh b/examples/build.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+# cuvs empty project template build script
+
+# Abort script on first error
+set -e
+
+PARALLEL_LEVEL=${PARALLEL_LEVEL:=`nproc`}
+
+BUILD_TYPE=Release
+BUILD_DIR=build/
+
+CUVS_REPO_REL=""
+EXTRA_CMAKE_ARGS=""
+set -e
+
+# Root of examples
+EXAMPLES_DIR=$(dirname "$(realpath "$0")")
+
+if [[ ${CUVS_REPO_REL} != "" ]]; then
+  CUVS_REPO_PATH="`readlink -f \"${CUVS_REPO_REL}\"`"
+  EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCPM_cuvs_SOURCE=${CUVS_REPO_PATH}"
+else
+  LIB_BUILD_DIR=${LIB_BUILD_DIR:-$(readlink -f "${EXAMPLES_DIR}/../cpp/build")}
+  EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -Dcuvs_ROOT=${LIB_BUILD_DIR}"
+fi
+
+if [ "$1" == "clean" ]; then
+  rm -rf build
+  exit 0
+fi
+
+################################################################################
+# Add individual libcudf examples build scripts down below
+
+build_example() {
+  example_dir=${1}
+  example_dir="${EXAMPLES_DIR}/${example_dir}"
+  build_dir="${example_dir}/build"
+
+  # Configure
+  cmake -S ${example_dir} -B ${build_dir} \
+  -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
+  -DCUVS_NVTX=OFF \
+  -DCMAKE_CUDA_ARCHITECTURES="native" \
+  -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
+  ${EXTRA_CMAKE_ARGS}
+  # Build
+  cmake --build ${build_dir} -j${PARALLEL_LEVEL}
+}
+
+build_example c
+build_example cpp
diff --git a/examples/c/CMakeLists.txt b/examples/c/CMakeLists.txt
@@ -0,0 +1,37 @@
+# =============================================================================
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+
+cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
+
+# ------------- configure rapids-cmake --------------#
+
+include(../cmake/thirdparty/fetch_rapids.cmake)
+include(rapids-cmake)
+include(rapids-cpm)
+include(rapids-export)
+include(rapids-find)
+
+# ------------- configure project --------------#
+
+project(test_cuvs_c LANGUAGES C CXX CUDA)
+
+# ------------- configure cuvs -----------------#
+
+rapids_cpm_init()
+set(BUILD_CUVS_C_LIBRARY ON)
+include(../cmake/thirdparty/get_dlpack.cmake)
+include(../cmake/thirdparty/get_cuvs.cmake)
+
+add_executable(CAGRA_C_EXAMPLE src/cagra_c_example.c)
+target_include_directories(CAGRA_C_EXAMPLE PUBLIC "$<BUILD_INTERFACE:${DLPACK_INCLUDE_DIR}>")
+target_link_libraries(CAGRA_C_EXAMPLE PRIVATE cuvs::c_api $<TARGET_NAME_IF_EXISTS:conda_env>)
diff --git a/examples/c/src/cagra_c_example.c b/examples/c/src/cagra_c_example.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cuvs/core/c_api.h>
+#include <cuvs/neighbors/cagra.h>
+
+#include <dlpack/dlpack.h>
+
+#include <cuda_runtime.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+float dataset[4][2] = {{0.74021935, 0.9209938},
+                       {0.03902049, 0.9689629},
+                       {0.92514056, 0.4463501},
+                       {0.6673192, 0.10993068}};
+float queries[4][2] = {{0.48216683, 0.0428398},
+                       {0.5084142, 0.6545497},
+                       {0.51260436, 0.2643005},
+                       {0.05198065, 0.5789965}};
+
+void cagra_build_search_simple() {
+
+  int64_t n_rows = 4;
+  int64_t n_cols = 2;
+  int64_t topk = 2;
+  int64_t n_queries = 4;
+
+  // Create a cuvsResources_t object
+  cuvsResources_t res;
+  cuvsResourcesCreate(&res);
+
+  // Use DLPack to represent `dataset` as a tensor
+  DLManagedTensor dataset_tensor;
+  dataset_tensor.dl_tensor.data = dataset;
+  dataset_tensor.dl_tensor.device.device_type = kDLCPU;
+  dataset_tensor.dl_tensor.ndim = 2;
+  dataset_tensor.dl_tensor.dtype.code = kDLFloat;
+  dataset_tensor.dl_tensor.dtype.bits = 32;
+  dataset_tensor.dl_tensor.dtype.lanes = 1;
+  int64_t dataset_shape[2] = {n_rows, n_cols};
+  dataset_tensor.dl_tensor.shape = dataset_shape;
+  dataset_tensor.dl_tensor.strides = NULL;
+
+  // Build the CAGRA index
+  cuvsCagraIndexParams_t index_params;
+  cuvsCagraIndexParamsCreate(&index_params);
+
+  cuvsCagraIndex_t index;
+  cuvsCagraIndexCreate(&index);
+
+  cuvsCagraBuild(res, index_params, &dataset_tensor, index);
+
+  // Allocate memory for `queries`, `neighbors` and `distances` output
+  uint32_t *neighbors;
+  float *distances, *queries_d;
+  cuvsRMMAlloc(res, (void**) &queries_d, sizeof(float) * n_queries * n_cols);
+  cuvsRMMAlloc(res, (void**) &neighbors, sizeof(uint32_t) * n_queries * topk);
+  cuvsRMMAlloc(res, (void**) &distances, sizeof(float) * n_queries * topk);
+
+  // Use DLPack to represent `queries`, `neighbors` and `distances` as tensors
+  cudaMemcpy(queries_d, queries, sizeof(float) * 4 * 2, cudaMemcpyDefault);
+
+  DLManagedTensor queries_tensor;
+  queries_tensor.dl_tensor.data = queries_d;
+  queries_tensor.dl_tensor.device.device_type = kDLCUDA;
+  queries_tensor.dl_tensor.ndim = 2;
+  queries_tensor.dl_tensor.dtype.code = kDLFloat;
+  queries_tensor.dl_tensor.dtype.bits = 32;
+  queries_tensor.dl_tensor.dtype.lanes = 1;
+  int64_t queries_shape[2] = {n_queries, n_cols};
+  queries_tensor.dl_tensor.shape = queries_shape;
+  queries_tensor.dl_tensor.strides = NULL;
+
+  DLManagedTensor neighbors_tensor;
+  neighbors_tensor.dl_tensor.data = neighbors;
+  neighbors_tensor.dl_tensor.device.device_type = kDLCUDA;
+  neighbors_tensor.dl_tensor.ndim = 2;
+  neighbors_tensor.dl_tensor.dtype.code = kDLUInt;
+  neighbors_tensor.dl_tensor.dtype.bits = 32;
+  neighbors_tensor.dl_tensor.dtype.lanes = 1;
+  int64_t neighbors_shape[2] = {n_queries, topk};
+  neighbors_tensor.dl_tensor.shape = neighbors_shape;
+  neighbors_tensor.dl_tensor.strides = NULL;
+
+  DLManagedTensor distances_tensor;
+  distances_tensor.dl_tensor.data = distances;
+  distances_tensor.dl_tensor.device.device_type = kDLCUDA;
+  distances_tensor.dl_tensor.ndim = 2;
+  distances_tensor.dl_tensor.dtype.code = kDLFloat;
+  distances_tensor.dl_tensor.dtype.bits = 32;
+  distances_tensor.dl_tensor.dtype.lanes = 1;
+  int64_t distances_shape[2] = {n_queries, topk};
+  distances_tensor.dl_tensor.shape = distances_shape;
+  distances_tensor.dl_tensor.strides = NULL;
+
+  // Search the CAGRA index
+  cuvsCagraSearchParams_t search_params;
+  cuvsCagraSearchParamsCreate(&search_params);
+
+  cuvsCagraSearch(res, search_params, index, &queries_tensor, &neighbors_tensor,
+                  &distances_tensor);
+
+  // print results
+  uint32_t *neighbors_h =
+      (uint32_t *)malloc(sizeof(uint32_t) * n_queries * topk);
+  float *distances_h = (float *)malloc(sizeof(float) * n_queries * topk);
+  cudaMemcpy(neighbors_h, neighbors, sizeof(uint32_t) * n_queries * topk,
+             cudaMemcpyDefault);
+  cudaMemcpy(distances_h, distances, sizeof(float) * n_queries * topk,
+             cudaMemcpyDefault);
+  printf("Query 0 neighbor indices: =[%d, %d]\n", neighbors_h[0],
+         neighbors_h[1]);
+  printf("Query 0 neighbor distances: =[%f, %f]\n", distances_h[0],
+         distances_h[1]);
+
+  // Free or destroy all allocations
+  free(neighbors_h);
+  free(distances_h);
+
+  cuvsCagraSearchParamsDestroy(search_params);
+
+  cuvsRMMFree(res, distances, sizeof(float) * n_queries * topk);
+  cuvsRMMFree(res, neighbors, sizeof(uint32_t) * n_queries * topk);
+  cuvsRMMFree(res, queries_d, sizeof(float) * n_queries * n_cols);
+
+  cuvsCagraIndexDestroy(index);
+  cuvsCagraIndexParamsDestroy(index_params);
+  cuvsResourcesDestroy(res);
+}
+
+int main() {
+  // Simple build and search example.
+  cagra_build_search_simple();
+}
diff --git a/...s/cpp/cmake/thirdparty/fetch_rapids.cmake → examples/cmake/thirdparty/fetch_rapids.cmake b/...s/cpp/cmake/thirdparty/fetch_rapids.cmake → examples/cmake/thirdparty/fetch_rapids.cmake
diff --git a/examples/cpp/cmake/thirdparty/get_cuvs.cmake → examples/cmake/thirdparty/get_cuvs.cmake b/examples/cpp/cmake/thirdparty/get_cuvs.cmake → examples/cmake/thirdparty/get_cuvs.cmake
@@ -17,22 +17,29 @@ set(CUVS_FORK "rapidsai")
 set(CUVS_PINNED_TAG "branch-${RAPIDS_VERSION}")
 
 function(find_and_configure_cuvs)
-    set(oneValueArgs VERSION FORK PINNED_TAG ENABLE_NVTX)
+    set(oneValueArgs VERSION FORK PINNED_TAG ENABLE_NVTX BUILD_CUVS_C_LIBRARY)
     cmake_parse_arguments(PKG "${options}" "${oneValueArgs}"
             "${multiValueArgs}" ${ARGN} )
 
+
+    set(CUVS_COMPONENTS "")
+    if(PKG_BUILD_CUVS_C_LIBRARY)
+        string(APPEND CUVS_COMPONENTS " c_api")
+    endif()
     #-----------------------------------------------------
     # Invoke CPM find_package()
     #-----------------------------------------------------
     rapids_cpm_find(cuvs ${PKG_VERSION}
             GLOBAL_TARGETS      cuvs::cuvs
-            BUILD_EXPORT_SET    cuvs-template-exports
-            INSTALL_EXPORT_SET  cuvs-template-exports
+            BUILD_EXPORT_SET    cuvs-examples-exports
+            INSTALL_EXPORT_SET  cuvs-examples-exports
+            COMPONENTS ${CUVS_COMPONENTS}
             CPM_ARGS
             GIT_REPOSITORY https://github.com/${PKG_FORK}/cuvs.git
             GIT_TAG        ${PKG_PINNED_TAG}
             SOURCE_SUBDIR  cpp
             OPTIONS
+            "BUILD_C_LIBRARY ${PKG_BUILD_CUVS_C_LIBRARY}"
             "BUILD_TESTS OFF"
             "CUVS_NVTX ${PKG_ENABLE_NVTX}"
             )
@@ -45,4 +52,5 @@ find_and_configure_cuvs(VERSION  ${CUVS_VERSION}.00
         FORK                     ${CUVS_FORK}
         PINNED_TAG               ${CUVS_PINNED_TAG}
         ENABLE_NVTX              OFF
+        BUILD_CUVS_C_LIBRARY     ${BUILD_CUVS_C_LIBRARY}
 )
diff --git a/examples/cmake/thirdparty/get_dlpack.cmake b/examples/cmake/thirdparty/get_dlpack.cmake
@@ -0,0 +1,41 @@
+# =============================================================================
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+# This function finds dlpack and sets any additional necessary environment variables.
+function(find_and_configure_dlpack VERSION)
+
+  include(${rapids-cmake-dir}/find/generate_module.cmake)
+  rapids_find_generate_module(DLPACK HEADER_NAMES dlpack.h)
+
+  rapids_cpm_find(
+    dlpack ${VERSION}
+    GIT_REPOSITORY https://github.com/dmlc/dlpack.git
+    GIT_TAG v${VERSION}
+    GIT_SHALLOW TRUE
+    DOWNLOAD_ONLY TRUE
+    OPTIONS "BUILD_MOCK OFF"
+  )
+
+  if(DEFINED dlpack_SOURCE_DIR)
+    # otherwise find_package(DLPACK) will set this variable
+    set(DLPACK_INCLUDE_DIR
+        "${dlpack_SOURCE_DIR}/include"
+        PARENT_SCOPE
+    )
+  endif()
+endfunction()
+
+set(CUVS_MIN_VERSION_dlpack 0.8)
+
+find_and_configure_dlpack(${CUVS_MIN_VERSION_dlpack})