diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index d094873da9..c3042dc06b 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -46,3 +46,6 @@ find .devcontainer/ -type f -name devcontainer.json -print0 | while IFS= read -r sed_runner "s@rapidsai/devcontainers:[0-9.]*@rapidsai/devcontainers:${NEXT_SHORT_TAG}@g" "${filename}" sed_runner "s@rapidsai/devcontainers/features/rapids-build-utils:[0-9.]*@rapidsai/devcontainers/features/rapids-build-utils:${NEXT_SHORT_TAG_PEP440}@" "${filename}" done + +# The example of a downstream project +sed_runner "s/find_and_configure_kvikio(.*)/find_and_configure_kvikio(\"${NEXT_SHORT_TAG}\")/g" "cpp/examples/downstream/cmake/get_kvikio.cmake" diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 675a77ea7b..b1ac294d7a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -44,13 +44,13 @@ rapids_cmake_support_conda_env(conda_env MODIFY_PREFIX_PATH) rapids_cpm_init() rapids_find_package( - CUDAToolkit REQUIRED + Threads REQUIRED BUILD_EXPORT_SET kvikio-exports INSTALL_EXPORT_SET kvikio-exports ) rapids_find_package( - Threads REQUIRED + CUDAToolkit BUILD_EXPORT_SET kvikio-exports INSTALL_EXPORT_SET kvikio-exports ) @@ -84,15 +84,24 @@ endif() add_library(kvikio INTERFACE) add_library(kvikio::kvikio ALIAS kvikio) -# Enable supported cuFile features in KvikIO -# -# While the code block below (in the value of final_code_string) sets this information when KvikIO -# is imported from a kvikio-config.cmake file, this code block is intended to be used by projects -# that include KvikIO's source directory in their own CMake build. +# We enable CUDA and cuFile both here and in the FINAL_CODE_BLOCK export block. While the code block +# below (in FINAL_CODE_BLOCK) sets this information when KvikIO is imported from a +# kvikio-config.cmake file, this code block is intended to be used by projects that include KvikIO's +# source directory in their own CMake build. # # Normally we would just set the below without using $, and without the # final_code_string, but in this case we want to conditionally set these things at import time, not # export time, since KvikIO is a header-only library that can adapt to different build environments. + +# Enable CUDA in KvikIO +if(CUDAToolkit_FOUND) + target_link_libraries(kvikio INTERFACE $) + target_compile_definitions(kvikio INTERFACE $) +else() + message(WARNING "Building KvikIO without CUDA") +endif() + +# Enable supported cuFile features in KvikIO if(cuFile_FOUND) target_link_libraries(kvikio INTERFACE $) target_compile_definitions(kvikio INTERFACE $) @@ -114,7 +123,6 @@ target_include_directories( ) target_link_libraries(kvikio INTERFACE Threads::Threads) -target_link_libraries(kvikio INTERFACE CUDA::toolkit) target_link_libraries(kvikio INTERFACE ${CMAKE_DL_LIBS}) target_compile_features(kvikio INTERFACE cxx_std_17) @@ -157,6 +165,14 @@ get_property(already_set_kvikio DIRECTORY PROPERTY kvikio_already_set_defines SE if(NOT already_set_kvikio) set_property(DIRECTORY PROPERTY kvikio_already_set_defines "ON") + find_package(CUDAToolkit QUIET) + if(CUDAToolkit_FOUND) + target_link_libraries(kvikio::kvikio INTERFACE CUDA::toolkit) + target_compile_definitions(kvikio::kvikio INTERFACE KVIKIO_CUDA_FOUND) + else() + message(WARNING "Building KvikIO without CUDA") + endif() + # Find cuFile and determine which features are supported find_package(cuFile QUIET) if(NOT cuFile_FOUND) @@ -203,7 +219,6 @@ rapids_export( FINAL_CODE_BLOCK final_code_string ) -# build export targets rapids_export( BUILD kvikio EXPORT_SET kvikio-exports diff --git a/cpp/examples/CMakeLists.txt b/cpp/examples/CMakeLists.txt index f06d0e0131..c12ddb2e52 100644 --- a/cpp/examples/CMakeLists.txt +++ b/cpp/examples/CMakeLists.txt @@ -13,20 +13,42 @@ # ============================================================================= set(TEST_INSTALL_PATH bin/tests/libkvikio) -set(TEST_NAME BASIC_IO_TEST) -add_executable(BASIC_IO_TEST basic_io.cpp) -set_target_properties(BASIC_IO_TEST PROPERTIES INSTALL_RPATH "\$ORIGIN/../../lib") -target_include_directories(BASIC_IO_TEST PRIVATE ../include ${cuFile_INCLUDE_DIRS}) -target_link_libraries(BASIC_IO_TEST PRIVATE kvikio CUDA::cudart) +if(CUDAToolkit_FOUND) + add_executable(BASIC_IO_TEST basic_io.cpp) + set_target_properties(BASIC_IO_TEST PROPERTIES INSTALL_RPATH "\$ORIGIN/../../lib") + target_include_directories(BASIC_IO_TEST PRIVATE ../include ${cuFile_INCLUDE_DIRS}) + target_link_libraries(BASIC_IO_TEST PRIVATE kvikio CUDA::cudart) + + if(CMAKE_COMPILER_IS_GNUCXX) + set(KVIKIO_CXX_FLAGS "-Wall;-Werror;-Wno-unknown-pragmas") + target_compile_options(BASIC_IO_TEST PRIVATE "$<$:${KVIKIO_CXX_FLAGS}>") + endif() + + install( + TARGETS BASIC_IO_TEST + COMPONENT testing + DESTINATION ${TEST_INSTALL_PATH} + EXCLUDE_FROM_ALL + ) +else() + message(STATUS "Cannot build the basic_io example when CUDA is not found") +endif() + +add_executable(BASIC_NO_CUDA_TEST basic_no_cuda.cpp) +set_target_properties(BASIC_NO_CUDA_TEST PROPERTIES INSTALL_RPATH "\$ORIGIN/../../lib") +target_include_directories(BASIC_NO_CUDA_TEST PRIVATE ../include) +target_link_libraries(BASIC_NO_CUDA_TEST PRIVATE kvikio) if(CMAKE_COMPILER_IS_GNUCXX) set(KVIKIO_CXX_FLAGS "-Wall;-Werror;-Wno-unknown-pragmas") - target_compile_options(BASIC_IO_TEST PRIVATE "$<$:${KVIKIO_CXX_FLAGS}>") + target_compile_options( + BASIC_NO_CUDA_TEST PRIVATE "$<$:${KVIKIO_CXX_FLAGS}>" + ) endif() install( - TARGETS ${TEST_NAME} + TARGETS BASIC_NO_CUDA_TEST COMPONENT testing DESTINATION ${TEST_INSTALL_PATH} EXCLUDE_FROM_ALL diff --git a/cpp/examples/basic_no_cuda.cpp b/cpp/examples/basic_no_cuda.cpp new file mode 100644 index 0000000000..700e3e8be9 --- /dev/null +++ b/cpp/examples/basic_no_cuda.cpp @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +using namespace std; + +void check(bool condition) +{ + if (!condition) { + std::cout << "Error" << std::endl; + exit(-1); + } +} + +constexpr int NELEM = 1024; // Number of elements used throughout the test +constexpr int SIZE = NELEM * sizeof(int); // Size of the memory allocations (in bytes) +constexpr int LARGE_SIZE = 8 * SIZE; // LARGE SIZE to test partial submit (in bytes) + +int main() +{ + cout << "KvikIO defaults: " << endl; + if (kvikio::defaults::compat_mode()) { + cout << " Compatibility mode: enabled" << endl; + } else { + kvikio::DriverInitializer manual_init_driver; + cout << " Compatibility mode: disabled" << endl; + kvikio::DriverProperties props; + cout << "DriverProperties: " << endl; + cout << " nvfs version: " << props.get_nvfs_major_version() << "." + << props.get_nvfs_minor_version() << endl; + cout << " Allow compatibility mode: " << std::boolalpha << props.get_nvfs_allow_compat_mode() + << endl; + cout << " Pool mode - enabled: " << std::boolalpha << props.get_nvfs_poll_mode() + << ", threshold: " << props.get_nvfs_poll_thresh_size() << " kb" << endl; + cout << " Max pinned memory: " << props.get_max_pinned_memory_size() << " kb" << endl; + cout << " Max batch IO size: " << props.get_max_batch_io_size() << endl; + } + + std::vector a(SIZE); + std::iota(a.begin(), a.end(), 0); + std::vector b(SIZE); + std::vector c(SIZE); + check(kvikio::is_host_memory(a.data()) == true); + + { + kvikio::FileHandle file1("/tmp/test-file1", "w"); + kvikio::FileHandle file2("/tmp/test-file2", "w"); + std::future fut1 = file1.pwrite(a.data(), SIZE); + std::future fut2 = file2.pwrite(a.data(), SIZE); + size_t written = fut1.get() + fut2.get(); + check(written == SIZE * 2); + check(SIZE == file1.nbytes()); + check(SIZE == file2.nbytes()); + cout << "Write: " << written << endl; + } + { + kvikio::FileHandle file1("/tmp/test-file1", "r"); + kvikio::FileHandle file2("/tmp/test-file2", "r"); + std::future fut1 = file1.pread(b.data(), SIZE); + std::future fut2 = file2.pread(c.data(), SIZE); + size_t read = fut1.get() + fut2.get(); + check(read == SIZE * 2); + check(SIZE == file1.nbytes()); + check(SIZE == file2.nbytes()); + for (int i = 0; i < NELEM; ++i) { + check(a[i] == b[i]); + check(a[i] == c[i]); + } + cout << "Parallel POSIX read (" << kvikio::defaults::thread_pool_nthreads() + << " threads): " << read << endl; + } +} diff --git a/cpp/examples/downstream/cmake/get_kvikio.cmake b/cpp/examples/downstream/cmake/get_kvikio.cmake index 367a69f42b..8516bc332f 100644 --- a/cpp/examples/downstream/cmake/get_kvikio.cmake +++ b/cpp/examples/downstream/cmake/get_kvikio.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -30,4 +30,4 @@ function(find_and_configure_kvikio MIN_VERSION) endfunction() -find_and_configure_kvikio("22.10") +find_and_configure_kvikio("24.06") diff --git a/cpp/include/kvikio/shim/cuda.hpp b/cpp/include/kvikio/shim/cuda.hpp index e01df4643e..5d42bd0dcb 100644 --- a/cpp/include/kvikio/shim/cuda.hpp +++ b/cpp/include/kvikio/shim/cuda.hpp @@ -15,8 +15,7 @@ */ #pragma once -#include - +#include #include namespace kvikio { @@ -51,6 +50,7 @@ class cudaAPI { decltype(cuStreamDestroy)* StreamDestroy{nullptr}; private: +#ifdef KVIKIO_CUDA_FOUND cudaAPI() { void* lib = load_library("libcuda.so.1"); @@ -77,6 +77,9 @@ class cudaAPI { get_symbol(StreamCreate, lib, KVIKIO_STRINGIFY(cuStreamCreate)); get_symbol(StreamDestroy, lib, KVIKIO_STRINGIFY(cuStreamDestroy)); } +#else + cudaAPI() { throw std::runtime_error("KvikIO not compiled with CUDA support"); } +#endif public: cudaAPI(cudaAPI const&) = delete; @@ -89,4 +92,25 @@ class cudaAPI { } }; +/** + * @brief Check if the CUDA library is available + * + * Notice, this doesn't check if the runtime environment supports CUDA. + * + * @return The boolean answer + */ +#ifdef KVIKIO_CUDA_FOUND +inline bool is_cuda_available() +{ + try { + cudaAPI::instance(); + } catch (const std::runtime_error&) { + return false; + } + return true; +} +#else +constexpr bool is_cuda_available() { return false; } +#endif + } // namespace kvikio diff --git a/cpp/include/kvikio/shim/cuda_h_wrapper.hpp b/cpp/include/kvikio/shim/cuda_h_wrapper.hpp new file mode 100644 index 0000000000..0740c99f31 --- /dev/null +++ b/cpp/include/kvikio/shim/cuda_h_wrapper.hpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +/** + * In order to support compilation when `cuda.h` isn't available, we + * wrap all use of cuda in a `#ifdef KVIKIO_CUDA_FOUND` guard. + * + * The motivation here is to make KvikIO work in all circumstances so + * that libraries doesn't have to implement there own fallback solutions. + */ +#ifdef KVIKIO_CUDA_FOUND +#include +#else + +// If CUDA isn't defined, we define some of the data types here. +// Notice, this doesn't need to be ABI compatible with the CUDA definitions. + +using CUresult = int; +using CUdeviceptr = unsigned long long; +using CUdevice = int; +using CUcontext = void*; +using CUstream = void*; + +#define CUDA_ERROR_STUB_LIBRARY 0 +#define CUDA_SUCCESS 0 +#define CUDA_ERROR_INVALID_VALUE 0 +#define CU_POINTER_ATTRIBUTE_CONTEXT 0 +#define CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL 0 +#define CU_POINTER_ATTRIBUTE_DEVICE_POINTER 0 +#define CU_MEMHOSTREGISTER_PORTABLE 0 +#define CU_STREAM_DEFAULT 0 + +CUresult cuInit(...); +CUresult cuMemHostAlloc(...); +CUresult cuMemFreeHost(...); +CUresult cuMemcpyHtoDAsync(...); +CUresult cuMemcpyDtoHAsync(...); +CUresult cuPointerGetAttribute(...); +CUresult cuPointerGetAttributes(...); +CUresult cuCtxPushCurrent(...); +CUresult cuCtxPopCurrent(...); +CUresult cuCtxGetCurrent(...); +CUresult cuMemGetAddressRange(...); +CUresult cuGetErrorName(...); +CUresult cuGetErrorString(...); +CUresult cuDeviceGet(...); +CUresult cuDevicePrimaryCtxRetain(...); +CUresult cuDevicePrimaryCtxRelease(...); +CUresult cuStreamCreate(...); +CUresult cuStreamDestroy(...); +CUresult cuStreamSynchronize(...); + +#endif diff --git a/cpp/include/kvikio/shim/cufile_h_wrapper.hpp b/cpp/include/kvikio/shim/cufile_h_wrapper.hpp index f512159602..f9bd7dc100 100644 --- a/cpp/include/kvikio/shim/cufile_h_wrapper.hpp +++ b/cpp/include/kvikio/shim/cufile_h_wrapper.hpp @@ -15,7 +15,6 @@ */ #pragma once -#include #include /** diff --git a/cpp/include/kvikio/utils.hpp b/cpp/include/kvikio/utils.hpp index 4348135d58..a136aad8c2 100644 --- a/cpp/include/kvikio/utils.hpp +++ b/cpp/include/kvikio/utils.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -61,6 +61,7 @@ inline constexpr std::size_t page_size = 4096; * @param ptr Memory pointer to query * @return The boolean answer */ +#ifdef KVIKIO_CUDA_FOUND inline bool is_host_memory(const void* ptr) { CUpointer_attribute attrs[1] = { @@ -80,6 +81,9 @@ inline bool is_host_memory(const void* ptr) // does it to support `cudaMemoryTypeUnregistered`. return memtype == 0 || memtype == CU_MEMORYTYPE_HOST; } +#else +constexpr bool is_host_memory(const void* ptr) { return true; } +#endif /** * @brief Return the device owning the pointer