diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index d094873da9..c3042dc06b 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -46,3 +46,6 @@ find .devcontainer/ -type f -name devcontainer.json -print0 | while IFS= read -r
     sed_runner "s@rapidsai/devcontainers:[0-9.]*@rapidsai/devcontainers:${NEXT_SHORT_TAG}@g" "${filename}"
     sed_runner "s@rapidsai/devcontainers/features/rapids-build-utils:[0-9.]*@rapidsai/devcontainers/features/rapids-build-utils:${NEXT_SHORT_TAG_PEP440}@" "${filename}"
 done
+
+# The example of a downstream project
+sed_runner "s/find_and_configure_kvikio(.*)/find_and_configure_kvikio(\"${NEXT_SHORT_TAG}\")/g" "cpp/examples/downstream/cmake/get_kvikio.cmake"
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 675a77ea7b..b1ac294d7a 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -44,13 +44,13 @@ rapids_cmake_support_conda_env(conda_env MODIFY_PREFIX_PATH)
 rapids_cpm_init()
 
 rapids_find_package(
-  CUDAToolkit REQUIRED
+  Threads REQUIRED
   BUILD_EXPORT_SET kvikio-exports
   INSTALL_EXPORT_SET kvikio-exports
 )
 
 rapids_find_package(
-  Threads REQUIRED
+  CUDAToolkit
   BUILD_EXPORT_SET kvikio-exports
   INSTALL_EXPORT_SET kvikio-exports
 )
@@ -84,15 +84,24 @@ endif()
 add_library(kvikio INTERFACE)
 add_library(kvikio::kvikio ALIAS kvikio)
 
-# Enable supported cuFile features in KvikIO
-#
-# While the code block below (in the value of final_code_string) sets this information when KvikIO
-# is imported from a kvikio-config.cmake file, this code block is intended to be used by projects
-# that include KvikIO's source directory in their own CMake build.
+# We enable CUDA and cuFile both here and in the FINAL_CODE_BLOCK export block. While the code block
+# below (in FINAL_CODE_BLOCK) sets this information when KvikIO is imported from a
+# kvikio-config.cmake file, this code block is intended to be used by projects that include KvikIO's
+# source directory in their own CMake build.
 #
 # Normally we would just set the below without using $<BUILD_LOCAL_INTERFACE:...>, and without the
 # final_code_string, but in this case we want to conditionally set these things at import time, not
 # export time, since KvikIO is a header-only library that can adapt to different build environments.
+
+# Enable CUDA in KvikIO
+if(CUDAToolkit_FOUND)
+  target_link_libraries(kvikio INTERFACE $<BUILD_LOCAL_INTERFACE:CUDA::toolkit>)
+  target_compile_definitions(kvikio INTERFACE $<BUILD_LOCAL_INTERFACE:KVIKIO_CUDA_FOUND>)
+else()
+  message(WARNING "Building KvikIO without CUDA")
+endif()
+
+# Enable supported cuFile features in KvikIO
 if(cuFile_FOUND)
   target_link_libraries(kvikio INTERFACE $<BUILD_LOCAL_INTERFACE:cufile::cuFile_interface>)
   target_compile_definitions(kvikio INTERFACE $<BUILD_LOCAL_INTERFACE:KVIKIO_CUFILE_FOUND>)
@@ -114,7 +123,6 @@ target_include_directories(
 )
 
 target_link_libraries(kvikio INTERFACE Threads::Threads)
-target_link_libraries(kvikio INTERFACE CUDA::toolkit)
 target_link_libraries(kvikio INTERFACE ${CMAKE_DL_LIBS})
 target_compile_features(kvikio INTERFACE cxx_std_17)
 
@@ -157,6 +165,14 @@ get_property(already_set_kvikio DIRECTORY PROPERTY kvikio_already_set_defines SE
 if(NOT already_set_kvikio)
   set_property(DIRECTORY PROPERTY kvikio_already_set_defines "ON")
 
+  find_package(CUDAToolkit QUIET)
+  if(CUDAToolkit_FOUND)
+    target_link_libraries(kvikio::kvikio INTERFACE CUDA::toolkit)
+    target_compile_definitions(kvikio::kvikio INTERFACE KVIKIO_CUDA_FOUND)
+  else()
+    message(WARNING "Building KvikIO without CUDA")
+  endif()
+
   # Find cuFile and determine which features are supported
   find_package(cuFile QUIET)
   if(NOT cuFile_FOUND)
@@ -203,7 +219,6 @@ rapids_export(
   FINAL_CODE_BLOCK final_code_string
 )
 
-# build export targets
 rapids_export(
   BUILD kvikio
   EXPORT_SET kvikio-exports
diff --git a/cpp/examples/CMakeLists.txt b/cpp/examples/CMakeLists.txt
index f06d0e0131..c12ddb2e52 100644
--- a/cpp/examples/CMakeLists.txt
+++ b/cpp/examples/CMakeLists.txt
@@ -13,20 +13,42 @@
 # =============================================================================
 
 set(TEST_INSTALL_PATH bin/tests/libkvikio)
-set(TEST_NAME BASIC_IO_TEST)
 
-add_executable(BASIC_IO_TEST basic_io.cpp)
-set_target_properties(BASIC_IO_TEST PROPERTIES INSTALL_RPATH "\$ORIGIN/../../lib")
-target_include_directories(BASIC_IO_TEST PRIVATE ../include ${cuFile_INCLUDE_DIRS})
-target_link_libraries(BASIC_IO_TEST PRIVATE kvikio CUDA::cudart)
+if(CUDAToolkit_FOUND)
+  add_executable(BASIC_IO_TEST basic_io.cpp)
+  set_target_properties(BASIC_IO_TEST PROPERTIES INSTALL_RPATH "\$ORIGIN/../../lib")
+  target_include_directories(BASIC_IO_TEST PRIVATE ../include ${cuFile_INCLUDE_DIRS})
+  target_link_libraries(BASIC_IO_TEST PRIVATE kvikio CUDA::cudart)
+
+  if(CMAKE_COMPILER_IS_GNUCXX)
+    set(KVIKIO_CXX_FLAGS "-Wall;-Werror;-Wno-unknown-pragmas")
+    target_compile_options(BASIC_IO_TEST PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${KVIKIO_CXX_FLAGS}>")
+  endif()
+
+  install(
+    TARGETS BASIC_IO_TEST
+    COMPONENT testing
+    DESTINATION ${TEST_INSTALL_PATH}
+    EXCLUDE_FROM_ALL
+  )
+else()
+  message(STATUS "Cannot build the basic_io example when CUDA is not found")
+endif()
+
+add_executable(BASIC_NO_CUDA_TEST basic_no_cuda.cpp)
+set_target_properties(BASIC_NO_CUDA_TEST PROPERTIES INSTALL_RPATH "\$ORIGIN/../../lib")
+target_include_directories(BASIC_NO_CUDA_TEST PRIVATE ../include)
+target_link_libraries(BASIC_NO_CUDA_TEST PRIVATE kvikio)
 
 if(CMAKE_COMPILER_IS_GNUCXX)
   set(KVIKIO_CXX_FLAGS "-Wall;-Werror;-Wno-unknown-pragmas")
-  target_compile_options(BASIC_IO_TEST PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${KVIKIO_CXX_FLAGS}>")
+  target_compile_options(
+    BASIC_NO_CUDA_TEST PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${KVIKIO_CXX_FLAGS}>"
+  )
 endif()
 
 install(
-  TARGETS ${TEST_NAME}
+  TARGETS BASIC_NO_CUDA_TEST
   COMPONENT testing
   DESTINATION ${TEST_INSTALL_PATH}
   EXCLUDE_FROM_ALL
diff --git a/cpp/examples/basic_no_cuda.cpp b/cpp/examples/basic_no_cuda.cpp
new file mode 100644
index 0000000000..700e3e8be9
--- /dev/null
+++ b/cpp/examples/basic_no_cuda.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <chrono>
+#include <iostream>
+
+#include <kvikio/batch.hpp>
+#include <kvikio/buffer.hpp>
+#include <kvikio/defaults.hpp>
+#include <kvikio/driver.hpp>
+#include <kvikio/error.hpp>
+#include <kvikio/file_handle.hpp>
+
+using namespace std;
+
+void check(bool condition)
+{
+  if (!condition) {
+    std::cout << "Error" << std::endl;
+    exit(-1);
+  }
+}
+
+constexpr int NELEM      = 1024;                 // Number of elements used throughout the test
+constexpr int SIZE       = NELEM * sizeof(int);  // Size of the memory allocations (in bytes)
+constexpr int LARGE_SIZE = 8 * SIZE;             // LARGE SIZE to test partial submit (in bytes)
+
+int main()
+{
+  cout << "KvikIO defaults: " << endl;
+  if (kvikio::defaults::compat_mode()) {
+    cout << "  Compatibility mode: enabled" << endl;
+  } else {
+    kvikio::DriverInitializer manual_init_driver;
+    cout << "  Compatibility mode: disabled" << endl;
+    kvikio::DriverProperties props;
+    cout << "DriverProperties: " << endl;
+    cout << "  nvfs version: " << props.get_nvfs_major_version() << "."
+         << props.get_nvfs_minor_version() << endl;
+    cout << "  Allow compatibility mode: " << std::boolalpha << props.get_nvfs_allow_compat_mode()
+         << endl;
+    cout << "  Pool mode - enabled: " << std::boolalpha << props.get_nvfs_poll_mode()
+         << ", threshold: " << props.get_nvfs_poll_thresh_size() << " kb" << endl;
+    cout << "  Max pinned memory: " << props.get_max_pinned_memory_size() << " kb" << endl;
+    cout << "  Max batch IO size: " << props.get_max_batch_io_size() << endl;
+  }
+
+  std::vector<int> a(SIZE);
+  std::iota(a.begin(), a.end(), 0);
+  std::vector<int> b(SIZE);
+  std::vector<int> c(SIZE);
+  check(kvikio::is_host_memory(a.data()) == true);
+
+  {
+    kvikio::FileHandle file1("/tmp/test-file1", "w");
+    kvikio::FileHandle file2("/tmp/test-file2", "w");
+    std::future<std::size_t> fut1 = file1.pwrite(a.data(), SIZE);
+    std::future<std::size_t> fut2 = file2.pwrite(a.data(), SIZE);
+    size_t written                = fut1.get() + fut2.get();
+    check(written == SIZE * 2);
+    check(SIZE == file1.nbytes());
+    check(SIZE == file2.nbytes());
+    cout << "Write: " << written << endl;
+  }
+  {
+    kvikio::FileHandle file1("/tmp/test-file1", "r");
+    kvikio::FileHandle file2("/tmp/test-file2", "r");
+    std::future<std::size_t> fut1 = file1.pread(b.data(), SIZE);
+    std::future<std::size_t> fut2 = file2.pread(c.data(), SIZE);
+    size_t read                   = fut1.get() + fut2.get();
+    check(read == SIZE * 2);
+    check(SIZE == file1.nbytes());
+    check(SIZE == file2.nbytes());
+    for (int i = 0; i < NELEM; ++i) {
+      check(a[i] == b[i]);
+      check(a[i] == c[i]);
+    }
+    cout << "Parallel POSIX read (" << kvikio::defaults::thread_pool_nthreads()
+         << " threads): " << read << endl;
+  }
+}
diff --git a/cpp/examples/downstream/cmake/get_kvikio.cmake b/cpp/examples/downstream/cmake/get_kvikio.cmake
index 367a69f42b..8516bc332f 100644
--- a/cpp/examples/downstream/cmake/get_kvikio.cmake
+++ b/cpp/examples/downstream/cmake/get_kvikio.cmake
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -30,4 +30,4 @@ function(find_and_configure_kvikio MIN_VERSION)
 
 endfunction()
 
-find_and_configure_kvikio("22.10")
+find_and_configure_kvikio("24.06")
diff --git a/cpp/include/kvikio/shim/cuda.hpp b/cpp/include/kvikio/shim/cuda.hpp
index e01df4643e..5d42bd0dcb 100644
--- a/cpp/include/kvikio/shim/cuda.hpp
+++ b/cpp/include/kvikio/shim/cuda.hpp
@@ -15,8 +15,7 @@
  */
 #pragma once
 
-#include <cuda.h>
-
+#include <kvikio/shim/cuda_h_wrapper.hpp>
 #include <kvikio/shim/utils.hpp>
 
 namespace kvikio {
@@ -51,6 +50,7 @@ class cudaAPI {
   decltype(cuStreamDestroy)* StreamDestroy{nullptr};
 
  private:
+#ifdef KVIKIO_CUDA_FOUND
   cudaAPI()
   {
     void* lib = load_library("libcuda.so.1");
@@ -77,6 +77,9 @@ class cudaAPI {
     get_symbol(StreamCreate, lib, KVIKIO_STRINGIFY(cuStreamCreate));
     get_symbol(StreamDestroy, lib, KVIKIO_STRINGIFY(cuStreamDestroy));
   }
+#else
+  cudaAPI() { throw std::runtime_error("KvikIO not compiled with CUDA support"); }
+#endif
 
  public:
   cudaAPI(cudaAPI const&)        = delete;
@@ -89,4 +92,25 @@ class cudaAPI {
   }
 };
 
+/**
+ * @brief Check if the CUDA library is available
+ *
+ * Notice, this doesn't check if the runtime environment supports CUDA.
+ *
+ * @return The boolean answer
+ */
+#ifdef KVIKIO_CUDA_FOUND
+inline bool is_cuda_available()
+{
+  try {
+    cudaAPI::instance();
+  } catch (const std::runtime_error&) {
+    return false;
+  }
+  return true;
+}
+#else
+constexpr bool is_cuda_available() { return false; }
+#endif
+
 }  // namespace kvikio
diff --git a/cpp/include/kvikio/shim/cuda_h_wrapper.hpp b/cpp/include/kvikio/shim/cuda_h_wrapper.hpp
new file mode 100644
index 0000000000..0740c99f31
--- /dev/null
+++ b/cpp/include/kvikio/shim/cuda_h_wrapper.hpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+/**
+ * In order to support compilation when `cuda.h` isn't available, we
+ * wrap all use of cuda in a `#ifdef KVIKIO_CUDA_FOUND` guard.
+ *
+ * The motivation here is to make KvikIO work in all circumstances so
+ * that libraries doesn't have to implement there own fallback solutions.
+ */
+#ifdef KVIKIO_CUDA_FOUND
+#include <cuda.h>
+#else
+
+// If CUDA isn't defined, we define some of the data types here.
+// Notice, this doesn't need to be ABI compatible with the CUDA definitions.
+
+using CUresult    = int;
+using CUdeviceptr = unsigned long long;
+using CUdevice    = int;
+using CUcontext   = void*;
+using CUstream    = void*;
+
+#define CUDA_ERROR_STUB_LIBRARY             0
+#define CUDA_SUCCESS                        0
+#define CUDA_ERROR_INVALID_VALUE            0
+#define CU_POINTER_ATTRIBUTE_CONTEXT        0
+#define CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL 0
+#define CU_POINTER_ATTRIBUTE_DEVICE_POINTER 0
+#define CU_MEMHOSTREGISTER_PORTABLE         0
+#define CU_STREAM_DEFAULT                   0
+
+CUresult cuInit(...);
+CUresult cuMemHostAlloc(...);
+CUresult cuMemFreeHost(...);
+CUresult cuMemcpyHtoDAsync(...);
+CUresult cuMemcpyDtoHAsync(...);
+CUresult cuPointerGetAttribute(...);
+CUresult cuPointerGetAttributes(...);
+CUresult cuCtxPushCurrent(...);
+CUresult cuCtxPopCurrent(...);
+CUresult cuCtxGetCurrent(...);
+CUresult cuMemGetAddressRange(...);
+CUresult cuGetErrorName(...);
+CUresult cuGetErrorString(...);
+CUresult cuDeviceGet(...);
+CUresult cuDevicePrimaryCtxRetain(...);
+CUresult cuDevicePrimaryCtxRelease(...);
+CUresult cuStreamCreate(...);
+CUresult cuStreamDestroy(...);
+CUresult cuStreamSynchronize(...);
+
+#endif
diff --git a/cpp/include/kvikio/shim/cufile_h_wrapper.hpp b/cpp/include/kvikio/shim/cufile_h_wrapper.hpp
index f512159602..f9bd7dc100 100644
--- a/cpp/include/kvikio/shim/cufile_h_wrapper.hpp
+++ b/cpp/include/kvikio/shim/cufile_h_wrapper.hpp
@@ -15,7 +15,6 @@
  */
 #pragma once
 
-#include <cuda.h>
 #include <sys/types.h>
 
 /**
diff --git a/cpp/include/kvikio/utils.hpp b/cpp/include/kvikio/utils.hpp
index 4348135d58..a136aad8c2 100644
--- a/cpp/include/kvikio/utils.hpp
+++ b/cpp/include/kvikio/utils.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -61,6 +61,7 @@ inline constexpr std::size_t page_size = 4096;
  * @param ptr Memory pointer to query
  * @return The boolean answer
  */
+#ifdef KVIKIO_CUDA_FOUND
 inline bool is_host_memory(const void* ptr)
 {
   CUpointer_attribute attrs[1] = {
@@ -80,6 +81,9 @@ inline bool is_host_memory(const void* ptr)
   // does it to support `cudaMemoryTypeUnregistered`.
   return memtype == 0 || memtype == CU_MEMORYTYPE_HOST;
 }
+#else
+constexpr bool is_host_memory(const void* ptr) { return true; }
+#endif
 
 /**
  * @brief Return the device owning the pointer