-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Expand and add more CUDA/HIP documentation
Document cuda_pool, cuda_scheduler, cuda_stream, cublas_handle, cusolver_handle, as well as expose these with CUDA sender adaptors in the documentation. Adds a high-level example of using CUDA functionality.
- Loading branch information
Showing
12 changed files
with
456 additions
and
74 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
// Copyright (c) 2023 ETH Zurich | ||
// | ||
// SPDX-License-Identifier: BSL-1.0 | ||
// Distributed under the Boost Software License, Version 1.0. (See accompanying | ||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | ||
|
||
#include <pika/cuda.hpp> | ||
#include <pika/execution.hpp> | ||
#include <pika/init.hpp> | ||
|
||
#include <fmt/printf.h> | ||
|
||
#include <iostream> | ||
#include <utility> | ||
|
||
#if defined(PIKA_HAVE_CUDA) | ||
# include <cublas_v2.h> | ||
# include <cuda_runtime.h> | ||
using stream_t = cudaStream_t; | ||
using blas_handle_t = cublasHandle_t; | ||
# elseif defined(PIKA_HAVE_HIP) | ||
# include <hip/hip_runtime.h> | ||
# include <hipblas.h> | ||
using stream_t = hipStream_t; | ||
using blas_handle_t = hipblasHandle_t; | ||
#endif | ||
|
||
__global__ kernel() { printf("Hello from kernel! threadIdx.x: %d\n", threadIdx.x); } | ||
|
||
int main(int argc, char* argv[]) | ||
{ | ||
namespace cu = pika::cuda::experimental; | ||
namespace ex = pika::execution::experimental; | ||
namespace tt = pika::this_thread::experimental; | ||
|
||
pika::start(argc, argv); | ||
ex::thread_pool_scheduler cpu_sched{}; | ||
|
||
// Create a pool of CUDA streams and cuBLAS/SOLVER handles | ||
cu::cuda_pool pool{}; | ||
// Then create a scheduler | ||
cu::cuda_scheduler cuda_sched{pool}; | ||
|
||
{ | ||
// There is no error checking of CUDA/HIP calls below! | ||
constexpr std::size_t n = 2048; | ||
double* a = nullptr; | ||
double* b = nullptr; | ||
double* c = nullptr; | ||
cudaMalloc(&a, sizeof(double) * n * n); | ||
cudaMalloc(&b, sizeof(double) * n * n); | ||
cudaMalloc(&c, sizeof(double) * n * n); | ||
|
||
// TODO: enable polling | ||
auto s = ex::just(42) | ex::continues_on(cuda_sched) | | ||
|
||
// CUDA kernel through a lambda | ||
ex::then([](int x) { printf("Hello from the GPU! x: %d\n", x); }) | | ||
|
||
// Explicitly launch a CUDA kernel with a stream | ||
// See https://github.com/eth-cscs/whip | ||
ex::then_with_stream([](stream_t stream) { kernel<<<32, 1, 0, stream>>>(); }) | | ||
|
||
// Explicitly run a continuation on the host, keeping the CUDA scheduler active. This | ||
// will run after the kernel above has finished, but in an unspecified execution | ||
// context. | ||
ex::then_on_host([] { fmt::print("Hello from the CPU!\n"); }) | | ||
|
||
// Launch a cuBLAS/cuSOLVER kernel | ||
ex::then_with_cublas([&](blas_handle_t handle) { blas_gemm(handle, a, b, c); }); | ||
|
||
tt::sync_wait(std::move(s)); | ||
|
||
// We know that all work has completed so we can safely free the memory | ||
cudaFree(a); | ||
cudaFree(b); | ||
cudaFree(c); | ||
} | ||
|
||
pika::finalize(); | ||
pika::stop(); | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
// Copyright (c) 2024 ETH Zurich | ||
// | ||
// SPDX-License-Identifier: BSL-1.0 | ||
// Distributed under the Boost Software License, Version 1.0. (See accompanying | ||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | ||
|
||
#include <pika/cuda.hpp> | ||
#include <pika/execution.hpp> | ||
#include <pika/init.hpp> | ||
|
||
#include <fmt/printf.h> | ||
|
||
#include <iostream> | ||
#include <utility> | ||
|
||
// TODO: Just use whip? | ||
#if defined(PIKA_HAVE_CUDA) | ||
# include <cuda_runtime.h> | ||
using stream_t = cudaStream_t; | ||
# elseif defined(PIKA_HAVE_HIP) | ||
# include <hip/hip_runtime.h> | ||
using stream_t = hipStream_t; | ||
#endif | ||
|
||
__global__ kernel() { printf("Hello from kernel! threadIdx.x: %d\n", threadIdx.x); } | ||
|
||
int main(int argc, char* argv[]) | ||
{ | ||
namespace cu = pika::cuda::experimental; | ||
namespace ex = pika::execution::experimental; | ||
namespace tt = pika::this_thread::experimental; | ||
|
||
pika::start(argc, argv); | ||
ex::thread_pool_scheduler cpu_sched{}; | ||
cu::cuda_pool pool{}; | ||
cu::cuda_scheduler cuda_sched{pool}; | ||
|
||
{ | ||
// There is no error checking of CUDA/HIP calls below! | ||
// TODO: enable polling | ||
// TODO: Pass some more interesting parameters through here | ||
auto s = ex::just(42) | ex::continues_on(cuda_sched) | | ||
ex::then_with_stream([](stream_t stream) { kernel<<<32, 1, 0, stream>>>(); }); | ||
|
||
tt::sync_wait(std::move(s)); | ||
} | ||
|
||
pika::finalize(); | ||
pika::stop(); | ||
|
||
return 0; | ||
} |
Oops, something went wrong.