Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pi_cuda example #8

Merged
merged 1 commit into from
Oct 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ target_link_libraries(raw_speed_cpu benchmark::benchmark)
include(CheckLanguage)
check_language(CUDA)
if (CMAKE_CUDA_COMPILER)
message(STATUS "CUDA FOUND")
message(STATUS "CUDA FOUND: building CUDA benchmarks")
enable_language(CUDA)
add_executable(raw_speed_cuda raw_speed_cuda.cu)
target_include_directories(raw_speed_cuda PRIVATE ${CMAKE_SOURCE_DIR}/include)
else()
message(STATUS "CUDA not available")
message(STATUS "skipping: CUDA benchmarks, CUDA not found")
endif()
14 changes: 14 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,17 @@
add_executable(basic_usage basic_usage.cpp)
add_executable(pi_openmp pi_openmp.cpp)


include(CheckLanguage)
check_language(CUDA)
if (CMAKE_CUDA_COMPILER)
message(STATUS "CUDA FOUND: building CUDA examples")
enable_language(CUDA)
add_executable(pi_cuda pi_cuda.cu)
target_include_directories(pi_cuda PRIVATE ${CMAKE_SOURCE_DIR}/include)
set_property(TARGET pi_cuda PROPERTY CUDA_STANDARD 17)

else()
message(STATUS "skipping: CUDA examples, CUDA not found")
endif()

61 changes: 61 additions & 0 deletions examples/pi_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,64 @@
// SOFTWARE. *
//********************************************************************************
// @HEADER

/**
* Compute Pi using monte carlo method.
*
* For simplicity, we ignore usual error checking here.
*/

#include <curand_kernel.h>
#include <openrand/tyche.h>

#include <cmath>
#include <iostream>

const int N = 100000000; // Number of points
const int SAMPLES_PER_THREAD = 1000; // Number of samples per thread
const int NTHREADS = N / SAMPLES_PER_THREAD; // Number of threads
const int THREADS_PER_BLOCK = 256; // Number of threads per block

typedef openrand::Tyche RNG;

__global__ void monteCarloPi(int *d_sum) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;

RNG rng(idx, 0);
int localHits = 0;

for (int i = 0; i < SAMPLES_PER_THREAD; i++) {
// Generate random numbers in [0, 1]
float x = rng.rand();
float y = rng.rand();
if (x * x + y * y <= 1.0f) localHits++;
}

atomicAdd(d_sum, localHits);
}

int main() {
int *d_sum;

std::cout << "Number of samples: " << N << std::endl;
std::cout << "Number of samples per thread: " << SAMPLES_PER_THREAD
<< std::endl;
std::cout << "Number of threads: " << NTHREADS << std::endl;

cudaMalloc(&d_sum, sizeof(int));
cudaMemset(d_sum, 0, sizeof(int));

int nblocks = (NTHREADS + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
monteCarloPi<<<THREADS_PER_BLOCK, nblocks>>>(d_sum);

int h_sum;
cudaMemcpy(&h_sum, d_sum, sizeof(int), cudaMemcpyDeviceToHost);

float pi = 4.0 * (float)h_sum / N;

std::cout << "Approximated value of Pi: " << pi << std::endl;

cudaFree(d_sum);

return 0;
}
27 changes: 0 additions & 27 deletions examples/pi_kokkos.cpp

This file was deleted.