Skip to content

Commit

Permalink
Add pi_cuda example
Browse files Browse the repository at this point in the history
  • Loading branch information
Shihab-Shahriar committed Oct 17, 2023
1 parent 365f64a commit 4059ed2
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 29 deletions.
4 changes: 2 additions & 2 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ target_link_libraries(raw_speed_cpu benchmark::benchmark)
include(CheckLanguage)
check_language(CUDA)
if (CMAKE_CUDA_COMPILER)
message(STATUS "CUDA FOUND")
message(STATUS "CUDA FOUND: building CUDA benchmarks")
enable_language(CUDA)
add_executable(raw_speed_cuda raw_speed_cuda.cu)
target_include_directories(raw_speed_cuda PRIVATE ${CMAKE_SOURCE_DIR}/include)
else()
message(STATUS "CUDA not available")
message(STATUS "skipping: CUDA benchmarks, CUDA not found")
endif()
14 changes: 14 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,17 @@
add_executable(basic_usage basic_usage.cpp)
add_executable(pi_openmp pi_openmp.cpp)


include(CheckLanguage)
check_language(CUDA)
if (CMAKE_CUDA_COMPILER)
message(STATUS "CUDA FOUND: building CUDA examples")
enable_language(CUDA)
add_executable(pi_cuda pi_cuda.cu)
target_include_directories(pi_cuda PRIVATE ${CMAKE_SOURCE_DIR}/include)
set_property(TARGET pi_cuda PROPERTY CUDA_STANDARD 17)

else()
message(STATUS "skipping: CUDA examples, CUDA not found")
endif()

61 changes: 61 additions & 0 deletions examples/pi_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,64 @@
// SOFTWARE. *
//********************************************************************************
// @HEADER

/**
* Compute Pi using monte carlo method.
*
* For simplicity, we ignore usual error checking here.
*/

#include <curand_kernel.h>
#include <openrand/tyche.h>

#include <cmath>
#include <iostream>

const int N = 100000000; // Number of points
const int SAMPLES_PER_THREAD = 1000; // Number of samples per thread
const int NTHREADS = N / SAMPLES_PER_THREAD; // Number of threads
const int THREADS_PER_BLOCK = 256; // Number of threads per block

typedef openrand::Tyche RNG;

__global__ void monteCarloPi(int *d_sum) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;

RNG rng(idx, 0);
int localHits = 0;

for (int i = 0; i < SAMPLES_PER_THREAD; i++) {
// Generate random numbers in [0, 1]
float x = rng.rand();
float y = rng.rand();
if (x * x + y * y <= 1.0f) localHits++;
}

atomicAdd(d_sum, localHits);
}

int main() {
int *d_sum;

std::cout << "Number of samples: " << N << std::endl;
std::cout << "Number of samples per thread: " << SAMPLES_PER_THREAD
<< std::endl;
std::cout << "Number of threads: " << NTHREADS << std::endl;

cudaMalloc(&d_sum, sizeof(int));
cudaMemset(d_sum, 0, sizeof(int));

int nblocks = (NTHREADS + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
monteCarloPi<<<THREADS_PER_BLOCK, nblocks>>>(d_sum);

int h_sum;
cudaMemcpy(&h_sum, d_sum, sizeof(int), cudaMemcpyDeviceToHost);

float pi = 4.0 * (float)h_sum / N;

std::cout << "Approximated value of Pi: " << pi << std::endl;

cudaFree(d_sum);

return 0;
}
27 changes: 0 additions & 27 deletions examples/pi_kokkos.cpp

This file was deleted.

0 comments on commit 4059ed2

Please sign in to comment.