Skip to content

Commit

Permalink
Philox: Solves a bug where importing philox on cuda led to identfier …
Browse files Browse the repository at this point in the history
…not found error (#10)

* Solves a bug where importing philox on cuda led to identfier not found error

* CMakeLists.txt: Don't run benchmarks as part of ctest; run_stat_tests.py: Increase practrand to 8GB

* clang-format
  • Loading branch information
Shihab-Shahriar authored Oct 26, 2023
1 parent 400cd5c commit d55850e
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 30 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
GIT_TAG main
CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release
)
set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(google_benchmark)
endif()

Expand Down
11 changes: 8 additions & 3 deletions examples/pi_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@
*/

#include <curand_kernel.h>
#include <openrand/phillox.h>
#include <openrand/tyche.h>
#include <openrand/threefry.h>
#include <openrand/squares.h>

#include <cmath>
#include <iostream>
Expand All @@ -42,7 +45,7 @@ const int SAMPLES_PER_THREAD = 1000; // Number of samples per thread
const int NTHREADS = N / SAMPLES_PER_THREAD; // Number of threads
const int THREADS_PER_BLOCK = 256; // Number of threads per block

typedef openrand::Tyche RNG;
typedef openrand::Phillox RNG;

__global__ void monteCarloPi(int *d_sum) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
Expand Down Expand Up @@ -77,9 +80,11 @@ int main() {
int h_sum;
cudaMemcpy(&h_sum, d_sum, sizeof(int), cudaMemcpyDeviceToHost);

float pi = 4.0 * (float)h_sum / N;
double pi_estimate = 4.0 * (float)h_sum / N;

std::cout << "Approximated value of Pi: " << pi << std::endl;
constexpr double pi = 3.14159265358979323846;
std::cout << "pi_estimate: " << pi_estimate << std::endl;
std::cout << "log10(|pi - pi_estimate|): " << std::log10(std::abs(pi - pi_estimate)) << std::endl;

cudaFree(d_sum);

Expand Down
2 changes: 2 additions & 0 deletions examples/pi_openmp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ int main() {
double pi_estimate = compute_pi();

constexpr double pi = 3.14159265358979323846;

std::cout << "pi_estimate: " << pi_estimate << std::endl;
std::cout << "log10(|pi - pi_estimate|): " << std::log10(std::abs(pi - pi_estimate)) << std::endl;

return 0;
Expand Down
50 changes: 24 additions & 26 deletions include/openrand/phillox.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,37 +35,18 @@
#include <iostream>
#include <limits>

namespace {

constexpr uint32_t PHILOX_M4x32_0 = 0xD2511F53;
constexpr uint32_t PHILOX_M4x32_1 = 0xCD9E8D57;
constexpr uint32_t PHILOX_W32_0 = 0x9E3779B9;
constexpr uint32_t PHILOX_W32_1 = 0xBB67AE85;

inline DEVICE uint32_t mulhilo(uint32_t L, uint32_t R, uint32_t *hip) {
uint64_t product = static_cast<uint64_t>(L) * static_cast<uint64_t>(R);
*hip = product >> 32;
return static_cast<uint32_t>(product);
}

inline DEVICE void round(const uint32_t (&key)[2], uint32_t (&ctr)[4]) {
uint32_t hi0;
uint32_t hi1;
uint32_t lo0 = mulhilo(PHILOX_M4x32_0, ctr[0], &hi0);
uint32_t lo1 = mulhilo(PHILOX_M4x32_1, ctr[2], &hi1);
ctr[0] = hi1 ^ ctr[1] ^ key[0];
ctr[1] = lo1;
ctr[2] = hi0 ^ ctr[3] ^ key[1];
ctr[3] = lo0;
}
} // namespace
#define PHILOX_W0 0x9E3779B9
#define PHILOX_W1 0xBB67AE85
#define PHILOX_M0 0xD2511F53
#define PHILOX_M1 0xCD9E8D57

namespace openrand {

/**
* @class Phillox
* @brief Phillox generator
* @note This is a modified version of Phillox generator from Random123 library.
* This uses 4x 32-bit counter, 2x 32-bit key along with 10 rounds.
*/
class Phillox : public BaseRNG<Phillox> {
public:
Expand Down Expand Up @@ -118,14 +99,31 @@ class Phillox : public BaseRNG<Phillox> {

for (int r = 0; r < 10; r++) {
if (r > 0) {
key[0] += PHILOX_W32_0;
key[1] += PHILOX_W32_1;
key[0] += PHILOX_W0;
key[1] += PHILOX_W1;
}
round(key, _out);
}
_ctr++;
}

inline DEVICE uint32_t mulhilo(uint32_t L, uint32_t R, uint32_t *hip) {
uint64_t product = static_cast<uint64_t>(L) * static_cast<uint64_t>(R);
*hip = product >> 32;
return static_cast<uint32_t>(product);
}

inline DEVICE void round(const uint32_t (&key)[2], uint32_t (&ctr)[4]) {
uint32_t hi0;
uint32_t hi1;
uint32_t lo0 = mulhilo(PHILOX_M0, ctr[0], &hi0);
uint32_t lo1 = mulhilo(PHILOX_M1, ctr[2], &hi1);
ctr[0] = hi1 ^ ctr[1] ^ key[0];
ctr[1] = lo1;
ctr[2] = hi0 ^ ctr[3] ^ key[1];
ctr[3] = lo0;
}

// User provided seed and counter broken up, constant throughout
// the lifetime of the object
const uint32_t seed_hi, seed_lo;
Expand Down
2 changes: 1 addition & 1 deletion tests/run_stat_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
PRACT_RND_EXEC = os.path.join(BUILD_DIR, "Practrand", "RNG_test")

for gen in ["philox", "tyche", "threefry", "squares"]:
command = f"{BUILD_DIR}/tests/pract_rand_multi {gen} | {PRACT_RND_EXEC} stdin32 -multithreaded -tlmax 4GB > {RES_DIR}/practrandm_{gen}.txt"
command = f"{BUILD_DIR}/tests/pract_rand_multi {gen} | {PRACT_RND_EXEC} stdin32 -multithreaded -tlmax 8GB > {RES_DIR}/practrandm_{gen}.txt"

p = subprocess.Popen(command, shell=True)
p.name = f"practrand_{gen}"
Expand Down

0 comments on commit d55850e

Please sign in to comment.