Skip to content

Commit

Permalink
Merge pull request #731 from ROCm/rvs-rel-6.1
Browse files Browse the repository at this point in the history
Merge pull request #730 from jkottiku/master
  • Loading branch information
vamovsik authored Apr 10, 2024
2 parents 8ac8ffc + ddc928b commit f590ccb
Show file tree
Hide file tree
Showing 7 changed files with 538 additions and 145 deletions.
6 changes: 4 additions & 2 deletions babel.so/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,13 @@ set(HIP_HCC_BUILD_FLAGS "${HIP_HCC_BUILD_FLAGS} -DHIP_VERSION_MAJOR=${HIP_VERSIO
set(HIP_HCC_BUILD_FLAGS)
set(HIP_HCC_BUILD_FLAGS "${HIP_HCC_BUILD_FLAGS} -fPIC ${HCC_CXX_FLAGS} -I${HSA_PATH}/include ${ASAN_CXX_FLAGS}")

set(HIP_STREAM_BUILD_FLAGS "-DNONTEMPORAL=1 -DDWORDS_PER_LANE=4 -DTBSIZE=1024 -DCHUNKS_PER_BLOCK=2 -O3 -std=c++17")

# Set compiler and compiler flags
set(CMAKE_CXX_COMPILER "${HIPCC_PATH}/bin/hipcc")
set(CMAKE_C_COMPILER "${HIPCC_PATH}/bin/hipcc")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${HIP_HCC_BUILD_FLAGS}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${HIP_HCC_BUILD_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${HIP_HCC_BUILD_FLAGS} ${HIP_STREAM_BUILD_FLAGS}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${HIP_HCC_BUILD_FLAGS} ${HIP_STREAM_BUILD_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ASAN_LD_FLAGS}")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${ASAN_LD_FLAGS}")

Expand Down
62 changes: 49 additions & 13 deletions babel.so/include/HIPStream.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,62 @@
// For full license terms please see the LICENSE file distributed with this
// source code

#ifndef MEM_SO_INCLUDE_HIP_STREAM_H_
#define MEM_SO_INCLUDE_HIP_STREAM_H_
#pragma once

#include <algorithm>
#include <iostream>
#include <stdexcept>
#include <sstream>

#include "Stream.h"
#include "hip/hip_runtime.h"
#ifndef __HIP_PLATFORM_NVCC__
#include "hip/hip_ext.h"
#endif

#define IMPLEMENTATION_STRING "HIP"

template <class T>
class HIPStream : public Stream<T>
{
#ifdef __HIP_PLATFORM_NVCC__
#ifndef DWORDS_PER_LANE
#define DWORDS_PER_LANE 1
#endif
#ifndef CHUNKS_PER_BLOCK
#define CHUNKS_PER_BLOCK 8
#endif
#else
#ifndef DWORDS_PER_LANE
#define DWORDS_PER_LANE 4
#endif
#ifndef CHUNKS_PER_BLOCK
#define CHUNKS_PER_BLOCK 1
#endif
#endif
// make sure that either:
// DWORDS_PER_LANE is less than sizeof(T), in which case we default to 1 element
// or
// DWORDS_PER_LANE is divisible by sizeof(T)
static_assert((DWORDS_PER_LANE * sizeof(unsigned int) < sizeof(T)) ||
(DWORDS_PER_LANE * sizeof(unsigned int) % sizeof(T) == 0),
"DWORDS_PER_LANE not divisible by sizeof(element_type)");

static constexpr unsigned int chunks_per_block{CHUNKS_PER_BLOCK};
// take into account the datatype size
// that is, if we specify 4 DWORDS_PER_LANE, this is 2 FP64 elements
// and 4 FP32 elements
static constexpr unsigned int elements_per_lane{
(DWORDS_PER_LANE * sizeof(unsigned int)) < sizeof(T) ? 1 : (
DWORDS_PER_LANE * sizeof(unsigned int) / sizeof(T))};
protected:
// Size of arrays
unsigned int array_size;
const unsigned int array_size;
const unsigned int block_cnt;
const bool evt_timing;
hipEvent_t start_ev;
hipEvent_t stop_ev;
hipEvent_t coherent_ev;

// Host array for partial sums for dot kernel
T *sums;
Expand All @@ -30,22 +69,19 @@ class HIPStream : public Stream<T>
T *d_a;
T *d_b;
T *d_c;
T *d_sum;


public:

HIPStream(const unsigned int, const int);
HIPStream(const unsigned int, const bool, const int);
~HIPStream();

virtual void copy() override;
virtual void add() override;
virtual void mul() override;
virtual void triad() override;
virtual float read() override;
virtual float write() override;
virtual float copy() override;
virtual float add() override;
virtual float mul() override;
virtual float triad() override;
virtual T dot() override;

virtual void init_arrays(T initA, T initB, T initC) override;
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;

};
#endif
17 changes: 7 additions & 10 deletions babel.so/include/Stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
// For full license terms please see the LICENSE file distributed with this
// source code


#ifndef RVS_INCLUDE_STREAM_H_
#define RVS_INCLUDE_STREAM_H_
#pragma once

#include <vector>
#include <string>
Expand All @@ -27,10 +25,12 @@ class Stream

// Kernels
// These must be blocking calls
virtual void copy() = 0;
virtual void mul() = 0;
virtual void add() = 0;
virtual void triad() = 0;
virtual float read() = 0;
virtual float write() = 0;
virtual float copy() = 0;
virtual float mul() = 0;
virtual float add() = 0;
virtual float triad() = 0;
virtual T dot() = 0;

// Copy memory between host and device
Expand All @@ -44,6 +44,3 @@ class Stream
void listDevices(void);
std::string getDeviceName(const int);
std::string getDeviceDriver(const int);

#endif

4 changes: 2 additions & 2 deletions babel.so/src/rvs_memworker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ void MemWorker::run() {
std::pair<int, uint16_t> device;

// log MEM stress test - start message
msg = "[" + action_name + "] " + MODULE_NAME + " " +
std::to_string(gpu_id) + " " + " Starting the Memory stress test ";
msg = "[" + action_name + "] " + "[GPU:: " +
std::to_string(gpu_id) + "] " + "Starting the Babel memory stress test";
rvs::lp::Log(msg, rvs::logresults);

/* Device Index */
Expand Down
Loading

0 comments on commit f590ccb

Please sign in to comment.