Skip to content

Commit

Permalink
add axpy example
Browse files Browse the repository at this point in the history
  • Loading branch information
Berenger Bramas authored and Berenger Bramas committed Feb 16, 2024
1 parent 5e8a48e commit a98edc5
Show file tree
Hide file tree
Showing 2 changed files with 203 additions and 0 deletions.
55 changes: 55 additions & 0 deletions Benchmark/axpy/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
###########################################################################
# SPECX - Berenger Bramas MPCDF - 2016
# Under LGPL Licence, please you must read the LICENCE file.
###########################################################################
project(BENCHMARK_CP_SPECX CXX)

ADD_DEFINITIONS(${SPECX_CXX_FLAGS})

if($ENV{VERBOSE})
MESSAGE(STATUS "Benchmark -- SPECX_CXX_FLAGS : ${BENCHMARK_CP_SPECX_CXX_FLAGS}")
endif()


# Find all code files
file(
GLOB_RECURSE
source_tests_files
./*.cpp
)


# Adding the project sources dir as an include dir
INCLUDE_DIRECTORIES(
${SPECX_BINARY_DIR}/Src
${SPECX_SOURCE_DIR}/Src
)


# Add execs - 1 cpp = 1 exec
foreach(exec ${source_tests_files})
if(${SPECX_COMPILE_WITH_MPI} OR NOT ${exec} MATCHES "-mpi\.")
get_filename_component(
execname ${exec}
NAME_WE
)

if($ENV{VERBOSE})
MESSAGE(STATUS "Benchmark -- Add ${execname}")
endif()

if(SPECX_COMPILE_WITH_CUDA)
set_source_files_properties(${exec} PROPERTIES LANGUAGE CUDA)
endif()
add_executable(
${execname}
${exec}
)

target_link_libraries(
${execname}
specx
${SPECX_LIBRARIES}
)
endif()
endforeach(exec)
148 changes: 148 additions & 0 deletions Benchmark/axpy/axpy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
///////////////////////////////////////////////////////////////////////////
// Specx - Berenger Bramas MPCDF - 2017
// Under LGPL Licence, please you must read the LICENCE file.
///////////////////////////////////////////////////////////////////////////
#include <utility>
#include <thread>
#include <chrono>
#include <iostream>

#include <clsimple.hpp>

#include "Data/SpDataAccessMode.hpp"
#include "Utils/SpUtils.hpp"
#include "Task/SpTask.hpp"

#include "Compute/SpComputeEngine.hpp"
#include "Compute/SpWorkerTeamBuilder.hpp"
#include "TaskGraph/SpTaskGraph.hpp"
#include "Config/SpConfig.hpp"
#include "Utils/SpTimer.hpp"

template <class NumType>
struct Vector{
std::vector<NumType> data;


/////////////////////////////////////////////////////////////

class DataDescr {
std::size_t size;
public:
explicit DataDescr(const std::size_t inSize = 0) : size(inSize){}

auto getSize() const{
return size;
}
};

using DataDescriptor = DataDescr;

std::size_t memmovNeededSize() const{
return sizeof(NumType)*data.size();
}

template <class DeviceMemmov>
auto memmovHostToDevice(DeviceMemmov& mover, void* devicePtr,[[maybe_unused]] std::size_t size){
assert(size == sizeof(NumType)*data.size());
NumType* doubleDevicePtr = reinterpret_cast<NumType*>(devicePtr);
mover.copyHostToDevice(doubleDevicePtr, data.data(), sizeof(NumType)*data.size());
return DataDescr(data.size());
}

template <class DeviceMemmov>
void memmovDeviceToHost(DeviceMemmov& mover, void* devicePtr,[[maybe_unused]] std::size_t size, const DataDescr& /*inDataDescr*/){
assert(size == sizeof(NumType)*data.size());
NumType* doubleDevicePtr = reinterpret_cast<NumType*>(devicePtr);
mover.copyDeviceToHost(data.data(), doubleDevicePtr, sizeof(NumType)*data.size());
}
};


#ifdef SPECX_COMPILE_WITH_CUDA
template <class NumType>
__global__ void cu_axpy(int n, NumType a, NumType *x, NumType *y, NumType *out)
{
int i = blockIdx.x*blockDim.x + threadIdx.x;

if (i < n)
out[i] = a*x[i] + y[i];
}
#endif


void BenchmarkTest(int argc, char** argv){
CLsimple args("Axpy", argc, argv);

args.addParameterNoArg({"help"}, "help");

int size = 100;
args.addParameter<int>({"sz" ,"size"}, "Size", size, 1024);

int nbthreads;
args.addParameter<int>({"th"}, "nbthreads", nbthreads, 256);

args.parse();

if(!args.isValid() || args.hasKey("help")){
// Print the help
args.printHelp(std::cout);
return;
}

Vector<float> x;
x.data.resize(size, 1);
Vector<float> y;
y.data.resize(size, 1);
Vector<float> z;
z.data.resize(size, 0);
const float a = 2;

#ifdef SPECX_COMPILE_WITH_CUDA
SpCudaUtils::PrintInfo();
SpComputeEngine ce(SpWorkerTeamBuilder::TeamOfCpuCudaWorkers());
#else
SpComputeEngine ce(SpWorkerTeamBuilder::TeamOfCpuWorkers());
#endif
SpTaskGraph tg;

tg.computeOn(ce);

tg.task(SpCommutativeWrite(z),SpRead(x),SpRead(y),
#ifndef SPECX_COMPILE_WITH_CUDA
SpCpu([ta=a](Vector<float>& tz, const Vector<float>& tx, const Vector<float>& ty) {
for(int idx = 0 ; idx < int(tz.data.size()) ; ++idx){
tz.data[idx] = ta*tx.data[idx]*ty.data[idx];
}
})
#else
SpCuda([a, nbthreads](SpDeviceDataView<Vector<float>> paramZ,
const SpDeviceDataView<const Vector<float>> paramX,
const SpDeviceDataView<const Vector<float>> paramY) {
const int size = paramZ.data().getSize();
const int nbBlocks = (size + nbthreads-1)/nbthreads;
cu_axpy<float><<<nbBlocks, nbthreads,0,SpCudaUtils::GetCurrentStream()>>>
(size, a, (float*)paramX.getRawPtr(), (float*)paramY.getRawPtr(), (float*)paramZ.getRawPtr());
})
#endif
);

#ifdef SPECX_COMPILE_WITH_CUDA
tg.task(SpWrite(z),
SpCpu([](Vector<float>&) {
})
);
#endif

tg.waitAllTasks();

std::cout << "Generate trace ./axpy-simu.svg" << std::endl;
tg.generateTrace("./axpy-simu.svg", false);
}


int main(int argc, char** argv){
BenchmarkTest(argc, argv);

return 0;
}

0 comments on commit a98edc5

Please sign in to comment.