Skip to content

Commit

Permalink
Feature: multiprocess support based on MPI (#88)
Browse files Browse the repository at this point in the history
* edit: MPI with indep var selection

* fix: compile errors

* edit: further rename functions

* add: fit mpi

* fix: runtime errors

* fix: unused variables

* add: bw sel mpi

* fix: bugs caused in non mpi mode

* fix: indep var sel mpi runtime error

* fix: bw sel runtime errors

* edit: activate all tests

* add: compile condition around mpi code

* edit: mpi with omp

* add: mpi with cuda

* fix: fitMpi S send error

* edit: call GWRBasic in MGWR

* fix: split fit core functions

* add: fit core omp and cuda

* edit: use min distance

* add: cubase static methods to create and destory handle

* edit: CRSDistance use cumat

* fix: GWR cuda bug

* edit: MGWR move global fit earlier

* add: mpi mat mul

* add: MGWR mpi fit

* fix: mpi mat mul error

* edit: test matrix size

* edit: use scatter instead of cast

* fix: mgwr mpi run error

* add: bw criterion cv aic mpi mode

* fix: mgwr mpi omp

* edit: mpi with cuda

* edit: MPI code conditional compile

* edit: mgwr set golden bounds of gwr basic

* edit: pre compute b_rows_i

* fix: setGroupSize use std::size_t

* fix: syntax error

* fix(test): MGWR

* fix: type mismatch on armadillo and RcppArmadillo

* fix: use MY_MPI_UWORD to gether sizes

It is unsigned long when  ARMA_32BIT_WORD is defined; otherwise, unsigned long long.

* edit: rename MY_MPI_UWORD

* edit: use int to record process status

* add(workflow): MPI tests

* Revert "add(workflow): MPI tests"

This reverts commit 5bbb210.

---------

Co-authored-by: rd21411 <[email protected]>
  • Loading branch information
HPDell and rd21411 authored Jul 21, 2024
1 parent 2adab8e commit be40076
Show file tree
Hide file tree
Showing 24 changed files with 2,067 additions and 1,595 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ set(CMAKE_MODULE_PATH cmake)

option(ENABLE_OpenMP "Determines whether OpemMP support should be built" ON)
option(ENABLE_CUDA "Determines whether CUDA support should be built" OFF)
option(ENABLE_MPI "Determines whether MPI support should be built" OFF)
option(WITH_TESTS "Determines whether to build and run tests" ON)

if(ENABLE_CUDA)
Expand Down
228 changes: 123 additions & 105 deletions include/gwmodelpp/GWRBasic.h

Large diffs are not rendered by default.

314 changes: 56 additions & 258 deletions include/gwmodelpp/GWRMultiscale.h

Large diffs are not rendered by default.

21 changes: 19 additions & 2 deletions include/gwmodelpp/IParallelizable.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@ enum ParallelType
{
SerialOnly = 1 << 0, //!< \~english Use no parallel methods. \~chinese 不并行。
OpenMP = 1 << 1, //!< \~english Use multithread methods. \~chinese 多线程并行。
CUDA = 1 << 2 //!< \~english Use CUDA accelerated methods. \~chinese CUDA加速。
CUDA = 1 << 2, //!< \~english Use CUDA accelerated methods. \~chinese CUDA加速。
MPI = (1 << 3),
MPI_Serial = (1 << 3) | (1 << 0),
MPI_MP = (1 << 3) | (1 << 1),
MPI_CUDA = (1 << 3) | (1 << 2)
};

/**
Expand Down Expand Up @@ -137,10 +141,23 @@ struct IParallelCudaEnabled
* 对于大多数 GPU 可选择值 64。
*
*/
virtual void setGroupSize(const size_t size) = 0;
virtual void setGroupSize(const std::size_t size) = 0;

};

struct IParallelMpiEnabled
{
virtual int workerId() = 0;
virtual void setWorkerId(int id) = 0;
virtual void setWorkerNum(int size) = 0;
};

#define GWM_MPI_MASTER_BEGIN if (workerId() == 0) {
#define GWM_MPI_MASTER_END }
#define GWM_MPI_WORKER_BEGIN if (workerId() != 0) {
#define GWM_MPI_WORKER_END }
#define GWM_MPI_MASTER_WORKER_SWITCH } else {

}

#endif // IPARALLELIZABLE_H
12 changes: 3 additions & 9 deletions include/gwmodelpp/spatialweight/CRSDistance.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "Distance.h"

#ifdef ENABLE_CUDA
#include "gwmodelpp/utils/cumat.hpp"
#include "gwmodelpp/spatialweight/cuda/CRSDistanceKernel.h"
#include "gwmodelpp/spatialweight/cuda/ISpatialCudaEnabled.h"
#endif // ENABLE_CUDA
Expand Down Expand Up @@ -146,13 +147,6 @@ class CRSDistance : public Distance

virtual ~CRSDistance()
{
#ifdef ENABLE_CUDA
if (mCudaPrepared)
{
cudaFree(mCudaDp);
cudaFree(mCudaFp);
}
#endif
}

virtual Distance * clone() const override
Expand Down Expand Up @@ -222,8 +216,8 @@ class CRSDistance : public Distance
CalculatorType mCalculator = &EuclideanDistance; //!< \~english Calculator \~chinese 距离计算方法

#ifdef ENABLE_CUDA
double* mCudaDp = 0; //!< \~english Device pointer to data points \~chinese 指向数据点的设备指针
double* mCudaFp = 0; //!< \~english Device pointer to focus points \~chinese 指向关注点的设备指针
cumat mCudaDp; //!< \~english Device pointer to data points \~chinese 指向数据点的设备指针
cumat mCudaFp; //!< \~english Device pointer to focus points \~chinese 指向关注点的设备指针
CalculatorCudaType mCalculatorCuda = &eu_dist_cuda; //!< \~english CUDA based Calculator \~chinese 基于 CUDA 的距离计算方法
#endif

Expand Down
4 changes: 2 additions & 2 deletions include/gwmodelpp/spatialweight/SpatialWeight.h
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ class SpatialWeight
* @param focus 当前样本的索引值。
* @return vec 当前样本到其他所有样本的空间权重向量。
*/
virtual arma::vec weightVector(arma::uword focus)
virtual arma::vec weightVector(arma::uword focus) const
{
return mWeight->weight(mDistance->distance(focus));
}
Expand Down Expand Up @@ -412,7 +412,7 @@ class SpatialWeight
* @param d_weights \~english Device pointer to distances \~chinese 指向输出权重的设备指针
* @return cudaError_t \~english CUDA error or success \~chinese CUDA 错误或成功
*/
virtual cudaError_t weightVector(arma::uword focus, double* d_dists, double* d_weights)
virtual cudaError_t weightVector(arma::uword focus, double* d_dists, double* d_weights) const
{
cudaError_t error;
size_t elems = 0;
Expand Down
10 changes: 10 additions & 0 deletions include/gwmodelpp/utils/armampi.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#include "armadillo_config.h"
#include "mpi.h"

#ifdef ARMA_32BIT_WORD
#define GWM_MPI_UWORD MPI_UNSIGNED_LONG
#else // ARMA_32BIT_WORD
#define GWM_MPI_UWORD MPI_UNSIGNED_LONG_LONG
#endif // ARMA_32BIT_WORD

void mat_mul_mpi(arma::mat& a, arma::mat& b, arma::mat& c, const int ip, const int np, const size_t range);
43 changes: 43 additions & 0 deletions include/gwmodelpp/utils/cumat.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,21 @@ class cubase
{
public:
static cublasHandle_t handle; //!< Save handle for cublas
static auto create_handle()
{
if (handle == nullptr) return cublasCreate(&handle);
else return CUBLAS_STATUS_SUCCESS;
}
static auto destory_handle()
{
if (handle != nullptr)
{
auto error = cublasDestroy(handle);
handle = nullptr;
return error;
}
else return CUBLAS_STATUS_SUCCESS;
}
constexpr static const double alpha1 = 1.0;
constexpr static const double beta0 = 0.0;
constexpr static const double beta1 = 1.0;
Expand Down Expand Up @@ -245,6 +260,32 @@ class cumat : public cubase
*/
const cuop_trans<cumat> t() const;

void resize(size_t rows, size_t cols)
{
if (dMem != nullptr && mIsRelease)
{
cudaFree(dMem);
}
cudaMalloc(&dMem, sizeof(double) * rows * cols);
}

cumat& operator=(const cumat& right)
{
resize(right.mRows, right.mCols);
cudaMemcpy(dMem, right.dMem, nbytes(), cudaMemcpyDeviceToDevice);
return *this;
}

cumat& operator=(cumat&& right)
{
mRows = right.mRows;
mCols = right.mCols;
dMem = right.dMem;
mIsRelease = true;
right.mIsRelease = false;
return *this;
}

cumat& operator=(const cuop_trans<cumat>& right);

template<class L, class R>
Expand Down Expand Up @@ -282,6 +323,8 @@ class cumat : public cubase
size_t mCols = 0;
};

void print(const cumat& mat);

/**
* @brief \~english Strided matrix. \~chinese 条带矩阵。
*
Expand Down
26 changes: 26 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,15 @@ if(OpenMP_FOUND AND OpenMP_C_FOUND AND OpenMP_CXX_FOUND)
endif(OpenMP_FOUND AND OpenMP_C_FOUND AND OpenMP_CXX_FOUND)
endif()

if(ENABLE_MPI)
find_package(MPI REQUIRED)
add_definitions(-DENABLE_MPI)
include_directories(${MPI_CXX_HEADER_DIR})
add_link_options(${MPI_CXX_LINK_FLAGS})
add_compile_options(${MPI_CXX_COMPILE_OPTIONS})
add_definitions(${MPI_CXX_COMPILE_DEFINITIONS})
endif(ENABLE_MPI)

find_package(GSL REQUIRED)
if(GSL_FOUND)
include_directories(${GSL_INCLUDE_DIRS})
Expand Down Expand Up @@ -137,6 +146,17 @@ if(ENABLE_CUDA)
list(PREPEND SOURCES_ALL ${SOURCES_CUDA})
endif(ENABLE_CUDA)

if(ENABLE_MPI)
set(HEADERS_MPI
../include/gwmodelpp/utils/armampi.h
)
set(SOURCES_MPI
gwmodelpp/utils/armampi.cpp
)
list(PREPEND HEADERS_ALL ${HEADERS_MPI})
list(PREPEND SOURCES_ALL ${SOURCES_MPI})
endif(ENABLE_MPI)

add_library(gwmodel STATIC ${HEADERS_ALL} ${SOURCES_ALL})
set_property(TARGET gwmodel PROPERTY POSITION_INDEPENDENT_CODE ON)

Expand Down Expand Up @@ -194,6 +214,12 @@ if(OpenMP_FOUND)
)
endif(OpenMP_FOUND)

if(ENABLE_MPI AND MPI_FOUND)
target_link_libraries(gwmodel
${MPI_CXX_LIBRARIES}
)
endif()

if(USE_CUDA_SHARED)
set(HEADERS_CUDA_SHARED
../include/gwmodelcuda/StdTelegram.h
Expand Down
Loading

0 comments on commit be40076

Please sign in to comment.