diff --git a/include/gwmodelpp/GWRBasic.h b/include/gwmodelpp/GWRBasic.h index 30665d2..5e6905f 100644 --- a/include/gwmodelpp/GWRBasic.h +++ b/include/gwmodelpp/GWRBasic.h @@ -51,10 +51,25 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe typedef arma::mat (GWRBasic::*FitCoreCalculator)(const arma::mat&, const arma::vec&, const SpatialWeight&); //!< \~english Fit function declaration. \~chinese 拟合函数声明。 typedef arma::mat (GWRBasic::*FitCoreSHatCalculator)(const arma::mat&, const arma::vec&, const SpatialWeight&, arma::vec&); //!< \~english Fit function declaration. \~chinese 拟合函数声明。 typedef arma::mat (GWRBasic::*FitCoreCVCalculator)(const arma::mat&, const arma::vec&, const SpatialWeight&); //!< \~english Fit function declaration. \~chinese 拟合函数声明。 + typedef void (GWRBasic::*FTestCalculator)(); + typedef double (GWRBasic::*TrQtQCalculator)(); + typedef double (GWRBasic::*TrQtQCoreCalculator)(); + typedef arma::vec (GWRBasic::*DiagBCalculator)(arma::uword); + typedef arma::vec (GWRBasic::*DiagBCoreCalculator)(arma::uword, const arma::vec&); typedef double (GWRBasic::*BandwidthSelectionCriterionCalculator)(BandwidthWeight*); //!< \~english Declaration of criterion calculator for bandwidth selection. \~chinese 带宽优选指标计算函数声明。 typedef double (GWRBasic::*IndepVarsSelectCriterionCalculator)(const std::vector&); //!< \~english Declaration of criterion calculator for variable selection. \~chinese 变量优选指标计算函数声明。 + struct FTestResult + { + double s = 0.0; + double df1 = 0.0; + double df2 = 0.0; + double p = 0.0; + }; + + using FTestResultCombine = std::tuple, FTestResult>; + private: /** @@ -388,6 +403,15 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe void setStoreC(bool flag) { mStoreC = flag; } + bool isDoFTest() { return mIsDoFTest; }; + + void setIsDoFtest(bool value) { mIsDoFTest = value; } + + FTestResultCombine fTestResults() + { + return std::make_tuple(mF1Test, mF2Test, mF3Test, mF4Test); + } + public: // Implement Algorithm bool isValid() override; @@ -396,6 +420,11 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe arma::mat fit() override; + void fTest() + { + (this->*mFTestFunction)(); + } + public: // Implement IVariableSelectable Status getCriterion(const std::vector& variables, double& criterion) override { @@ -508,6 +537,11 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe */ arma::mat fitBase(); + void fTestBase(); + + double calcTrQtQBase(); + + arma::vec calcDiagBBase(arma::uword i); private: @@ -517,6 +551,10 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe arma::mat fitCoreCVSerial(const arma::mat& x, const arma::vec& y, const SpatialWeight& sw); + double calcTrQtQCoreSerial(); + + arma::vec calcDiagBCoreSerial(arma::uword i, const arma::vec& c); + #ifdef ENABLE_OPENMP /** @@ -593,6 +631,10 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe */ arma::mat fitCoreSHatOmp(const arma::mat& x, const arma::vec& y, const SpatialWeight& sw, arma::vec& shat); + double calcTrQtQCoreOmp(); + + arma::vec calcDiagBCoreOmp(arma::uword i, const arma::vec& c); + #endif #ifdef ENABLE_CUDA @@ -678,6 +720,8 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe double bandwidthSizeCriterionCVMpi(BandwidthWeight* bandwidthWeight); double bandwidthSizeCriterionAICMpi(BandwidthWeight* bandwidthWeight); arma::mat fitMpi(); + double calcTrQtQMpi(); + arma::vec calcDiagBMpi(arma::uword i); #endif // ENABLE_MPI public: // Implement IParallelizable @@ -772,6 +816,17 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe arma::cube mC;//!< \~english All \f$S\f$ matrices. \~chinese 所有 \f$C\f$ 矩阵。 bool mStoreS = false; //!< \~english Whether to save S \~chinese 是否保存 S 矩阵 bool mStoreC = false; //!< \~english Whether to save C \~chinese 是否保存 C 矩阵 + + bool mIsDoFTest = false; + FTestResult mF1Test; + FTestResult mF2Test; + std::vector mF3Test; + FTestResult mF4Test; + FTestCalculator mFTestFunction = &GWRBasic::fTestBase; + TrQtQCalculator mCalcTrQtQFunction = &GWRBasic::calcTrQtQBase; + TrQtQCoreCalculator mCalcTrQtQCoreFunction = &GWRBasic::calcTrQtQCoreSerial; + DiagBCalculator mCalcDiagBFunction = &GWRBasic::calcDiagBBase; + DiagBCoreCalculator mCalcDiagBCoreFunction = &GWRBasic::calcDiagBCoreSerial; }; } diff --git a/include/gwmodelpp/utils/armampi.h b/include/gwmodelpp/utils/armampi.h index d1866a0..9fa1758 100644 --- a/include/gwmodelpp/utils/armampi.h +++ b/include/gwmodelpp/utils/armampi.h @@ -7,4 +7,8 @@ #define GWM_MPI_UWORD MPI_UNSIGNED_LONG_LONG #endif // ARMA_32BIT_WORD -void mat_mul_mpi(arma::mat& a, arma::mat& b, arma::mat& c, const int ip, const int np, const size_t range); +void mat_mul_mpi(arma::mat& a, arma::mat& b, arma::mat& c, const int ip, const int np); + +void mat_quad_mpi(arma::mat& a, arma::mat& aTa, const int ip, const int np); + +double mat_trace_mpi(arma::mat& a, const int ip, const int np); diff --git a/src/gwmodelpp/GWRBasic.cpp b/src/gwmodelpp/GWRBasic.cpp index 12720c7..acd771c 100644 --- a/src/gwmodelpp/GWRBasic.cpp +++ b/src/gwmodelpp/GWRBasic.cpp @@ -1,3 +1,4 @@ +#include #include "GWRBasic.h" #include "BandwidthSelector.h" #include "VariableForwardSelector.h" @@ -158,6 +159,12 @@ mat GWRBasic::fit() GWM_LOG_STAGE("Model fitting"); mBetas = (this->*mFitFunction)(); + + if (mIsDoFTest) + { + GWM_LOG_STAGE("F Test"); + (this->*mFTestFunction)(); + } #ifdef ENABLE_CUDA if (mParallelType & ParallelType::CUDA) @@ -417,6 +424,174 @@ double gwm::GWRBasic::indepVarsSelectionCriterion(const vector& indepVar } } +void gwm::GWRBasic::fTestBase() +{ + double v1 = mSHat(0), v2 = mSHat(1); + double nDp = double(mCoords.n_rows), nVar = double(mX.n_cols); + double RSSg = RSS(mX, mY, mBetas); + vec betaOls = (mX.t() * mX).i() * (mX.t() * mY); + vec residualOls = mY - mX * betaOls; + double RSSo = sum(residualOls % residualOls); + double DFo = nDp - nVar; + double delta1 = nDp - 2.0 * v1 + v2; + double sigma21 = RSSg / delta1; + double lDelta1 = sum(mQDiag), lDelta2 = (this->*mCalcTrQtQFunction)(); + //========= + // F1 Test + //========= + mF1Test.s = (RSSg/lDelta1)/(RSSo/DFo); + mF1Test.df1 = lDelta1 * lDelta1 / lDelta2; + mF1Test.df2 = DFo; + mF1Test.p = gsl_cdf_fdist_P(mF1Test.s, mF1Test.df1, mF1Test.df2); + //========= + // F2 Test + //========= + mF2Test.s = ((RSSo-RSSg)/(DFo-lDelta1))/(RSSo/DFo); + mF2Test.df1 = (DFo-lDelta1) * (DFo-lDelta1) / (DFo - 2 * lDelta1 + lDelta2); + mF2Test.df2 = DFo; + mF2Test.p = gsl_cdf_fdist_Q(mF2Test.s, mF2Test.df1, mF2Test.df2); + //========= + // F3 Test + //========= + mF3Test.resize(mX.n_cols); + vec vk2(nVar, arma::fill::zeros); + for (int i = 0; i < nVar; i++) + { + vec betasi = mBetas.col(i); + vec betasJndp = vec(nDp, fill::ones) * (sum(betasi) * 1.0 / nDp); + vk2(i) = (1.0 / nDp) * det(trans(betasi - betasJndp) * betasi); + } + + for (uword i = 0; i < mX.n_cols; i++) + { + vec diagB = (this->*mCalcDiagBFunction)(i); + double g1 = diagB(0), g2 = diagB(1); + double numdf = g1 * g1 / g2; + FTestResult f3i; + f3i.s = (vk2(i) / g1) / sigma21; + f3i.df1 = numdf; + f3i.df2 = mF1Test.df1; + f3i.p = gsl_cdf_fdist_Q(f3i.s, numdf, mF1Test.df1); + mF3Test[i] = f3i; + } + //========= + // F4 Test + //========= + mF4Test.s = RSSg / RSSo; + mF4Test.df1 = delta1; + mF4Test.df2 = DFo; + mF4Test.p = gsl_cdf_fdist_P(mF4Test.s, mF4Test.df1, mF4Test.df2); +} + +double GWRBasic::calcTrQtQBase() +{ + double trQtQ = 0.0; + uword nDp = mCoords.n_rows; + if (isStoreS()) + { + mat EmS = eye(nDp, nDp) - mS; + mat Q = trans(EmS) * EmS; + trQtQ = sum(diagvec(trans(Q) * Q)); + } + else + { + trQtQ = (this->*mCalcTrQtQCoreFunction)(); + } + return trQtQ; +} + +double GWRBasic::calcTrQtQCoreSerial() +{ + double trQtQ = 0.0; + uword nDp = mCoords.n_rows, nVar = mCoords.n_cols; + mat wspan(1, nVar, fill::ones); + std::pair workRange = mWorkRange.value_or(make_pair(0, nDp)); + for (uword i = workRange.first; i < workRange.second; i++) + { + vec wi = mSpatialWeight.weightVector(i); + mat xtwi = trans(mX % (wi * wspan)); + try + { + mat xtwxR = inv_sympd(xtwi * mX); + mat ci = xtwxR * xtwi; + mat si = mX.row(i) * inv(xtwi * mX) * xtwi; + vec pi = -trans(si); + pi(i) += 1.0; + double qi = sum(pi % pi); + trQtQ += qi * qi; + for (arma::uword j = i + 1; j < nDp; j++) + { + vec wj = mSpatialWeight.weightVector(j); + mat xtwj = trans(mX % (wj * wspan)); + try { + mat sj = mX.row(j) * inv_sympd(xtwj * mX) * xtwj; + vec pj = -trans(sj); + pj(j) += 1.0; + double qj = sum(pi % pj); + trQtQ += qj * qj * 2.0; + } + catch (const std::exception& e) + { + return DBL_MAX; + } + } + } + catch(const std::exception& e) + { + return DBL_MAX; + } + + } + return trQtQ; +} + +arma::vec GWRBasic::calcDiagBBase(arma::uword i) +{ + arma::uword nDp = mX.n_rows; + vec c(nDp, fill::zeros); + for (arma::uword j = 0; j < nDp; j++) + { + vec w = mSpatialWeight.weightVector(j); + mat xtw = trans(mX.each_col() % w); + try + { + mat C = trans(xtw) * inv_sympd(xtw * mX); + c += C.col(i); + } + catch (const std::exception& e) + { + return { DBL_MAX, DBL_MAX }; + } + } + vec diagB = (this->*mCalcDiagBCoreFunction)(i, c); + diagB = 1.0 / nDp * diagB; + return { sum(diagB), sum(diagB % diagB) }; +} + +vec GWRBasic::calcDiagBCoreSerial(uword i, const vec& c) +{ + arma::uword nDp = mX.n_rows; + vec diagB(nDp, fill::zeros); + std::pair workRange = mWorkRange.value_or(make_pair(0, nDp)); + for (arma::uword k = workRange.first; k < workRange.second; k++) + { + vec w = mSpatialWeight.weightVector(k); + mat xtw = (mX.each_col() % w).t(); + try + { + mat C = xtw.t() * inv_sympd(xtw * mX); + vec b = C.col(i); + diagB += (b % b - (1.0 / nDp) * (b % c)); + } + catch (const std::exception& e) + { + diagB.fill(DBL_MAX); + return diagB; + } + } + return diagB; +} + #ifdef ENABLE_OPENMP mat GWRBasic::predictOmp(const mat& locations, const mat& x, const vec& y) { @@ -582,6 +757,87 @@ arma::mat GWRBasic::fitCoreSHatOmp(const arma::mat& x, const arma::vec& y, const shat = sum(shat_all, 1); return betas.t(); } + +double GWRBasic::calcTrQtQCoreOmp() +{ + double trQtQ = 0.0; + uword nDp = mCoords.n_rows, nVar = mCoords.n_cols; + mat wspan(1, nVar, fill::ones); + std::pair workRange = mWorkRange.value_or(make_pair(0, nDp)); + int flag = true; +#pragma omp parallel for num_threads(mOmpThreadNum) + for (uword i = workRange.first; i < workRange.second; i++) + { + if (flag) { + vec wi = mSpatialWeight.weightVector(i); + mat xtwi = trans(mX % (wi * wspan)); + try + { + mat xtwxR = inv_sympd(xtwi * mX); + mat ci = xtwxR * xtwi; + mat si = mX.row(i) * inv(xtwi * mX) * xtwi; + vec pi = -trans(si); + pi(i) += 1.0; + double qi = sum(pi % pi); + trQtQ += qi * qi; + for (arma::uword j = i + 1; j < nDp; j++) + { + vec wj = mSpatialWeight.weightVector(j); + mat xtwj = trans(mX % (wj * wspan)); + try { + mat sj = mX.row(j) * inv_sympd(xtwj * mX) * xtwj; + vec pj = -trans(sj); + pj(j) += 1.0; + double qj = sum(pi % pj); + trQtQ += qj * qj * 2.0; + } + catch (const std::exception& e) + { + flag = false; + } + } + } + catch(const std::exception& e) + { + flag = false; + } + } + } + return flag ? trQtQ : DBL_MAX; +} + +vec GWRBasic::calcDiagBCoreOmp(uword i, const vec& c) +{ + arma::uword nDp = mX.n_rows; + mat diagB_all(nDp, mOmpThreadNum, fill::zeros); + std::pair workRange = mWorkRange.value_or(make_pair(0, nDp)); + int flag = true; +#pragma omp parallel for num_threads(mOmpThreadNum) + for (arma::uword k = workRange.first; k < workRange.second; k++) + { + if (flag) { + int thread = omp_get_thread_num(); + vec w = mSpatialWeight.weightVector(k); + mat xtw = trans(mX.each_col() % w); + try + { + mat C = trans(xtw) * inv_sympd(xtw * mX); + vec b = C.col(i); + diagB_all.col(thread) += (b % b - (1.0 / nDp) * (b % c)); + } + catch (const std::exception& e) + { + flag = false; + } + } + } + if (!flag) + { + diagB_all.fill(DBL_MAX); + } + vec diagB = sum(diagB_all, 1); + return diagB; +} #endif #ifdef ENABLE_CUDA @@ -1020,26 +1276,51 @@ arma::mat gwm::GWRBasic::fitMpi() return mBetas; } -// double GWRBasic::indepVarsSelectionCriterion(const mat& x, const vec& y, vec& shat) -// { -// try -// { -// mat S; -// mat betas = (this->*mFitCoreSHatFunction)(x, y, mSpatialWeight, shat, S); -// GWM_LOG_PROGRESS(++mIndepVarSelectionProgressCurrent, mIndepVarSelectionProgressTotal); -// if (mStatus == Status::Success) -// { -// double rss = GWRBase::RSS(x, y, betas); -// return rss; -// } -// else return DBL_MAX; -// } -// catch(const std::exception& e) -// { -// return DBL_MAX; -// } - -// } +double GWRBasic::calcTrQtQMpi() +{ + double trQtQ = 0.0; + uword nDp = mCoords.n_rows; + if (isStoreS()) + { + std::pair workRange = mWorkRange.value_or(make_pair(0, nDp)); + mat EmS = eye(nDp, nDp).eval().rows(workRange.first, workRange.second - 1) - mS; + mat Q, QtQ; + mat_quad_mpi(EmS, Q, mWorkerId, mWorkerNum); + mat_quad_mpi(Q, QtQ, mWorkerId, mWorkerNum); + trQtQ = mat_trace_mpi(QtQ, mWorkerId, mWorkerNum); + } + else + { + double trQtQi = (this->*mCalcTrQtQCoreFunction)(); + MPI_Allreduce(&trQtQi, &trQtQ, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + } + return trQtQ; +} + +vec GWRBasic::calcDiagBMpi(uword i) +{ + arma::uword nDp = mX.n_rows; + vec c(nDp, fill::zeros); + for (arma::uword j = 0; j < nDp; j++) + { + vec w = mSpatialWeight.weightVector(j); + mat xtw = trans(mX.each_col() % w); + try + { + mat C = trans(xtw) * inv_sympd(xtw * mX); + c += C.col(i); + } + catch (const std::exception& e) + { + return { DBL_MAX, DBL_MAX }; + } + } + vec diagBi = (this->*mCalcDiagBCoreFunction)(i, c); + vec diagB(nDp, arma::fill::zeros); + MPI_Allreduce(diagBi.memptr(), diagB.memptr(), int(nDp), MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + diagB = 1.0 / double(nDp) * diagB; + return { sum(diagB), sum(diagB % diagB) }; +} #endif // ENABLE_MPI void GWRBasic::setBandwidthSelectionCriterion(const BandwidthSelectionCriterionType& criterion) @@ -1087,6 +1368,8 @@ void GWRBasic::setParallelType(const ParallelType& type) mFitCoreFunction = &GWRBasic::fitCoreSerial; mFitCoreCVFunction = &GWRBasic::fitCoreCVSerial; mFitCoreSHatFunction = &GWRBasic::fitCoreSHatSerial; + mCalcTrQtQCoreFunction = &GWRBasic::calcTrQtQCoreSerial; + mCalcDiagBCoreFunction = &GWRBasic::calcDiagBCoreSerial; break; #ifdef ENABLE_OPENMP case ParallelType::OpenMP: @@ -1095,6 +1378,8 @@ void GWRBasic::setParallelType(const ParallelType& type) mFitCoreFunction = &GWRBasic::fitCoreOmp; mFitCoreCVFunction = &GWRBasic::fitCoreCVOmp; mFitCoreSHatFunction = &GWRBasic::fitCoreSHatOmp; + mCalcTrQtQCoreFunction = &GWRBasic::calcTrQtQCoreOmp; + mCalcDiagBCoreFunction = &GWRBasic::calcDiagBCoreOmp; break; #endif // ENABLE_OPENMP #ifdef ENABLE_CUDA @@ -1104,6 +1389,8 @@ void GWRBasic::setParallelType(const ParallelType& type) mFitCoreFunction = &GWRBasic::fitCoreCuda; mFitCoreCVFunction = &GWRBasic::fitCoreCVCuda; mFitCoreSHatFunction = &GWRBasic::fitCoreSHatCuda; + mCalcTrQtQCoreFunction = &GWRBasic::calcTrQtQCoreSerial; + mCalcDiagBCoreFunction = &GWRBasic::calcDiagBCoreSerial; break; #endif // ENABLE_CUDA default: @@ -1111,6 +1398,8 @@ void GWRBasic::setParallelType(const ParallelType& type) mFitCoreFunction = &GWRBasic::fitCoreSerial; mFitCoreCVFunction = &GWRBasic::fitCoreCVSerial; mFitCoreSHatFunction = &GWRBasic::fitCoreSHatSerial; + mCalcTrQtQCoreFunction = &GWRBasic::calcTrQtQCoreSerial; + mCalcDiagBCoreFunction = &GWRBasic::calcDiagBCoreSerial; break; } #ifdef ENABLE_MPI @@ -1118,11 +1407,17 @@ void GWRBasic::setParallelType(const ParallelType& type) { mFitFunction = &GWRBasic::fitMpi; mIndepVarsSelectionCriterionFunction = &GWRBasic::indepVarsSelectionCriterionMpi; + mFTestFunction = &GWRBasic::fTestBase; + mCalcTrQtQFunction = &GWRBasic::calcTrQtQMpi; + mCalcDiagBFunction = &GWRBasic::calcDiagBMpi; } else { mFitFunction = &GWRBasic::fitBase; mIndepVarsSelectionCriterionFunction = &GWRBasic::indepVarsSelectionCriterion; + mFTestFunction = &GWRBasic::fTestBase; + mCalcTrQtQFunction = &GWRBasic::calcTrQtQBase; + mCalcDiagBFunction = &GWRBasic::calcDiagBBase; } #endif setBandwidthSelectionCriterion(mBandwidthSelectionCriterion); diff --git a/src/gwmodelpp/GWRMultiscale.cpp b/src/gwmodelpp/GWRMultiscale.cpp index 3e03f4a..41a2a55 100644 --- a/src/gwmodelpp/GWRMultiscale.cpp +++ b/src/gwmodelpp/GWRMultiscale.cpp @@ -954,7 +954,7 @@ vec GWRMultiscale::fitVarMpi(const size_t var) if (mHasHatMatrix) { mat SArrayi = mSArray.slice(var) - mS0; - mat_mul_mpi(S, SArrayi, mSArray.slice(var), mWorkerId, mWorkerNum, mWorkRangeSize); + mat_mul_mpi(S, SArrayi, mSArray.slice(var), mWorkerId, mWorkerNum); mSArray.slice(var) += S; mS0 = mSArray.slice(var) - SArrayi; } diff --git a/src/gwmodelpp/utils/armampi.cpp b/src/gwmodelpp/utils/armampi.cpp index 024db9b..69c3f1b 100644 --- a/src/gwmodelpp/utils/armampi.cpp +++ b/src/gwmodelpp/utils/armampi.cpp @@ -5,7 +5,7 @@ using namespace std; using namespace arma; -void mat_mul_mpi(mat& a, mat& b, mat& c, const int ip, const int np, const size_t range) +void mat_mul_mpi(mat& a, mat& b, mat& c, const int ip, const int np) { arma::uword m = a.n_rows, n = b.n_cols; int k = (int) b.n_rows; @@ -23,3 +23,33 @@ void mat_mul_mpi(mat& a, mat& b, mat& c, const int ip, const int np, const size_ MPI_Reduce(ci.memptr(), c.memptr(), ci.n_elem, MPI_DOUBLE, MPI_SUM, pi, MPI_COMM_WORLD); } } + +void mat_quad_mpi(mat& a, mat& aTa, const int ip, const int np) +{ + arma::uword m = a.n_cols, n = a.n_rows; + int k = (int) a.n_rows; + arma::Col a_rows(np, arma::fill::zeros); + MPI_Allgather(&k, 1, MPI_INT, a_rows.memptr(), 1, MPI_INT, MPI_COMM_WORLD); + aTa = mat(n, m, arma::fill::zeros); + mat a_buf; + for (int pi = 0; pi < np; pi++) + { + arma::Col a_disp = arma::cumsum(a_rows) - a_rows; + a_buf = a.cols(a_disp(pi), a_disp(pi) + a_rows(pi) - 1); + mat aTai = a_buf.t() * a; + MPI_Reduce(aTai.memptr(), aTa.memptr(), aTai.n_elem, MPI_DOUBLE, MPI_SUM, pi, MPI_COMM_WORLD); + } +} + +double mat_trace_mpi(arma::mat& a, const int ip, const int np) +{ + int k = (int) a.n_rows; + arma::Col a_rows(np, arma::fill::zeros); + MPI_Allgather(&k, 1, MPI_INT, a_rows.memptr(), 1, MPI_INT, MPI_COMM_WORLD); + arma::Col a_disp = arma::cumsum(a_rows) - a_rows; + mat ai = a.cols(a_disp(ip), a_disp(ip) + a_rows(ip) - 1); + double ti = trace(ai); + double tr = 0.0; + MPI_Allreduce(&ti, &tr, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + return tr; +} diff --git a/test/mpi/testGWRBasicMpi.cpp b/test/mpi/testGWRBasicMpi.cpp index 2c44061..d435540 100644 --- a/test/mpi/testGWRBasicMpi.cpp +++ b/test/mpi/testGWRBasicMpi.cpp @@ -21,6 +21,11 @@ using namespace std; using namespace arma; using namespace gwm; +array convFTestArray(GWRBasic::FTestResult f) +{ + return { f.s, f.df1, f.df2, f.p }; +} + TEST_CASE("BasicGWR: LondonHP") { int iProcess, nProcess; @@ -303,6 +308,79 @@ TEST_CASE("BasicGWR: LondonHP") } } + SECTION("F test | adaptive bandwidth | no bandwidth optimization | no variable optimization") { + auto parallel = GENERATE_REF(values(parallel_list)); + INFO("Parallel:" << ParallelTypeDict.at(parallel)); + + CRSDistance distance(false); + BandwidthWeight bandwidth(36, true, BandwidthWeight::Gaussian); + SpatialWeight spatial(&bandwidth, &distance); + + GWRBasic algorithm; + algorithm.setCoords(londonhp100_coord); + algorithm.setDependentVariable(y); + algorithm.setIndependentVariables(x); + algorithm.setSpatialWeight(spatial); + algorithm.setParallelType(parallel); + algorithm.setWorkerId(iProcess); + algorithm.setWorkerNum(nProcess); +#ifdef ENABLE_OPENMP + if (parallel == ParallelType::OpenMP) + { + algorithm.setOmpThreadNum(omp_get_num_threads()); + } +#endif // ENABLE_OPENMP +#ifdef ENABLE_CUDA + if (parallel == ParallelType::CUDA) + { + algorithm.setGPUId(0); + algorithm.setGroupSize(64); + } +#endif // ENABLE_CUDA + algorithm.setIsDoFtest(true); + REQUIRE_NOTHROW(algorithm.fit()); + if (iProcess == 0) + { + auto results = algorithm.fTestResults(); + auto f1 = convFTestArray(get<0>(results)); + auto f2 = convFTestArray(get<1>(results)); + vector> f3; + for (auto &&i : get<2>(results)) + { + f3.push_back(convFTestArray(i)); + } + auto f4 = convFTestArray(get<3>(results)); + REQUIRE_THAT(f1[0], Catch::Matchers::WithinAbs(0.9342, 1e-3)); + REQUIRE_THAT(f1[1], Catch::Matchers::WithinAbs(93.3300, 1e-3)); + REQUIRE_THAT(f1[2], Catch::Matchers::WithinAbs(96.0000, 1e-3)); + REQUIRE_THAT(f1[3], Catch::Matchers::WithinAbs(0.3710, 1e-3)); + REQUIRE_THAT(f2[0], Catch::Matchers::WithinAbs(1.9762, 1e-3)); + REQUIRE_THAT(f2[1], Catch::Matchers::WithinAbs(13.1571, 1e-3)); + REQUIRE_THAT(f2[2], Catch::Matchers::WithinAbs(96.0000, 1e-3)); + REQUIRE_THAT(f2[3], Catch::Matchers::WithinAbs(0.0303, 1e-3)); + REQUIRE_THAT(f4[0], Catch::Matchers::WithinAbs(0.8752, 1e-3)); + REQUIRE_THAT(f4[1], Catch::Matchers::WithinAbs(89.9377, 1e-3)); + REQUIRE_THAT(f4[2], Catch::Matchers::WithinAbs(96.0000, 1e-3)); + REQUIRE_THAT(f4[3], Catch::Matchers::WithinAbs(0.2619, 1e-3)); + REQUIRE_THAT(f3[0][0], Catch::Matchers::WithinAbs(0.4655, 1e-3)); + REQUIRE_THAT(f3[0][1], Catch::Matchers::WithinAbs(27.1298, 1e-3)); + REQUIRE_THAT(f3[0][2], Catch::Matchers::WithinAbs(93.3300, 1e-3)); + REQUIRE_THAT(f3[0][3], Catch::Matchers::WithinAbs(0.9872, 1e-3)); + REQUIRE_THAT(f3[1][0], Catch::Matchers::WithinAbs(0.5022, 1e-3)); + REQUIRE_THAT(f3[1][1], Catch::Matchers::WithinAbs(18.3315, 1e-3)); + REQUIRE_THAT(f3[1][2], Catch::Matchers::WithinAbs(93.3300, 1e-3)); + REQUIRE_THAT(f3[1][3], Catch::Matchers::WithinAbs(0.9526, 1e-3)); + REQUIRE_THAT(f3[2][0], Catch::Matchers::WithinAbs(0.6415, 1e-3)); + REQUIRE_THAT(f3[2][1], Catch::Matchers::WithinAbs(29.6827, 1e-3)); + REQUIRE_THAT(f3[2][2], Catch::Matchers::WithinAbs(93.3300, 1e-3)); + REQUIRE_THAT(f3[2][3], Catch::Matchers::WithinAbs(0.9151, 1e-3)); + REQUIRE_THAT(f3[3][0], Catch::Matchers::WithinAbs(0.3019, 1e-3)); + REQUIRE_THAT(f3[3][1], Catch::Matchers::WithinAbs(24.7164, 1e-3)); + REQUIRE_THAT(f3[3][2], Catch::Matchers::WithinAbs(93.3300, 1e-3)); + REQUIRE_THAT(f3[3][3], Catch::Matchers::WithinAbs(0.9994, 1e-3)); + } + } + } int main(int argc, char *argv[]) diff --git a/test/mpi/testMpiMatMul.cpp b/test/mpi/testMpiMatMul.cpp index dc70b5c..2bb7299 100644 --- a/test/mpi/testMpiMatMul.cpp +++ b/test/mpi/testMpiMatMul.cpp @@ -32,7 +32,7 @@ TEST_CASE("mat mul mpi") A = A_all.rows(from, to - 1); B = B_all.rows(from, to - 1); // printf("process %d begin mat mul\n", ip); - REQUIRE_NOTHROW(mat_mul_mpi(A, B, C, ip, np, size)); + REQUIRE_NOTHROW(mat_mul_mpi(A, B, C, ip, np)); // printf("process %d end mat mul\n", ip); if (ip == 0) @@ -59,6 +59,91 @@ TEST_CASE("mat mul mpi") } } +TEST_CASE("mat quad mpi") +{ + int ip, np; + MPI_Comm_size(MPI_COMM_WORLD, &np); + MPI_Comm_rank(MPI_COMM_WORLD, &ip); + + uword n = 100; + uword size = n / np + (n % np == 0 ? 0 : 1); + uword from = ip * size, to = min(from + size, n); + + // printf("range from %llu to %llu\n", from, to); + + mat A_all, C_all; + A_all = mat(n, n, arma::fill::randn); + if (ip == 0) + { + C_all = A_all.t() * A_all; + } + + mat A, C; + A = A_all.rows(from, to - 1); + // printf("process %d begin mat mul\n", ip); + REQUIRE_NOTHROW(mat_quad_mpi(A, C, ip, np)); + // printf("process %d end mat mul\n", ip); + + if (ip == 0) + { + mat C_gather(n, n); + C_gather.rows(from, to - 1) = C; + uvec received(np, fill::zeros); + received(0) = 1; + auto bufsize = size * n; + double *buf = new double[size * n]; + while (!all(received == 1)) + { + MPI_Status status; + MPI_Recv(buf, bufsize, MPI_DOUBLE, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status); + received(status.MPI_SOURCE) = 1; + uword ifrom = status.MPI_SOURCE * size, ito = min(ifrom + size, n), irange = ito - ifrom; + C_gather.rows(ifrom, ito - 1) = mat(buf, irange, n); + } + REQUIRE(approx_equal(C_gather, C_all, "absdiff", 1e-6)); + } + else + { + MPI_Send(C.memptr(), C.n_elem, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); + } +} + +TEST_CASE("mat trace mpi") +{ + int ip, np; + MPI_Comm_size(MPI_COMM_WORLD, &np); + MPI_Comm_rank(MPI_COMM_WORLD, &ip); + + uword n = 100; + uword size = n / np + (n % np == 0 ? 0 : 1); + uword from = ip * size, to = min(from + size, n); + + // printf("range from %llu to %llu\n", from, to); + + mat A_all; + double trA_all; + A_all = mat(n, n, arma::fill::randn); + if (ip == 0) + { + trA_all = trace(A_all); + } + + mat A; + double trA = 0.0; + A = A_all.rows(from, to - 1); + // printf("process %d begin mat mul\n", ip); + REQUIRE_NOTHROW(([&]() + { + trA = mat_trace_mpi(A, ip, np); + })()); + // printf("process %d end mat mul\n", ip); + + if (ip == 0) + { + REQUIRE_THAT(trA, Catch::Matchers::WithinAbs(trA_all, 1e-6)); + } +} + int main(int argc, char *argv[]) { MPI_Init(&argc, &argv); diff --git a/test/testGWRBasic.cpp b/test/testGWRBasic.cpp index b1e76f0..f490ab9 100644 --- a/test/testGWRBasic.cpp +++ b/test/testGWRBasic.cpp @@ -1,6 +1,7 @@ #define CATCH_CONFIG_MAIN #include +#include #include #include #include @@ -20,6 +21,11 @@ using namespace std; using namespace arma; using namespace gwm; +array convFTestArray(GWRBasic::FTestResult f) +{ + return { f.s, f.df1, f.df2, f.p }; +} + TEST_CASE("BasicGWR: LondonHP") { mat londonhp100_coord, londonhp100_data; @@ -259,6 +265,73 @@ TEST_CASE("BasicGWR: LondonHP") REQUIRE_THAT(diagnostic.RSquareAdjust, Catch::Matchers::WithinAbs(0.678982114793865, 1e-8)); } + SECTION("F test | adaptive bandwidth | no bandwidth optimization | no variable optimization") { + auto parallel = GENERATE_REF(values(parallel_list)); + INFO("Parallel:" << ParallelTypeDict.at(parallel)); + + CRSDistance distance(false); + BandwidthWeight bandwidth(36, true, BandwidthWeight::Gaussian); + SpatialWeight spatial(&bandwidth, &distance); + + GWRBasic algorithm; + algorithm.setCoords(londonhp100_coord); + algorithm.setDependentVariable(y); + algorithm.setIndependentVariables(x); + algorithm.setSpatialWeight(spatial); + algorithm.setParallelType(parallel); +#ifdef ENABLE_OPENMP + if (parallel == ParallelType::OpenMP) + { + algorithm.setOmpThreadNum(omp_get_num_threads()); + } +#endif // ENABLE_OPENMP +#ifdef ENABLE_CUDA + if (parallel == ParallelType::CUDA) + { + algorithm.setGPUId(0); + algorithm.setGroupSize(64); + } +#endif // ENABLE_CUDA + algorithm.setIsDoFtest(true); + REQUIRE_NOTHROW(algorithm.fit()); + auto results = algorithm.fTestResults(); + auto f1 = convFTestArray(get<0>(results)); + auto f2 = convFTestArray(get<1>(results)); + vector> f3; + for (auto &&i : get<2>(results)) + { + f3.push_back(convFTestArray(i)); + } + auto f4 = convFTestArray(get<3>(results)); + REQUIRE_THAT(f1[0], Catch::Matchers::WithinAbs(0.9342, 1e-3)); + REQUIRE_THAT(f1[1], Catch::Matchers::WithinAbs(93.3300, 1e-3)); + REQUIRE_THAT(f1[2], Catch::Matchers::WithinAbs(96.0000, 1e-3)); + REQUIRE_THAT(f1[3], Catch::Matchers::WithinAbs(0.3710, 1e-3)); + REQUIRE_THAT(f2[0], Catch::Matchers::WithinAbs(1.9762, 1e-3)); + REQUIRE_THAT(f2[1], Catch::Matchers::WithinAbs(13.1571, 1e-3)); + REQUIRE_THAT(f2[2], Catch::Matchers::WithinAbs(96.0000, 1e-3)); + REQUIRE_THAT(f2[3], Catch::Matchers::WithinAbs(0.0303, 1e-3)); + REQUIRE_THAT(f4[0], Catch::Matchers::WithinAbs(0.8752, 1e-3)); + REQUIRE_THAT(f4[1], Catch::Matchers::WithinAbs(89.9377, 1e-3)); + REQUIRE_THAT(f4[2], Catch::Matchers::WithinAbs(96.0000, 1e-3)); + REQUIRE_THAT(f4[3], Catch::Matchers::WithinAbs(0.2619, 1e-3)); + REQUIRE_THAT(f3[0][0], Catch::Matchers::WithinAbs(0.4655, 1e-3)); + REQUIRE_THAT(f3[0][1], Catch::Matchers::WithinAbs(27.1298, 1e-3)); + REQUIRE_THAT(f3[0][2], Catch::Matchers::WithinAbs(93.3300, 1e-3)); + REQUIRE_THAT(f3[0][3], Catch::Matchers::WithinAbs(0.9872, 1e-3)); + REQUIRE_THAT(f3[1][0], Catch::Matchers::WithinAbs(0.5022, 1e-3)); + REQUIRE_THAT(f3[1][1], Catch::Matchers::WithinAbs(18.3315, 1e-3)); + REQUIRE_THAT(f3[1][2], Catch::Matchers::WithinAbs(93.3300, 1e-3)); + REQUIRE_THAT(f3[1][3], Catch::Matchers::WithinAbs(0.9526, 1e-3)); + REQUIRE_THAT(f3[2][0], Catch::Matchers::WithinAbs(0.6415, 1e-3)); + REQUIRE_THAT(f3[2][1], Catch::Matchers::WithinAbs(29.6827, 1e-3)); + REQUIRE_THAT(f3[2][2], Catch::Matchers::WithinAbs(93.3300, 1e-3)); + REQUIRE_THAT(f3[2][3], Catch::Matchers::WithinAbs(0.9151, 1e-3)); + REQUIRE_THAT(f3[3][0], Catch::Matchers::WithinAbs(0.3019, 1e-3)); + REQUIRE_THAT(f3[3][1], Catch::Matchers::WithinAbs(24.7164, 1e-3)); + REQUIRE_THAT(f3[3][2], Catch::Matchers::WithinAbs(93.3300, 1e-3)); + REQUIRE_THAT(f3[3][3], Catch::Matchers::WithinAbs(0.9994, 1e-3)); + } }