From 69a64179997b56ba29804286cd70b81fd92eeea6 Mon Sep 17 00:00:00 2001 From: HPDell Date: Thu, 1 Aug 2024 13:14:44 +0100 Subject: [PATCH 01/11] add: calc trQtQ and diagB (serial) --- include/gwmodelpp/GWRBasic.h | 18 +++++++ src/gwmodelpp/GWRBasic.cpp | 99 ++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) diff --git a/include/gwmodelpp/GWRBasic.h b/include/gwmodelpp/GWRBasic.h index 30665d2..f18f489 100644 --- a/include/gwmodelpp/GWRBasic.h +++ b/include/gwmodelpp/GWRBasic.h @@ -51,6 +51,10 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe typedef arma::mat (GWRBasic::*FitCoreCalculator)(const arma::mat&, const arma::vec&, const SpatialWeight&); //!< \~english Fit function declaration. \~chinese 拟合函数声明。 typedef arma::mat (GWRBasic::*FitCoreSHatCalculator)(const arma::mat&, const arma::vec&, const SpatialWeight&, arma::vec&); //!< \~english Fit function declaration. \~chinese 拟合函数声明。 typedef arma::mat (GWRBasic::*FitCoreCVCalculator)(const arma::mat&, const arma::vec&, const SpatialWeight&); //!< \~english Fit function declaration. \~chinese 拟合函数声明。 + typedef double (GWRBasic::*TrQtQCalculator)(); + typedef double (GWRBasic::*TrQtQCoreCalculator)(); + typedef arma::vec (GWRBasic::*DiagBCalculator)(arma::uword); + typedef arma::vec (GWRBasic::*DiagBCoreCalculator)(arma::uword); typedef double (GWRBasic::*BandwidthSelectionCriterionCalculator)(BandwidthWeight*); //!< \~english Declaration of criterion calculator for bandwidth selection. \~chinese 带宽优选指标计算函数声明。 typedef double (GWRBasic::*IndepVarsSelectCriterionCalculator)(const std::vector&); //!< \~english Declaration of criterion calculator for variable selection. \~chinese 变量优选指标计算函数声明。 @@ -508,6 +512,12 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe */ arma::mat fitBase(); + double calcTrQtQBase(); + + arma::vec calcDiagBBase(arma::uword i) + { + return (this->*mCalcDiagBCoreFunction)(i); + } private: @@ -517,6 +527,10 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe arma::mat fitCoreCVSerial(const arma::mat& x, const arma::vec& y, const SpatialWeight& sw); + double calcTrQtQCoreSerial(); + + arma::vec calcDiagBCoreSerial(arma::uword i); + #ifdef ENABLE_OPENMP /** @@ -772,6 +786,10 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe arma::cube mC;//!< \~english All \f$S\f$ matrices. \~chinese 所有 \f$C\f$ 矩阵。 bool mStoreS = false; //!< \~english Whether to save S \~chinese 是否保存 S 矩阵 bool mStoreC = false; //!< \~english Whether to save C \~chinese 是否保存 C 矩阵 + TrQtQCalculator mCalcTrQtQFunction = &GWRBasic::calcTrQtQBase; + TrQtQCoreCalculator mCalcTrQtQCoreFUnction = &GWRBasic::calcTrQtQCoreSerial; + DiagBCalculator mCalcDiagBFunction = &GWRBasic::calcDiagBBase; + DiagBCoreCalculator mCalcDiagBCoreFunction = &GWRBasic::calcDiagBCoreSerial; }; } diff --git a/src/gwmodelpp/GWRBasic.cpp b/src/gwmodelpp/GWRBasic.cpp index 12720c7..67669fd 100644 --- a/src/gwmodelpp/GWRBasic.cpp +++ b/src/gwmodelpp/GWRBasic.cpp @@ -416,6 +416,105 @@ double gwm::GWRBasic::indepVarsSelectionCriterion(const vector& indepVar return DBL_MAX; } } +double GWRBasic::calcTrQtQBase() +{ + double trQtQ = 0.0; + uword nDp = mCoords.n_rows; + if (isStoreS()) + { + mat EmS = eye(nDp, nDp) - mS; + mat Q = trans(EmS) * EmS; + trQtQ = sum(diagvec(trans(Q) * Q)); + } + else + { + trQtQ = (this->*mCalcTrQtQCoreFUnction)(); + } + return trQtQ; +} + +double GWRBasic::calcTrQtQCoreSerial() +{ + double trQtQ = 0.0; + uword nDp = mCoords.n_rows, nVar = mCoords.n_cols; + mat wspan(1, nVar, fill::ones); + for (uword i = 0; i < nDp; i++) + { + vec wi = mSpatialWeight.weightVector(i); + mat xtwi = trans(mX % (wi * wspan)); + try + { + mat xtwxR = inv_sympd(xtwi * mX); + mat ci = xtwxR * xtwi; + mat si = mX.row(i) * inv(xtwi * mX) * xtwi; + vec pi = -trans(si); + pi(i) += 1.0; + double qi = sum(pi % pi); + trQtQ += qi * qi; + for (arma::uword j = i + 1; j < nDp; j++) + { + vec wj = mSpatialWeight.weightVector(j); + mat xtwj = trans(mX % (wj * wspan)); + try { + mat sj = mX.row(j) * inv_sympd(xtwj * mX) * xtwj; + vec pj = -trans(sj); + pj(j) += 1.0; + double qj = sum(pi % pj); + trQtQ += qj * qj * 2.0; + } + catch (const std::exception& e) + { + return DBL_MAX; + } + } + } + catch(const std::exception& e) + { + return DBL_MAX; + } + + } + return trQtQ; +} + +vec GWRBasic::calcDiagBCoreSerial(uword i) +{ + arma::uword nDp = mX.n_rows, nVar = mX.n_cols; + vec diagB(nDp, fill::zeros), c(nDp, fill::zeros); + mat ek = eye(nVar, nVar); + mat wspan(1, nVar, fill::ones); + for (arma::uword j = 0; j < nDp; j++) + { + vec w = mSpatialWeight.weightVector(j); + mat xtw = trans(mX % (w * wspan)); + try + { + mat C = trans(xtw) * inv_sympd(xtw * mX); + c += C.col(i); + } + catch (const std::exception& e) + { + return { DBL_MAX, DBL_MAX }; + } + } + for (arma::uword k = 0; k < nDp; k++) + { + vec w = mSpatialWeight.weightVector(k); + mat xtw = trans(mX % (w * wspan)); + try + { + mat C = trans(xtw) * inv_sympd(xtw * mX); + vec b = C.col(i); + diagB += (b % b - (1.0 / nDp) * (b % c)); + } + catch (const std::exception& e) + { + return { DBL_MAX, DBL_MAX }; + } + } + diagB = 1.0 / nDp * diagB; + return { sum(diagB), sum(diagB % diagB) }; +} #ifdef ENABLE_OPENMP mat GWRBasic::predictOmp(const mat& locations, const mat& x, const vec& y) From d4573b3b7a139a5fc1dbcad51cb0684f1a05b8cc Mon Sep 17 00:00:00 2001 From: HPDell Date: Thu, 1 Aug 2024 13:16:02 +0100 Subject: [PATCH 02/11] add: f test base --- include/gwmodelpp/GWRBasic.h | 22 +++++++++++++ src/gwmodelpp/GWRBasic.cpp | 62 ++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) diff --git a/include/gwmodelpp/GWRBasic.h b/include/gwmodelpp/GWRBasic.h index f18f489..9f89e1d 100644 --- a/include/gwmodelpp/GWRBasic.h +++ b/include/gwmodelpp/GWRBasic.h @@ -51,6 +51,7 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe typedef arma::mat (GWRBasic::*FitCoreCalculator)(const arma::mat&, const arma::vec&, const SpatialWeight&); //!< \~english Fit function declaration. \~chinese 拟合函数声明。 typedef arma::mat (GWRBasic::*FitCoreSHatCalculator)(const arma::mat&, const arma::vec&, const SpatialWeight&, arma::vec&); //!< \~english Fit function declaration. \~chinese 拟合函数声明。 typedef arma::mat (GWRBasic::*FitCoreCVCalculator)(const arma::mat&, const arma::vec&, const SpatialWeight&); //!< \~english Fit function declaration. \~chinese 拟合函数声明。 + typedef void (GWRBasic::*FTestCalculator)(); typedef double (GWRBasic::*TrQtQCalculator)(); typedef double (GWRBasic::*TrQtQCoreCalculator)(); typedef arma::vec (GWRBasic::*DiagBCalculator)(arma::uword); @@ -59,6 +60,14 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe typedef double (GWRBasic::*BandwidthSelectionCriterionCalculator)(BandwidthWeight*); //!< \~english Declaration of criterion calculator for bandwidth selection. \~chinese 带宽优选指标计算函数声明。 typedef double (GWRBasic::*IndepVarsSelectCriterionCalculator)(const std::vector&); //!< \~english Declaration of criterion calculator for variable selection. \~chinese 变量优选指标计算函数声明。 + struct FTestResult + { + double s = 0.0; + double df1 = 0.0; + double df2 = 0.0; + double p = 0.0; + }; + private: /** @@ -400,6 +409,11 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe arma::mat fit() override; + void fTest() + { + (this->*mFTestFunction)(); + } + public: // Implement IVariableSelectable Status getCriterion(const std::vector& variables, double& criterion) override { @@ -512,6 +526,8 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe */ arma::mat fitBase(); + void fTestBase(); + double calcTrQtQBase(); arma::vec calcDiagBBase(arma::uword i) @@ -786,6 +802,12 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe arma::cube mC;//!< \~english All \f$S\f$ matrices. \~chinese 所有 \f$C\f$ 矩阵。 bool mStoreS = false; //!< \~english Whether to save S \~chinese 是否保存 S 矩阵 bool mStoreC = false; //!< \~english Whether to save C \~chinese 是否保存 C 矩阵 + + FTestResult mF1Test; + FTestResult mF2Test; + std::vector mF3Test; + FTestResult mF4Test; + FTestCalculator mFTestFunction = &GWRBasic::fTestBase; TrQtQCalculator mCalcTrQtQFunction = &GWRBasic::calcTrQtQBase; TrQtQCoreCalculator mCalcTrQtQCoreFUnction = &GWRBasic::calcTrQtQCoreSerial; DiagBCalculator mCalcDiagBFunction = &GWRBasic::calcDiagBBase; diff --git a/src/gwmodelpp/GWRBasic.cpp b/src/gwmodelpp/GWRBasic.cpp index 67669fd..e3b336f 100644 --- a/src/gwmodelpp/GWRBasic.cpp +++ b/src/gwmodelpp/GWRBasic.cpp @@ -1,3 +1,4 @@ +#include #include "GWRBasic.h" #include "BandwidthSelector.h" #include "VariableForwardSelector.h" @@ -416,6 +417,67 @@ double gwm::GWRBasic::indepVarsSelectionCriterion(const vector& indepVar return DBL_MAX; } } + +void gwm::GWRBasic::fTestBase() +{ + double v1 = mSHat(0), v2 = mSHat(1); + double nDp = double(mCoords.n_rows), nVar = double(mX.n_cols); + double RSSg = RSS(mX, mY, mBetas); + vec betaOls = solve(mX, mY); + vec residualOls = mY - mX * betaOls; + double RSSo = sum(residualOls % residualOls); + double DFo = nDp - nVar; + double delta1 = 1.0 * nDp - 2.0 * v1 + v2; + double sigma21 = RSSg / delta1; + double lDelta1 = sum(mQDiag), lDelta2 = (this->*mCalcTrQtQFunction)(); + //========= + // F1 Test + //========= + mF1Test.s = (RSSg/lDelta1)/(RSSo/DFo); + mF1Test.df1 = lDelta1 * lDelta1 / lDelta2; + mF1Test.df2 = DFo; + mF1Test.p = gsl_cdf_fdist_P(mF1Test.s, mF1Test.df1, mF1Test.df2); + //========= + // F2 Test + //========= + mF2Test.s = ((RSSo-RSSg)/(DFo-lDelta1))/(RSSo/DFo); + mF2Test.df1 = (DFo-lDelta1) * (DFo-lDelta1) / (DFo - 2 * lDelta1 + lDelta2); + mF2Test.df2 = DFo; + mF2Test.p = gsl_cdf_fdist_Q(mF2Test.s, mF2Test.df1, mF2Test.df2); + //========= + // F3 Test + //========= + mF3Test.resize(mX.n_cols); + vec vk2(nVar, arma::fill::zeros); + for (int i = 0; i < nVar; i++) + { + vec betasi = mBetas.col(i); + vec betasJndp = vec(nDp, fill::ones) * (sum(betasi) * 1.0 / nDp); + vk2(i) = (1.0 / nDp) * det(trans(betasi - betasJndp) * betasi); + } + + for (uword i = 0; i < mX.n_cols; i++) + { + vec diagB = (this->*mCalcDiagBFunction)(i); + double g1 = diagB(0); + double g2 = diagB(1); + double numdf = g1 * g1 / g2; + FTestResult f3i; + f3i.s = (vk2(i) / g1) / sigma21; + f3i.df1 = numdf; + f3i.df2 = mF1Test.df1; + f3i.p = gsl_cdf_fdist_Q(f3i.s, numdf, mF1Test.df1); + mF3Test[i] = f3i; + } + //========= + // F4 Test + //========= + mF4Test.s = RSSg / RSSo; + mF4Test.df1 = delta1; + mF4Test.df2 = DFo; + mF4Test.p = gsl_cdf_fdist_P(mF4Test.s, mF4Test.df1, mF4Test.df2); +} + double GWRBasic::calcTrQtQBase() { double trQtQ = 0.0; From a27ff1ac843658ed9b49dff1c8bd98e2eaf3e596 Mon Sep 17 00:00:00 2001 From: HPDell Date: Thu, 1 Aug 2024 14:43:31 +0100 Subject: [PATCH 03/11] add: multithreading implementation --- include/gwmodelpp/GWRBasic.h | 4 ++ src/gwmodelpp/GWRBasic.cpp | 97 ++++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+) diff --git a/include/gwmodelpp/GWRBasic.h b/include/gwmodelpp/GWRBasic.h index 9f89e1d..ed9b050 100644 --- a/include/gwmodelpp/GWRBasic.h +++ b/include/gwmodelpp/GWRBasic.h @@ -623,6 +623,10 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe */ arma::mat fitCoreSHatOmp(const arma::mat& x, const arma::vec& y, const SpatialWeight& sw, arma::vec& shat); + double calcTrQtQCoreOmp(); + + arma::vec calcDiagBCoreOmp(arma::uword i); + #endif #ifdef ENABLE_CUDA diff --git a/src/gwmodelpp/GWRBasic.cpp b/src/gwmodelpp/GWRBasic.cpp index e3b336f..dbf11ed 100644 --- a/src/gwmodelpp/GWRBasic.cpp +++ b/src/gwmodelpp/GWRBasic.cpp @@ -743,6 +743,91 @@ arma::mat GWRBasic::fitCoreSHatOmp(const arma::mat& x, const arma::vec& y, const shat = sum(shat_all, 1); return betas.t(); } + +double GWRBasic::calcTrQtQCoreOmp() +{ + double trQtQ = 0.0; + uword nDp = mCoords.n_rows, nVar = mCoords.n_cols; + mat wspan(1, nVar, fill::ones); +#pragma omp parallel for num_threads(mOmpThreadNum) + for (uword i = 0; i < nDp; i++) + { + vec wi = mSpatialWeight.weightVector(i); + mat xtwi = trans(mX % (wi * wspan)); + try + { + mat xtwxR = inv_sympd(xtwi * mX); + mat ci = xtwxR * xtwi; + mat si = mX.row(i) * inv(xtwi * mX) * xtwi; + vec pi = -trans(si); + pi(i) += 1.0; + double qi = sum(pi % pi); + trQtQ += qi * qi; + for (arma::uword j = i + 1; j < nDp; j++) + { + vec wj = mSpatialWeight.weightVector(j); + mat xtwj = trans(mX % (wj * wspan)); + try { + mat sj = mX.row(j) * inv_sympd(xtwj * mX) * xtwj; + vec pj = -trans(sj); + pj(j) += 1.0; + double qj = sum(pi % pj); + trQtQ += qj * qj * 2.0; + } + catch (const std::exception& e) + { + return DBL_MAX; + } + } + } + catch(const std::exception& e) + { + return DBL_MAX; + } + + } + return trQtQ; +} + +vec GWRBasic::calcDiagBCoreOmp(uword i) +{ + arma::uword nDp = mX.n_rows, nVar = mX.n_cols; + vec diagB(nDp, fill::zeros), c(nDp, fill::zeros); + mat ek = eye(nVar, nVar); + mat wspan(1, nVar, fill::ones); + for (arma::uword j = 0; j < nDp; j++) + { + vec w = mSpatialWeight.weightVector(j); + mat xtw = trans(mX % (w * wspan)); + try + { + mat C = trans(xtw) * inv_sympd(xtw * mX); + c += C.col(i); + } + catch (const std::exception& e) + { + return { DBL_MAX, DBL_MAX }; + } + } +#pragma omp parallel for num_threads(mOmpThreadNum) + for (arma::uword k = 0; k < nDp; k++) + { + vec w = mSpatialWeight.weightVector(k); + mat xtw = trans(mX % (w * wspan)); + try + { + mat C = trans(xtw) * inv_sympd(xtw * mX); + vec b = C.col(i); + diagB += (b % b - (1.0 / nDp) * (b % c)); + } + catch (const std::exception& e) + { + return { DBL_MAX, DBL_MAX }; + } + } + diagB = 1.0 / nDp * diagB; + return { sum(diagB), sum(diagB % diagB) }; +} #endif #ifdef ENABLE_CUDA @@ -1248,6 +1333,8 @@ void GWRBasic::setParallelType(const ParallelType& type) mFitCoreFunction = &GWRBasic::fitCoreSerial; mFitCoreCVFunction = &GWRBasic::fitCoreCVSerial; mFitCoreSHatFunction = &GWRBasic::fitCoreSHatSerial; + mCalcTrQtQCoreFUnction = &GWRBasic::calcTrQtQCoreSerial; + mCalcDiagBCoreFunction = &GWRBasic::calcDiagBCoreSerial; break; #ifdef ENABLE_OPENMP case ParallelType::OpenMP: @@ -1256,6 +1343,8 @@ void GWRBasic::setParallelType(const ParallelType& type) mFitCoreFunction = &GWRBasic::fitCoreOmp; mFitCoreCVFunction = &GWRBasic::fitCoreCVOmp; mFitCoreSHatFunction = &GWRBasic::fitCoreSHatOmp; + mCalcTrQtQCoreFUnction = &GWRBasic::calcTrQtQCoreOmp; + mCalcDiagBCoreFunction = &GWRBasic::calcDiagBCoreOmp; break; #endif // ENABLE_OPENMP #ifdef ENABLE_CUDA @@ -1272,6 +1361,8 @@ void GWRBasic::setParallelType(const ParallelType& type) mFitCoreFunction = &GWRBasic::fitCoreSerial; mFitCoreCVFunction = &GWRBasic::fitCoreCVSerial; mFitCoreSHatFunction = &GWRBasic::fitCoreSHatSerial; + mCalcTrQtQCoreFUnction = &GWRBasic::calcTrQtQCoreSerial; + mCalcDiagBCoreFunction = &GWRBasic::calcDiagBCoreSerial; break; } #ifdef ENABLE_MPI @@ -1279,11 +1370,17 @@ void GWRBasic::setParallelType(const ParallelType& type) { mFitFunction = &GWRBasic::fitMpi; mIndepVarsSelectionCriterionFunction = &GWRBasic::indepVarsSelectionCriterionMpi; + mFTestFunction = &GWRBasic::fTestBase; + mCalcTrQtQFunction = &GWRBasic::calcTrQtQBase; + mCalcDiagBFunction = &GWRBasic::calcDiagBBase; } else { mFitFunction = &GWRBasic::fitBase; mIndepVarsSelectionCriterionFunction = &GWRBasic::indepVarsSelectionCriterion; + mFTestFunction = &GWRBasic::fTestBase; + mCalcTrQtQFunction = &GWRBasic::calcTrQtQBase; + mCalcDiagBFunction = &GWRBasic::calcDiagBBase; } #endif setBandwidthSelectionCriterion(mBandwidthSelectionCriterion); From 5c388f7fe07309d7701f2f05374765428ee736ee Mon Sep 17 00:00:00 2001 From: HPDell Date: Thu, 1 Aug 2024 16:48:50 +0100 Subject: [PATCH 04/11] fix: return in omp --- src/gwmodelpp/GWRBasic.cpp | 88 +++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 40 deletions(-) diff --git a/src/gwmodelpp/GWRBasic.cpp b/src/gwmodelpp/GWRBasic.cpp index dbf11ed..d3ab9c1 100644 --- a/src/gwmodelpp/GWRBasic.cpp +++ b/src/gwmodelpp/GWRBasic.cpp @@ -749,44 +749,46 @@ double GWRBasic::calcTrQtQCoreOmp() double trQtQ = 0.0; uword nDp = mCoords.n_rows, nVar = mCoords.n_cols; mat wspan(1, nVar, fill::ones); + int flag = true; #pragma omp parallel for num_threads(mOmpThreadNum) for (uword i = 0; i < nDp; i++) { - vec wi = mSpatialWeight.weightVector(i); - mat xtwi = trans(mX % (wi * wspan)); - try - { - mat xtwxR = inv_sympd(xtwi * mX); - mat ci = xtwxR * xtwi; - mat si = mX.row(i) * inv(xtwi * mX) * xtwi; - vec pi = -trans(si); - pi(i) += 1.0; - double qi = sum(pi % pi); - trQtQ += qi * qi; - for (arma::uword j = i + 1; j < nDp; j++) + if (flag) { + vec wi = mSpatialWeight.weightVector(i); + mat xtwi = trans(mX % (wi * wspan)); + try { - vec wj = mSpatialWeight.weightVector(j); - mat xtwj = trans(mX % (wj * wspan)); - try { - mat sj = mX.row(j) * inv_sympd(xtwj * mX) * xtwj; - vec pj = -trans(sj); - pj(j) += 1.0; - double qj = sum(pi % pj); - trQtQ += qj * qj * 2.0; - } - catch (const std::exception& e) + mat xtwxR = inv_sympd(xtwi * mX); + mat ci = xtwxR * xtwi; + mat si = mX.row(i) * inv(xtwi * mX) * xtwi; + vec pi = -trans(si); + pi(i) += 1.0; + double qi = sum(pi % pi); + trQtQ += qi * qi; + for (arma::uword j = i + 1; j < nDp; j++) { - return DBL_MAX; + vec wj = mSpatialWeight.weightVector(j); + mat xtwj = trans(mX % (wj * wspan)); + try { + mat sj = mX.row(j) * inv_sympd(xtwj * mX) * xtwj; + vec pj = -trans(sj); + pj(j) += 1.0; + double qj = sum(pi % pj); + trQtQ += qj * qj * 2.0; + } + catch (const std::exception& e) + { + flag = false; + } } } + catch(const std::exception& e) + { + flag = false; + } } - catch(const std::exception& e) - { - return DBL_MAX; - } - } - return trQtQ; + return flag ? trQtQ : DBL_MAX; } vec GWRBasic::calcDiagBCoreOmp(uword i) @@ -809,22 +811,28 @@ vec GWRBasic::calcDiagBCoreOmp(uword i) return { DBL_MAX, DBL_MAX }; } } + int flag = true; #pragma omp parallel for num_threads(mOmpThreadNum) for (arma::uword k = 0; k < nDp; k++) { - vec w = mSpatialWeight.weightVector(k); - mat xtw = trans(mX % (w * wspan)); - try - { - mat C = trans(xtw) * inv_sympd(xtw * mX); - vec b = C.col(i); - diagB += (b % b - (1.0 / nDp) * (b % c)); - } - catch (const std::exception& e) - { - return { DBL_MAX, DBL_MAX }; + if (flag) { + vec w = mSpatialWeight.weightVector(k); + mat xtw = trans(mX % (w * wspan)); + try + { + mat C = trans(xtw) * inv_sympd(xtw * mX); + vec b = C.col(i); + diagB += (b % b - (1.0 / nDp) * (b % c)); + } + catch (const std::exception& e) + { + flag = false; + } } } + if (!flag) { + return { DBL_MAX, DBL_MAX }; + } diagB = 1.0 / nDp * diagB; return { sum(diagB), sum(diagB % diagB) }; } From a759c695c8a4577d8f6d7df4502b296de1498b92 Mon Sep 17 00:00:00 2001 From: HPDell Date: Thu, 1 Aug 2024 17:34:25 +0100 Subject: [PATCH 05/11] add: mat quad mpi --- include/gwmodelpp/utils/armampi.h | 4 ++- src/gwmodelpp/GWRMultiscale.cpp | 2 +- src/gwmodelpp/utils/armampi.cpp | 19 +++++++++++- test/mpi/testMpiMatMul.cpp | 51 ++++++++++++++++++++++++++++++- 4 files changed, 72 insertions(+), 4 deletions(-) diff --git a/include/gwmodelpp/utils/armampi.h b/include/gwmodelpp/utils/armampi.h index d1866a0..9382261 100644 --- a/include/gwmodelpp/utils/armampi.h +++ b/include/gwmodelpp/utils/armampi.h @@ -7,4 +7,6 @@ #define GWM_MPI_UWORD MPI_UNSIGNED_LONG_LONG #endif // ARMA_32BIT_WORD -void mat_mul_mpi(arma::mat& a, arma::mat& b, arma::mat& c, const int ip, const int np, const size_t range); +void mat_mul_mpi(arma::mat& a, arma::mat& b, arma::mat& c, const int ip, const int np); + +void mat_quad_mpi(arma::mat& a, arma::mat& aTa, const int ip, const int np); diff --git a/src/gwmodelpp/GWRMultiscale.cpp b/src/gwmodelpp/GWRMultiscale.cpp index 3e03f4a..41a2a55 100644 --- a/src/gwmodelpp/GWRMultiscale.cpp +++ b/src/gwmodelpp/GWRMultiscale.cpp @@ -954,7 +954,7 @@ vec GWRMultiscale::fitVarMpi(const size_t var) if (mHasHatMatrix) { mat SArrayi = mSArray.slice(var) - mS0; - mat_mul_mpi(S, SArrayi, mSArray.slice(var), mWorkerId, mWorkerNum, mWorkRangeSize); + mat_mul_mpi(S, SArrayi, mSArray.slice(var), mWorkerId, mWorkerNum); mSArray.slice(var) += S; mS0 = mSArray.slice(var) - SArrayi; } diff --git a/src/gwmodelpp/utils/armampi.cpp b/src/gwmodelpp/utils/armampi.cpp index 024db9b..97594af 100644 --- a/src/gwmodelpp/utils/armampi.cpp +++ b/src/gwmodelpp/utils/armampi.cpp @@ -5,7 +5,7 @@ using namespace std; using namespace arma; -void mat_mul_mpi(mat& a, mat& b, mat& c, const int ip, const int np, const size_t range) +void mat_mul_mpi(mat& a, mat& b, mat& c, const int ip, const int np) { arma::uword m = a.n_rows, n = b.n_cols; int k = (int) b.n_rows; @@ -23,3 +23,20 @@ void mat_mul_mpi(mat& a, mat& b, mat& c, const int ip, const int np, const size_ MPI_Reduce(ci.memptr(), c.memptr(), ci.n_elem, MPI_DOUBLE, MPI_SUM, pi, MPI_COMM_WORLD); } } + +void mat_quad_mpi(mat& a, mat& aTa, const int ip, const int np) +{ + arma::uword m = a.n_cols, n = a.n_rows; + int k = (int) a.n_rows; + arma::Col a_rows(np, arma::fill::zeros); + MPI_Allgather(&k, 1, MPI_INT, a_rows.memptr(), 1, MPI_INT, MPI_COMM_WORLD); + aTa = mat(n, m, arma::fill::zeros); + mat a_buf; + for (int pi = 0; pi < np; pi++) + { + arma::Col a_disp = arma::cumsum(a_rows) - a_rows; + a_buf = a.cols(a_disp(pi), a_disp(pi) + a_rows(pi) - 1); + mat aTai = a_buf.t() * a; + MPI_Reduce(aTai.memptr(), aTa.memptr(), aTai.n_elem, MPI_DOUBLE, MPI_SUM, pi, MPI_COMM_WORLD); + } +} diff --git a/test/mpi/testMpiMatMul.cpp b/test/mpi/testMpiMatMul.cpp index dc70b5c..c2979a4 100644 --- a/test/mpi/testMpiMatMul.cpp +++ b/test/mpi/testMpiMatMul.cpp @@ -32,7 +32,56 @@ TEST_CASE("mat mul mpi") A = A_all.rows(from, to - 1); B = B_all.rows(from, to - 1); // printf("process %d begin mat mul\n", ip); - REQUIRE_NOTHROW(mat_mul_mpi(A, B, C, ip, np, size)); + REQUIRE_NOTHROW(mat_mul_mpi(A, B, C, ip, np)); + // printf("process %d end mat mul\n", ip); + + if (ip == 0) + { + mat C_gather(n, n); + C_gather.rows(from, to - 1) = C; + uvec received(np, fill::zeros); + received(0) = 1; + auto bufsize = size * n; + double *buf = new double[size * n]; + while (!all(received == 1)) + { + MPI_Status status; + MPI_Recv(buf, bufsize, MPI_DOUBLE, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status); + received(status.MPI_SOURCE) = 1; + uword ifrom = status.MPI_SOURCE * size, ito = min(ifrom + size, n), irange = ito - ifrom; + C_gather.rows(ifrom, ito - 1) = mat(buf, irange, n); + } + REQUIRE(approx_equal(C_gather, C_all, "absdiff", 1e-6)); + } + else + { + MPI_Send(C.memptr(), C.n_elem, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); + } +} + +TEST_CASE("mat quad mpi") +{ + int ip, np; + MPI_Comm_size(MPI_COMM_WORLD, &np); + MPI_Comm_rank(MPI_COMM_WORLD, &ip); + + uword n = 100; + uword size = n / np + (n % np == 0 ? 0 : 1); + uword from = ip * size, to = min(from + size, n); + + // printf("range from %llu to %llu\n", from, to); + + mat A_all, C_all; + A_all = mat(n, n, arma::fill::randn); + if (ip == 0) + { + C_all = A_all.t() * A_all; + } + + mat A, C; + A = A_all.rows(from, to - 1); + // printf("process %d begin mat mul\n", ip); + REQUIRE_NOTHROW(mat_quad_mpi(A, C, ip, np)); // printf("process %d end mat mul\n", ip); if (ip == 0) From d4fcd5c1808e5718bf202b0d4259d35ee0e186fe Mon Sep 17 00:00:00 2001 From: HPDell Date: Thu, 1 Aug 2024 17:51:28 +0100 Subject: [PATCH 06/11] add: mat trace mpi --- include/gwmodelpp/utils/armampi.h | 2 ++ src/gwmodelpp/utils/armampi.cpp | 13 +++++++++++ test/mpi/testMpiMatMul.cpp | 36 +++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+) diff --git a/include/gwmodelpp/utils/armampi.h b/include/gwmodelpp/utils/armampi.h index 9382261..9fa1758 100644 --- a/include/gwmodelpp/utils/armampi.h +++ b/include/gwmodelpp/utils/armampi.h @@ -10,3 +10,5 @@ void mat_mul_mpi(arma::mat& a, arma::mat& b, arma::mat& c, const int ip, const int np); void mat_quad_mpi(arma::mat& a, arma::mat& aTa, const int ip, const int np); + +double mat_trace_mpi(arma::mat& a, const int ip, const int np); diff --git a/src/gwmodelpp/utils/armampi.cpp b/src/gwmodelpp/utils/armampi.cpp index 97594af..69c3f1b 100644 --- a/src/gwmodelpp/utils/armampi.cpp +++ b/src/gwmodelpp/utils/armampi.cpp @@ -40,3 +40,16 @@ void mat_quad_mpi(mat& a, mat& aTa, const int ip, const int np) MPI_Reduce(aTai.memptr(), aTa.memptr(), aTai.n_elem, MPI_DOUBLE, MPI_SUM, pi, MPI_COMM_WORLD); } } + +double mat_trace_mpi(arma::mat& a, const int ip, const int np) +{ + int k = (int) a.n_rows; + arma::Col a_rows(np, arma::fill::zeros); + MPI_Allgather(&k, 1, MPI_INT, a_rows.memptr(), 1, MPI_INT, MPI_COMM_WORLD); + arma::Col a_disp = arma::cumsum(a_rows) - a_rows; + mat ai = a.cols(a_disp(ip), a_disp(ip) + a_rows(ip) - 1); + double ti = trace(ai); + double tr = 0.0; + MPI_Allreduce(&ti, &tr, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + return tr; +} diff --git a/test/mpi/testMpiMatMul.cpp b/test/mpi/testMpiMatMul.cpp index c2979a4..2bb7299 100644 --- a/test/mpi/testMpiMatMul.cpp +++ b/test/mpi/testMpiMatMul.cpp @@ -108,6 +108,42 @@ TEST_CASE("mat quad mpi") } } +TEST_CASE("mat trace mpi") +{ + int ip, np; + MPI_Comm_size(MPI_COMM_WORLD, &np); + MPI_Comm_rank(MPI_COMM_WORLD, &ip); + + uword n = 100; + uword size = n / np + (n % np == 0 ? 0 : 1); + uword from = ip * size, to = min(from + size, n); + + // printf("range from %llu to %llu\n", from, to); + + mat A_all; + double trA_all; + A_all = mat(n, n, arma::fill::randn); + if (ip == 0) + { + trA_all = trace(A_all); + } + + mat A; + double trA = 0.0; + A = A_all.rows(from, to - 1); + // printf("process %d begin mat mul\n", ip); + REQUIRE_NOTHROW(([&]() + { + trA = mat_trace_mpi(A, ip, np); + })()); + // printf("process %d end mat mul\n", ip); + + if (ip == 0) + { + REQUIRE_THAT(trA, Catch::Matchers::WithinAbs(trA_all, 1e-6)); + } +} + int main(int argc, char *argv[]) { MPI_Init(&argc, &argv); From 8fffcd6e289ed60d383fea31ce586248664ea38e Mon Sep 17 00:00:00 2001 From: HPDell Date: Fri, 2 Aug 2024 11:42:46 +0100 Subject: [PATCH 07/11] add: MPI version of FTest --- include/gwmodelpp/GWRBasic.h | 22 ++++--- src/gwmodelpp/GWRBasic.cpp | 112 ++++++++++++++++++----------------- 2 files changed, 72 insertions(+), 62 deletions(-) diff --git a/include/gwmodelpp/GWRBasic.h b/include/gwmodelpp/GWRBasic.h index ed9b050..395b48c 100644 --- a/include/gwmodelpp/GWRBasic.h +++ b/include/gwmodelpp/GWRBasic.h @@ -55,7 +55,7 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe typedef double (GWRBasic::*TrQtQCalculator)(); typedef double (GWRBasic::*TrQtQCoreCalculator)(); typedef arma::vec (GWRBasic::*DiagBCalculator)(arma::uword); - typedef arma::vec (GWRBasic::*DiagBCoreCalculator)(arma::uword); + typedef arma::vec (GWRBasic::*DiagBCoreCalculator)(arma::uword, const arma::vec&); typedef double (GWRBasic::*BandwidthSelectionCriterionCalculator)(BandwidthWeight*); //!< \~english Declaration of criterion calculator for bandwidth selection. \~chinese 带宽优选指标计算函数声明。 typedef double (GWRBasic::*IndepVarsSelectCriterionCalculator)(const std::vector&); //!< \~english Declaration of criterion calculator for variable selection. \~chinese 变量优选指标计算函数声明。 @@ -68,6 +68,8 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe double p = 0.0; }; + using FTestResultCombine = std::tuple, FTestResult>; + private: /** @@ -401,6 +403,11 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe void setStoreC(bool flag) { mStoreC = flag; } + FTestResultCombine fTestResults() + { + return std::make_tuple(mF1Test, mF2Test, mF3Test, mF4Test); + } + public: // Implement Algorithm bool isValid() override; @@ -530,10 +537,7 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe double calcTrQtQBase(); - arma::vec calcDiagBBase(arma::uword i) - { - return (this->*mCalcDiagBCoreFunction)(i); - } + arma::vec calcDiagBBase(arma::uword i); private: @@ -545,7 +549,7 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe double calcTrQtQCoreSerial(); - arma::vec calcDiagBCoreSerial(arma::uword i); + arma::vec calcDiagBCoreSerial(arma::uword i, const arma::vec& c); #ifdef ENABLE_OPENMP @@ -625,7 +629,7 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe double calcTrQtQCoreOmp(); - arma::vec calcDiagBCoreOmp(arma::uword i); + arma::vec calcDiagBCoreOmp(arma::uword i, const arma::vec& c); #endif @@ -712,6 +716,8 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe double bandwidthSizeCriterionCVMpi(BandwidthWeight* bandwidthWeight); double bandwidthSizeCriterionAICMpi(BandwidthWeight* bandwidthWeight); arma::mat fitMpi(); + double calcTrQtQMpi(); + arma::vec calcDiagBMpi(arma::uword i); #endif // ENABLE_MPI public: // Implement IParallelizable @@ -813,7 +819,7 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe FTestResult mF4Test; FTestCalculator mFTestFunction = &GWRBasic::fTestBase; TrQtQCalculator mCalcTrQtQFunction = &GWRBasic::calcTrQtQBase; - TrQtQCoreCalculator mCalcTrQtQCoreFUnction = &GWRBasic::calcTrQtQCoreSerial; + TrQtQCoreCalculator mCalcTrQtQCoreFunction = &GWRBasic::calcTrQtQCoreSerial; DiagBCalculator mCalcDiagBFunction = &GWRBasic::calcDiagBBase; DiagBCoreCalculator mCalcDiagBCoreFunction = &GWRBasic::calcDiagBCoreSerial; }; diff --git a/src/gwmodelpp/GWRBasic.cpp b/src/gwmodelpp/GWRBasic.cpp index d3ab9c1..97dfd8e 100644 --- a/src/gwmodelpp/GWRBasic.cpp +++ b/src/gwmodelpp/GWRBasic.cpp @@ -490,7 +490,7 @@ double GWRBasic::calcTrQtQBase() } else { - trQtQ = (this->*mCalcTrQtQCoreFUnction)(); + trQtQ = (this->*mCalcTrQtQCoreFunction)(); } return trQtQ; } @@ -500,7 +500,8 @@ double GWRBasic::calcTrQtQCoreSerial() double trQtQ = 0.0; uword nDp = mCoords.n_rows, nVar = mCoords.n_cols; mat wspan(1, nVar, fill::ones); - for (uword i = 0; i < nDp; i++) + std::pair workRange = mWorkRange.value_or(make_pair(0, nDp)); + for (uword i = workRange.first; i < workRange.second; i++) { vec wi = mSpatialWeight.weightVector(i); mat xtwi = trans(mX % (wi * wspan)); @@ -539,16 +540,14 @@ double GWRBasic::calcTrQtQCoreSerial() return trQtQ; } -vec GWRBasic::calcDiagBCoreSerial(uword i) +arma::vec GWRBasic::calcDiagBBase(arma::uword i) { arma::uword nDp = mX.n_rows, nVar = mX.n_cols; - vec diagB(nDp, fill::zeros), c(nDp, fill::zeros); - mat ek = eye(nVar, nVar); - mat wspan(1, nVar, fill::ones); + vec c(nDp, fill::zeros); for (arma::uword j = 0; j < nDp; j++) { vec w = mSpatialWeight.weightVector(j); - mat xtw = trans(mX % (w * wspan)); + mat xtw = trans(mX.each_col() % w); try { mat C = trans(xtw) * inv_sympd(xtw * mX); @@ -559,7 +558,16 @@ vec GWRBasic::calcDiagBCoreSerial(uword i) return { DBL_MAX, DBL_MAX }; } } - for (arma::uword k = 0; k < nDp; k++) + return (this->*mCalcDiagBCoreFunction)(i); +} + +vec GWRBasic::calcDiagBCoreSerial(uword i, const vec& c) +{ + arma::uword nDp = mX.n_rows, nVar = mX.n_cols; + vec diagB(nDp, fill::zeros) + mat ek = eye(nVar, nVar); + std::pair workRange = mWorkRange.value_or(make_pair(0, nDp)); + for (arma::uword k = workRange.first; k < workRange.second; k++) { vec w = mSpatialWeight.weightVector(k); mat xtw = trans(mX % (w * wspan)); @@ -749,9 +757,10 @@ double GWRBasic::calcTrQtQCoreOmp() double trQtQ = 0.0; uword nDp = mCoords.n_rows, nVar = mCoords.n_cols; mat wspan(1, nVar, fill::ones); + std::pair workRange = mWorkRange.value_or(make_pair(0, nDp)); int flag = true; #pragma omp parallel for num_threads(mOmpThreadNum) - for (uword i = 0; i < nDp; i++) + for (uword i = workRange.first; i < workRange.second; i++) { if (flag) { vec wi = mSpatialWeight.weightVector(i); @@ -791,33 +800,19 @@ double GWRBasic::calcTrQtQCoreOmp() return flag ? trQtQ : DBL_MAX; } -vec GWRBasic::calcDiagBCoreOmp(uword i) +vec GWRBasic::calcDiagBCoreOmp(uword i, const vec& c) { arma::uword nDp = mX.n_rows, nVar = mX.n_cols; - vec diagB(nDp, fill::zeros), c(nDp, fill::zeros); + vec diagB(nDp, fill::zeros); mat ek = eye(nVar, nVar); - mat wspan(1, nVar, fill::ones); - for (arma::uword j = 0; j < nDp; j++) - { - vec w = mSpatialWeight.weightVector(j); - mat xtw = trans(mX % (w * wspan)); - try - { - mat C = trans(xtw) * inv_sympd(xtw * mX); - c += C.col(i); - } - catch (const std::exception& e) - { - return { DBL_MAX, DBL_MAX }; - } - } + std::pair workRange = mWorkRange.value_or(make_pair(0, nDp)); int flag = true; #pragma omp parallel for num_threads(mOmpThreadNum) - for (arma::uword k = 0; k < nDp; k++) + for (arma::uword k = workRange.first; k < workRange.second; k++) { if (flag) { vec w = mSpatialWeight.weightVector(k); - mat xtw = trans(mX % (w * wspan)); + mat xtw = trans(mX.each_col() % w); try { mat C = trans(xtw) * inv_sympd(xtw * mX); @@ -830,7 +825,8 @@ vec GWRBasic::calcDiagBCoreOmp(uword i) } } } - if (!flag) { + if (!flag) + { return { DBL_MAX, DBL_MAX }; } diagB = 1.0 / nDp * diagB; @@ -1274,26 +1270,34 @@ arma::mat gwm::GWRBasic::fitMpi() return mBetas; } -// double GWRBasic::indepVarsSelectionCriterion(const mat& x, const vec& y, vec& shat) -// { -// try -// { -// mat S; -// mat betas = (this->*mFitCoreSHatFunction)(x, y, mSpatialWeight, shat, S); -// GWM_LOG_PROGRESS(++mIndepVarSelectionProgressCurrent, mIndepVarSelectionProgressTotal); -// if (mStatus == Status::Success) -// { -// double rss = GWRBase::RSS(x, y, betas); -// return rss; -// } -// else return DBL_MAX; -// } -// catch(const std::exception& e) -// { -// return DBL_MAX; -// } - -// } +double GWRBasic::calcTrQtQMpi() +{ + double trQtQ = 0.0; + uword nDp = mCoords.n_rows; + if (isStoreS()) + { + std::pair workRange = mWorkRange.value_or(make_pair(0, nDp)); + mat EmS = eye(nDp, nDp).eval().rows(workRange.first, workRange.second - 1) - mS; + mat Q, QtQ; + mat_quad_mpi(EmS, Q, mWorkerId, mWorkerNum); + mat_quad_mpi(Q, QtQ, mWorkerId, mWorkerNum); + trQtQ = mat_trace_mpi(QtQ, mWorkerId, mWorkerNum); + } + else + { + double trQtQi = (this->*mCalcTrQtQCoreFunction)(); + MPI_Allreduce(&trQtQi, &trQtQ, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + } + return trQtQ; +} + +vec GWRBasic::calcDiagBMpi(uword i) +{ + vec diagBi = calcDiagBBase(i); + vec diagB; + MPI_Allreduce(diagBi.memptr(), diagB.memptr(), 2, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + return diagB; +} #endif // ENABLE_MPI void GWRBasic::setBandwidthSelectionCriterion(const BandwidthSelectionCriterionType& criterion) @@ -1341,7 +1345,7 @@ void GWRBasic::setParallelType(const ParallelType& type) mFitCoreFunction = &GWRBasic::fitCoreSerial; mFitCoreCVFunction = &GWRBasic::fitCoreCVSerial; mFitCoreSHatFunction = &GWRBasic::fitCoreSHatSerial; - mCalcTrQtQCoreFUnction = &GWRBasic::calcTrQtQCoreSerial; + mCalcTrQtQCoreFunction = &GWRBasic::calcTrQtQCoreSerial; mCalcDiagBCoreFunction = &GWRBasic::calcDiagBCoreSerial; break; #ifdef ENABLE_OPENMP @@ -1351,7 +1355,7 @@ void GWRBasic::setParallelType(const ParallelType& type) mFitCoreFunction = &GWRBasic::fitCoreOmp; mFitCoreCVFunction = &GWRBasic::fitCoreCVOmp; mFitCoreSHatFunction = &GWRBasic::fitCoreSHatOmp; - mCalcTrQtQCoreFUnction = &GWRBasic::calcTrQtQCoreOmp; + mCalcTrQtQCoreFunction = &GWRBasic::calcTrQtQCoreOmp; mCalcDiagBCoreFunction = &GWRBasic::calcDiagBCoreOmp; break; #endif // ENABLE_OPENMP @@ -1369,7 +1373,7 @@ void GWRBasic::setParallelType(const ParallelType& type) mFitCoreFunction = &GWRBasic::fitCoreSerial; mFitCoreCVFunction = &GWRBasic::fitCoreCVSerial; mFitCoreSHatFunction = &GWRBasic::fitCoreSHatSerial; - mCalcTrQtQCoreFUnction = &GWRBasic::calcTrQtQCoreSerial; + mCalcTrQtQCoreFunction = &GWRBasic::calcTrQtQCoreSerial; mCalcDiagBCoreFunction = &GWRBasic::calcDiagBCoreSerial; break; } @@ -1379,8 +1383,8 @@ void GWRBasic::setParallelType(const ParallelType& type) mFitFunction = &GWRBasic::fitMpi; mIndepVarsSelectionCriterionFunction = &GWRBasic::indepVarsSelectionCriterionMpi; mFTestFunction = &GWRBasic::fTestBase; - mCalcTrQtQFunction = &GWRBasic::calcTrQtQBase; - mCalcDiagBFunction = &GWRBasic::calcDiagBBase; + mCalcTrQtQFunction = &GWRBasic::calcTrQtQMpi; + mCalcDiagBFunction = &GWRBasic::calcDiagBMpi; } else { From 29c885312d6eeef19724253b6f85da73217e2bc3 Mon Sep 17 00:00:00 2001 From: HPDell Date: Fri, 2 Aug 2024 11:46:17 +0100 Subject: [PATCH 08/11] fix: remove ek in diag B calculator --- src/gwmodelpp/GWRBasic.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/gwmodelpp/GWRBasic.cpp b/src/gwmodelpp/GWRBasic.cpp index 97dfd8e..81d6491 100644 --- a/src/gwmodelpp/GWRBasic.cpp +++ b/src/gwmodelpp/GWRBasic.cpp @@ -558,22 +558,21 @@ arma::vec GWRBasic::calcDiagBBase(arma::uword i) return { DBL_MAX, DBL_MAX }; } } - return (this->*mCalcDiagBCoreFunction)(i); + return (this->*mCalcDiagBCoreFunction)(i, c); } vec GWRBasic::calcDiagBCoreSerial(uword i, const vec& c) { arma::uword nDp = mX.n_rows, nVar = mX.n_cols; vec diagB(nDp, fill::zeros) - mat ek = eye(nVar, nVar); std::pair workRange = mWorkRange.value_or(make_pair(0, nDp)); for (arma::uword k = workRange.first; k < workRange.second; k++) { vec w = mSpatialWeight.weightVector(k); - mat xtw = trans(mX % (w * wspan)); + mat xtw = (mX.each_col() % w).t(); try { - mat C = trans(xtw) * inv_sympd(xtw * mX); + mat C = xtw.t() * inv_sympd(xtw * mX); vec b = C.col(i); diagB += (b % b - (1.0 / nDp) * (b % c)); } @@ -804,7 +803,6 @@ vec GWRBasic::calcDiagBCoreOmp(uword i, const vec& c) { arma::uword nDp = mX.n_rows, nVar = mX.n_cols; vec diagB(nDp, fill::zeros); - mat ek = eye(nVar, nVar); std::pair workRange = mWorkRange.value_or(make_pair(0, nDp)); int flag = true; #pragma omp parallel for num_threads(mOmpThreadNum) From 811731689dbb7b7e0f2316bd37f59835dd1e0da3 Mon Sep 17 00:00:00 2001 From: HPDell Date: Fri, 2 Aug 2024 12:33:18 +0100 Subject: [PATCH 09/11] add: test for GWR f test --- include/gwmodelpp/GWRBasic.h | 5 +++ src/gwmodelpp/GWRBasic.cpp | 18 ++++++--- test/testGWRBasic.cpp | 73 ++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 6 deletions(-) diff --git a/include/gwmodelpp/GWRBasic.h b/include/gwmodelpp/GWRBasic.h index 395b48c..5e6905f 100644 --- a/include/gwmodelpp/GWRBasic.h +++ b/include/gwmodelpp/GWRBasic.h @@ -403,6 +403,10 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe void setStoreC(bool flag) { mStoreC = flag; } + bool isDoFTest() { return mIsDoFTest; }; + + void setIsDoFtest(bool value) { mIsDoFTest = value; } + FTestResultCombine fTestResults() { return std::make_tuple(mF1Test, mF2Test, mF3Test, mF4Test); @@ -813,6 +817,7 @@ class GWRBasic : public GWRBase, public IBandwidthSelectable, public IVarialbeSe bool mStoreS = false; //!< \~english Whether to save S \~chinese 是否保存 S 矩阵 bool mStoreC = false; //!< \~english Whether to save C \~chinese 是否保存 C 矩阵 + bool mIsDoFTest = false; FTestResult mF1Test; FTestResult mF2Test; std::vector mF3Test; diff --git a/src/gwmodelpp/GWRBasic.cpp b/src/gwmodelpp/GWRBasic.cpp index 81d6491..3906913 100644 --- a/src/gwmodelpp/GWRBasic.cpp +++ b/src/gwmodelpp/GWRBasic.cpp @@ -159,6 +159,12 @@ mat GWRBasic::fit() GWM_LOG_STAGE("Model fitting"); mBetas = (this->*mFitFunction)(); + + if (mIsDoFTest) + { + GWM_LOG_STAGE("F Test"); + (this->*mFTestFunction)(); + } #ifdef ENABLE_CUDA if (mParallelType & ParallelType::CUDA) @@ -423,11 +429,11 @@ void gwm::GWRBasic::fTestBase() double v1 = mSHat(0), v2 = mSHat(1); double nDp = double(mCoords.n_rows), nVar = double(mX.n_cols); double RSSg = RSS(mX, mY, mBetas); - vec betaOls = solve(mX, mY); + vec betaOls = (mX.t() * mX).i() * (mX.t() * mY); vec residualOls = mY - mX * betaOls; double RSSo = sum(residualOls % residualOls); double DFo = nDp - nVar; - double delta1 = 1.0 * nDp - 2.0 * v1 + v2; + double delta1 = nDp - 2.0 * v1 + v2; double sigma21 = RSSg / delta1; double lDelta1 = sum(mQDiag), lDelta2 = (this->*mCalcTrQtQFunction)(); //========= @@ -542,7 +548,7 @@ double GWRBasic::calcTrQtQCoreSerial() arma::vec GWRBasic::calcDiagBBase(arma::uword i) { - arma::uword nDp = mX.n_rows, nVar = mX.n_cols; + arma::uword nDp = mX.n_rows; vec c(nDp, fill::zeros); for (arma::uword j = 0; j < nDp; j++) { @@ -563,8 +569,8 @@ arma::vec GWRBasic::calcDiagBBase(arma::uword i) vec GWRBasic::calcDiagBCoreSerial(uword i, const vec& c) { - arma::uword nDp = mX.n_rows, nVar = mX.n_cols; - vec diagB(nDp, fill::zeros) + arma::uword nDp = mX.n_rows; + vec diagB(nDp, fill::zeros); std::pair workRange = mWorkRange.value_or(make_pair(0, nDp)); for (arma::uword k = workRange.first; k < workRange.second; k++) { @@ -801,7 +807,7 @@ double GWRBasic::calcTrQtQCoreOmp() vec GWRBasic::calcDiagBCoreOmp(uword i, const vec& c) { - arma::uword nDp = mX.n_rows, nVar = mX.n_cols; + arma::uword nDp = mX.n_rows; vec diagB(nDp, fill::zeros); std::pair workRange = mWorkRange.value_or(make_pair(0, nDp)); int flag = true; diff --git a/test/testGWRBasic.cpp b/test/testGWRBasic.cpp index b1e76f0..f490ab9 100644 --- a/test/testGWRBasic.cpp +++ b/test/testGWRBasic.cpp @@ -1,6 +1,7 @@ #define CATCH_CONFIG_MAIN #include +#include #include #include #include @@ -20,6 +21,11 @@ using namespace std; using namespace arma; using namespace gwm; +array convFTestArray(GWRBasic::FTestResult f) +{ + return { f.s, f.df1, f.df2, f.p }; +} + TEST_CASE("BasicGWR: LondonHP") { mat londonhp100_coord, londonhp100_data; @@ -259,6 +265,73 @@ TEST_CASE("BasicGWR: LondonHP") REQUIRE_THAT(diagnostic.RSquareAdjust, Catch::Matchers::WithinAbs(0.678982114793865, 1e-8)); } + SECTION("F test | adaptive bandwidth | no bandwidth optimization | no variable optimization") { + auto parallel = GENERATE_REF(values(parallel_list)); + INFO("Parallel:" << ParallelTypeDict.at(parallel)); + + CRSDistance distance(false); + BandwidthWeight bandwidth(36, true, BandwidthWeight::Gaussian); + SpatialWeight spatial(&bandwidth, &distance); + + GWRBasic algorithm; + algorithm.setCoords(londonhp100_coord); + algorithm.setDependentVariable(y); + algorithm.setIndependentVariables(x); + algorithm.setSpatialWeight(spatial); + algorithm.setParallelType(parallel); +#ifdef ENABLE_OPENMP + if (parallel == ParallelType::OpenMP) + { + algorithm.setOmpThreadNum(omp_get_num_threads()); + } +#endif // ENABLE_OPENMP +#ifdef ENABLE_CUDA + if (parallel == ParallelType::CUDA) + { + algorithm.setGPUId(0); + algorithm.setGroupSize(64); + } +#endif // ENABLE_CUDA + algorithm.setIsDoFtest(true); + REQUIRE_NOTHROW(algorithm.fit()); + auto results = algorithm.fTestResults(); + auto f1 = convFTestArray(get<0>(results)); + auto f2 = convFTestArray(get<1>(results)); + vector> f3; + for (auto &&i : get<2>(results)) + { + f3.push_back(convFTestArray(i)); + } + auto f4 = convFTestArray(get<3>(results)); + REQUIRE_THAT(f1[0], Catch::Matchers::WithinAbs(0.9342, 1e-3)); + REQUIRE_THAT(f1[1], Catch::Matchers::WithinAbs(93.3300, 1e-3)); + REQUIRE_THAT(f1[2], Catch::Matchers::WithinAbs(96.0000, 1e-3)); + REQUIRE_THAT(f1[3], Catch::Matchers::WithinAbs(0.3710, 1e-3)); + REQUIRE_THAT(f2[0], Catch::Matchers::WithinAbs(1.9762, 1e-3)); + REQUIRE_THAT(f2[1], Catch::Matchers::WithinAbs(13.1571, 1e-3)); + REQUIRE_THAT(f2[2], Catch::Matchers::WithinAbs(96.0000, 1e-3)); + REQUIRE_THAT(f2[3], Catch::Matchers::WithinAbs(0.0303, 1e-3)); + REQUIRE_THAT(f4[0], Catch::Matchers::WithinAbs(0.8752, 1e-3)); + REQUIRE_THAT(f4[1], Catch::Matchers::WithinAbs(89.9377, 1e-3)); + REQUIRE_THAT(f4[2], Catch::Matchers::WithinAbs(96.0000, 1e-3)); + REQUIRE_THAT(f4[3], Catch::Matchers::WithinAbs(0.2619, 1e-3)); + REQUIRE_THAT(f3[0][0], Catch::Matchers::WithinAbs(0.4655, 1e-3)); + REQUIRE_THAT(f3[0][1], Catch::Matchers::WithinAbs(27.1298, 1e-3)); + REQUIRE_THAT(f3[0][2], Catch::Matchers::WithinAbs(93.3300, 1e-3)); + REQUIRE_THAT(f3[0][3], Catch::Matchers::WithinAbs(0.9872, 1e-3)); + REQUIRE_THAT(f3[1][0], Catch::Matchers::WithinAbs(0.5022, 1e-3)); + REQUIRE_THAT(f3[1][1], Catch::Matchers::WithinAbs(18.3315, 1e-3)); + REQUIRE_THAT(f3[1][2], Catch::Matchers::WithinAbs(93.3300, 1e-3)); + REQUIRE_THAT(f3[1][3], Catch::Matchers::WithinAbs(0.9526, 1e-3)); + REQUIRE_THAT(f3[2][0], Catch::Matchers::WithinAbs(0.6415, 1e-3)); + REQUIRE_THAT(f3[2][1], Catch::Matchers::WithinAbs(29.6827, 1e-3)); + REQUIRE_THAT(f3[2][2], Catch::Matchers::WithinAbs(93.3300, 1e-3)); + REQUIRE_THAT(f3[2][3], Catch::Matchers::WithinAbs(0.9151, 1e-3)); + REQUIRE_THAT(f3[3][0], Catch::Matchers::WithinAbs(0.3019, 1e-3)); + REQUIRE_THAT(f3[3][1], Catch::Matchers::WithinAbs(24.7164, 1e-3)); + REQUIRE_THAT(f3[3][2], Catch::Matchers::WithinAbs(93.3300, 1e-3)); + REQUIRE_THAT(f3[3][3], Catch::Matchers::WithinAbs(0.9994, 1e-3)); + } } From 24404242c0387e66081bb9788c518d308808b869 Mon Sep 17 00:00:00 2001 From: HPDell Date: Fri, 2 Aug 2024 13:12:54 +0100 Subject: [PATCH 10/11] fix: MPI problems --- src/gwmodelpp/GWRBasic.cpp | 49 +++++++++++++++------- test/mpi/testGWRBasicMpi.cpp | 78 ++++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+), 15 deletions(-) diff --git a/src/gwmodelpp/GWRBasic.cpp b/src/gwmodelpp/GWRBasic.cpp index 3906913..4c56b1d 100644 --- a/src/gwmodelpp/GWRBasic.cpp +++ b/src/gwmodelpp/GWRBasic.cpp @@ -465,8 +465,7 @@ void gwm::GWRBasic::fTestBase() for (uword i = 0; i < mX.n_cols; i++) { vec diagB = (this->*mCalcDiagBFunction)(i); - double g1 = diagB(0); - double g2 = diagB(1); + double g1 = diagB(0), g2 = diagB(1); double numdf = g1 * g1 / g2; FTestResult f3i; f3i.s = (vk2(i) / g1) / sigma21; @@ -564,7 +563,9 @@ arma::vec GWRBasic::calcDiagBBase(arma::uword i) return { DBL_MAX, DBL_MAX }; } } - return (this->*mCalcDiagBCoreFunction)(i, c); + vec diagB = (this->*mCalcDiagBCoreFunction)(i, c); + diagB = 1.0 / nDp * diagB; + return { sum(diagB), sum(diagB % diagB) }; } vec GWRBasic::calcDiagBCoreSerial(uword i, const vec& c) @@ -584,11 +585,11 @@ vec GWRBasic::calcDiagBCoreSerial(uword i, const vec& c) } catch (const std::exception& e) { - return { DBL_MAX, DBL_MAX }; + diagB.fill(DBL_MAX); + return diagB; } } - diagB = 1.0 / nDp * diagB; - return { sum(diagB), sum(diagB % diagB) }; + return diagB; } #ifdef ENABLE_OPENMP @@ -808,20 +809,21 @@ double GWRBasic::calcTrQtQCoreOmp() vec GWRBasic::calcDiagBCoreOmp(uword i, const vec& c) { arma::uword nDp = mX.n_rows; - vec diagB(nDp, fill::zeros); + mat diagB_all(nDp, mOmpThreadNum, fill::zeros); std::pair workRange = mWorkRange.value_or(make_pair(0, nDp)); int flag = true; #pragma omp parallel for num_threads(mOmpThreadNum) for (arma::uword k = workRange.first; k < workRange.second; k++) { if (flag) { + int thread = omp_get_thread_num(); vec w = mSpatialWeight.weightVector(k); mat xtw = trans(mX.each_col() % w); try { mat C = trans(xtw) * inv_sympd(xtw * mX); vec b = C.col(i); - diagB += (b % b - (1.0 / nDp) * (b % c)); + diagB_all.col(thread) += (b % b - (1.0 / nDp) * (b % c)); } catch (const std::exception& e) { @@ -831,10 +833,10 @@ vec GWRBasic::calcDiagBCoreOmp(uword i, const vec& c) } if (!flag) { - return { DBL_MAX, DBL_MAX }; + diagB_all.fill(DBL_MAX); } - diagB = 1.0 / nDp * diagB; - return { sum(diagB), sum(diagB % diagB) }; + vec diagB = sum(diagB_all, 1); + return diagB; } #endif @@ -1297,10 +1299,27 @@ double GWRBasic::calcTrQtQMpi() vec GWRBasic::calcDiagBMpi(uword i) { - vec diagBi = calcDiagBBase(i); - vec diagB; - MPI_Allreduce(diagBi.memptr(), diagB.memptr(), 2, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - return diagB; + arma::uword nDp = mX.n_rows; + vec c(nDp, fill::zeros); + for (arma::uword j = 0; j < nDp; j++) + { + vec w = mSpatialWeight.weightVector(j); + mat xtw = trans(mX.each_col() % w); + try + { + mat C = trans(xtw) * inv_sympd(xtw * mX); + c += C.col(i); + } + catch (const std::exception& e) + { + return { DBL_MAX, DBL_MAX }; + } + } + vec diagBi = (this->*mCalcDiagBCoreFunction)(i, c); + vec diagB(nDp, arma::fill::zeros); + MPI_Allreduce(diagBi.memptr(), diagB.memptr(), int(nDp), MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + diagB = 1.0 / double(nDp) * diagB; + return { sum(diagB), sum(diagB % diagB) }; } #endif // ENABLE_MPI diff --git a/test/mpi/testGWRBasicMpi.cpp b/test/mpi/testGWRBasicMpi.cpp index 2c44061..d435540 100644 --- a/test/mpi/testGWRBasicMpi.cpp +++ b/test/mpi/testGWRBasicMpi.cpp @@ -21,6 +21,11 @@ using namespace std; using namespace arma; using namespace gwm; +array convFTestArray(GWRBasic::FTestResult f) +{ + return { f.s, f.df1, f.df2, f.p }; +} + TEST_CASE("BasicGWR: LondonHP") { int iProcess, nProcess; @@ -303,6 +308,79 @@ TEST_CASE("BasicGWR: LondonHP") } } + SECTION("F test | adaptive bandwidth | no bandwidth optimization | no variable optimization") { + auto parallel = GENERATE_REF(values(parallel_list)); + INFO("Parallel:" << ParallelTypeDict.at(parallel)); + + CRSDistance distance(false); + BandwidthWeight bandwidth(36, true, BandwidthWeight::Gaussian); + SpatialWeight spatial(&bandwidth, &distance); + + GWRBasic algorithm; + algorithm.setCoords(londonhp100_coord); + algorithm.setDependentVariable(y); + algorithm.setIndependentVariables(x); + algorithm.setSpatialWeight(spatial); + algorithm.setParallelType(parallel); + algorithm.setWorkerId(iProcess); + algorithm.setWorkerNum(nProcess); +#ifdef ENABLE_OPENMP + if (parallel == ParallelType::OpenMP) + { + algorithm.setOmpThreadNum(omp_get_num_threads()); + } +#endif // ENABLE_OPENMP +#ifdef ENABLE_CUDA + if (parallel == ParallelType::CUDA) + { + algorithm.setGPUId(0); + algorithm.setGroupSize(64); + } +#endif // ENABLE_CUDA + algorithm.setIsDoFtest(true); + REQUIRE_NOTHROW(algorithm.fit()); + if (iProcess == 0) + { + auto results = algorithm.fTestResults(); + auto f1 = convFTestArray(get<0>(results)); + auto f2 = convFTestArray(get<1>(results)); + vector> f3; + for (auto &&i : get<2>(results)) + { + f3.push_back(convFTestArray(i)); + } + auto f4 = convFTestArray(get<3>(results)); + REQUIRE_THAT(f1[0], Catch::Matchers::WithinAbs(0.9342, 1e-3)); + REQUIRE_THAT(f1[1], Catch::Matchers::WithinAbs(93.3300, 1e-3)); + REQUIRE_THAT(f1[2], Catch::Matchers::WithinAbs(96.0000, 1e-3)); + REQUIRE_THAT(f1[3], Catch::Matchers::WithinAbs(0.3710, 1e-3)); + REQUIRE_THAT(f2[0], Catch::Matchers::WithinAbs(1.9762, 1e-3)); + REQUIRE_THAT(f2[1], Catch::Matchers::WithinAbs(13.1571, 1e-3)); + REQUIRE_THAT(f2[2], Catch::Matchers::WithinAbs(96.0000, 1e-3)); + REQUIRE_THAT(f2[3], Catch::Matchers::WithinAbs(0.0303, 1e-3)); + REQUIRE_THAT(f4[0], Catch::Matchers::WithinAbs(0.8752, 1e-3)); + REQUIRE_THAT(f4[1], Catch::Matchers::WithinAbs(89.9377, 1e-3)); + REQUIRE_THAT(f4[2], Catch::Matchers::WithinAbs(96.0000, 1e-3)); + REQUIRE_THAT(f4[3], Catch::Matchers::WithinAbs(0.2619, 1e-3)); + REQUIRE_THAT(f3[0][0], Catch::Matchers::WithinAbs(0.4655, 1e-3)); + REQUIRE_THAT(f3[0][1], Catch::Matchers::WithinAbs(27.1298, 1e-3)); + REQUIRE_THAT(f3[0][2], Catch::Matchers::WithinAbs(93.3300, 1e-3)); + REQUIRE_THAT(f3[0][3], Catch::Matchers::WithinAbs(0.9872, 1e-3)); + REQUIRE_THAT(f3[1][0], Catch::Matchers::WithinAbs(0.5022, 1e-3)); + REQUIRE_THAT(f3[1][1], Catch::Matchers::WithinAbs(18.3315, 1e-3)); + REQUIRE_THAT(f3[1][2], Catch::Matchers::WithinAbs(93.3300, 1e-3)); + REQUIRE_THAT(f3[1][3], Catch::Matchers::WithinAbs(0.9526, 1e-3)); + REQUIRE_THAT(f3[2][0], Catch::Matchers::WithinAbs(0.6415, 1e-3)); + REQUIRE_THAT(f3[2][1], Catch::Matchers::WithinAbs(29.6827, 1e-3)); + REQUIRE_THAT(f3[2][2], Catch::Matchers::WithinAbs(93.3300, 1e-3)); + REQUIRE_THAT(f3[2][3], Catch::Matchers::WithinAbs(0.9151, 1e-3)); + REQUIRE_THAT(f3[3][0], Catch::Matchers::WithinAbs(0.3019, 1e-3)); + REQUIRE_THAT(f3[3][1], Catch::Matchers::WithinAbs(24.7164, 1e-3)); + REQUIRE_THAT(f3[3][2], Catch::Matchers::WithinAbs(93.3300, 1e-3)); + REQUIRE_THAT(f3[3][3], Catch::Matchers::WithinAbs(0.9994, 1e-3)); + } + } + } int main(int argc, char *argv[]) From 4de2600d92649aaf468fe99240430fba6f881058 Mon Sep 17 00:00:00 2001 From: HPDell Date: Fri, 2 Aug 2024 13:54:46 +0100 Subject: [PATCH 11/11] edit: use serial implementation under CUDA parallel by default --- src/gwmodelpp/GWRBasic.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gwmodelpp/GWRBasic.cpp b/src/gwmodelpp/GWRBasic.cpp index 4c56b1d..acd771c 100644 --- a/src/gwmodelpp/GWRBasic.cpp +++ b/src/gwmodelpp/GWRBasic.cpp @@ -1389,6 +1389,8 @@ void GWRBasic::setParallelType(const ParallelType& type) mFitCoreFunction = &GWRBasic::fitCoreCuda; mFitCoreCVFunction = &GWRBasic::fitCoreCVCuda; mFitCoreSHatFunction = &GWRBasic::fitCoreSHatCuda; + mCalcTrQtQCoreFunction = &GWRBasic::calcTrQtQCoreSerial; + mCalcDiagBCoreFunction = &GWRBasic::calcDiagBCoreSerial; break; #endif // ENABLE_CUDA default: