diff --git a/Grid/qcd/QCD.h b/Grid/qcd/QCD.h index dbedfa7c49..575071dee7 100644 --- a/Grid/qcd/QCD.h +++ b/Grid/qcd/QCD.h @@ -99,25 +99,25 @@ const int GparityFlavourTensorIndex = 3; //TensorLevel counts from the bottom! // s,sp,c,spc,lc -template using iSinglet = iScalar > >; -template using iSpinMatrix = iScalar, Ns> >; -template using iColourMatrix = iScalar > > ; -template using iSpinColourMatrix = iScalar, Ns> >; -template using iLorentzColourMatrix = iVector >, Nd > ; -template using iLorentzComplex = iVector >, Nd > ; -template using iDoubleStoredColourMatrix = iVector >, Nds > ; -template using iSpinVector = iScalar, Ns> >; -template using iColourVector = iScalar > >; -template using iSpinColourVector = iScalar, Ns> >; -template using iHalfSpinVector = iScalar, Nhs> >; -template using iHalfSpinColourVector = iScalar, Nhs> >; - template using iSpinColourSpinColourMatrix = iScalar, Ns>, Nc>, Ns> >; - - -template using iGparityFlavourVector = iVector >, Ngp>; -template using iGparitySpinColourVector = iVector, Ns>, Ngp >; -template using iGparityHalfSpinColourVector = iVector, Nhs>, Ngp >; -template using iGparityFlavourMatrix = iMatrix >, Ngp>; +template using iSinglet = iScalar > >; +template using iSpinMatrix = iScalar, Ns> >; +template using iColourMatrix = iScalar > > ; +template using iSpinColourMatrix = iScalar, Ns> >; +template using iLorentzColourMatrix = iVector >, Nd > ; +template using iLorentzComplex = iVector >, Nd > ; +template using iDoubleStoredColourMatrix = iVector >, Nds > ; +template using iSpinVector = iScalar, Ns> >; +template using iColourVector = iScalar > >; +template using iSpinColourVector = iScalar, Ns> >; +template using iHalfSpinVector = iScalar, Nhs> >; +template using iHalfSpinColourVector = iScalar, Nhs> >; +template using iSpinColourSpinColourMatrix = iScalar, Ns>, Nc>, Ns> >; + + +template using iGparityFlavourVector = iVector >, Ngp>; +template using iGparitySpinColourVector = iVector, Ns>, Ngp >; +template using iGparityHalfSpinColourVector = iVector, Nhs>, Ngp >; +template using iGparityFlavourMatrix = iMatrix >, Ngp>; // Spin matrix typedef iSpinMatrix SpinMatrix; diff --git a/Grid/qcd/action/fermion/StaggeredImpl.h b/Grid/qcd/action/fermion/StaggeredImpl.h index f44d12f4f8..01c516b7b3 100644 --- a/Grid/qcd/action/fermion/StaggeredImpl.h +++ b/Grid/qcd/action/fermion/StaggeredImpl.h @@ -163,7 +163,7 @@ class StaggeredImpl : public PeriodicGaugeImpl(outerProduct(Btilde,A)); + link = outerProduct(Btilde,A); PokeIndex(mat,link,mu); } diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index e98e9b879d..a55f8d4a71 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -27,7 +27,7 @@ directory *************************************************************************************/ /* @file HISQSmearing.h - @brief Declares classes related to HISQ smearing + @brief Declares classes related to the HISQ action */ @@ -35,42 +35,19 @@ directory #include #include #include - - NAMESPACE_BEGIN(Grid); -// TODO: find a way to fold this into the stencil header. need to access grid to get -// Nd, since you don't want to inherit from QCD.h -/*! @brief append arbitrary shift path to shifts */ -template -void appendShift(std::vector& shifts, int dir, Args... args) { - Coordinate shift(Nd,0); - generalShift(shift, dir, args...); - // push_back creates an element at the end of shifts and - // assigns the data in the argument to it. - shifts.push_back(shift); -} - - -/*! @brief figure out the stencil index from mu and nu */ -accelerator_inline int stencilIndex(int mu, int nu) { - // Nshifts depends on how you built the stencil - int Nshifts = 6; - return Nshifts*nu + Nd*Nshifts*mu; -} - - -/*! @brief structure holding the link treatment */ -struct HISQSmearingParameters{ - HISQSmearingParameters(){} - Real c_1; // 1 link - Real c_naik; // Naik term - Real c_3; // 3 link - Real c_5; // 5 link - Real c_7; // 7 link - Real c_lp; // 5 link Lepage - HISQSmearingParameters(Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) +/*! @brief structure holding the link treatment for a given smear */ +template +struct SmearingParameters{ + floatT c_1; // 1 link + floatT c_naik; // Naik term + floatT c_3; // 3 link + floatT c_5; // 5 link + floatT c_7; // 7 link + floatT c_lp; // Lepage + SmearingParameters(floatT c1, floatT cnaik, floatT c3, floatT c5, floatT c7, floatT clp) : c_1(c1), c_naik(cnaik), c_3(c3), @@ -80,252 +57,572 @@ struct HISQSmearingParameters{ }; -/*! @brief create fat links from link variables */ +// There are 6 quarks in nature, and 3 never need a Naik epsilon +int const GRID_MAX_NAIK = 3; + + +/*! @brief structure holding all input parameters related to the HISQ action */ +template +struct HISQParameters{ + // Structure from QOP/QDP + int n_naiks; + std::array eps_naiks; + floatT fat7_c1 ; floatT fat7_c3 ; floatT fat7_c5 ; floatT fat7_c7 ; floatT fat7_clp; + floatT asqtad_c1; floatT asqtad_c3; floatT asqtad_c5; floatT asqtad_c7; floatT asqtad_clp; floatT asqtad_cnaik; + floatT diff_c1 ; floatT diff_cnaik; + HISQParameters(int n_naiks_in, std::array eps_naiks_in, + floatT fat7_one_link , floatT fat7_three_staple , floatT fat7_five_staple , floatT fat7_seven_staple , floatT fat7_lepage, + floatT asqtad_one_link, floatT asqtad_three_staple, floatT asqtad_five_staple, floatT asqtad_seven_staple, floatT asqtad_lepage, + floatT asqtad_naik , floatT difference_one_link, floatT difference_naik) + : n_naiks(n_naiks_in), + eps_naiks(eps_naiks_in), + fat7_c1(fat7_one_link), + fat7_c3(fat7_three_staple), + fat7_c5(fat7_five_staple), + fat7_c7(fat7_seven_staple), + fat7_clp(fat7_lepage), + asqtad_c1(asqtad_one_link), + asqtad_c3(asqtad_three_staple), + asqtad_c5(asqtad_five_staple), + asqtad_c7(asqtad_seven_staple), + asqtad_clp(asqtad_lepage), + asqtad_cnaik(asqtad_naik), + diff_c1(difference_one_link), + diff_cnaik(difference_naik) {} +}; + + +/*! @brief Sometimes in the U(3) projection we use SVD cuts; here we collect related parameters */ +template +struct HISQReunitSVDParameters{ + // Structure from QOP/QDP + bool allow_svd; + bool svd_only; + floatT svd_rel_error; + floatT svd_abs_error; + floatT force_filter; + HISQReunitSVDParameters(bool allow, bool only, floatT rel, floatT abs, floatT filter) + : allow_svd(allow), + svd_only(only), + svd_rel_error(rel), + svd_abs_error(abs), + force_filter(filter) {} +}; + + +/*! @brief Get the link U_mu(x). */ +template accelerator_inline +auto getLink(const link& __restrict__ U, GeneralStencilEntry* x, int mu) { + return coalescedReadGeneralPermute(U[x->_offset](mu), x->_permute, Nd); +} +#define setLink coalescedWrite + + +/*! @brief Figure out the stencil index from mu and nu. */ +accelerator_inline +int HISQStencilIndex(int mu, int nu, int rho=0, int sig=0, std::string kind="3STAPLE") { + int res; + if (kind=="3STAPLE") + res = 5*(nu + Nd*mu); + else if (kind=="5STAPLE") + res = 17*(rho + Nd*nu + Nd*Nd*mu); + else if (kind=="7STAPLE") + // seems correct + res = 46*(sig + Nd*rho + Nd*Nd*nu + Nd*Nd*Nd*mu); + else + Grid_error("Unknown staple kind",kind); + return res; +} + + +/*! @brief Create various stencils needed for HISQ calculations */ +inline +std::vector createHISQStencil(std::string kind="3STAPLE") { + std::vector shifts; + // We allow nu=mu and rho=nu, rho=mu to make indexing easier, but these + // entries will not be used. + if (kind=="3STAPLE") { + for(int mu=0;mu(shifts,mu); + appendShift(shifts,nu); + appendShift(shifts,shiftSignal::NO_SHIFT); + appendShift(shifts,mu,Back(nu)); + appendShift(shifts,Back(nu)); + } + } else if (kind=="5STAPLE") { + for(int mu =0;mu (shifts,nu,Back(rho)); + appendShift(shifts,nu); + appendShift(shifts,Back(rho)); + appendShift(shifts,shiftSignal::NO_SHIFT); + appendShift(shifts,rho); + appendShift(shifts,Back(nu),Back(rho)); + appendShift(shifts,Back(nu)); + appendShift(shifts,Back(nu),rho); + appendShift(shifts,mu,nu,Back(rho)); + appendShift(shifts,mu,nu); + appendShift(shifts,mu,Back(rho)); + appendShift(shifts,mu); + appendShift(shifts,mu,rho); + appendShift(shifts,mu,Back(nu),Back(rho)); + appendShift(shifts,mu,Back(nu)); + appendShift(shifts,mu,Back(nu),rho); + appendShift(shifts,nu,rho); + } + } else if (kind=="7STAPLE") { + for(int mu =0;mu (shifts,shiftSignal::NO_SHIFT); + appendShift(shifts,mu); + appendShift(shifts,mu,nu); + appendShift(shifts,mu,nu,rho); + appendShift(shifts,mu,nu,rho,Back(sig)); + appendShift(shifts,mu,nu,Back(rho)); + appendShift(shifts,mu,nu,Back(rho),Back(sig)); + appendShift(shifts,mu,nu,Back(sig)); + appendShift(shifts,mu,Back(nu)); + appendShift(shifts,mu,Back(nu),rho); + appendShift(shifts,mu,Back(nu),rho,sig); + appendShift(shifts,mu,Back(nu),rho,Back(sig)); + appendShift(shifts,mu,Back(nu),Back(rho)); + appendShift(shifts,mu,Back(nu),Back(rho),sig); + appendShift(shifts,mu,Back(nu),Back(rho),Back(sig)); + appendShift(shifts,mu,Back(nu),sig); + appendShift(shifts,mu,Back(nu),Back(sig)); + appendShift(shifts,mu,rho); + appendShift(shifts,mu,rho,sig); + appendShift(shifts,mu,rho,Back(sig)); + appendShift(shifts,mu,Back(rho)); + appendShift(shifts,mu,Back(rho),sig); + appendShift(shifts,mu,Back(rho),Back(sig)); + appendShift(shifts,mu,sig); + appendShift(shifts,mu,Back(sig)); + appendShift(shifts,nu); + appendShift(shifts,nu,rho); + appendShift(shifts,nu,rho,sig); + appendShift(shifts,nu,rho,Back(sig)); + appendShift(shifts,nu,Back(rho)); + appendShift(shifts,nu,Back(rho),sig); + appendShift(shifts,nu,Back(rho),Back(sig)); + appendShift(shifts,rho); + appendShift(shifts,rho,Back(nu)); + appendShift(shifts,rho,sig); + appendShift(shifts,rho,Back(sig)); + appendShift(shifts,Back(nu)); + appendShift(shifts,Back(nu),rho); + appendShift(shifts,Back(nu),rho,sig); + appendShift(shifts,Back(nu),rho,Back(sig)); + appendShift(shifts,Back(nu),Back(rho)); + appendShift(shifts,Back(nu),Back(rho),sig); + appendShift(shifts,Back(nu),Back(rho),Back(sig)); + appendShift(shifts,Back(rho)); + appendShift(shifts,Back(rho),sig); + appendShift(shifts,Back(rho),Back(sig)); + } + } else { + Grid_error("Unknown staple kind",kind); + } + return shifts; +} + + +/*! @brief Retrieve 3-link stencil entries. */ +template accelerator_inline +std::tuple +get3StaplePoints(acc sView, int sIndex, int site) { + GeneralStencilEntry* x_p_mu = sView.GetEntry(sIndex+0,site); + GeneralStencilEntry* x_p_nu = sView.GetEntry(sIndex+1,site); + GeneralStencilEntry* x = sView.GetEntry(sIndex+2,site); + GeneralStencilEntry* x_p_mu_m_nu = sView.GetEntry(sIndex+3,site); + GeneralStencilEntry* x_m_nu = sView.GetEntry(sIndex+4,site); + return { x_p_mu, x_p_nu, x, x_p_mu_m_nu, x_m_nu }; +} + + +/*! @brief Retrieve 5-link stencil entries. */ +template accelerator_inline +std::tuple +get5StaplePoints(acc sView, int sIndex, int site) { + GeneralStencilEntry* x_p_nu_m_rho = sView.GetEntry(sIndex+0 ,site); + GeneralStencilEntry* x_p_nu = sView.GetEntry(sIndex+1 ,site); + GeneralStencilEntry* x_m_rho = sView.GetEntry(sIndex+2 ,site); + GeneralStencilEntry* x = sView.GetEntry(sIndex+3 ,site); + GeneralStencilEntry* x_p_rho = sView.GetEntry(sIndex+4 ,site); + GeneralStencilEntry* x_m_nu_m_rho = sView.GetEntry(sIndex+5 ,site); + GeneralStencilEntry* x_m_nu = sView.GetEntry(sIndex+6 ,site); + GeneralStencilEntry* x_m_nu_p_rho = sView.GetEntry(sIndex+7 ,site); + GeneralStencilEntry* x_p_mu_p_nu_m_rho = sView.GetEntry(sIndex+8 ,site); + GeneralStencilEntry* x_p_mu_p_nu = sView.GetEntry(sIndex+9 ,site); + GeneralStencilEntry* x_p_mu_m_rho = sView.GetEntry(sIndex+10,site); + GeneralStencilEntry* x_p_mu = sView.GetEntry(sIndex+11,site); + GeneralStencilEntry* x_p_mu_p_rho = sView.GetEntry(sIndex+12,site); + GeneralStencilEntry* x_p_mu_m_nu_m_rho = sView.GetEntry(sIndex+13,site); + GeneralStencilEntry* x_p_mu_m_nu = sView.GetEntry(sIndex+14,site); + GeneralStencilEntry* x_p_mu_m_nu_p_rho = sView.GetEntry(sIndex+15,site); + GeneralStencilEntry* x_p_nu_p_rho = sView.GetEntry(sIndex+16,site); + return {x_p_nu_m_rho , x_p_nu , x_m_rho , + x , x_p_rho , x_m_nu_m_rho , + x_m_nu , x_m_nu_p_rho , x_p_mu_p_nu_m_rho, + x_p_mu_p_nu , x_p_mu_m_rho , x_p_mu , + x_p_mu_p_rho , x_p_mu_m_nu_m_rho, x_p_mu_m_nu , + x_p_mu_m_nu_p_rho, x_p_nu_p_rho}; +} + + +/*! @brief Retrieve 7-link stencil entries. */ +template accelerator_inline +std::tuple +get7StaplePoints(acc sView, int sIndex, int site) { + GeneralStencilEntry* x = sView.GetEntry(sIndex+0 ,site); + GeneralStencilEntry* x_p_mu = sView.GetEntry(sIndex+1 ,site); + GeneralStencilEntry* x_p_mu_p_nu = sView.GetEntry(sIndex+2 ,site); + GeneralStencilEntry* x_p_mu_p_nu_p_rho = sView.GetEntry(sIndex+3 ,site); + GeneralStencilEntry* x_p_mu_p_nu_p_rho_m_sig = sView.GetEntry(sIndex+4 ,site); + GeneralStencilEntry* x_p_mu_p_nu_m_rho = sView.GetEntry(sIndex+5 ,site); + GeneralStencilEntry* x_p_mu_p_nu_m_rho_m_sig = sView.GetEntry(sIndex+6 ,site); + GeneralStencilEntry* x_p_mu_p_nu_m_sig = sView.GetEntry(sIndex+7 ,site); + GeneralStencilEntry* x_p_mu_m_nu = sView.GetEntry(sIndex+8 ,site); + GeneralStencilEntry* x_p_mu_m_nu_p_rho = sView.GetEntry(sIndex+9 ,site); + GeneralStencilEntry* x_p_mu_m_nu_p_rho_p_sig = sView.GetEntry(sIndex+10,site); + GeneralStencilEntry* x_p_mu_m_nu_p_rho_m_sig = sView.GetEntry(sIndex+11,site); + GeneralStencilEntry* x_p_mu_m_nu_m_rho = sView.GetEntry(sIndex+12,site); + GeneralStencilEntry* x_p_mu_m_nu_m_rho_p_sig = sView.GetEntry(sIndex+13,site); + GeneralStencilEntry* x_p_mu_m_nu_m_rho_m_sig = sView.GetEntry(sIndex+14,site); + GeneralStencilEntry* x_p_mu_m_nu_p_sig = sView.GetEntry(sIndex+15,site); + GeneralStencilEntry* x_p_mu_m_nu_m_sig = sView.GetEntry(sIndex+16,site); + GeneralStencilEntry* x_p_mu_p_rho = sView.GetEntry(sIndex+17,site); + GeneralStencilEntry* x_p_mu_p_rho_p_sig = sView.GetEntry(sIndex+18,site); + GeneralStencilEntry* x_p_mu_p_rho_m_sig = sView.GetEntry(sIndex+19,site); + GeneralStencilEntry* x_p_mu_m_rho = sView.GetEntry(sIndex+20,site); + GeneralStencilEntry* x_p_mu_m_rho_p_sig = sView.GetEntry(sIndex+21,site); + GeneralStencilEntry* x_p_mu_m_rho_m_sig = sView.GetEntry(sIndex+22,site); + GeneralStencilEntry* x_p_mu_p_sig = sView.GetEntry(sIndex+23,site); + GeneralStencilEntry* x_p_mu_m_sig = sView.GetEntry(sIndex+24,site); + GeneralStencilEntry* x_p_nu = sView.GetEntry(sIndex+25,site); + GeneralStencilEntry* x_p_nu_p_rho = sView.GetEntry(sIndex+26,site); + GeneralStencilEntry* x_p_nu_p_rho_p_sig = sView.GetEntry(sIndex+27,site); + GeneralStencilEntry* x_p_nu_p_rho_m_sig = sView.GetEntry(sIndex+28,site); + GeneralStencilEntry* x_p_nu_m_rho = sView.GetEntry(sIndex+29,site); + GeneralStencilEntry* x_p_nu_m_rho_p_sig = sView.GetEntry(sIndex+30,site); + GeneralStencilEntry* x_p_nu_m_rho_m_sig = sView.GetEntry(sIndex+31,site); + GeneralStencilEntry* x_p_rho = sView.GetEntry(sIndex+32,site); + GeneralStencilEntry* x_p_rho_m_nu = sView.GetEntry(sIndex+33,site); + GeneralStencilEntry* x_p_rho_p_sig = sView.GetEntry(sIndex+34,site); + GeneralStencilEntry* x_p_rho_m_sig = sView.GetEntry(sIndex+35,site); + GeneralStencilEntry* x_m_nu = sView.GetEntry(sIndex+36,site); + GeneralStencilEntry* x_m_nu_p_rho = sView.GetEntry(sIndex+37,site); + GeneralStencilEntry* x_m_nu_p_rho_p_sig = sView.GetEntry(sIndex+38,site); + GeneralStencilEntry* x_m_nu_p_rho_m_sig = sView.GetEntry(sIndex+39,site); + GeneralStencilEntry* x_m_nu_m_rho = sView.GetEntry(sIndex+40,site); + GeneralStencilEntry* x_m_nu_m_rho_p_sig = sView.GetEntry(sIndex+41,site); + GeneralStencilEntry* x_m_nu_m_rho_m_sig = sView.GetEntry(sIndex+42,site); + GeneralStencilEntry* x_m_rho = sView.GetEntry(sIndex+43,site); + GeneralStencilEntry* x_m_rho_p_sig = sView.GetEntry(sIndex+44,site); + GeneralStencilEntry* x_m_rho_m_sig = sView.GetEntry(sIndex+45,site); + return {x , x_p_mu , x_p_mu_p_nu , x_p_mu_p_nu_p_rho , + x_p_mu_p_nu_p_rho_m_sig, x_p_mu_p_nu_m_rho , x_p_mu_p_nu_m_rho_m_sig, x_p_mu_p_nu_m_sig , + x_p_mu_m_nu , x_p_mu_m_nu_p_rho , x_p_mu_m_nu_p_rho_p_sig, x_p_mu_m_nu_p_rho_m_sig, + x_p_mu_m_nu_m_rho , x_p_mu_m_nu_m_rho_p_sig, x_p_mu_m_nu_m_rho_m_sig, x_p_mu_m_nu_p_sig , + x_p_mu_m_nu_m_sig , x_p_mu_p_rho , x_p_mu_p_rho_p_sig , x_p_mu_p_rho_m_sig , + x_p_mu_m_rho , x_p_mu_m_rho_p_sig , x_p_mu_m_rho_m_sig , x_p_mu_p_sig , + x_p_mu_m_sig , x_p_nu , x_p_nu_p_rho , x_p_nu_p_rho_p_sig , + x_p_nu_p_rho_m_sig , x_p_nu_m_rho , x_p_nu_m_rho_p_sig , x_p_nu_m_rho_m_sig , + x_p_rho , x_p_rho_m_nu , x_p_rho_p_sig , x_p_rho_m_sig , + x_m_nu , x_m_nu_p_rho , x_m_nu_p_rho_p_sig , x_m_nu_p_rho_m_sig , + x_m_nu_m_rho , x_m_nu_m_rho_p_sig , x_m_nu_m_rho_m_sig , x_m_rho , + x_m_rho_p_sig , x_m_rho_m_sig}; +} + + +/*! @brief Allows for ASQTAD-like smearings. */ template class Smear_HISQ : public Gimpl { +public: -private: GridCartesian* const _grid; - HISQSmearingParameters _linkTreatment; - -public: + // Sort out the Gimpl. This handles BCs and part of the precision. INHERIT_GIMPL_TYPES(Gimpl); - typedef typename Gimpl::GaugeField GF; - typedef typename Gimpl::GaugeLinkField LF; - typedef typename Gimpl::ComplexField CF; + typedef typename Gimpl::GaugeField GF; + typedef typename Gimpl::GaugeLinkField LF; + typedef typename Gimpl::ComplexField CF; + typedef typename Gimpl::Scalar ComplexScalar; + typedef decltype(real(ComplexScalar())) RealScalar; + typedef iColourMatrix ComplexColourMatrix; + + RealScalar _Scut; // Cutoff for U(3) projection eigenvalues, set at initialization + int _HaloDepth=1; + + SmearingParameters _linkTreatment; + + void initialize() { + if (sizeof(RealScalar)==4) { + _Scut=1e-5; // Maybe should be higher? e.g. 1e-4 + } else if (sizeof(RealScalar)==8) { + _Scut=1e-8; + } else { + Grid_error("HISQ smearing only implemented for single and double"); + } + assert(Nc == 3 && "HISQ smearing currently implemented only for Nc==3"); + assert(Nd == 4 && "HISQ smearing only defined for Nd==4"); + } - // Don't allow default values here. - Smear_HISQ(GridCartesian* grid, Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) + Smear_HISQ(GridCartesian* grid, RealScalar c1, RealScalar cnaik, RealScalar c3, RealScalar c5, RealScalar c7, RealScalar clp) : _grid(grid), _linkTreatment(c1,cnaik,c3,c5,c7,clp) { - assert(Nc == 3 && "HISQ smearing currently implemented only for Nc==3"); - assert(Nd == 4 && "HISQ smearing only defined for Nd==4"); + initialize(); } - // Allow to pass a pointer to a C-style, double array for MILC convenience + // Allow to pass a pointer to a C-style array for MILC convenience Smear_HISQ(GridCartesian* grid, double* coeff) : _grid(grid), _linkTreatment(coeff[0],coeff[1],coeff[2],coeff[3],coeff[4],coeff[5]) { - assert(Nc == 3 && "HISQ smearing currently implemented only for Nc==3"); - assert(Nd == 4 && "HISQ smearing only defined for Nd==4"); + initialize(); + } + Smear_HISQ(GridCartesian* grid, float* coeff) + : _grid(grid), + _linkTreatment(coeff[0],coeff[1],coeff[2],coeff[3],coeff[4],coeff[5]) { + initialize(); } ~Smear_HISQ() {} - // Intent: OUT--u_smr, u_naik - // IN--u_thin + + // Intent: OUT--U_3link (sum of left and right 3-staples attached to U) + // U_fat (accmulates the fat smearing) + // IN--U (thin links) + // gStencil (3-link stencil) + // mu + void threeLinkStaple(GF& U_fat, GF& U_3link, GF& U, GeneralLocalStencil gStencil, int mu) const { + + SmearingParameters lt = this->_linkTreatment; + autoView(U_v , U , AcceleratorRead); + autoView(U_fat_v , U_fat , AcceleratorWrite); + autoView(U_3link_v , U_3link , AcceleratorWrite); + auto gStencil_v = gStencil.View(AcceleratorRead); + typedef decltype(getLink(U_v,gStencil_v.GetEntry(0,0),0)) U3matrix; + int Nsites = U_v.size(); + + accelerator_for(site,Nsites,Simd::Nsimd(),{ + U3matrix res; + for(int nu=0;nu_offset](nu), res); + + // The index operator (x) returns the coalesced read on GPU. The view [] index returns + // a reference to the vector object. The [x](mu) returns a reference to the densely + // packed (contiguous in memory) mu-th element of the vector object. + setLink(U_fat_v[x->_offset](mu), U_fat_v(x->_offset)(mu) + lt.c_3*res); + } + }) + return; + } + + + // Intent: OUT--U_5link (sum of 5-staples attached to U) + // U_fat (accmulates the fat smearing) + // IN--U (thin links) + // U_3link (sum of left and right 3-staples attached to U) + // gStencil (3-link stencil) + // mu + // updateFatLinks (in the force, you only want U_5link_v) + void fiveLinkStaple(GF& U_fat, GF& U_5linkA, GF& U_5linkB, GF& U_3link, + GF& U, GeneralLocalStencil gStencil, int mu) const { + + SmearingParameters lt = this->_linkTreatment; + autoView(U_v , U , AcceleratorRead); + autoView(U_fat_v , U_fat , AcceleratorWrite); + autoView(U_3link_v , U_3link , AcceleratorWrite); + autoView(U_5linkA_v, U_5linkA, AcceleratorWrite); + autoView(U_5linkB_v, U_5linkB, AcceleratorWrite); + auto gStencil_v = gStencil.View(AcceleratorRead); + typedef decltype(getLink(U_v,gStencil_v.GetEntry(0,0),0)) U3matrix; + int Nsites = U_v.size(); + + accelerator_for(site,Nsites,Simd::Nsimd(),{ + U3matrix res; + int sigmaIndex = 0; + for(int nu=0;nu_offset](rho), res); + else + setLink(U_5linkB_v[x->_offset](rho), res); + + setLink(U_fat_v[x->_offset](mu), U_fat_v(x->_offset)(mu) + lt.c_5*res); + sigmaIndex++; + } + } + }) + return; + } + + + // Intent: OUT--U_fat (accmulates the fat smearing) + // IN--U (thin links) + // U_5link (sum of 5-staples attached to U) + // gStencil (3-link stencil) + // mu + void sevenLinkStaple(GF& U_fat, GF& U_5linkA, GF& U_5linkB, GF& U, GeneralLocalStencil gStencil, int mu) const { + + SmearingParameters lt = this->_linkTreatment; + autoView(U_v , U , AcceleratorRead); + autoView(U_fat_v , U_fat , AcceleratorWrite); + autoView(U_5linkA_v, U_5linkA, AcceleratorWrite); + autoView(U_5linkB_v, U_5linkB, AcceleratorWrite); + auto gStencil_v = gStencil.View(AcceleratorRead); + typedef decltype(getLink(U_v,gStencil_v.GetEntry(0,0),0)) U3matrix; + int Nsites = U_v.size(); + + accelerator_for(site,Nsites,Simd::Nsimd(),{ + U3matrix res; + int sigmaIndex = 0; + for(int nu=0;nu_offset](mu), U_fat_v(x->_offset)(mu) + lt.c_7*res); + sigmaIndex++; + } + } + }) + return; + } + + + // Intent: OUT--u_smr (smeared links) + // u_naik (Naik links) + // IN--u_thin (thin links) void smear(GF& u_smr, GF& u_naik, GF& u_thin) const { - HISQSmearingParameters lt = this->_linkTreatment; + SmearingParameters lt = this->_linkTreatment; auto grid = this->_grid; // Create a padded cell of extra padding depth=1 and fill the padding. - int depth = 1; - PaddedCell Ghost(depth,grid); + PaddedCell Ghost(_HaloDepth,grid); GF Ughost = Ghost.Exchange(u_thin); - // This is where auxiliary N-link fields and the final smear will be stored. - GF Ughost_fat(Ughost.Grid()); - GF Ughost_3link(Ughost.Grid()); - GF Ughost_5linkA(Ughost.Grid()); - GF Ughost_5linkB(Ughost.Grid()); + // This is where auxiliary N-link fields and the final smear will be stored. As + // implemented, this uses about 25% more memory than necessary. + GF U_fat(Ughost.Grid()); + GF U_3link(Ughost.Grid()); + GF U_5linkA(Ughost.Grid()); + GF U_5linkB(Ughost.Grid()); - // mu-nu plane stencil. We allow mu==nu to make indexing the stencil easier, - // but these entries will not be used. - std::vector shifts; - for(int mu=0;mu shifts = createHISQStencil("3STAPLE"); // A GeneralLocalStencil has two indices: a site and stencil index GeneralLocalStencil gStencil(Ughost.Grid(),shifts); - // This is where contributions from the smearing get added together - Ughost_fat=Zero(); - - // This loop handles 3-, 5-, and 7-link constructs, minus Lepage and Naik. + // Store sum of 3-, 5-, 7-link contributions in U_fat + U_fat=Zero(); for(int mu=0;mu_permute,Nd)) U3matrix; - - int Nsites = U_v.size(); - auto gStencil_v = gStencil.View(AcceleratorRead); - - accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 3-link constructs - stencilElement SE0, SE1, SE2, SE3, SE4, SE5; - U3matrix U0, U1, U2, U3, U4, U5, W; - for(int nu=0;nu_offset; - SE1 = gStencil_v.GetEntry(s+1,site); int x_p_nu = SE1->_offset; - SE2 = gStencil_v.GetEntry(s+2,site); int x = SE2->_offset; - SE3 = gStencil_v.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset; - SE4 = gStencil_v.GetEntry(s+4,site); int x_m_nu = SE4->_offset; - SE5 = gStencil_v.GetEntry(s+5,site); int x_m_mu = SE5->_offset; - - // When you're deciding whether to take an adjoint, the question is: how is the - // stored link oriented compared to the one you want? If I imagine myself travelling - // with the to-be-updated link, I have two possible, alternative 3-link paths I can - // take, one starting by going to the left, the other starting by going to the right. - U0 = coalescedReadGeneralPermute(U_v[x_p_mu ](nu),SE0->_permute,Nd); - U1 = coalescedReadGeneralPermute(U_v[x_p_nu ](mu),SE1->_permute,Nd); - U2 = coalescedReadGeneralPermute(U_v[x ](nu),SE2->_permute,Nd); - U3 = coalescedReadGeneralPermute(U_v[x_p_mu_m_nu](nu),SE3->_permute,Nd); - U4 = coalescedReadGeneralPermute(U_v[x_m_nu ](mu),SE4->_permute,Nd); - U5 = coalescedReadGeneralPermute(U_v[x_m_nu ](nu),SE4->_permute,Nd); - - // "left" "right" - W = U2*U1*adj(U0) + adj(U5)*U4*U3; - - // Save 3-link construct for later and add to smeared field. - coalescedWrite(U_3link_v[x](nu), W); - - // The index operator (x) returns the coalesced read on GPU. The view [] index returns - // a reference to the vector object. The [x](mu) returns a reference to the densely - // packed (contiguous in memory) mu-th element of the vector object. On CPU, - // coalescedRead/Write is the identity mapping assigning vector object to vector object. - // But on GPU it's non-trivial and maps scalar object to vector object and vice versa. - coalescedWrite(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_3*W); - } - }) - - accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 5-link - stencilElement SE0, SE1, SE2, SE3, SE4, SE5; - U3matrix U0, U1, U2, U3, U4, U5, W; - int sigmaIndex = 0; - for(int nu=0;nu_offset; - SE1 = gStencil_v.GetEntry(s+1,site); int x_p_nu = SE1->_offset; - SE2 = gStencil_v.GetEntry(s+2,site); int x = SE2->_offset; - SE3 = gStencil_v.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset; - SE4 = gStencil_v.GetEntry(s+4,site); int x_m_nu = SE4->_offset; - - U0 = coalescedReadGeneralPermute( U_v[x_p_mu ](nu ),SE0->_permute,Nd); - U1 = coalescedReadGeneralPermute(U_3link_v[x_p_nu ](rho),SE1->_permute,Nd); - U2 = coalescedReadGeneralPermute( U_v[x ](nu ),SE2->_permute,Nd); - U3 = coalescedReadGeneralPermute( U_v[x_p_mu_m_nu](nu ),SE3->_permute,Nd); - U4 = coalescedReadGeneralPermute(U_3link_v[x_m_nu ](rho),SE4->_permute,Nd); - U5 = coalescedReadGeneralPermute( U_v[x_m_nu ](nu ),SE4->_permute,Nd); - - W = U2*U1*adj(U0) + adj(U5)*U4*U3; - - if(sigmaIndex<3) { - coalescedWrite(U_5linkA_v[x](rho), W); - } else { - coalescedWrite(U_5linkB_v[x](rho), W); - } - - coalescedWrite(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_5*W); - sigmaIndex++; - } - } - }) - - accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 7-link - stencilElement SE0, SE1, SE2, SE3, SE4, SE5; - U3matrix U0, U1, U2, U3, U4, U5, W; - int sigmaIndex = 0; - for(int nu=0;nu_offset; - SE1 = gStencil_v.GetEntry(s+1,site); int x_p_nu = SE1->_offset; - SE2 = gStencil_v.GetEntry(s+2,site); int x = SE2->_offset; - SE3 = gStencil_v.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset; - SE4 = gStencil_v.GetEntry(s+4,site); int x_m_nu = SE4->_offset; - - U0 = coalescedReadGeneralPermute(U_v[x_p_mu](nu),SE0->_permute,Nd); - if(sigmaIndex<3) { - U1 = coalescedReadGeneralPermute(U_5linkB_v[x_p_nu](rho),SE1->_permute,Nd); - } else { - U1 = coalescedReadGeneralPermute(U_5linkA_v[x_p_nu](rho),SE1->_permute,Nd); - } - U2 = coalescedReadGeneralPermute(U_v[x](nu),SE2->_permute,Nd); - U3 = coalescedReadGeneralPermute(U_v[x_p_mu_m_nu](nu),SE3->_permute,Nd); - if(sigmaIndex<3) { - U4 = coalescedReadGeneralPermute(U_5linkB_v[x_m_nu](rho),SE4->_permute,Nd); - } else { - U4 = coalescedReadGeneralPermute(U_5linkA_v[x_m_nu](rho),SE4->_permute,Nd); - } - U5 = coalescedReadGeneralPermute(U_v[x_m_nu](nu),SE4->_permute,Nd); - - W = U2*U1*adj(U0) + adj(U5)*U4*U3; - - coalescedWrite(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_7*W); - sigmaIndex++; - } - } - }) - - } // end mu loop - - // c1, c3, c5, c7 construct contributions - u_smr = Ghost.Extract(Ughost_fat) + lt.c_1*u_thin; + // Add 1-link contribution + u_smr = Ghost.Extract(U_fat) + lt.c_1*u_thin; // Load up U and V std::vectors to access thin and smeared links. std::vector U(Nd, grid); std::vector V(Nd, grid); std::vector Vnaik(Nd, grid); for (int mu = 0; mu < Nd; mu++) { - U[mu] = PeekIndex(u_thin, mu); - V[mu] = PeekIndex(u_smr, mu); + U[mu] = PeekIndex(u_thin, mu); + V[mu] = PeekIndex(u_smr, mu); + Vnaik[mu] = Zero(); } for(int mu=0;mu_grid; + // Open up the views + autoView(uproj_v, u_proj, AcceleratorWrite); + autoView(umu_v , u_mu , AcceleratorRead); - LF V(grid), Q(grid), sqrtQinv(grid), id_3(grid), diff(grid); - CF c0(grid), c1(grid), c2(grid), g0(grid), g1(grid), g2(grid), S(grid), R(grid), theta(grid), - u(grid), v(grid), w(grid), den(grid), f0(grid), f1(grid), f2(grid); + // Make sure everyone is using the same Grid + conformable(u_proj,u_mu); // Follow MILC 10.1103/PhysRevD.82.074501, eqs (B2-B3) and (C1-C8) + accelerator_for(ss,umu_v.size(),Simd::Nsimd(),{ +#ifdef GRID_SIMT + { int blane=acceleratorSIMTlane(Simd::Nsimd());// +#else + for(int blane=0;blane +class Force_HISQ : public Gimpl { +public: + + GridCartesian* const _grid; + GridRedBlackCartesian* _gridRB; + + // Sort out the Gimpl. This handles BCs and part of the precision. + INHERIT_GIMPL_TYPES(Gimpl); + typedef typename Gimpl::FermionField FF; + typedef typename Gimpl::GaugeField GF; + typedef typename Gimpl::GaugeLinkField LF; + typedef typename Gimpl::ComplexField CF; + typedef typename Gimpl::Scalar ComplexScalar; + typedef decltype(real(ComplexScalar())) RealScalar; + typedef iColourMatrix ComplexColourMatrix; + + RealScalar _Scut=-1; // Cutoff for U(3) projection eigenvalues, set at initialization + int _HaloDepth=1; // Depth of padded cell + + HISQParameters _linkParams; + HISQReunitSVDParameters _reunitParams; + GF _Umu, _Vmu, _Wmu; + + void initialize() { + if (sizeof(RealScalar)==4) { + _Scut=1e-5; // Maybe should be higher? e.g. 1e-4 + } else if (sizeof(RealScalar)==8) { + _Scut=1e-8; + } else { + Grid_error("HISQ force only implemented for single and double"); + } + _gridRB = SpaceTimeGrid::makeFourDimRedBlackGrid(_grid); + assert(Nc == 3 && "HISQ force currently implemented only for Nc==3"); + assert(Nd == 4 && "HISQ force only defined for Nd==4"); + } + + Force_HISQ(GridCartesian* grid, HISQParameters linkParams, GF Wmu, GF Vmu, GF Umu, + HISQReunitSVDParameters reunitParams) + : _grid(grid), + _linkParams(linkParams), + _Wmu(Wmu), + _Vmu(Vmu), + _Umu(Umu), + _reunitParams(reunitParams) { + initialize(); + } + + ~Force_HISQ() {} + + + // Intent: OUT--u_deriv (dW/dV slotted into force) + // IN--u_mu (fat links), + // u_force (slot derivative into this force), + // delta (force cutoff) + // Follow MILC 10.1103/PhysRevD.82.074501 + void projU3Deriv(GF& u_deriv, GF& u_mu, GF& u_force, RealScalar const delta=5e-5) { + + conformable(u_force,u_mu); + conformable(u_deriv,u_mu); + + autoView(uderiv_v, u_deriv, AcceleratorWrite); + autoView(umu_v , u_mu , AcceleratorRead); + autoView(uforce_v, u_force, AcceleratorRead); + + // Follow MILC 10.1103/PhysRevD.82.074501, eqs (B2-B3) and (C1-C8) + accelerator_for(ss,umu_v.size(),Simd::Nsimd(),{ +#ifdef GRID_SIMT + { int blane=acceleratorSIMTlane(Simd::Nsimd());// +#else + for(int blane=0;blane 1e-5) { SVD } + + RealScalar u = sqrt(g0) + sqrt(g1) + sqrt(g2); + RealScalar v = sqrt(g0*g1) + sqrt(g0*g2) + sqrt(g1*g2); + RealScalar w = sqrt(g0*g1*g2); + RealScalar den = w*(u*v-w); + RealScalar f0 = (-w*(u*u+v)+u*v*v)/den; + RealScalar f1 = (-w-u*u*u+2.*u*v)/den; + RealScalar f2 = u/den; + + auto Qinvsq = f0 + f1*Q + f2*Q*Q; + + force() = forcemu(mu); + auto forcedag = adj(force); + + RealScalar u2, u3, u4, u5, u6, u7, u8, v2 ,v3, v4, v5, v6, w2, w3, w4, w5; + + u2 = u*u; u3 = u2*u; u4 = u3*u; u5 = u4*u; u6 = u5*u; u7 = u6*u; u8 = u7*u; + v2 = v*v; v3 = v2*v; v4 = v3*v; v5 = v4*v; v6 = v5*v; + w2 = w*w; w3 = w2*w; w4 = w3*w; w5 = w4*w; + + // eq (C10) + auto d = 2*w3*(u*v-w)*(u*v-w)*(u*v-w); + + // eq (C11) + auto C00 = ( -w3*u6 + 3*v*w3*u4 + 3*v4*w*u4 - v6*u3 - 4*w4*u3 - 12*v3*w2*u3 + 16*v2*w3*u2 + + 3*v5*w*u2 - 8*v*w4*u - 3*v4*w2*u + w5 + v3*w3 )/d; + auto C01 = ( -w2*u7 - v2*w*u6 + v4*u5 + 6*v*w2*u5 - 5*w3*u4 - v3*w*u4 - 2*v5*u3 - 6*v2*w2*u3 + + 10*v*w3*u2 + 6*v4*w*u2 - 3*w4*u - 6*v3*w2*u + 2*v2*w3 )/d; + auto C02 = ( w2*u5 + v2*w*u4 - v4*u3 - 4*v*w2*u3 + 4*w3*u2 +3*v3*w*u2 - 3*v2*w2*u + v*w3 )/d; + auto C11 = ( -w*u8 - v2*u7 + 7*v*w*u6 + 4*v3*u5 - 5*w2*u5 - 16*v2*w*u4 - 4*v4*u3 + 16*v*w2*u3 + - 3*w3*u2 + 12*v3*w*u2 - 12*v2*w2*u + 3*v*w3 )/d; + auto C12 = ( w*u6 + v2*u5 - 5*v*w*u4 - 2*v3*u3 + 4*w2*u3 + 6*v2*w*u2 - 6*v*w2*u + w3 )/d; + auto C22 = ( -w*u4 - v2*u3 + 3*v*w*u2 - 3*w2*u )/d; + + // These are all used in the loop over color entries, and we want to avoid recomputing + // these products, which should be broadcast to all sites, 3*3*3*3=81 times. + auto Vdag = adj(V); + auto VVdag = V*Vdag; + auto VQ = V*Q; + auto VQ2 = VQ*Q; + auto VQVdag = VQ*Vdag; + auto QVdag = Q*Vdag; + auto Q2Vdag = Q*QVdag; + + // eqs (C17-C19) + auto PVdag = ( C00 + C01*Q + C02*Q*Q )*Vdag; + auto RVdag = ( C01 + C11*Q + C12*Q*Q )*Vdag; + auto SVdag = ( C02 + C12*Q + C22*Q*Q )*Vdag; + + // eqs (C20) and (C21) + ComplexColourMatrix res = Zero(); + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) { + + ComplexScalar deriv = 0.; // dWij/dVkl + + if (k == i) deriv += Qinvsq()()(l,j); + if (l == j) deriv += f1*VVdag()()(i,k)+f2*VQVdag()()(i,k); + + deriv += f2*VVdag()()(i,k)*Q()()(l,j) + V()()(i,j)*PVdag()()(l,k) + + VQ()()(i,j)*RVdag()()(l,k) + VQ2()()(i,j)*SVdag()()(l,k); + + res()()(l,k) = res()()(l,k) + deriv*force()()(j,i); + + // dWij^+/dVkl + deriv = (f1*Vdag()()(i,k)+f2*QVdag()()(i,k))*Vdag()()(l,j) + + f2*Vdag()()(i,k)*QVdag()()(l,j) + Vdag()()(i,j)*PVdag()()(l,k) + + QVdag()()(i,j)*RVdag()()(l,k)+Q2Vdag()()(i,j)*SVdag()()(l,k); + + res()()(l,k) = res()()(l,k) + deriv*forcedag()()(j,i); + } + + insertLane(blane,uderiv_v[ss](mu),res()); + } + } + }); + }; + + + // vecx (contains |X> and |Y>) + // l (rat approx and Naik index) + // sep (separation between |X> and |Y>) + GF outerProductHISQ(std::vector& vecx, std::vector vecdt, std::vector n_orders_naik, int n_naiks, int sep) { + + auto grid = this->_grid; + auto gridRB = this->_gridRB; + + GF XY(grid), XY_l(grid); + FF X(grid), Y(grid), RB(gridRB); + LF XYnu(grid), YXnu(grid); + + XY = Zero(); + + // These four lines control the loop over rational approximation contributions. As explained in force(), + // l indexes over both Naik epsilon and rational approximation order. + int l = 0; + for (int inaik = 0; inaik < n_naiks; inaik++) { + int rat_order = n_orders_naik[inaik]; + for (int i=0; i(XY_l,(YXnu-XYnu),nu); + } + XY += vecdt[l]*XY_l; + l++; + } + } + return XY; + } + + + // Intent: OUT--Fghost (accumulates 3-link derivative contribution) + // IN--Ughost (thin links) + // XYCghost (outer product) + // gStencil3 (3-link stencil) + // c3 + // mu + void threeLinkDeriv(GF& Fghost, GF& Ughost, GF& XYCghost, GeneralLocalStencil gStencil3, Real c3, int mu) const { + + autoView(U_v , Ughost , AcceleratorRead); + autoView(XY_v, XYCghost, AcceleratorRead); + autoView(F_v , Fghost , AcceleratorWrite); + int Nsites = U_v.size(); + auto gStencil3_v = gStencil3.View(AcceleratorRead); + typedef decltype(getLink(U_v,gStencil3.GetEntry(0,0),0)) U3matrix; + + accelerator_for(site,Nsites,Simd::Nsimd(),{ + U3matrix res; + for(int nu=0;nu_offset](mu), F_v(x->_offset)(mu) + c3*adj(res)); + } + }) + } + + + // Intent: OUT--Fghost (accumulates 5-link derivative contribution) + // IN--Ughost (thin links) + // XYCghost (outer product) + // gStencil5 (5-link stencil) + // c5 + // mu + template + void fiveLinkDeriv(GF& Fghost, GF& Ughost, GF& XYCghost, GeneralLocalStencil gStencil5, Real c5, int mu) const { + + autoView(U_v , Ughost , AcceleratorRead); + autoView(XY_v, XYCghost, AcceleratorRead); + autoView(F_v , Fghost , AcceleratorWrite); + int Nsites = U_v.size(); + auto gStencil5_v = gStencil5.View(AcceleratorRead); + typedef decltype(getLink(U_v,gStencil5.GetEntry(0,0),0)) U3matrix; + + accelerator_for(site,Nsites,Simd::Nsimd(),{ + U3matrix res; + for(int nu=0;nu_offset](mu), F_v(x->_offset)(mu) + c5*res); + } + } + }) + } + + + template + void sevenLinkDeriv(GF& Fghost, GF& Ughost, GF& XYCghost, GeneralLocalStencil gStencil7, Real c7, int mu) const { + + autoView(U_v , Ughost , AcceleratorRead); + autoView(XY_v, XYCghost, AcceleratorRead); + autoView(F_v , Fghost , AcceleratorWrite); + int Nsites = U_v.size(); + auto gStencil7_v = gStencil7.View(AcceleratorRead); + typedef decltype(getLink(U_v,gStencil7.GetEntry(0,0),0)) U3matrix; + + accelerator_for(site,Nsites,Simd::Nsimd(),{ + U3matrix res, U1; + for(int nu=0;nu_offset](mu), F_v(x->_offset)(mu) + c7*res); + } + } + } + }) + + } + + + GF naikLinkDeriv(std::vector vecdt, std::vector& vecx, std::vector n_orders_naik, int n_naiks, Real cnaik) { + auto grid = this->_grid; + GF temp(grid); + std::vector Wv(Nd, grid); + std::vector XYdag(Nd, grid); + std::vector ddW(Nd, grid); + temp = outerProductHISQ(vecx, vecdt, n_orders_naik, n_naiks, 3); + for (int mu = 0; mu < Nd; mu++) { + Wv[mu] = PeekIndex(_Wmu, mu); + XYdag[mu] = PeekIndex(temp, mu); + ddW[mu] = Zero(); + } + for (int mu = 0; mu < Nd; mu++) { + ddW[mu] = Cshift( Wv[mu],mu, 1)*Cshift( Wv[mu],mu, 2)* XYdag[mu] + + Cshift( Wv[mu],mu, 1)*Cshift(XYdag[mu],mu,-1)*Cshift( Wv[mu],mu,-1) + + Cshift(XYdag[mu],mu,-2)*Cshift( Wv[mu],mu,-2)*Cshift( Wv[mu],mu,-1); + } + for (int mu = 0; mu < Nd; mu++) { + PokeIndex(temp, ddW[mu], mu); + } + return cnaik*temp; + } + + + GF lepageLinkDeriv(GF& XY, Real clp) { + + auto grid = this->_grid; + GF temp(grid); + std::vector Wv(Nd, grid); + std::vector XYdag(Nd, grid); + std::vector ddW(Nd, grid); + + for (int mu = 0; mu < Nd; mu++) { + Wv[mu] = PeekIndex(_Wmu, mu); + ddW[mu] = Zero(); + XYdag[mu] = adj(PeekIndex(XY, mu)); + } + + for (int mu = 0; mu < Nd; mu++) + for (int nu = 0; nu < Nd; nu++) { + if(mu==nu) continue; + + // (forward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[nu],nu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftIdentityBackward(XYdag[nu],nu)))))); + // (backward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[mu],mu, + Gimpl::CovShiftBackward(Wv[nu],nu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftIdentityForward(XYdag[nu],nu)))))); + // (forward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftForward(Wv[nu],nu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftBackward(XYdag[nu],nu, + Gimpl::CovShiftIdentityBackward(Wv[mu],mu)))))); + // (backward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[nu],nu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(XYdag[nu],nu, + Gimpl::CovShiftIdentityBackward(Wv[mu],mu)))))); + // (forward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftForward(Wv[nu],nu, + Gimpl::CovShiftForward(Wv[nu],nu, + Gimpl::CovShiftForward(XYdag[mu],mu, + Gimpl::CovShiftBackward(Wv[nu],nu, + Gimpl::CovShiftIdentityBackward(Wv[nu],nu)))))); + // (backward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[nu],nu, + Gimpl::CovShiftBackward(Wv[nu],nu, + Gimpl::CovShiftForward(XYdag[mu],mu, + Gimpl::CovShiftForward(Wv[nu],nu, + Gimpl::CovShiftIdentityForward(Wv[nu],nu)))))); + // (forward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[mu],mu, + Gimpl::CovShiftForward(XYdag[nu],nu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftIdentityBackward(Wv[nu],nu)))))); + // (backward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[mu],mu, + Gimpl::CovShiftBackward(XYdag[nu],nu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftIdentityForward(Wv[nu],nu)))))); + // (forward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftForward(XYdag[nu],nu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftBackward(Wv[nu],nu, + Gimpl::CovShiftIdentityBackward(Wv[mu],mu)))))); + // (backward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(XYdag[nu],nu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[nu],nu, + Gimpl::CovShiftIdentityBackward(Wv[mu],mu)))))); + } + for (int mu = 0; mu < Nd; mu++) { - V = PeekIndex(u_mu, mu); - Q = adj(V)*V; - c0 = real(trace(Q)); - c1 = (1/2.)*real(trace(Q*Q)); - c2 = (1/3.)*real(trace(Q*Q*Q)); - S = (1/3.)*c1-(1/18.)*c0*c0; - if (norm2(S)<1e-28) { - g0 = (1/3.)*c0; g1 = g0; g2 = g1; - } else { - R = (1/2.)*c2-(1/3. )*c0*c1+(1/27.)*c0*c0*c0; - theta = acos(R*pow(S,-1.5)); - g0 = (1/3.)*c0+2.*sqrt(S)*cos((1/3.)*theta-2*M_PI/3.); - g1 = (1/3.)*c0+2.*sqrt(S)*cos((1/3.)*theta ); - g2 = (1/3.)*c0+2.*sqrt(S)*cos((1/3.)*theta+2*M_PI/3.); + PokeIndex(temp, ddW[mu], mu); + } + + return clp*temp; + } + + + // We are calculating the force using the rational approximation. The goal is that we can approximate + // (Mdag M)^(-nf/4) = alpha_0 + sum_l alpha_l/(M^dag M + beta_l). Hence the index l runs over the + // introduce a different "Naik epsilon" for each M. Hence, we can think of the total application + // of this operator as having an index inaik, running over the different Naik epsilons; for each inaik + // there is a possibly different order_inaik, then the operator has an index l running up to order_inaik. + // All terms with inaik=0 correspond to epsilon_Naik = 0. + // + // Intent: OUT--u_force + // IN--vecdt: Monte Carlo separation vector times alpha_{inaik,0}. + // vecx: A vector of fermion fields coming from the MILC code. It is organized so that + // |X_l> = (Mdag M + beta_l)^-1 |Phi> is on even sites, |Y_l>=D|X_l> is on odd sites. + // All the |X_l> for i=0 come first in memory, followed by all the |X_l> with + // i=1 in memory, and so on. + // n_orders_naik: Indexed by unique naik epsilon. + void force(GF& u_force, std::vector vecdt, std::vector& vecx, std::vector n_orders_naik) { + + HISQParameters hp = this->_linkParams; + auto grid = this->_grid; + + GF XY(grid); // outer product field + GF temp(grid); // used to accumulate N-link force contributions and projU3Deriv + + u_force = Zero(); + + if(hp.asqtad_cnaik!=0) u_force += naikLinkDeriv(vecdt, vecx, n_orders_naik, hp.n_naiks, hp.asqtad_cnaik); + + XY = outerProductHISQ(vecx, vecdt, n_orders_naik, hp.n_naiks, 1); + u_force += hp.asqtad_c1*XY; + + if(hp.asqtad_clp!=0) u_force += lepageLinkDeriv(XY, hp.asqtad_clp); + + + // ---------------------------------- N-LINK DERIVATIVES (ASQTAD) + + PaddedCell Ghost(_HaloDepth,grid); + + temp = Zero(); + + GF UWghost = Ghost.Exchange(_Wmu); // Plays role of U or W + GF XYCghost = Ghost.Exchange(XY); // Plays role of XY or chain rule = dW/dV*dX/dW + GF Fghost = Ghost.Exchange(temp); + + std::vector shifts3 = createHISQStencil("3STAPLE"); + std::vector shifts5 = createHISQStencil("5STAPLE"); + std::vector shifts7 = createHISQStencil("7STAPLE"); + GeneralLocalStencil gStencil3(UWghost.Grid(),shifts3); + GeneralLocalStencil gStencil5(UWghost.Grid(),shifts5); + GeneralLocalStencil gStencil7(UWghost.Grid(),shifts7); + + for(int mu=0;mu( Fghost, UWghost, XYCghost, gStencil5, hp.asqtad_c5, mu); + fiveLinkDeriv<1>( Fghost, UWghost, XYCghost, gStencil5, hp.asqtad_c5, mu); } -// if (fabs(Q.determinant()/(g0*g1*g2)-1.0) > 1e-5) { SVD } - u = sqrt(g0) + sqrt(g1) + sqrt(g2); - v = sqrt(g0*g1) + sqrt(g0*g2) + sqrt(g1*g2); - w = sqrt(g0*g1*g2); - den = w*(u*v-w); - f0 = (-w*(u*u+v)+u*v*v)/den; - f1 = (-w-u*u*u+2.*u*v)/den; - f2 = u/den; - id_3 = 1.; - - sqrtQinv = f0*id_3 + f1*Q + f2*Q*Q; - - PokeIndex(u_proj, V*sqrtQinv, mu); + if(hp.asqtad_c7!=0) { + sevenLinkDeriv<0>( Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<1>( Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<2>( Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<3>( Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<4>( Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<5>( Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<6>( Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<7>( Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<8>( Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<9>( Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<10>(Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<11>(Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<12>(Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<13>(Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + } } - }; + u_force += Ghost.Extract(Fghost); + + // ------------------------------------------- U3 PROJ DERIVATIVE + + projU3Deriv(temp, _Vmu, u_force, 5e-5); + u_force = hp.fat7_c1*temp; + + // ------------------------------------ N-LINK DERIVATIVES (FAT7) + + UWghost = Ghost.Exchange(_Umu); + XYCghost = Ghost.Exchange(temp); + Fghost = Zero(); + + for(int mu=0;mu( Fghost, UWghost, XYCghost, gStencil5, hp.fat7_c5, mu); + fiveLinkDeriv<1>( Fghost, UWghost, XYCghost, gStencil5, hp.fat7_c5, mu); + } + if(hp.fat7_c7!=0) { + sevenLinkDeriv<0>( Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<1>( Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<2>( Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<3>( Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<4>( Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<5>( Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<6>( Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<7>( Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<8>( Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<9>( Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<10>(Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<11>(Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<12>(Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<13>(Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + } + } // end mu loop + + u_force += Ghost.Extract(Fghost); + + // Close the loop: Multiply on the left by U_mu(x) + LF force_mu(grid); + for (int mu = 0; mu < Nd; mu++) { + force_mu = PeekIndex(_Umu, mu) * PeekIndex(u_force, mu); + PokeIndex(u_force, force_mu, mu); + } + } -// void derivative(const GaugeField& Gauge) const { -// }; }; diff --git a/Grid/stencil/GeneralLocalStencil.h b/Grid/stencil/GeneralLocalStencil.h index 66d25bc4b5..c2bc9111f4 100644 --- a/Grid/stencil/GeneralLocalStencil.h +++ b/Grid/stencil/GeneralLocalStencil.h @@ -156,12 +156,11 @@ class GeneralLocalStencil : public GeneralLocalStencilView { class shiftSignal { public: enum { - BACKWARD_CONST = 16, + BACKWARD_CONST = GRID_MAX_LATTICE_DIMENSION + 2, NO_SHIFT = -1 }; }; -// TODO: put a check somewhere that BACKWARD_CONST > Nd! /*! @brief signals that you want to go backwards in direction dir */ inline int Back(const int dir) { @@ -170,6 +169,7 @@ inline int Back(const int dir) { return dir + shiftSignal::BACKWARD_CONST; } + /*! @brief shift one unit in direction dir */ template void generalShift(Coordinate& shift, int dir) { @@ -183,6 +183,7 @@ void generalShift(Coordinate& shift, int dir) { } } + /*! @brief follow a path of directions, shifting one unit in each direction */ template void generalShift(Coordinate& shift, int dir, Args... args) { @@ -198,5 +199,16 @@ void generalShift(Coordinate& shift, int dir, Args... args) { } +/*! @brief append arbitrary shift path to shifts of dimension d */ +template +void appendShift(std::vector& shifts, int dir, Args... args) { + Coordinate shift(d,0); + generalShift(shift, dir, args...); + // push_back creates an element at the end of shift and + // assigns the data in the argument to it. + shifts.push_back(shift); +} + + NAMESPACE_END(Grid); diff --git a/Makefile.am b/Makefile.am index 9addcbf5bb..656ce475ac 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,3 +1,5 @@ +# part of the GNU Automake system, which simplifies the generation of `Makefile.in` files. + # additional include paths necessary to compile the C++ library SUBDIRS = Grid benchmarks tests examples HMC diff --git a/tests/forces/Makefile.am b/tests/forces/Makefile.am index 481efd3f55..f46e5005f7 100644 --- a/tests/forces/Makefile.am +++ b/tests/forces/Makefile.am @@ -12,3 +12,4 @@ check: tests ./Test_dwf_gpforce ./Test_mobius_force ./Test_zmobius_force + ./Test_HISQ_force diff --git a/tests/forces/Test_HISQ_force.cc b/tests/forces/Test_HISQ_force.cc new file mode 100644 index 0000000000..3d06c509b5 --- /dev/null +++ b/tests/forces/Test_HISQ_force.cc @@ -0,0 +1,278 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/smearing/Test_HISQ_force.cc + +Copyright (C) 2024 + +Author: D. A. Clarke + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* + @file Test_HISQ_force.cc + @brief test of the HISQ fermion force +*/ + + +#include +#include +#include +#include +using namespace Grid; + + +//#define USE_DOUBLE true +#define USE_DOUBLE true + +#if USE_DOUBLE + #define PREC double + typedef LatticeGaugeFieldD LGF; + typedef StaggeredImplD GIMPL; + typedef vComplexD COMP; +#else + #define PREC float + typedef LatticeGaugeFieldF LGF; + typedef StaggeredImplF GIMPL; + typedef vComplexF COMP; +#endif + +typedef typename GIMPL::FermionField FF; + + + + +// This is a sort of contrived test situation. The goal is to make sure the fermion force +// code is stable against future changes and get an idea how the HISQ force interface works. +bool testForce(GridCartesian& GRID, LGF Umu, LGF Ucontrol, std::string testKind, + Real fat7_c1 , Real fat7_c3 , Real fat7_c5 , Real fat7_c7 , Real cnaik, + Real asqtad_c1, Real asqtad_c3, Real asqtad_c5, Real asqtad_c7, Real asqtad_clp) { + + LGF Vmu(&GRID), Wmu(&GRID), Nmu(&GRID), Umom(&GRID); + + // The n_orders_naik array is indexed according to the unique Naik epsilon values. By convention, + // index 0 always corresponds to zero Naik epsilon. n_orders_naik[0] is the sum of the RHMC + // molecular dynamics orders of all the pseudofermion fermions with zero Naik epsilon at the + // head of the rational function parameter file. n_orders_naik[1] is the sum of orders for the + // first nonzero Naik epsilon. The sum of all the n_orders_naik elements equals the size of the + // vecx and vecdt arrays. Elements of those arrays are grouped according to n_orders_naik, so + // the first n_orders_naik[0] elements of vecx and epsv correspond to the pseudofermions with + // nonzero Naik epsilon. That group lumps together terms for each of the zero-epsilon + // pseudofermions. + int n_naiks = 1; // Just a charm + std::array eps_naik = {0,0,0}; + std::vector n_orders_naik = {1,1}; + std::vector vecdt = {0.1,0.1}; + + HISQParameters hisq_param(n_naiks , eps_naik , + fat7_c1 , fat7_c3 , fat7_c5 , fat7_c7 , 0., + asqtad_c1, asqtad_c3, asqtad_c5, asqtad_c7, asqtad_clp, + cnaik , 0. , 0.); + HISQReunitSVDParameters hisq_reunit_svd(false, false, 1, 1, 1); + + Smear_HISQ fat7(&GRID,fat7_c1,0.,fat7_c3,fat7_c5,fat7_c7,0.); + fat7.smear(Vmu,Nmu,Umu); // Populate fat7 and Naik links Vmu and Nmu + fat7.projectU3(Wmu,Vmu); // Populate U(3) projection Wmu + Force_HISQ hisq_force(&GRID, hisq_param, Wmu, Vmu, Umu, hisq_reunit_svd); + + std::vector seeds({1,2,3,4}); + GridParallelRNG pRNG(&GRID); pRNG.SeedFixedIntegers(seeds); + + // Construct a vecx. Eventually it would be nice if this generated the correct distribution, + // but for now use Gaussian random variables as a placeholder. + std::vector vecx; + int l = 0; + for (int inaik = 0; inaik < hisq_param.n_naiks; inaik++) { + int rat_order = n_orders_naik[inaik]; + FF PHI(&GRID); + PHI = Zero(); + for (int i=0; i eps_naik = {0,0,0}; + + Real fat7_c1 = 1/8.; Real asqtad_c1 = 1.; + Real fat7_c3 = 1/16.; Real asqtad_c3 = 1/16.; + Real fat7_c5 = 1/64.; Real asqtad_c5 = 1/64.; + Real fat7_c7 = 1/384.; Real asqtad_c7 = 1/384.; + Real cnaik = -1/24.+eps_naik[0]/8; Real asqtad_clp = -1/8.; + + + LGF Vmu(&GRID), Wmu(&GRID), Nmu(&GRID); + Smear_HISQ fat7(&GRID,fat7_c1,0.,fat7_c3,fat7_c5,fat7_c7,0.); + + fat7.smear(Vmu,Nmu,Umu); // Populate fat7 and Naik links Vmu and Nmu + fat7.projectU3(Wmu,Vmu); // Populate U(3) projection Wmu + + HISQParameters hisq_param(n_naiks , eps_naik , + fat7_c1 , fat7_c3 , fat7_c5 , fat7_c7 , 0., + asqtad_c1, asqtad_c3, asqtad_c5, asqtad_c7, asqtad_clp, + cnaik , 0. , 0.); + + HISQReunitSVDParameters hisq_reunit_svd(false, false, 1, 1, 1); + + Force_HISQ hisq_force(&GRID, hisq_param, Wmu, Vmu, Umu, hisq_reunit_svd); + + Grid_log(""); + Grid_log(" TEST DERIVATIVE U3 PROJECTION"); + Grid_log(""); + + LGF diff(&GRID); + hisq_force.projU3Deriv(Uforce, Umu, Umu, 5e-5); + diff = Ucontrol-Uforce; + auto absDiff = norm2(diff)/norm2(Ucontrol); + if (absDiff < 1e-30) { + Grid_pass(" |Umu-Usmr|/|Umu| = ",absDiff); + return true; + } else { + Grid_error(" |Umu-Usmr|/|Umu| = ",absDiff); + return false; + } +// NerscIO::writeConfiguration(Uforce,"nersc.l8t4b3360.ddVU3"); +} + + +int main (int argc, char** argv) { + + // Params for the test. + int Ns = 8; + int Nt = 4; + Coordinate latt_size(Nd,0); latt_size[0]=Ns; latt_size[1]=Ns; latt_size[2]=Ns; latt_size[3]=Nt; + std::string conf_in = "nersc.l8t4b3360"; + int threads = GridThread::GetThreads(); + + // Initialize the Grid + Grid_init(&argc,&argv); + Coordinate simd_layout = GridDefaultSimd(Nd,COMP::Nsimd()); + Coordinate mpi_layout = GridDefaultMpi(); + Grid_log("mpi = ",mpi_layout); + Grid_log("simd = ",simd_layout); + Grid_log("latt = ",latt_size); + Grid_log("threads = ",threads); + GridCartesian GRID(latt_size,simd_layout,mpi_layout); + + LGF Umu(&GRID), Ucontrol(&GRID); + +#if USE_DOUBLE // NerscIO is hard-coded to double. + + // Read the configuration into Umu + FieldMetaData header; + NerscIO::readConfiguration(Umu, header, conf_in); + + bool pass=true; + + // Check derivative of projection + NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.ddVU3.control"); + pass *= testddUProj(GRID,Umu,Ucontrol); + + // Check the 1-link (outer product) + NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.XY.control"); + pass *= testForce(GRID, Umu, Ucontrol, "1-link", + 1, 0, 0, 0, 0, + 1, 0, 0, 0, 0 ); + + // Check the 3-link + NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.3link.control"); + pass *= testForce(GRID, Umu, Ucontrol, "3-link", + 1, 0, 0, 0, 0, + 1, 1, 0, 0, 0 ); + + // Check the LePage-link + NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.lp.control"); + pass *= testForce(GRID, Umu, Ucontrol, "LePage", + 1, 0, 0, 0, 0, + 1, 0, 0, 0, 1 ); + + // Check the Naik-link + NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.naik.control"); + pass *= testForce(GRID, Umu, Ucontrol, "Naik", + 1, 0, 0, 0, 1, + 1, 0, 0, 0, 0 ); + + // Check the 5-link + NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.5link.control"); + pass *= testForce(GRID, Umu, Ucontrol, "5-link", + 1, 0, 0, 0, 0, + 1, 0, 1, 0, 0 ); + + // Check the 7-link + NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.7link.control"); + pass *= testForce(GRID, Umu, Ucontrol, "7-link", + 1, 0, 0, 0, 0, + 1, 0, 0, 1, 0 ); + + // Check level 2 smearing + NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.level2.control"); + pass *= testForce(GRID, Umu, Ucontrol, "level 2", + 1, 0, 0, 0, -1/24., + 1, -1/16., 1/64., -1/384., -1/8. ); + + // Check level 1+2 smearing + NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.level12.control"); + pass *= testForce(GRID, Umu, Ucontrol, "level 1+2", + 1/8., -1/16., 1/64., -1/384., -1/24., + 1 , -1/16., 1/64., -1/384., -1/8. ); + + + if(pass){ + Grid_pass("All tests passed."); + } else { + Grid_error("At least one test failed."); + } + +#endif + + Grid_finalize(); +} \ No newline at end of file diff --git a/tests/smearing/Test_fatLinks.cc b/tests/smearing/Test_fatLinks.cc index 04d5b165db..4d9e38a343 100644 --- a/tests/smearing/Test_fatLinks.cc +++ b/tests/smearing/Test_fatLinks.cc @@ -38,24 +38,26 @@ directory using namespace Grid; -/*! @brief parameter file to easily adjust Nloop */ -struct ConfParameters: Serializable { - GRID_SERIALIZABLE_CLASS_MEMBERS( - ConfParameters, - int, benchmark, - int, Nloop); - - template - ConfParameters(Reader& Reader){ - read(Reader, "parameters", *this); - } -}; - +//#define USE_DOUBLE true +#define USE_DOUBLE true -bool testSmear(GridCartesian& GRID, LatticeGaugeFieldD Umu, LatticeGaugeFieldD Usmr, LatticeGaugeFieldD Unaik, - LatticeGaugeFieldD Ucontrol, Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) { - Smear_HISQ hisq_fat(&GRID,c1,cnaik,c3,c5,c7,clp); - LatticeGaugeFieldD diff(&GRID), Uproj(&GRID); +#if USE_DOUBLE + #define PREC double + typedef LatticeGaugeFieldD LGF; + typedef PeriodicGimplD GIMPL; + typedef vComplexD COMP; +#else + #define PREC float + typedef LatticeGaugeFieldF LGF; + typedef PeriodicGimplF GIMPL; + typedef vComplexF COMP; +#endif + + +bool testSmear(GridCartesian& GRID, LGF Umu, LGF Usmr, LGF Unaik, + LGF Ucontrol, PREC c1, PREC cnaik, PREC c3, PREC c5, PREC c7, PREC clp) { + Smear_HISQ hisq_fat(&GRID,c1,cnaik,c3,c5,c7,clp); + LGF diff(&GRID), Uproj(&GRID); hisq_fat.smear(Usmr, Unaik, Umu); bool result; if (cnaik < 1e-30) { // Testing anything but Naik term @@ -85,6 +87,16 @@ bool testSmear(GridCartesian& GRID, LatticeGaugeFieldD Umu, LatticeGaugeFieldD U } +void hotStartSmear(GridCartesian& GRID) { + LGF Uproj(&GRID), Uhot(&GRID); + GridParallelRNG pRNG(&GRID); pRNG.SeedFixedIntegers(std::vector({111,222,333,444})); + SU::HotConfiguration(pRNG,Uhot); + Smear_HISQ hisq_fat(&GRID,1/8.,0.,1/16.,1/64.,1/384.,-1/8.); + hisq_fat.projectU3(Uproj,Uhot); + Grid_log("norm2(Uproj) = ",norm2(Uproj)/(Nc*Nd*GRID.gSites())); +} + + int main (int argc, char** argv) { // Params for the test. @@ -94,11 +106,15 @@ int main (int argc, char** argv) { std::string conf_in = "nersc.l8t4b3360"; int threads = GridThread::GetThreads(); - typedef LatticeGaugeFieldD LGF; + if (sizeof(PREC)==4) { + Grid_log("Run in single precision."); + } else { + Grid_log("Run in double precision."); + } // Initialize the Grid Grid_init(&argc,&argv); - Coordinate simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd()); + Coordinate simd_layout = GridDefaultSimd(Nd,COMP::Nsimd()); Coordinate mpi_layout = GridDefaultMpi(); Grid_log("mpi = ",mpi_layout); Grid_log("simd = ",simd_layout); @@ -106,12 +122,10 @@ int main (int argc, char** argv) { Grid_log("threads = ",threads); GridCartesian GRID(latt_size,simd_layout,mpi_layout); - XmlReader Reader("fatParams.xml",false,"grid"); - ConfParameters param(Reader); - if(param.benchmark) Grid_log(" Nloop = ",param.Nloop); - LGF Umu(&GRID), Usmr(&GRID), Unaik(&GRID), Ucontrol(&GRID); +#if USE_DOUBLE // NerscIO is hard-coded to double. + // Read the configuration into Umu FieldMetaData header; NerscIO::readConfiguration(Umu, header, conf_in); @@ -136,46 +150,17 @@ int main (int argc, char** argv) { Grid_error("At least one test failed."); } +#endif + + // Does a small hot start cause an issue? + hotStartSmear(GRID); + latt_size[0]=16; latt_size[1]=16; latt_size[2]=16; latt_size[3]=16; + GridCartesian SYMM(latt_size,simd_layout,mpi_layout); + hotStartSmear(SYMM); + // Test a C-style instantiation double path_coeff[6] = {1, 2, 3, 4, 5, 6}; - Smear_HISQ hisq_fat_Cstyle(&GRID,path_coeff); - - if (param.benchmark) { - - autoView(U_v, Umu, CpuRead); // Gauge accessor - - // Read in lattice sequentially, Nloop times - double lookupTime = 0.; - for(int i=0;i hisq_fat_Cstyle(&GRID,path_coeff); Grid_finalize(); } \ No newline at end of file