From 588b69b30cf551dc8dfcad1b5b38f7cda0ca391c Mon Sep 17 00:00:00 2001 From: david clarke Date: Sun, 28 Apr 2024 23:23:35 -0600 Subject: [PATCH 01/32] starting dW/dV --- Grid/qcd/smearing/HISQSmearing.h | 197 +++++++++++++++++++++++------ Grid/stencil/GeneralLocalStencil.h | 16 ++- tests/smearing/Test_fatLinks.cc | 15 ++- 3 files changed, 187 insertions(+), 41 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index 529ea090c5..d52e81e0f8 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -40,26 +40,6 @@ directory NAMESPACE_BEGIN(Grid); -// TODO: find a way to fold this into the stencil header. need to access grid to get -// Nd, since you don't want to inherit from QCD.h -/*! @brief append arbitrary shift path to shifts */ -template -void appendShift(std::vector& shifts, int dir, Args... args) { - Coordinate shift(Nd,0); - generalShift(shift, dir, args...); - // push_back creates an element at the end of shifts and - // assigns the data in the argument to it. - shifts.push_back(shift); -} - - -/*! @brief figure out the stencil index from mu and nu */ -accelerator_inline int stencilIndex(int mu, int nu) { - // Nshifts depends on how you built the stencil - int Nshifts = 6; - return Nshifts*nu + Nd*Nshifts*mu; -} - /*! @brief structure holding the link treatment */ struct SmearingParameters{ @@ -86,8 +66,16 @@ class Smear_HISQ : public Gimpl { private: GridCartesian* const _grid; + Real const _Scut = 1e-16; // Cutoff for U(3) projection eigenvalues SmearingParameters _linkTreatment; + // figure out the stencil index from mu and nu + accelerator_inline int stencilIndex(int mu, int nu) const { + // Nshifts depends on how you built the stencil + int Nshifts = 6; + return Nshifts*nu + Nd*Nshifts*mu; + } + public: INHERIT_GIMPL_TYPES(Gimpl); @@ -95,7 +83,6 @@ class Smear_HISQ : public Gimpl { typedef typename Gimpl::GaugeLinkField LF; typedef typename Gimpl::ComplexField CF; - // Don't allow default values here. Smear_HISQ(GridCartesian* grid, Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) : _grid(grid), _linkTreatment(c1,cnaik,c3,c5,c7,clp) { @@ -113,8 +100,9 @@ class Smear_HISQ : public Gimpl { ~Smear_HISQ() {} - // Intent: OUT--u_smr, u_naik - // IN--u_thin + // Intent: OUT--u_smr (smeared links), + // u_naik (Naik links), + // IN--u_thin (think links) void smear(GF& u_smr, GF& u_naik, GF& u_thin) const { SmearingParameters lt = this->_linkTreatment; @@ -136,12 +124,12 @@ class Smear_HISQ : public Gimpl { std::vector shifts; for(int mu=0;mu(shifts,mu); + appendShift(shifts,nu); + appendShift(shifts,shiftSignal::NO_SHIFT); + appendShift(shifts,mu,Back(nu)); + appendShift(shifts,Back(nu)); + appendShift(shifts,Back(mu)); } // A GeneralLocalStencil has two indices: a site and stencil index @@ -177,7 +165,7 @@ class Smear_HISQ : public Gimpl { U3matrix U0, U1, U2, U3, U4, U5, W; for(int nu=0;nustencilIndex(mu,nu); // The stencil gives us support points in the mu-nu plane that we will use to // grab the links we need. @@ -220,7 +208,7 @@ class Smear_HISQ : public Gimpl { int sigmaIndex = 0; for(int nu=0;nustencilIndex(mu,nu); for(int rho=0;rhostencilIndex(mu,nu); for(int rho=0;rho_grid; - LF V(grid), Q(grid), sqrtQinv(grid), id_3(grid), diff(grid); + LF V(grid), Q(grid), sqrtQinv(grid), id_3(grid); CF c0(grid), c1(grid), c2(grid), g0(grid), g1(grid), g2(grid), S(grid), R(grid), theta(grid), u(grid), v(grid), w(grid), den(grid), f0(grid), f1(grid), f2(grid); + id_3 = 1.; + // Follow MILC 10.1103/PhysRevD.82.074501, eqs (B2-B3) and (C1-C8) for (int mu = 0; mu < Nd; mu++) { V = PeekIndex(u_mu, mu); @@ -355,7 +345,7 @@ class Smear_HISQ : public Gimpl { c1 = (1/2.)*real(trace(Q*Q)); c2 = (1/3.)*real(trace(Q*Q*Q)); S = (1/3.)*c1-(1/18.)*c0*c0; - if (norm2(S)<1e-28) { + if (norm2(S)_Scut) { g0 = (1/3.)*c0; g1 = g0; g2 = g1; } else { R = (1/2.)*c2-(1/3. )*c0*c1+(1/27.)*c0*c0*c0; @@ -365,14 +355,13 @@ class Smear_HISQ : public Gimpl { g2 = (1/3.)*c0+2.*sqrt(S)*cos((1/3.)*theta+2*M_PI/3.); } // if (fabs(Q.determinant()/(g0*g1*g2)-1.0) > 1e-5) { SVD } - u = sqrt(g0) + sqrt(g1) + sqrt(g2); + u = sqrt(g0) + sqrt(g1) + sqrt(g2); v = sqrt(g0*g1) + sqrt(g0*g2) + sqrt(g1*g2); w = sqrt(g0*g1*g2); den = w*(u*v-w); f0 = (-w*(u*u+v)+u*v*v)/den; f1 = (-w-u*u*u+2.*u*v)/den; f2 = u/den; - id_3 = 1.; sqrtQinv = f0*id_3 + f1*Q + f2*Q*Q; @@ -380,6 +369,138 @@ class Smear_HISQ : public Gimpl { } }; + // Intent: OUT--u_deriv (dW/dV slotted into force) + // IN--u_mu (fat links), + // u_force (slot derivative into this force), + // delta (force cutoff) + // Follow MILC 10.1103/PhysRevD.82.074501 + void ddVprojectU3(GF& u_deriv, GF& u_mu, GF& u_force, Real const delta=5e-5) { + + auto grid = this->_grid; + + LF V(grid), Q(grid), sqrtQinv(grid), id_3(grid), res(grid), force(grid), forcedag(grid), + Vdag(grid), VVdag(grid), VQVdag(grid), PVdag(grid), VQ(grid), RVdag(grid), VQ2(grid), + SVdag(grid), QVdag(grid), Q2Vdag(grid); + + CF c0(grid), c1(grid), c2(grid), g0(grid), g1(grid), g2(grid), S(grid), R(grid), theta(grid), + u(grid), v(grid), w(grid), den(grid), f0(grid), f1(grid), f2(grid), delta(grid), + u2(grid), u3(grid), u4(grid), u5(grid), u6(grid), u7(grid), u8(grid), + v2(grid), v3(grid), v4(grid), v5(grid), v6(grid), + w2(grid), w3(grid), w4(grid), w5(grid), d(grid), + C00(grid), C01(grid), C02(grid), C11(grid), C12(grid), C22(grid), deriv(grid); + + id_3 = 1.; + + // eqs (B2-B3) and (C1-C8) + for (int mu = 0; mu < Nd; mu++) { + V = PeekIndex(u_mu, mu); + Q = adj(V)*V; + c0 = real(trace(Q)); + c1 = (1/2.)*real(trace(Q*Q)); + c2 = (1/3.)*real(trace(Q*Q*Q)); + S = (1/3.)*c1-(1/18.)*c0*c0; + if (norm2(S)_Scut) { + g0 = (1/3.)*c0; g1 = g0; g2 = g1; + } else { + R = (1/2.)*c2-(1/3. )*c0*c1+(1/27.)*c0*c0*c0; + theta = acos(R*pow(S,-1.5)); + g0 = (1/3.)*c0+2.*sqrt(S)*cos((1/3.)*theta-2*M_PI/3.); + g1 = (1/3.)*c0+2.*sqrt(S)*cos((1/3.)*theta ); + g2 = (1/3.)*c0+2.*sqrt(S)*cos((1/3.)*theta+2*M_PI/3.); + } +// if (g0 < delta || g1 < delta || g2 < delta) { +// // force filter eq (C23) +// g0 += g0 + delta; +// g1 += g1 + delta; +// g2 += g2 + delta; +// Q += delta*id_3; +// } +// if (fabs(Q.determinant()/(g0*g1*g2)-1.0) > 1e-5) { SVD } + u = sqrt(g0) + sqrt(g1) + sqrt(g2); + v = sqrt(g0*g1) + sqrt(g0*g2) + sqrt(g1*g2); + w = sqrt(g0*g1*g2); + den = w*(u*v-w); + f0 = (-w*(u*u+v)+u*v*v)/den; + f1 = (-w-u*u*u+2.*u*v)/den; + f2 = u/den; + + sqrtQinv = f0*id_3 + f1*Q + f2*Q*Q; + + force = PeekIndex(u_force, mu); + forcedag = adj(force); + + // Ask Peter: is this necessary/helpful? + u2 = u * u; + u3 = u2 * u; + u4 = u3 * u; + u5 = u4 * u; + u6 = u5 * u; + u7 = u6 * u; + u8 = u7 * u; + v2 = v * v; + v3 = v2 * v; + v4 = v3 * v; + v5 = v4 * v; + v6 = v5 * v; + w2 = w * w; + w3 = w2 * w; + w4 = w3 * w; + w5 = w4 * w; + + // eq (C10) + d = 2*w3*(u*v-w)*(u*v-w)*(u*v-w); + + // eq (C11) + C00 = ( -w3*u6 + 3*v*w3*u4 + 3*v4*w*u4 - v6*u3 - 4*w4*u3 - 12*v3*w2*u3 + 16*v2*w3*u2 + + 3*v5*w*u2 - 8*v*w4*u - 3*v4*w2*u + w5 + v3*w3 )/d; + C01 = ( -w2*u7 - v2*w*u6 + v4*u5 + 6*v*w2*u5 - 5*w3*u4 - v3*w*u4 - 2*v5*u3 - 6*v2*w2*u3 + + 10*v*w3*u2 + 6*v4*w*u2 - 3*w4*u - 6*v3*w2*u + 2*v2*w3 )/d; + C02 = ( w2*u5 + v2*w*u4 - v4*u3 - 4*v*w2*u3 + 4*w3*u2 +3*v3*w*u2 - 3*v2*w2*u + v*w3 )/d; + C11 = ( -w*u8 - v2*u7 + 7*v*w*u6 + 4*v3*u5 - 5*w2*u5 - 16*v2*w*u4 - 4*v4*u3 + 16*v*w2*u3 + - 3*w3*u2 + 12*v3*w*u2 - 12*v2*w2*u + 3*v*w3 )/d; + C12 = ( w*u6 + v2*u5 - 5*v*w*u4 - 2*v3*u3 + 4*w2*u3 + 6*v2*w*u2 - 6*v*w2*u + w3 )/d; + C22 = ( -w*u4 - v2*u3 + 3*v*w*u2 - 3*w2*u )/d; + + Vd = adj(V); + VVd = V*Vd; + VQVd = V*Q*Vd; + VQ = V*Q; + VQ2 = V*Q*Q; + QVd = Q*Vd; + Q2Vd = Q*Q*Vd; + + // eqs (C17-C19) + PVd = ( C00*id_3 + C01*Q + C02*Q*Q )*Vd; + RVd = ( C01*id_3 + C11*Q + C12*Q*Q )*Vd; + SVd = ( C02*id_3 + C12*Q + C22*Q*Q )*Vd; + + // eqs (C20) and (C21) + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) { + + deriv = Zero(); // dWij/dVkl + + if (k == i) deriv += Qinvsq()()(l,j); + if (l == j) deriv += f1*VVd()()(i,k)+f2*VQVd()()(i,k); + + deriv += f2*VVd()()(i,k)*Q()()(l,j) + V()()(i,j)*PVd()()(l,k) + + VQ()()(i,j)*RVd()()(l,k) + VQ2()()(i,j)*SVd()()(l,k); + + res()()(l,k) = res()()(l,k) + deriv*force()()(j,i); + + // dWij^+/dVkl + deriv = (f1*Vd()()(i,k)+f2*QVd()()(i,k))*Vd()()(l,j) + + f2*Vd()()(i,k)*QVd()()(l,j) + Vd()()(i,j)*PVd()()(l,k) + + QVd()()(i,j)*RVd()()(l,k)+Q2Vd()()(i,j)*SVd()()(l,k); + + res()()(l,k) = res()()(l,k) + deriv*forcedag()()(j,i); + } + + PokeIndex(u_deriv, res, mu); + } + }; // void derivative(const GaugeField& Gauge) const { // }; diff --git a/Grid/stencil/GeneralLocalStencil.h b/Grid/stencil/GeneralLocalStencil.h index 9af9b834bb..f19eaf5945 100644 --- a/Grid/stencil/GeneralLocalStencil.h +++ b/Grid/stencil/GeneralLocalStencil.h @@ -145,12 +145,11 @@ class GeneralLocalStencil : public GeneralLocalStencilView { class shiftSignal { public: enum { - BACKWARD_CONST = 16, + BACKWARD_CONST = GRID_MAX_LATTICE_DIMENSION + 2, NO_SHIFT = -1 }; }; -// TODO: put a check somewhere that BACKWARD_CONST > Nd! /*! @brief signals that you want to go backwards in direction dir */ inline int Back(const int dir) { @@ -159,6 +158,7 @@ inline int Back(const int dir) { return dir + shiftSignal::BACKWARD_CONST; } + /*! @brief shift one unit in direction dir */ template void generalShift(Coordinate& shift, int dir) { @@ -172,6 +172,7 @@ void generalShift(Coordinate& shift, int dir) { } } + /*! @brief follow a path of directions, shifting one unit in each direction */ template void generalShift(Coordinate& shift, int dir, Args... args) { @@ -187,5 +188,16 @@ void generalShift(Coordinate& shift, int dir, Args... args) { } +/*! @brief append arbitrary shift path to shifts of dimension d */ +template +void appendShift(std::vector& shifts, int dir, Args... args) { + Coordinate shift(d,0); + generalShift(shift, dir, args...); + // push_back creates an element at the end of shifts and + // assigns the data in the argument to it. + shifts.push_back(shift); +} + + NAMESPACE_END(Grid); diff --git a/tests/smearing/Test_fatLinks.cc b/tests/smearing/Test_fatLinks.cc index 04d5b165db..ad1a9b1813 100644 --- a/tests/smearing/Test_fatLinks.cc +++ b/tests/smearing/Test_fatLinks.cc @@ -84,6 +84,14 @@ bool testSmear(GridCartesian& GRID, LatticeGaugeFieldD Umu, LatticeGaugeFieldD U return result; } +void hotStartSmear(GridCartesian& GRID) { + LatticeGaugeFieldD Uproj(&GRID), Uhot(&GRID); + GridParallelRNG pRNG(&GRID); pRNG.SeedFixedIntegers(std::vector({111,222,333,444})); + SU::HotConfiguration(pRNG,Uhot); + Smear_HISQ hisq_fat(&GRID,1/8.,0.,1/16.,1/64.,1/384.,-1/8.); + hisq_fat.projectU3(Uproj,Uhot); + Grid_log("norm2(Uproj) = ",norm2(Uproj)); +} int main (int argc, char** argv) { @@ -93,7 +101,6 @@ int main (int argc, char** argv) { Coordinate latt_size(Nd,0); latt_size[0]=Ns; latt_size[1]=Ns; latt_size[2]=Ns; latt_size[3]=Nt; std::string conf_in = "nersc.l8t4b3360"; int threads = GridThread::GetThreads(); - typedef LatticeGaugeFieldD LGF; // Initialize the Grid @@ -136,6 +143,12 @@ int main (int argc, char** argv) { Grid_error("At least one test failed."); } + // Does a small hot start cause an issue? + hotStartSmear(GRID); + latt_size[0]=16; latt_size[1]=16; latt_size[2]=16; latt_size[3]=16; + GridCartesian SYMM(latt_size,simd_layout,mpi_layout); + hotStartSmear(SYMM); + // Test a C-style instantiation double path_coeff[6] = {1, 2, 3, 4, 5, 6}; Smear_HISQ hisq_fat_Cstyle(&GRID,path_coeff); From 0aa12f1e675e928f89d8508726fc8373d54f20e7 Mon Sep 17 00:00:00 2001 From: david clarke Date: Tue, 21 May 2024 13:49:37 -0600 Subject: [PATCH 02/32] ddVprojectU3 now compiles --- Grid/qcd/QCD.h | 38 +-- Grid/qcd/smearing/HISQSmearing.h | 415 ++++++++++++++++--------------- Grid/util/FlightRecorder.cc | 2 + tests/forces/Test_HISQ_force.cc | 111 +++++++++ tests/smearing/Test_fatLinks.cc | 2 +- 5 files changed, 354 insertions(+), 214 deletions(-) create mode 100644 tests/forces/Test_HISQ_force.cc diff --git a/Grid/qcd/QCD.h b/Grid/qcd/QCD.h index dbedfa7c49..575071dee7 100644 --- a/Grid/qcd/QCD.h +++ b/Grid/qcd/QCD.h @@ -99,25 +99,25 @@ const int GparityFlavourTensorIndex = 3; //TensorLevel counts from the bottom! // s,sp,c,spc,lc -template using iSinglet = iScalar > >; -template using iSpinMatrix = iScalar, Ns> >; -template using iColourMatrix = iScalar > > ; -template using iSpinColourMatrix = iScalar, Ns> >; -template using iLorentzColourMatrix = iVector >, Nd > ; -template using iLorentzComplex = iVector >, Nd > ; -template using iDoubleStoredColourMatrix = iVector >, Nds > ; -template using iSpinVector = iScalar, Ns> >; -template using iColourVector = iScalar > >; -template using iSpinColourVector = iScalar, Ns> >; -template using iHalfSpinVector = iScalar, Nhs> >; -template using iHalfSpinColourVector = iScalar, Nhs> >; - template using iSpinColourSpinColourMatrix = iScalar, Ns>, Nc>, Ns> >; - - -template using iGparityFlavourVector = iVector >, Ngp>; -template using iGparitySpinColourVector = iVector, Ns>, Ngp >; -template using iGparityHalfSpinColourVector = iVector, Nhs>, Ngp >; -template using iGparityFlavourMatrix = iMatrix >, Ngp>; +template using iSinglet = iScalar > >; +template using iSpinMatrix = iScalar, Ns> >; +template using iColourMatrix = iScalar > > ; +template using iSpinColourMatrix = iScalar, Ns> >; +template using iLorentzColourMatrix = iVector >, Nd > ; +template using iLorentzComplex = iVector >, Nd > ; +template using iDoubleStoredColourMatrix = iVector >, Nds > ; +template using iSpinVector = iScalar, Ns> >; +template using iColourVector = iScalar > >; +template using iSpinColourVector = iScalar, Ns> >; +template using iHalfSpinVector = iScalar, Nhs> >; +template using iHalfSpinColourVector = iScalar, Nhs> >; +template using iSpinColourSpinColourMatrix = iScalar, Ns>, Nc>, Ns> >; + + +template using iGparityFlavourVector = iVector >, Ngp>; +template using iGparitySpinColourVector = iVector, Ns>, Ngp >; +template using iGparityHalfSpinColourVector = iVector, Nhs>, Ngp >; +template using iGparityFlavourMatrix = iMatrix >, Ngp>; // Spin matrix typedef iSpinMatrix SpinMatrix; diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index d52e81e0f8..1ff2dc2e50 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -35,7 +35,7 @@ directory #include #include #include - +//#include NAMESPACE_BEGIN(Grid); @@ -60,14 +60,23 @@ struct SmearingParameters{ }; +// I think that "coalesced..." functions are extremely general, which is nice, +// but in the HISQ context it boils down to link reading and writing. +template accelerator_inline +vobj getLink(const vobj & __restrict__ vec,GeneralStencilEntry* SE) { + return coalescedReadGeneralPermute(vec, SE->_permute, Nd); +} +#define setLink coalescedWrite + + /*! @brief create fat links from link variables */ template class Smear_HISQ : public Gimpl { +public: -private: GridCartesian* const _grid; - Real const _Scut = 1e-16; // Cutoff for U(3) projection eigenvalues - SmearingParameters _linkTreatment; + Real const Scut = 1e-16; // Cutoff for U(3) projection eigenvalues + SmearingParameters linkTreatment; // figure out the stencil index from mu and nu accelerator_inline int stencilIndex(int mu, int nu) const { @@ -76,8 +85,6 @@ class Smear_HISQ : public Gimpl { return Nshifts*nu + Nd*Nshifts*mu; } -public: - INHERIT_GIMPL_TYPES(Gimpl); typedef typename Gimpl::GaugeField GF; typedef typename Gimpl::GaugeLinkField LF; @@ -85,7 +92,7 @@ class Smear_HISQ : public Gimpl { Smear_HISQ(GridCartesian* grid, Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) : _grid(grid), - _linkTreatment(c1,cnaik,c3,c5,c7,clp) { + linkTreatment(c1,cnaik,c3,c5,c7,clp) { assert(Nc == 3 && "HISQ smearing currently implemented only for Nc==3"); assert(Nd == 4 && "HISQ smearing only defined for Nd==4"); } @@ -93,7 +100,7 @@ class Smear_HISQ : public Gimpl { // Allow to pass a pointer to a C-style, double array for MILC convenience Smear_HISQ(GridCartesian* grid, double* coeff) : _grid(grid), - _linkTreatment(coeff[0],coeff[1],coeff[2],coeff[3],coeff[4],coeff[5]) { + linkTreatment(coeff[0],coeff[1],coeff[2],coeff[3],coeff[4],coeff[5]) { assert(Nc == 3 && "HISQ smearing currently implemented only for Nc==3"); assert(Nd == 4 && "HISQ smearing only defined for Nd==4"); } @@ -102,10 +109,10 @@ class Smear_HISQ : public Gimpl { // Intent: OUT--u_smr (smeared links), // u_naik (Naik links), - // IN--u_thin (think links) + // IN--u_thin (thin links) void smear(GF& u_smr, GF& u_naik, GF& u_thin) const { - SmearingParameters lt = this->_linkTreatment; + SmearingParameters lt = this->linkTreatment; auto grid = this->_grid; // Create a padded cell of extra padding depth=1 and fill the padding. @@ -155,7 +162,7 @@ class Smear_HISQ : public Gimpl { // We infer some types that will be needed in the calculation. typedef decltype(gStencil.GetEntry(0,0)) stencilElement; - typedef decltype(coalescedReadGeneralPermute(U_v[0](0),gStencil.GetEntry(0,0)->_permute,Nd)) U3matrix; + typedef decltype(getLink(U_v[0](0),gStencil.GetEntry(0,0))) U3matrix; int Nsites = U_v.size(); auto gStencil_v = gStencil.View(); @@ -165,7 +172,7 @@ class Smear_HISQ : public Gimpl { U3matrix U0, U1, U2, U3, U4, U5, W; for(int nu=0;nustencilIndex(mu,nu); + int s = stencilIndex(mu,nu); // The stencil gives us support points in the mu-nu plane that we will use to // grab the links we need. @@ -180,25 +187,23 @@ class Smear_HISQ : public Gimpl { // stored link oriented compared to the one you want? If I imagine myself travelling // with the to-be-updated link, I have two possible, alternative 3-link paths I can // take, one starting by going to the left, the other starting by going to the right. - U0 = coalescedReadGeneralPermute(U_v[x_p_mu ](nu),SE0->_permute,Nd); - U1 = coalescedReadGeneralPermute(U_v[x_p_nu ](mu),SE1->_permute,Nd); - U2 = coalescedReadGeneralPermute(U_v[x ](nu),SE2->_permute,Nd); - U3 = coalescedReadGeneralPermute(U_v[x_p_mu_m_nu](nu),SE3->_permute,Nd); - U4 = coalescedReadGeneralPermute(U_v[x_m_nu ](mu),SE4->_permute,Nd); - U5 = coalescedReadGeneralPermute(U_v[x_m_nu ](nu),SE4->_permute,Nd); + U0 = getLink(U_v[x_p_mu ](nu),SE0); + U1 = getLink(U_v[x_p_nu ](mu),SE1); + U2 = getLink(U_v[x ](nu),SE2); + U3 = getLink(U_v[x_p_mu_m_nu](nu),SE3); + U4 = getLink(U_v[x_m_nu ](mu),SE4); + U5 = getLink(U_v[x_m_nu ](nu),SE4); // "left" "right" W = U2*U1*adj(U0) + adj(U5)*U4*U3; // Save 3-link construct for later and add to smeared field. - coalescedWrite(U_3link_v[x](nu), W); + setLink(U_3link_v[x](nu), W); // The index operator (x) returns the coalesced read on GPU. The view [] index returns // a reference to the vector object. The [x](mu) returns a reference to the densely - // packed (contiguous in memory) mu-th element of the vector object. On CPU, - // coalescedRead/Write is the identity mapping assigning vector object to vector object. - // But on GPU it's non-trivial and maps scalar object to vector object and vice versa. - coalescedWrite(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_3*W); + // packed (contiguous in memory) mu-th element of the vector object. + setLink(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_3*W); } }) @@ -208,7 +213,7 @@ class Smear_HISQ : public Gimpl { int sigmaIndex = 0; for(int nu=0;nustencilIndex(mu,nu); + int s = stencilIndex(mu,nu); for(int rho=0;rho_offset; SE4 = gStencil_v.GetEntry(s+4,site); int x_m_nu = SE4->_offset; - U0 = coalescedReadGeneralPermute( U_v[x_p_mu ](nu ),SE0->_permute,Nd); - U1 = coalescedReadGeneralPermute(U_3link_v[x_p_nu ](rho),SE1->_permute,Nd); - U2 = coalescedReadGeneralPermute( U_v[x ](nu ),SE2->_permute,Nd); - U3 = coalescedReadGeneralPermute( U_v[x_p_mu_m_nu](nu ),SE3->_permute,Nd); - U4 = coalescedReadGeneralPermute(U_3link_v[x_m_nu ](rho),SE4->_permute,Nd); - U5 = coalescedReadGeneralPermute( U_v[x_m_nu ](nu ),SE4->_permute,Nd); + U0 = getLink( U_v[x_p_mu ](nu ),SE0); + U1 = getLink(U_3link_v[x_p_nu ](rho),SE1); + U2 = getLink( U_v[x ](nu ),SE2); + U3 = getLink( U_v[x_p_mu_m_nu](nu ),SE3); + U4 = getLink(U_3link_v[x_m_nu ](rho),SE4); + U5 = getLink( U_v[x_m_nu ](nu ),SE4); W = U2*U1*adj(U0) + adj(U5)*U4*U3; if(sigmaIndex<3) { - coalescedWrite(U_5linkA_v[x](rho), W); + setLink(U_5linkA_v[x](rho), W); } else { - coalescedWrite(U_5linkB_v[x](rho), W); + setLink(U_5linkB_v[x](rho), W); } - coalescedWrite(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_5*W); + setLink(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_5*W); sigmaIndex++; } } @@ -245,7 +250,7 @@ class Smear_HISQ : public Gimpl { int sigmaIndex = 0; for(int nu=0;nustencilIndex(mu,nu); + int s = stencilIndex(mu,nu); for(int rho=0;rho_offset; SE4 = gStencil_v.GetEntry(s+4,site); int x_m_nu = SE4->_offset; - U0 = coalescedReadGeneralPermute(U_v[x_p_mu](nu),SE0->_permute,Nd); + U0 = getLink(U_v[x_p_mu](nu),SE0); if(sigmaIndex<3) { - U1 = coalescedReadGeneralPermute(U_5linkB_v[x_p_nu](rho),SE1->_permute,Nd); + U1 = getLink(U_5linkB_v[x_p_nu](rho),SE1); } else { - U1 = coalescedReadGeneralPermute(U_5linkA_v[x_p_nu](rho),SE1->_permute,Nd); + U1 = getLink(U_5linkA_v[x_p_nu](rho),SE1); } - U2 = coalescedReadGeneralPermute(U_v[x](nu),SE2->_permute,Nd); - U3 = coalescedReadGeneralPermute(U_v[x_p_mu_m_nu](nu),SE3->_permute,Nd); + U2 = getLink(U_v[x](nu),SE2); + U3 = getLink(U_v[x_p_mu_m_nu](nu),SE3); if(sigmaIndex<3) { - U4 = coalescedReadGeneralPermute(U_5linkB_v[x_m_nu](rho),SE4->_permute,Nd); + U4 = getLink(U_5linkB_v[x_m_nu](rho),SE4); } else { - U4 = coalescedReadGeneralPermute(U_5linkA_v[x_m_nu](rho),SE4->_permute,Nd); + U4 = getLink(U_5linkA_v[x_m_nu](rho),SE4); } - U5 = coalescedReadGeneralPermute(U_v[x_m_nu](nu),SE4->_permute,Nd); + U5 = getLink(U_v[x_m_nu](nu),SE4); W = U2*U1*adj(U0) + adj(U5)*U4*U3; - coalescedWrite(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_7*W); + setLink(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_7*W); sigmaIndex++; } } @@ -329,46 +334,58 @@ class Smear_HISQ : public Gimpl { // IN--u_mu (to-be-projected links) void projectU3(GF& u_proj, GF& u_mu) const { - auto grid = this->_grid; - - LF V(grid), Q(grid), sqrtQinv(grid), id_3(grid); - CF c0(grid), c1(grid), c2(grid), g0(grid), g1(grid), g2(grid), S(grid), R(grid), theta(grid), - u(grid), v(grid), w(grid), den(grid), f0(grid), f1(grid), f2(grid); + // Open up the views + autoView(uproj_v, u_proj, AcceleratorWrite); + autoView(umu_v , u_mu , AcceleratorRead); - id_3 = 1.; + // Make sure everyone is using the same Grid + conformable(u_proj,u_mu); // Follow MILC 10.1103/PhysRevD.82.074501, eqs (B2-B3) and (C1-C8) - for (int mu = 0; mu < Nd; mu++) { - V = PeekIndex(u_mu, mu); - Q = adj(V)*V; - c0 = real(trace(Q)); - c1 = (1/2.)*real(trace(Q*Q)); - c2 = (1/3.)*real(trace(Q*Q*Q)); - S = (1/3.)*c1-(1/18.)*c0*c0; - if (norm2(S)_Scut) { - g0 = (1/3.)*c0; g1 = g0; g2 = g1; - } else { - R = (1/2.)*c2-(1/3. )*c0*c1+(1/27.)*c0*c0*c0; - theta = acos(R*pow(S,-1.5)); - g0 = (1/3.)*c0+2.*sqrt(S)*cos((1/3.)*theta-2*M_PI/3.); - g1 = (1/3.)*c0+2.*sqrt(S)*cos((1/3.)*theta ); - g2 = (1/3.)*c0+2.*sqrt(S)*cos((1/3.)*theta+2*M_PI/3.); - } -// if (fabs(Q.determinant()/(g0*g1*g2)-1.0) > 1e-5) { SVD } - u = sqrt(g0) + sqrt(g1) + sqrt(g2); - v = sqrt(g0*g1) + sqrt(g0*g2) + sqrt(g1*g2); - w = sqrt(g0*g1*g2); - den = w*(u*v-w); - f0 = (-w*(u*u+v)+u*v*v)/den; - f1 = (-w-u*u*u+2.*u*v)/den; - f2 = u/den; - - sqrtQinv = f0*id_3 + f1*Q + f2*Q*Q; - - PokeIndex(u_proj, V*sqrtQinv, mu); - } + accelerator_for(ss,umu_v.size(),vLorentzColourMatrixD::Nsimd(),{ +#ifdef GRID_SIMT + { int blane=acceleratorSIMTlane(vLorentzColourMatrixD::Nsimd());// +#else + for(int blane=0;blane_grid; - - LF V(grid), Q(grid), sqrtQinv(grid), id_3(grid), res(grid), force(grid), forcedag(grid), - Vdag(grid), VVdag(grid), VQVdag(grid), PVdag(grid), VQ(grid), RVdag(grid), VQ2(grid), - SVdag(grid), QVdag(grid), Q2Vdag(grid); + conformable(u_force,u_mu); + conformable(u_deriv,u_mu); - CF c0(grid), c1(grid), c2(grid), g0(grid), g1(grid), g2(grid), S(grid), R(grid), theta(grid), - u(grid), v(grid), w(grid), den(grid), f0(grid), f1(grid), f2(grid), delta(grid), - u2(grid), u3(grid), u4(grid), u5(grid), u6(grid), u7(grid), u8(grid), - v2(grid), v3(grid), v4(grid), v5(grid), v6(grid), - w2(grid), w3(grid), w4(grid), w5(grid), d(grid), - C00(grid), C01(grid), C02(grid), C11(grid), C12(grid), C22(grid), deriv(grid); + autoView(uderiv_v, u_deriv, AcceleratorWrite); + autoView(umu_v , u_mu , AcceleratorRead); + autoView(uforce_v, u_force, AcceleratorRead); - id_3 = 1.; + // Follow MILC 10.1103/PhysRevD.82.074501, eqs (B2-B3) and (C1-C8) + accelerator_for(ss,umu_v.size(),vLorentzColourMatrixD::Nsimd(),{ +#ifdef GRID_SIMT + { int blane=acceleratorSIMTlane(vLorentzColourMatrixD::Nsimd());// +#else + for(int blane=0;blane(u_mu, mu); - Q = adj(V)*V; - c0 = real(trace(Q)); - c1 = (1/2.)*real(trace(Q*Q)); - c2 = (1/3.)*real(trace(Q*Q*Q)); - S = (1/3.)*c1-(1/18.)*c0*c0; - if (norm2(S)_Scut) { - g0 = (1/3.)*c0; g1 = g0; g2 = g1; - } else { - R = (1/2.)*c2-(1/3. )*c0*c1+(1/27.)*c0*c0*c0; - theta = acos(R*pow(S,-1.5)); - g0 = (1/3.)*c0+2.*sqrt(S)*cos((1/3.)*theta-2*M_PI/3.); - g1 = (1/3.)*c0+2.*sqrt(S)*cos((1/3.)*theta ); - g2 = (1/3.)*c0+2.*sqrt(S)*cos((1/3.)*theta+2*M_PI/3.); - } -// if (g0 < delta || g1 < delta || g2 < delta) { -// // force filter eq (C23) -// g0 += g0 + delta; -// g1 += g1 + delta; -// g2 += g2 + delta; -// Q += delta*id_3; -// } -// if (fabs(Q.determinant()/(g0*g1*g2)-1.0) > 1e-5) { SVD } - u = sqrt(g0) + sqrt(g1) + sqrt(g2); - v = sqrt(g0*g1) + sqrt(g0*g2) + sqrt(g1*g2); - w = sqrt(g0*g1*g2); - den = w*(u*v-w); - f0 = (-w*(u*u+v)+u*v*v)/den; - f1 = (-w-u*u*u+2.*u*v)/den; - f2 = u/den; - - sqrtQinv = f0*id_3 + f1*Q + f2*Q*Q; - - force = PeekIndex(u_force, mu); - forcedag = adj(force); - - // Ask Peter: is this necessary/helpful? - u2 = u * u; - u3 = u2 * u; - u4 = u3 * u; - u5 = u4 * u; - u6 = u5 * u; - u7 = u6 * u; - u8 = u7 * u; - v2 = v * v; - v3 = v2 * v; - v4 = v3 * v; - v5 = v4 * v; - v6 = v5 * v; - w2 = w * w; - w3 = w2 * w; - w4 = w3 * w; - w5 = w4 * w; - - // eq (C10) - d = 2*w3*(u*v-w)*(u*v-w)*(u*v-w); - - // eq (C11) - C00 = ( -w3*u6 + 3*v*w3*u4 + 3*v4*w*u4 - v6*u3 - 4*w4*u3 - 12*v3*w2*u3 + 16*v2*w3*u2 - + 3*v5*w*u2 - 8*v*w4*u - 3*v4*w2*u + w5 + v3*w3 )/d; - C01 = ( -w2*u7 - v2*w*u6 + v4*u5 + 6*v*w2*u5 - 5*w3*u4 - v3*w*u4 - 2*v5*u3 - 6*v2*w2*u3 - + 10*v*w3*u2 + 6*v4*w*u2 - 3*w4*u - 6*v3*w2*u + 2*v2*w3 )/d; - C02 = ( w2*u5 + v2*w*u4 - v4*u3 - 4*v*w2*u3 + 4*w3*u2 +3*v3*w*u2 - 3*v2*w2*u + v*w3 )/d; - C11 = ( -w*u8 - v2*u7 + 7*v*w*u6 + 4*v3*u5 - 5*w2*u5 - 16*v2*w*u4 - 4*v4*u3 + 16*v*w2*u3 - - 3*w3*u2 + 12*v3*w*u2 - 12*v2*w2*u + 3*v*w3 )/d; - C12 = ( w*u6 + v2*u5 - 5*v*w*u4 - 2*v3*u3 + 4*w2*u3 + 6*v2*w*u2 - 6*v*w2*u + w3 )/d; - C22 = ( -w*u4 - v2*u3 + 3*v*w*u2 - 3*w2*u )/d; - - Vd = adj(V); - VVd = V*Vd; - VQVd = V*Q*Vd; - VQ = V*Q; - VQ2 = V*Q*Q; - QVd = Q*Vd; - Q2Vd = Q*Q*Vd; - - // eqs (C17-C19) - PVd = ( C00*id_3 + C01*Q + C02*Q*Q )*Vd; - RVd = ( C01*id_3 + C11*Q + C12*Q*Q )*Vd; - SVd = ( C02*id_3 + C12*Q + C22*Q*Q )*Vd; - - // eqs (C20) and (C21) - for (int k = 0; k < 3; k++) - for (int l = 0; l < 3; l++) - for (int i = 0; i < 3; i++) - for (int j = 0; j < 3; j++) { - - deriv = Zero(); // dWij/dVkl + if (g0 < delta || g1 < delta || g2 < delta) { + // force filter eq (C23) + g0 += delta; + g1 += delta; + g2 += delta; + Q = Q + delta; + } +// if (fabs(Q.determinant()/(g0*g1*g2)-1.0) > 1e-5) { SVD } + + auto u = sqrt(g0) + sqrt(g1) + sqrt(g2); + auto v = sqrt(g0*g1) + sqrt(g0*g2) + sqrt(g1*g2); + auto w = sqrt(g0*g1*g2); + auto den = w*(u*v-w); + auto f0 = (-w*(u*u+v)+u*v*v)/den; + auto f1 = (-w-u*u*u+2.*u*v)/den; + auto f2 = u/den; + + auto Qinvsq = f0 + f1*Q + f2*Q*Q; + + force() = forcemu(mu); + auto forcedag = adj(force); + + auto u2 = u * u; + auto u3 = u2 * u; + auto u4 = u3 * u; + auto u5 = u4 * u; + auto u6 = u5 * u; + auto u7 = u6 * u; + auto u8 = u7 * u; + auto v2 = v * v; + auto v3 = v2 * v; + auto v4 = v3 * v; + auto v5 = v4 * v; + auto v6 = v5 * v; + auto w2 = w * w; + auto w3 = w2 * w; + auto w4 = w3 * w; + auto w5 = w4 * w; - if (k == i) deriv += Qinvsq()()(l,j); - if (l == j) deriv += f1*VVd()()(i,k)+f2*VQVd()()(i,k); + // eq (C10) + auto d = 2*w3*(u*v-w)*(u*v-w)*(u*v-w); - deriv += f2*VVd()()(i,k)*Q()()(l,j) + V()()(i,j)*PVd()()(l,k) - + VQ()()(i,j)*RVd()()(l,k) + VQ2()()(i,j)*SVd()()(l,k); - - res()()(l,k) = res()()(l,k) + deriv*force()()(j,i); + // eq (C11) + auto C00 = ( -w3*u6 + 3*v*w3*u4 + 3*v4*w*u4 - v6*u3 - 4*w4*u3 - 12*v3*w2*u3 + 16*v2*w3*u2 + + 3*v5*w*u2 - 8*v*w4*u - 3*v4*w2*u + w5 + v3*w3 )/d; + auto C01 = ( -w2*u7 - v2*w*u6 + v4*u5 + 6*v*w2*u5 - 5*w3*u4 - v3*w*u4 - 2*v5*u3 - 6*v2*w2*u3 + + 10*v*w3*u2 + 6*v4*w*u2 - 3*w4*u - 6*v3*w2*u + 2*v2*w3 )/d; + auto C02 = ( w2*u5 + v2*w*u4 - v4*u3 - 4*v*w2*u3 + 4*w3*u2 +3*v3*w*u2 - 3*v2*w2*u + v*w3 )/d; + auto C11 = ( -w*u8 - v2*u7 + 7*v*w*u6 + 4*v3*u5 - 5*w2*u5 - 16*v2*w*u4 - 4*v4*u3 + 16*v*w2*u3 + - 3*w3*u2 + 12*v3*w*u2 - 12*v2*w2*u + 3*v*w3 )/d; + auto C12 = ( w*u6 + v2*u5 - 5*v*w*u4 - 2*v3*u3 + 4*w2*u3 + 6*v2*w*u2 - 6*v*w2*u + w3 )/d; + auto C22 = ( -w*u4 - v2*u3 + 3*v*w*u2 - 3*w2*u )/d; + + // These are all used in the loop over color entries, and we want to avoid recomputing + // these products, which should be broadcast to all sites, 3*3*3*3=81 times. + auto Vdag = adj(V); + auto VVdag = V*Vdag; + auto VQ = V*Q; + auto VQ2 = VQ*Q; + auto VQVdag = VQ*Vdag; + auto QVdag = Q*Vdag; + auto Q2Vdag = Q*QVdag; - // dWij^+/dVkl - deriv = (f1*Vd()()(i,k)+f2*QVd()()(i,k))*Vd()()(l,j) - + f2*Vd()()(i,k)*QVd()()(l,j) + Vd()()(i,j)*PVd()()(l,k) - + QVd()()(i,j)*RVd()()(l,k)+Q2Vd()()(i,j)*SVd()()(l,k); + // eqs (C17-C19) + auto PVdag = ( C00 + C01*Q + C02*Q*Q )*Vdag; + auto RVdag = ( C01 + C11*Q + C12*Q*Q )*Vdag; + auto SVdag = ( C02 + C12*Q + C22*Q*Q )*Vdag; - res()()(l,k) = res()()(l,k) + deriv*forcedag()()(j,i); - } + // eqs (C20) and (C21) + ColourMatrix res = Zero(); + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) { + + Complex deriv = Zero(); // dWij/dVkl + + if (k == i) deriv += Qinvsq()()(l,j); + if (l == j) deriv += f1*VVdag()()(i,k)+f2*VQVdag()()(i,k); - PokeIndex(u_deriv, res, mu); - } + deriv += f2*VVdag()()(i,k)*Q()()(l,j) + V()()(i,j)*PVdag()()(l,k) + + VQ()()(i,j)*RVdag()()(l,k) + VQ2()()(i,j)*SVdag()()(l,k); + + res()()(l,k) = res()()(l,k) + deriv*force()()(j,i); + + // dWij^+/dVkl + deriv = (f1*Vdag()()(i,k)+f2*QVdag()()(i,k))*Vdag()()(l,j) + + f2*Vdag()()(i,k)*QVdag()()(l,j) + Vdag()()(i,j)*PVdag()()(l,k) + + QVdag()()(i,j)*RVdag()()(l,k)+Q2Vdag()()(i,j)*SVdag()()(l,k); + + res()()(l,k) = res()()(l,k) + deriv*forcedag()()(j,i); + } + + insertLane(blane,uderiv_v[ss](mu),res()); + } + } + }); }; // void derivative(const GaugeField& Gauge) const { diff --git a/Grid/util/FlightRecorder.cc b/Grid/util/FlightRecorder.cc index 4b8e03461e..32fcd48bfc 100644 --- a/Grid/util/FlightRecorder.cc +++ b/Grid/util/FlightRecorder.cc @@ -290,7 +290,9 @@ void FlightRecorder::xmitLog(void *buf,uint64_t bytes) deviceVector dev(1); acceleratorCopyToDevice(&word,&dev[0],sizeof(uint64_t)); acceleratorCopySynchronise(); +#ifndef GRID_COMMS_NONE MPI_Barrier(MPI_COMM_WORLD); +#endif } } void FlightRecorder::recvLog(void *buf,uint64_t bytes,int rank) diff --git a/tests/forces/Test_HISQ_force.cc b/tests/forces/Test_HISQ_force.cc new file mode 100644 index 0000000000..cf41f182a2 --- /dev/null +++ b/tests/forces/Test_HISQ_force.cc @@ -0,0 +1,111 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/smearing/Test_fatLinks.cc + +Copyright (C) 2024 + +Author: D. A. Clarke + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* + @file Test_HISQ_force.cc + @brief test of the HISQ fermion force +*/ + + +#include +#include +#include +#include +using namespace Grid; + + +//bool testSmear(GridCartesian& GRID, LatticeGaugeFieldD Umu, LatticeGaugeFieldD Usmr, LatticeGaugeFieldD Unaik, +// LatticeGaugeFieldD Ucontrol, Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) { +// Smear_HISQ hisq_fat(&GRID,c1,cnaik,c3,c5,c7,clp); +// LatticeGaugeFieldD diff(&GRID), Uproj(&GRID); +// hisq_fat.smear(Usmr, Unaik, Umu); +// bool result; +// if (cnaik < 1e-30) { // Testing anything but Naik term +// diff = Ucontrol-Usmr; +// auto absDiff = norm2(diff)/norm2(Ucontrol); +// if (absDiff < 1e-30) { +// Grid_pass(" |Umu-Usmr|/|Umu| = ",absDiff); +// result = true; +// } else { +// Grid_error(" |Umu-Usmr|/|Umu| = ",absDiff); +// result = false; +// } +// } else { // Testing Naik specifically +// diff = Ucontrol-Unaik; +// auto absDiff = norm2(diff)/norm2(Ucontrol); +// if (absDiff < 1e-30) { +// Grid_pass(" |Umu-Unaik|/|Umu| = ",absDiff); +// result = true; +// } else { +// Grid_error(" |Umu-Unaik|/|Umu| = ",absDiff); +// result = false; +// } +// hisq_fat.projectU3(Uproj,Ucontrol); +//// NerscIO::writeConfiguration(Unaik,"nersc.l8t4b3360.naik"); +// } +// return result; +//} + + +int main (int argc, char** argv) { + + // Params for the test. + int Ns = 8; + int Nt = 4; + Coordinate latt_size(Nd,0); latt_size[0]=Ns; latt_size[1]=Ns; latt_size[2]=Ns; latt_size[3]=Nt; + std::string conf_in = "nersc.l8t4b3360"; + int threads = GridThread::GetThreads(); + typedef LatticeGaugeFieldD LGF; + + // Initialize the Grid + Grid_init(&argc,&argv); + Coordinate simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd()); + Coordinate mpi_layout = GridDefaultMpi(); + Grid_log("mpi = ",mpi_layout); + Grid_log("simd = ",simd_layout); + Grid_log("latt = ",latt_size); + Grid_log("threads = ",threads); + GridCartesian GRID(latt_size,simd_layout,mpi_layout); + + XmlReader Reader("HISQParams.xml",false,"grid"); + + LGF Umu(&GRID), dWdV(&GRID); + + // Read the configuration into Umu + FieldMetaData header; + NerscIO::readConfiguration(Umu, header, conf_in); + + bool pass=true; + + if(pass){ + Grid_pass("All tests passed."); + } else { + Grid_error("At least one test failed."); + } + + Grid_finalize(); +} \ No newline at end of file diff --git a/tests/smearing/Test_fatLinks.cc b/tests/smearing/Test_fatLinks.cc index ad1a9b1813..e120289841 100644 --- a/tests/smearing/Test_fatLinks.cc +++ b/tests/smearing/Test_fatLinks.cc @@ -90,7 +90,7 @@ void hotStartSmear(GridCartesian& GRID) { SU::HotConfiguration(pRNG,Uhot); Smear_HISQ hisq_fat(&GRID,1/8.,0.,1/16.,1/64.,1/384.,-1/8.); hisq_fat.projectU3(Uproj,Uhot); - Grid_log("norm2(Uproj) = ",norm2(Uproj)); + Grid_log("norm2(Uproj) = ",norm2(Uproj)/(Nc*Nd*GRID.gSites())); } int main (int argc, char** argv) { From 90eb0b6ff9f4b749b09a381551a9b589571d902e Mon Sep 17 00:00:00 2001 From: david clarke Date: Mon, 27 May 2024 20:29:06 -0600 Subject: [PATCH 03/32] added dWdV tests; passes float and double --- Grid/qcd/smearing/HISQSmearing.h | 18 +++++----- Makefile.am | 2 ++ tests/forces/Makefile.am | 1 + tests/forces/Test_HISQ_force.cc | 62 +++++++++++++------------------- tests/smearing/Test_fatLinks.cc | 16 ++++----- 5 files changed, 45 insertions(+), 54 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index fca91078c0..557921fbdf 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -342,14 +342,14 @@ class Smear_HISQ : public Gimpl { conformable(u_proj,u_mu); // Follow MILC 10.1103/PhysRevD.82.074501, eqs (B2-B3) and (C1-C8) - accelerator_for(ss,umu_v.size(),vLorentzColourMatrixD::Nsimd(),{ + accelerator_for(ss,umu_v.size(),vLorentzColourMatrix::Nsimd(),{ #ifdef GRID_SIMT - { int blane=acceleratorSIMTlane(vLorentzColourMatrixD::Nsimd());// + { int blane=acceleratorSIMTlane(vLorentzColourMatrix::Nsimd());// #else - for(int blane=0;blane hisq_fat(&GRID,c1,cnaik,c3,c5,c7,clp); -// LatticeGaugeFieldD diff(&GRID), Uproj(&GRID); -// hisq_fat.smear(Usmr, Unaik, Umu); -// bool result; -// if (cnaik < 1e-30) { // Testing anything but Naik term -// diff = Ucontrol-Usmr; -// auto absDiff = norm2(diff)/norm2(Ucontrol); -// if (absDiff < 1e-30) { -// Grid_pass(" |Umu-Usmr|/|Umu| = ",absDiff); -// result = true; -// } else { -// Grid_error(" |Umu-Usmr|/|Umu| = ",absDiff); -// result = false; -// } -// } else { // Testing Naik specifically -// diff = Ucontrol-Unaik; -// auto absDiff = norm2(diff)/norm2(Ucontrol); -// if (absDiff < 1e-30) { -// Grid_pass(" |Umu-Unaik|/|Umu| = ",absDiff); -// result = true; -// } else { -// Grid_error(" |Umu-Unaik|/|Umu| = ",absDiff); -// result = false; -// } -// hisq_fat.projectU3(Uproj,Ucontrol); -//// NerscIO::writeConfiguration(Unaik,"nersc.l8t4b3360.naik"); -// } -// return result; -//} +bool testForce(GridCartesian& GRID, LatticeGaugeField Umu, LatticeGaugeField Uforce, + LatticeGaugeField Ucontrol, Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) { + Smear_HISQ hisq_fat(&GRID,c1,cnaik,c3,c5,c7,clp); + LatticeGaugeField diff(&GRID); + hisq_fat.ddVprojectU3(Uforce, Umu, Umu, 5e-5); + bool result; + diff = Ucontrol-Uforce; + auto absDiff = norm2(diff)/norm2(Ucontrol); + if (absDiff < 1e-30) { + Grid_pass(" |Umu-Usmr|/|Umu| = ",absDiff); + result = true; + } else { + Grid_error(" |Umu-Usmr|/|Umu| = ",absDiff); + result = false; + } +// NerscIO::writeConfiguration(Uforce,"nersc.l8t4b3360.ddVU3"); + return result; +} int main (int argc, char** argv) { @@ -79,11 +66,11 @@ int main (int argc, char** argv) { Coordinate latt_size(Nd,0); latt_size[0]=Ns; latt_size[1]=Ns; latt_size[2]=Ns; latt_size[3]=Nt; std::string conf_in = "nersc.l8t4b3360"; int threads = GridThread::GetThreads(); - typedef LatticeGaugeFieldD LGF; + typedef LatticeGaugeField LGF; // Initialize the Grid Grid_init(&argc,&argv); - Coordinate simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd()); + Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); Coordinate mpi_layout = GridDefaultMpi(); Grid_log("mpi = ",mpi_layout); Grid_log("simd = ",simd_layout); @@ -91,9 +78,7 @@ int main (int argc, char** argv) { Grid_log("threads = ",threads); GridCartesian GRID(latt_size,simd_layout,mpi_layout); - XmlReader Reader("HISQParams.xml",false,"grid"); - - LGF Umu(&GRID), dWdV(&GRID); + LGF Umu(&GRID), Ucontrol(&GRID); // Read the configuration into Umu FieldMetaData header; @@ -101,6 +86,9 @@ int main (int argc, char** argv) { bool pass=true; + NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.ddVU3.control"); + pass *= testForce(GRID,Umu,Umu,Ucontrol,0.,0.,0.,0.,0.,0.); + if(pass){ Grid_pass("All tests passed."); } else { diff --git a/tests/smearing/Test_fatLinks.cc b/tests/smearing/Test_fatLinks.cc index e120289841..cc8eef32a5 100644 --- a/tests/smearing/Test_fatLinks.cc +++ b/tests/smearing/Test_fatLinks.cc @@ -52,10 +52,10 @@ struct ConfParameters: Serializable { }; -bool testSmear(GridCartesian& GRID, LatticeGaugeFieldD Umu, LatticeGaugeFieldD Usmr, LatticeGaugeFieldD Unaik, - LatticeGaugeFieldD Ucontrol, Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) { - Smear_HISQ hisq_fat(&GRID,c1,cnaik,c3,c5,c7,clp); - LatticeGaugeFieldD diff(&GRID), Uproj(&GRID); +bool testSmear(GridCartesian& GRID, LatticeGaugeField Umu, LatticeGaugeField Usmr, LatticeGaugeField Unaik, + LatticeGaugeField Ucontrol, Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) { + Smear_HISQ hisq_fat(&GRID,c1,cnaik,c3,c5,c7,clp); + LatticeGaugeField diff(&GRID), Uproj(&GRID); hisq_fat.smear(Usmr, Unaik, Umu); bool result; if (cnaik < 1e-30) { // Testing anything but Naik term @@ -85,7 +85,7 @@ bool testSmear(GridCartesian& GRID, LatticeGaugeFieldD Umu, LatticeGaugeFieldD U } void hotStartSmear(GridCartesian& GRID) { - LatticeGaugeFieldD Uproj(&GRID), Uhot(&GRID); + LatticeGaugeField Uproj(&GRID), Uhot(&GRID); GridParallelRNG pRNG(&GRID); pRNG.SeedFixedIntegers(std::vector({111,222,333,444})); SU::HotConfiguration(pRNG,Uhot); Smear_HISQ hisq_fat(&GRID,1/8.,0.,1/16.,1/64.,1/384.,-1/8.); @@ -101,11 +101,11 @@ int main (int argc, char** argv) { Coordinate latt_size(Nd,0); latt_size[0]=Ns; latt_size[1]=Ns; latt_size[2]=Ns; latt_size[3]=Nt; std::string conf_in = "nersc.l8t4b3360"; int threads = GridThread::GetThreads(); - typedef LatticeGaugeFieldD LGF; + typedef LatticeGaugeField LGF; // Initialize the Grid Grid_init(&argc,&argv); - Coordinate simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd()); + Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); Coordinate mpi_layout = GridDefaultMpi(); Grid_log("mpi = ",mpi_layout); Grid_log("simd = ",simd_layout); @@ -151,7 +151,7 @@ int main (int argc, char** argv) { // Test a C-style instantiation double path_coeff[6] = {1, 2, 3, 4, 5, 6}; - Smear_HISQ hisq_fat_Cstyle(&GRID,path_coeff); + Smear_HISQ hisq_fat_Cstyle(&GRID,path_coeff); if (param.benchmark) { From 7044ff73e8169133629ae49bbabce25ff387c351 Mon Sep 17 00:00:00 2001 From: "D. A. Clarke" Date: Tue, 11 Jun 2024 15:13:39 -0600 Subject: [PATCH 04/32] remove unused stencil elements --- Grid/qcd/smearing/HISQSmearing.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index 557921fbdf..34613fcdaf 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -35,7 +35,6 @@ directory #include #include #include -//#include NAMESPACE_BEGIN(Grid); @@ -89,6 +88,7 @@ class Smear_HISQ : public Gimpl { typedef typename Gimpl::GaugeField GF; typedef typename Gimpl::GaugeLinkField LF; typedef typename Gimpl::ComplexField CF; +// typedef typename Gimpl::(real part of a complex) Smear_HISQ(GridCartesian* grid, Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) : _grid(grid), @@ -168,7 +168,7 @@ class Smear_HISQ : public Gimpl { auto gStencil_v = gStencil.View(AcceleratorRead); accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 3-link constructs - stencilElement SE0, SE1, SE2, SE3, SE4, SE5; + stencilElement SE0, SE1, SE2, SE3, SE4; U3matrix U0, U1, U2, U3, U4, U5, W; for(int nu=0;nu_offset; SE3 = gStencil_v.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset; SE4 = gStencil_v.GetEntry(s+4,site); int x_m_nu = SE4->_offset; - SE5 = gStencil_v.GetEntry(s+5,site); int x_m_mu = SE5->_offset; // When you're deciding whether to take an adjoint, the question is: how is the // stored link oriented compared to the one you want? If I imagine myself travelling @@ -208,7 +207,7 @@ class Smear_HISQ : public Gimpl { }) accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 5-link - stencilElement SE0, SE1, SE2, SE3, SE4, SE5; + stencilElement SE0, SE1, SE2, SE3, SE4; U3matrix U0, U1, U2, U3, U4, U5, W; int sigmaIndex = 0; for(int nu=0;nu Date: Thu, 18 Jul 2024 18:49:31 -0400 Subject: [PATCH 05/32] single/double issues should be fixed now... --- Grid/qcd/smearing/HISQSmearing.h | 181 +++++++++++++++++-------------- tests/forces/Test_HISQ_force.cc | 31 +++++- tests/smearing/Test_fatLinks.cc | 100 ++++++----------- 3 files changed, 162 insertions(+), 150 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index 34613fcdaf..7239a3b644 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -41,15 +41,16 @@ NAMESPACE_BEGIN(Grid); /*! @brief structure holding the link treatment */ +template struct SmearingParameters{ SmearingParameters(){} - Real c_1; // 1 link - Real c_naik; // Naik term - Real c_3; // 3 link - Real c_5; // 5 link - Real c_7; // 7 link - Real c_lp; // 5 link Lepage - SmearingParameters(Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) + floatT c_1; // 1 link + floatT c_naik; // Naik term + floatT c_3; // 3 link + floatT c_5; // 5 link + floatT c_7; // 7 link + floatT c_lp; // 5 link Lepage + SmearingParameters(floatT c1, floatT cnaik, floatT c3, floatT c5, floatT c7, floatT clp) : c_1(c1), c_naik(cnaik), c_3(c3), @@ -74,39 +75,59 @@ class Smear_HISQ : public Gimpl { public: GridCartesian* const _grid; - Real const Scut = 1e-16; // Cutoff for U(3) projection eigenvalues - SmearingParameters linkTreatment; - - // figure out the stencil index from mu and nu - accelerator_inline int stencilIndex(int mu, int nu) const { - // Nshifts depends on how you built the stencil - int Nshifts = 6; - return Nshifts*nu + Nd*Nshifts*mu; - } + // Sort out the Gimpl. This handles BCs and part of the precision. INHERIT_GIMPL_TYPES(Gimpl); typedef typename Gimpl::GaugeField GF; typedef typename Gimpl::GaugeLinkField LF; typedef typename Gimpl::ComplexField CF; -// typedef typename Gimpl::(real part of a complex) + typedef typename Gimpl::Scalar ComplexScalar; + typedef decltype(real(ComplexScalar())) RealScalar; + typedef iColourMatrix ComplexColourMatrix; + + RealScalar Scut=-1; // Cutoff for U(3) projection eigenvalues, set at initialization + int Nsimd=0; + SmearingParameters linkTreatment; + + void initialize() { + if (sizeof(RealScalar)==4) { + Scut=1e-6; + } else if (sizeof(RealScalar)==8) { + Scut=1e-8; + } else { + Grid_error("HISQ smearing only implemented for single and double"); + } + assert(Nc == 3 && "HISQ smearing currently implemented only for Nc==3"); + assert(Nd == 4 && "HISQ smearing only defined for Nd==4"); + } - Smear_HISQ(GridCartesian* grid, Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) + Smear_HISQ(GridCartesian* grid, RealScalar c1, RealScalar cnaik, RealScalar c3, RealScalar c5, RealScalar c7, RealScalar clp) : _grid(grid), linkTreatment(c1,cnaik,c3,c5,c7,clp) { - assert(Nc == 3 && "HISQ smearing currently implemented only for Nc==3"); - assert(Nd == 4 && "HISQ smearing only defined for Nd==4"); + initialize(); } - // Allow to pass a pointer to a C-style, double array for MILC convenience + // Allow to pass a pointer to a C-style array for MILC convenience Smear_HISQ(GridCartesian* grid, double* coeff) : _grid(grid), linkTreatment(coeff[0],coeff[1],coeff[2],coeff[3],coeff[4],coeff[5]) { - assert(Nc == 3 && "HISQ smearing currently implemented only for Nc==3"); - assert(Nd == 4 && "HISQ smearing only defined for Nd==4"); + initialize(); + } + Smear_HISQ(GridCartesian* grid, float* coeff) + : _grid(grid), + linkTreatment(coeff[0],coeff[1],coeff[2],coeff[3],coeff[4],coeff[5]) { + initialize(); } ~Smear_HISQ() {} + // figure out the stencil index from mu and nu + accelerator_inline int stencilIndex(int mu, int nu) const { + // Nshifts depends on how you built the stencil + int Nshifts = 6; + return Nshifts*nu + Nd*Nshifts*mu; + } + // Intent: OUT--u_smr (smeared links), // u_naik (Naik links), // IN--u_thin (thin links) @@ -341,40 +362,40 @@ class Smear_HISQ : public Gimpl { conformable(u_proj,u_mu); // Follow MILC 10.1103/PhysRevD.82.074501, eqs (B2-B3) and (C1-C8) - accelerator_for(ss,umu_v.size(),vLorentzColourMatrix::Nsimd(),{ + accelerator_for(ss,umu_v.size(),Simd::Nsimd(),{ #ifdef GRID_SIMT - { int blane=acceleratorSIMTlane(vLorentzColourMatrix::Nsimd());// + { int blane=acceleratorSIMTlane(Simd::Nsimd());// #else - for(int blane=0;blane 1e-5) { SVD } - auto u = sqrt(g0) + sqrt(g1) + sqrt(g2); - auto v = sqrt(g0*g1) + sqrt(g0*g2) + sqrt(g1*g2); - auto w = sqrt(g0*g1*g2); - auto den = w*(u*v-w); - auto f0 = (-w*(u*u+v)+u*v*v)/den; - auto f1 = (-w-u*u*u+2.*u*v)/den; - auto f2 = u/den; + RealScalar u = sqrt(g0) + sqrt(g1) + sqrt(g2); + RealScalar v = sqrt(g0*g1) + sqrt(g0*g2) + sqrt(g1*g2); + RealScalar w = sqrt(g0*g1*g2); + RealScalar den = w*(u*v-w); + RealScalar f0 = (-w*(u*u+v)+u*v*v)/den; + RealScalar f1 = (-w-u*u*u+2.*u*v)/den; + RealScalar f2 = u/den; auto Qinvsq = f0 + f1*Q + f2*Q*Q; force() = forcemu(mu); auto forcedag = adj(force); - auto u2 = u * u; - auto u3 = u2 * u; - auto u4 = u3 * u; - auto u5 = u4 * u; - auto u6 = u5 * u; - auto u7 = u6 * u; - auto u8 = u7 * u; - auto v2 = v * v; - auto v3 = v2 * v; - auto v4 = v3 * v; - auto v5 = v4 * v; - auto v6 = v5 * v; - auto w2 = w * w; - auto w3 = w2 * w; - auto w4 = w3 * w; - auto w5 = w4 * w; + RealScalar u2 = u * u; + RealScalar u3 = u2 * u; + RealScalar u4 = u3 * u; + RealScalar u5 = u4 * u; + RealScalar u6 = u5 * u; + RealScalar u7 = u6 * u; + RealScalar u8 = u7 * u; + RealScalar v2 = v * v; + RealScalar v3 = v2 * v; + RealScalar v4 = v3 * v; + RealScalar v5 = v4 * v; + RealScalar v6 = v5 * v; + RealScalar w2 = w * w; + RealScalar w3 = w2 * w; + RealScalar w4 = w3 * w; + RealScalar w5 = w4 * w; // eq (C10) auto d = 2*w3*(u*v-w)*(u*v-w)*(u*v-w); @@ -498,13 +519,13 @@ class Smear_HISQ : public Gimpl { auto SVdag = ( C02 + C12*Q + C22*Q*Q )*Vdag; // eqs (C20) and (C21) - ColourMatrix res = Zero(); + ComplexColourMatrix res = Zero(); for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) { - Complex deriv = 0.; // dWij/dVkl + ComplexScalar deriv = 0.; // dWij/dVkl if (k == i) deriv += Qinvsq()()(l,j); if (l == j) deriv += f1*VVdag()()(i,k)+f2*VQVdag()()(i,k); diff --git a/tests/forces/Test_HISQ_force.cc b/tests/forces/Test_HISQ_force.cc index c8b69637e1..9954dfaabe 100644 --- a/tests/forces/Test_HISQ_force.cc +++ b/tests/forces/Test_HISQ_force.cc @@ -38,10 +38,26 @@ directory using namespace Grid; -bool testForce(GridCartesian& GRID, LatticeGaugeField Umu, LatticeGaugeField Uforce, - LatticeGaugeField Ucontrol, Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) { - Smear_HISQ hisq_fat(&GRID,c1,cnaik,c3,c5,c7,clp); - LatticeGaugeField diff(&GRID); +//#define USE_DOUBLE true +#define USE_DOUBLE false + +#if USE_DOUBLE + #define PREC double + typedef LatticeGaugeFieldD LGF; + typedef PeriodicGimplD GIMPL; + typedef vComplexD COMP; +#else + #define PREC float + typedef LatticeGaugeFieldF LGF; + typedef PeriodicGimplF GIMPL; + typedef vComplexF COMP; +#endif + + +bool testForce(GridCartesian& GRID, LGF Umu, LGF Uforce, + LGF Ucontrol, PREC c1, PREC cnaik, PREC c3, PREC c5, PREC c7, PREC clp) { + Smear_HISQ hisq_fat(&GRID,c1,cnaik,c3,c5,c7,clp); + LGF diff(&GRID); hisq_fat.ddVprojectU3(Uforce, Umu, Umu, 5e-5); bool result; diff = Ucontrol-Uforce; @@ -66,11 +82,10 @@ int main (int argc, char** argv) { Coordinate latt_size(Nd,0); latt_size[0]=Ns; latt_size[1]=Ns; latt_size[2]=Ns; latt_size[3]=Nt; std::string conf_in = "nersc.l8t4b3360"; int threads = GridThread::GetThreads(); - typedef LatticeGaugeField LGF; // Initialize the Grid Grid_init(&argc,&argv); - Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + Coordinate simd_layout = GridDefaultSimd(Nd,COMP::Nsimd()); Coordinate mpi_layout = GridDefaultMpi(); Grid_log("mpi = ",mpi_layout); Grid_log("simd = ",simd_layout); @@ -80,6 +95,8 @@ int main (int argc, char** argv) { LGF Umu(&GRID), Ucontrol(&GRID); +#if USE_DOUBLE // NerscIO is hard-coded to double. + // Read the configuration into Umu FieldMetaData header; NerscIO::readConfiguration(Umu, header, conf_in); @@ -95,5 +112,7 @@ int main (int argc, char** argv) { Grid_error("At least one test failed."); } +#endif + Grid_finalize(); } \ No newline at end of file diff --git a/tests/smearing/Test_fatLinks.cc b/tests/smearing/Test_fatLinks.cc index cc8eef32a5..5a699bdec9 100644 --- a/tests/smearing/Test_fatLinks.cc +++ b/tests/smearing/Test_fatLinks.cc @@ -38,24 +38,26 @@ directory using namespace Grid; -/*! @brief parameter file to easily adjust Nloop */ -struct ConfParameters: Serializable { - GRID_SERIALIZABLE_CLASS_MEMBERS( - ConfParameters, - int, benchmark, - int, Nloop); - - template - ConfParameters(Reader& Reader){ - read(Reader, "parameters", *this); - } -}; - - -bool testSmear(GridCartesian& GRID, LatticeGaugeField Umu, LatticeGaugeField Usmr, LatticeGaugeField Unaik, - LatticeGaugeField Ucontrol, Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) { - Smear_HISQ hisq_fat(&GRID,c1,cnaik,c3,c5,c7,clp); - LatticeGaugeField diff(&GRID), Uproj(&GRID); +//#define USE_DOUBLE true +#define USE_DOUBLE false + +#if USE_DOUBLE + #define PREC double + typedef LatticeGaugeFieldD LGF; + typedef PeriodicGimplD GIMPL; + typedef vComplexD COMP; +#else + #define PREC float + typedef LatticeGaugeFieldF LGF; + typedef PeriodicGimplF GIMPL; + typedef vComplexF COMP; +#endif + + +bool testSmear(GridCartesian& GRID, LGF Umu, LGF Usmr, LGF Unaik, + LGF Ucontrol, PREC c1, PREC cnaik, PREC c3, PREC c5, PREC c7, PREC clp) { + Smear_HISQ hisq_fat(&GRID,c1,cnaik,c3,c5,c7,clp); + LGF diff(&GRID), Uproj(&GRID); hisq_fat.smear(Usmr, Unaik, Umu); bool result; if (cnaik < 1e-30) { // Testing anything but Naik term @@ -84,15 +86,17 @@ bool testSmear(GridCartesian& GRID, LatticeGaugeField Umu, LatticeGaugeField Usm return result; } + void hotStartSmear(GridCartesian& GRID) { - LatticeGaugeField Uproj(&GRID), Uhot(&GRID); + LGF Uproj(&GRID), Uhot(&GRID); GridParallelRNG pRNG(&GRID); pRNG.SeedFixedIntegers(std::vector({111,222,333,444})); SU::HotConfiguration(pRNG,Uhot); - Smear_HISQ hisq_fat(&GRID,1/8.,0.,1/16.,1/64.,1/384.,-1/8.); + Smear_HISQ hisq_fat(&GRID,1/8.,0.,1/16.,1/64.,1/384.,-1/8.); hisq_fat.projectU3(Uproj,Uhot); Grid_log("norm2(Uproj) = ",norm2(Uproj)/(Nc*Nd*GRID.gSites())); } + int main (int argc, char** argv) { // Params for the test. @@ -101,11 +105,16 @@ int main (int argc, char** argv) { Coordinate latt_size(Nd,0); latt_size[0]=Ns; latt_size[1]=Ns; latt_size[2]=Ns; latt_size[3]=Nt; std::string conf_in = "nersc.l8t4b3360"; int threads = GridThread::GetThreads(); - typedef LatticeGaugeField LGF; + + if (sizeof(PREC)==4) { + Grid_log("Run in single precision."); + } else { + Grid_log("Run in double precision."); + } // Initialize the Grid Grid_init(&argc,&argv); - Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + Coordinate simd_layout = GridDefaultSimd(Nd,COMP::Nsimd()); Coordinate mpi_layout = GridDefaultMpi(); Grid_log("mpi = ",mpi_layout); Grid_log("simd = ",simd_layout); @@ -113,12 +122,10 @@ int main (int argc, char** argv) { Grid_log("threads = ",threads); GridCartesian GRID(latt_size,simd_layout,mpi_layout); - XmlReader Reader("fatParams.xml",false,"grid"); - ConfParameters param(Reader); - if(param.benchmark) Grid_log(" Nloop = ",param.Nloop); - LGF Umu(&GRID), Usmr(&GRID), Unaik(&GRID), Ucontrol(&GRID); +#if USE_DOUBLE // NerscIO is hard-coded to double. + // Read the configuration into Umu FieldMetaData header; NerscIO::readConfiguration(Umu, header, conf_in); @@ -143,6 +150,8 @@ int main (int argc, char** argv) { Grid_error("At least one test failed."); } +#endif + // Does a small hot start cause an issue? hotStartSmear(GRID); latt_size[0]=16; latt_size[1]=16; latt_size[2]=16; latt_size[3]=16; @@ -151,44 +160,7 @@ int main (int argc, char** argv) { // Test a C-style instantiation double path_coeff[6] = {1, 2, 3, 4, 5, 6}; - Smear_HISQ hisq_fat_Cstyle(&GRID,path_coeff); - - if (param.benchmark) { - - autoView(U_v, Umu, CpuRead); // Gauge accessor - - // Read in lattice sequentially, Nloop times - double lookupTime = 0.; - for(int i=0;i hisq_fat_Cstyle(&GRID,path_coeff); Grid_finalize(); } \ No newline at end of file From 8996763fd8eb7aae813c6a4975c6c34a685e6379 Mon Sep 17 00:00:00 2001 From: "D. A. Clarke" Date: Mon, 19 Aug 2024 15:04:54 -0600 Subject: [PATCH 06/32] first attempt at 3-link --- Grid/qcd/smearing/HISQSmearing.h | 89 +++++++++++++++++++++++++++++--- tests/smearing/Test_fatLinks.cc | 2 +- 2 files changed, 84 insertions(+), 7 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index 7239a3b644..3065896d23 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -86,12 +86,13 @@ class Smear_HISQ : public Gimpl { typedef iColourMatrix ComplexColourMatrix; RealScalar Scut=-1; // Cutoff for U(3) projection eigenvalues, set at initialization - int Nsimd=0; + int HaloDepth=1; + SmearingParameters linkTreatment; void initialize() { if (sizeof(RealScalar)==4) { - Scut=1e-6; + Scut=1e-5; // Maybe should be higher? e.g. 1e-4 } else if (sizeof(RealScalar)==8) { Scut=1e-8; } else { @@ -137,8 +138,7 @@ class Smear_HISQ : public Gimpl { auto grid = this->_grid; // Create a padded cell of extra padding depth=1 and fill the padding. - int depth = 1; - PaddedCell Ghost(depth,grid); + PaddedCell Ghost(HaloDepth,grid); GF Ughost = Ghost.Exchange(u_thin); // This is where auxiliary N-link fields and the final smear will be stored. @@ -162,6 +162,7 @@ class Smear_HISQ : public Gimpl { // A GeneralLocalStencil has two indices: a site and stencil index GeneralLocalStencil gStencil(Ughost.Grid(),shifts); + typedef decltype(gStencil.GetEntry(0,0)) stencilElement; // This is where contributions from the smearing get added together Ughost_fat=Zero(); @@ -181,8 +182,7 @@ class Smear_HISQ : public Gimpl { autoView(U_5linkA_v, Ughost_5linkA, AcceleratorWrite); autoView(U_5linkB_v, Ughost_5linkB, AcceleratorWrite); - // We infer some types that will be needed in the calculation. - typedef decltype(gStencil.GetEntry(0,0)) stencilElement; + // We infer a type that will be needed in the calculation. typedef decltype(getLink(U_v[0](0),gStencil.GetEntry(0,0))) U3matrix; int Nsites = U_v.size(); @@ -549,6 +549,83 @@ class Smear_HISQ : public Gimpl { }); }; + + void ddV_3link(GF& u_deriv, GF& u_mu, GF& u_force) { + + SmearingParameters lt = this->linkTreatment; + auto grid = this->_grid; + + PaddedCell Ghost(HaloDepth,grid); + GF Ughost = Ghost.Exchange(u_mu); + GF Fghost = Ghost.Exchange(u_force); + + GF Ughost_deriv(Ughost.Grid()); + + Ughost_deriv = Zero(); + + std::vector shifts; + for(int mu=0;mu(shifts,mu); + appendShift(shifts,nu); + appendShift(shifts,shiftSignal::NO_SHIFT); + appendShift(shifts,mu,Back(nu)); + appendShift(shifts,Back(nu)); + appendShift(shifts,Back(mu)); + } + + GeneralLocalStencil gStencil(Ughost.Grid(),shifts); + typedef decltype(gStencil.GetEntry(0,0)) stencilElement; + + for(int mu=0;mu_offset; + SE1 = gStencil_v.GetEntry(s+1,site); int x_p_nu = SE1->_offset; + SE2 = gStencil_v.GetEntry(s+2,site); int x = SE2->_offset; + SE3 = gStencil_v.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset; + SE4 = gStencil_v.GetEntry(s+4,site); int x_m_nu = SE4->_offset; + + U0 = getLink(U_v[x_p_mu ](nu),SE0); + U1 = getLink(U_v[x_p_nu ](mu),SE1); + U2 = getLink(U_v[x ](nu),SE2); + U3 = getLink(U_v[x_p_mu_m_nu](nu),SE3); + U4 = getLink(U_v[x_m_nu ](mu),SE4); + U5 = getLink(U_v[x_m_nu ](nu),SE4); + + F0 = getLink(F_v[x_p_mu ](nu),SE0); + F1 = getLink(F_v[x_p_nu ](mu),SE1); + F2 = getLink(F_v[x ](nu),SE2); + F3 = getLink(F_v[x_p_mu_m_nu](nu),SE3); + F4 = getLink(F_v[x_m_nu ](mu),SE4); + F5 = getLink(F_v[x_m_nu ](nu),SE4); + + W = adj(F2)*U1*adj(U0) + U2 *adj(F1)*adj(U0) + U2 *U1* F0 + + F5 *U4* U3 + adj(U5)*adj(F4)* U3 + adj(U5)*U4*adj(F3); + + setLink(U_deriv_v[x](mu), U_deriv_v(x)(mu) + lt.c_3*W); + } + }) + } // end mu loop + + u_deriv = Ghost.Extract(Ughost_deriv); + } + // void derivative(const GaugeField& Gauge) const { // }; }; diff --git a/tests/smearing/Test_fatLinks.cc b/tests/smearing/Test_fatLinks.cc index 5a699bdec9..4d9e38a343 100644 --- a/tests/smearing/Test_fatLinks.cc +++ b/tests/smearing/Test_fatLinks.cc @@ -39,7 +39,7 @@ using namespace Grid; //#define USE_DOUBLE true -#define USE_DOUBLE false +#define USE_DOUBLE true #if USE_DOUBLE #define PREC double From 0f5404009e5dc83b95c4300c840f8165d1d05826 Mon Sep 17 00:00:00 2001 From: "D. A. Clarke" Date: Tue, 20 Aug 2024 15:15:27 -0600 Subject: [PATCH 07/32] remove unused element from stencil --- Grid/qcd/smearing/HISQSmearing.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index 3065896d23..5560174587 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -125,7 +125,7 @@ class Smear_HISQ : public Gimpl { // figure out the stencil index from mu and nu accelerator_inline int stencilIndex(int mu, int nu) const { // Nshifts depends on how you built the stencil - int Nshifts = 6; + int Nshifts = 5; return Nshifts*nu + Nd*Nshifts*mu; } @@ -157,7 +157,6 @@ class Smear_HISQ : public Gimpl { appendShift(shifts,shiftSignal::NO_SHIFT); appendShift(shifts,mu,Back(nu)); appendShift(shifts,Back(nu)); - appendShift(shifts,Back(mu)); } // A GeneralLocalStencil has two indices: a site and stencil index From 0f587932e7b38d34743167c7a5bc61b3b32b5bb2 Mon Sep 17 00:00:00 2001 From: Michael Lynch Date: Tue, 20 Aug 2024 14:17:25 -0700 Subject: [PATCH 08/32] Add ddV_naik function to Smear_HISQ --- Grid/qcd/smearing/HISQSmearing.h | 76 ++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index 5560174587..09a11ac2e0 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -625,6 +625,82 @@ class Smear_HISQ : public Gimpl { u_deriv = Ghost.Extract(Ughost_deriv); } + void ddV_naik(GF& u_deriv, GF& u_mu, GF& u_force) { + + SmearingParameters lt = this->linkTreatment; + auto grid = this->_grid; + + PaddedCell Ghost(3,grid); + GF Ughost = Ghost.Exchange(u_mu); + GF Fghost = Ghost.Exchange(u_force); + + GF Ughost_deriv(Ughost.Grid()); + + Ughost_deriv = Zero(); + + std::vector shifts; + for(int mu=0;mu(shifts, shiftSignal::NO_SHIFT); + appendShift(shifts, mu); + appendShift(shifts, mu, mu); + + appendShift(shifts, Back(mu)); + appendShift(shifts, Back(mu), Back(mu)); + appendShift(shifts, Back(mu), Back(mu), Back(mu)); + } + + GeneralLocalStencil gStencil(Ughost.Grid(),shifts); + typedef decltype(gStencil.GetEntry(0,0)) stencilElement; + + autoView(U_v , Ughost , AcceleratorRead); + autoView(F_v , Fghost , AcceleratorRead); + autoView(U_deriv_v, Ughost_deriv, AcceleratorWrite); + + typedef decltype(getLink(U_v[0](0),gStencil.GetEntry(0,0))) U3matrix; + + int Nsites = U_v.size(); + auto gStencil_v = gStencil.View(AcceleratorRead); + + accelerator_for(site,Nsites,Simd::Nsimd(),{ + stencilElement SE0, SE1, SE2, SE3, SE4; + U3matrix U0, U1, U2, U3, U4, U5, F0, F1, F2, F3, F4, F5, V; + int s = 0 + for(int mu=0;mu_offset; + SE1 = gStencil_v.GetEntry(s+1,site); int x_p_mu = SE1->_offset; + SE2 = gStencil_v.GetEntry(s+2,site); int x_p_2mu = SE2->_offset; + SE3 = gStencil_v.GetEntry(s+3,site); int x_m_mu = SE3->_offset; + SE4 = gStencil_v.GetEntry(s+4,site); int x_m_2mu = SE4->_offset; + SE5 = gStencil_v.GetEntry(s+5,site); int x_m_3mu = SE5->_offset; + + U0 = getLink(U_v[x ](mu),SE0); + U1 = getLink(U_v[x_p_mu ](mu),SE1); + U2 = getLink(U_v[x_p_2mu](mu),SE2); + U3 = getLink(U_v[x_m_mu ](mu),SE3); + U4 = getLink(U_v[x_m_2mu](mu),SE4); + U5 = getLink(U_v[x_m_3mu](mu),SE5); + + F0 = getLink(F_v[x ](mu),SE0); + F1 = getLink(F_v[x_p_mu ](mu),SE1); + F2 = getLink(F_v[x_p_2mu](mu),SE2); + F3 = getLink(F_v[x_m_mu ](mu),SE3); + F4 = getLink(F_v[x_m_2mu](mu),SE4); + F5 = getLink(F_v[x_m_3mu](mu),SE5); + + // ********Forward******** *******Backward******** + V = (adj(F2)* U1 * U0 )+(adj(U5)*adj(U4)* F3 ) + +( U2 *adj(F1)* U0 )+(adj(U5)* F4 *adj(U3)) + +( U2 * U1 *adj(F0))+( F5 *adj(U4)*adj(U3)); + + setLink(U_deriv_v[x](mu), U_deriv_v(x)(mu) + lt.c_naik*V); + + s += 6; + } + }); + u_deriv = Ghost.Extract(Ughost_deriv); + } + // void derivative(const GaugeField& Gauge) const { // }; }; From cf83dee09c817986cf0993ac88560cc12498ee10 Mon Sep 17 00:00:00 2001 From: "D. A. Clarke" Date: Mon, 26 Aug 2024 14:07:10 -0400 Subject: [PATCH 09/32] starting refactor for Force_HISQ class --- Grid/qcd/smearing/HISQSmearing.h | 80 +++++++++++++++++++++++++++----- 1 file changed, 68 insertions(+), 12 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index 09a11ac2e0..6ba9de47ab 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -69,6 +69,15 @@ vobj getLink(const vobj & __restrict__ vec,GeneralStencilEntry* SE) { #define setLink coalescedWrite +// figure out the stencil index from mu and nu +accelerator_inline int stencilIndex(int mu, int nu) { + // Nshifts depends on how you built the stencil + int Nshifts = 5; + return Nshifts*nu + Nd*Nshifts*mu; +} + + + /*! @brief create fat links from link variables */ template class Smear_HISQ : public Gimpl { @@ -122,13 +131,6 @@ class Smear_HISQ : public Gimpl { ~Smear_HISQ() {} - // figure out the stencil index from mu and nu - accelerator_inline int stencilIndex(int mu, int nu) const { - // Nshifts depends on how you built the stencil - int Nshifts = 5; - return Nshifts*nu + Nd*Nshifts*mu; - } - // Intent: OUT--u_smr (smeared links), // u_naik (Naik links), // IN--u_thin (thin links) @@ -404,6 +406,63 @@ class Smear_HISQ : public Gimpl { }); }; +}; + + + +/*! @brief compute force from link variables */ +template +class Force_HISQ : public Gimpl { +public: + + GridCartesian* const _grid; + + // Sort out the Gimpl. This handles BCs and part of the precision. + INHERIT_GIMPL_TYPES(Gimpl); + typedef typename Gimpl::GaugeField GF; + typedef typename Gimpl::GaugeLinkField LF; + typedef typename Gimpl::ComplexField CF; + typedef typename Gimpl::Scalar ComplexScalar; + typedef decltype(real(ComplexScalar())) RealScalar; + typedef iColourMatrix ComplexColourMatrix; + + RealScalar Scut=-1; // Cutoff for U(3) projection eigenvalues, set at initialization + int HaloDepth=1; + + SmearingParameters linkTreatment; + + void initialize() { + if (sizeof(RealScalar)==4) { + Scut=1e-5; // Maybe should be higher? e.g. 1e-4 + } else if (sizeof(RealScalar)==8) { + Scut=1e-8; + } else { + Grid_error("HISQ force only implemented for single and double"); + } + assert(Nc == 3 && "HISQ force currently implemented only for Nc==3"); + assert(Nd == 4 && "HISQ force only defined for Nd==4"); + } + + Force_HISQ(GridCartesian* grid, RealScalar c1, RealScalar cnaik, RealScalar c3, RealScalar c5, RealScalar c7, RealScalar clp) + : _grid(grid), + linkTreatment(c1,cnaik,c3,c5,c7,clp) { + initialize(); + } + + // Allow to pass a pointer to a C-style array for MILC convenience + Force_HISQ(GridCartesian* grid, double* coeff) + : _grid(grid), + linkTreatment(coeff[0],coeff[1],coeff[2],coeff[3],coeff[4],coeff[5]) { + initialize(); + } + Force_HISQ(GridCartesian* grid, float* coeff) + : _grid(grid), + linkTreatment(coeff[0],coeff[1],coeff[2],coeff[3],coeff[4],coeff[5]) { + initialize(); + } + + ~Force_HISQ() {} + // Intent: OUT--u_deriv (dW/dV slotted into force) // IN--u_mu (fat links), @@ -643,7 +702,6 @@ class Smear_HISQ : public Gimpl { appendShift(shifts, shiftSignal::NO_SHIFT); appendShift(shifts, mu); appendShift(shifts, mu, mu); - appendShift(shifts, Back(mu)); appendShift(shifts, Back(mu), Back(mu)); appendShift(shifts, Back(mu), Back(mu), Back(mu)); @@ -662,9 +720,9 @@ class Smear_HISQ : public Gimpl { auto gStencil_v = gStencil.View(AcceleratorRead); accelerator_for(site,Nsites,Simd::Nsimd(),{ - stencilElement SE0, SE1, SE2, SE3, SE4; + stencilElement SE0, SE1, SE2, SE3, SE4, SE5; U3matrix U0, U1, U2, U3, U4, U5, F0, F1, F2, F3, F4, F5, V; - int s = 0 + int s = 0; for(int mu=0;mu_offset; @@ -701,8 +759,6 @@ class Smear_HISQ : public Gimpl { u_deriv = Ghost.Extract(Ughost_deriv); } -// void derivative(const GaugeField& Gauge) const { -// }; }; From eae54253a0a943919841d98c935380cda7651769 Mon Sep 17 00:00:00 2001 From: "D. A. Clarke" Date: Mon, 16 Sep 2024 16:44:05 -0600 Subject: [PATCH 10/32] 3-link deriv compiles; still need to fix tensor product --- Grid/qcd/action/fermion/StaggeredImpl.h | 2 +- Grid/qcd/smearing/HISQSmearing.h | 186 ++++++++++++++++-------- tests/forces/Test_HISQ_force.cc | 48 ++++-- 3 files changed, 166 insertions(+), 70 deletions(-) diff --git a/Grid/qcd/action/fermion/StaggeredImpl.h b/Grid/qcd/action/fermion/StaggeredImpl.h index f44d12f4f8..01c516b7b3 100644 --- a/Grid/qcd/action/fermion/StaggeredImpl.h +++ b/Grid/qcd/action/fermion/StaggeredImpl.h @@ -163,7 +163,7 @@ class StaggeredImpl : public PeriodicGaugeImpl(outerProduct(Btilde,A)); + link = outerProduct(Btilde,A); PokeIndex(mat,link,mu); } diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index 6ba9de47ab..db17a1165c 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -27,7 +27,7 @@ directory *************************************************************************************/ /* @file HISQSmearing.h - @brief Declares classes related to HISQ smearing + @brief Declares classes related to the HISQ action */ @@ -35,21 +35,18 @@ directory #include #include #include - NAMESPACE_BEGIN(Grid); - -/*! @brief structure holding the link treatment */ +/*! @brief structure holding the link treatment for a given smear */ template struct SmearingParameters{ - SmearingParameters(){} floatT c_1; // 1 link floatT c_naik; // Naik term floatT c_3; // 3 link floatT c_5; // 5 link floatT c_7; // 7 link - floatT c_lp; // 5 link Lepage + floatT c_lp; // Lepage SmearingParameters(floatT c1, floatT cnaik, floatT c3, floatT c5, floatT c7, floatT clp) : c_1(c1), c_naik(cnaik), @@ -60,6 +57,69 @@ struct SmearingParameters{ }; +// There are 6 quarks in nature, and 3 never need a Naik epsilon +int const GRID_MAX_NAIK = 3; + + +/*! @brief structure holding all input parameters related to the HISQ action */ +template +struct HISQParameters{ + // Structure from QOP/QDP + int n_naiks; + std::array eps_naiks; + floatT fat7_c1; + floatT fat7_c3; + floatT fat7_c5; + floatT fat7_c7; + floatT fat7_clp; + floatT asqtad_c1; + floatT asqtad_c3; + floatT asqtad_c5; + floatT asqtad_c7; + floatT asqtad_clp; + floatT asqtad_cnaik; + floatT diff_c1; + floatT diff_cnaik; + HISQParameters(int n_naiks_in, std::array eps_naiks_in, + floatT fat7_one_link , floatT fat7_three_staple , floatT fat7_five_staple , floatT fat7_seven_staple , floatT fat7_lepage, + floatT asqtad_one_link, floatT asqtad_three_staple, floatT asqtad_five_staple, floatT asqtad_seven_staple, floatT asqtad_lepage, + floatT asqtad_naik , floatT difference_one_link, floatT difference_naik) + : n_naiks(n_naiks_in), + eps_naiks(eps_naiks_in), + fat7_c1(fat7_one_link), + fat7_c3(fat7_three_staple), + fat7_c5(fat7_five_staple), + fat7_c7(fat7_seven_staple), + fat7_clp(fat7_lepage), + asqtad_c1(asqtad_one_link), + asqtad_c3(asqtad_three_staple), + asqtad_c5(asqtad_five_staple), + asqtad_c7(asqtad_seven_staple), + asqtad_clp(asqtad_lepage), + asqtad_cnaik(asqtad_naik), + diff_c1(difference_one_link), + diff_cnaik(difference_naik) {} +}; + + +/*! @brief somtimes in the U(3) projection we use SVD cuts; here we collect related parameters */ +template +struct HISQReunitSVDParameters{ + // Structure from QOP/QDP + bool allow_svd; + bool svd_only; + floatT svd_rel_error; + floatT svd_abs_error; + floatT force_filter; + HISQReunitSVDParameters(bool allow, bool only, floatT rel, floatT abs, floatT filter) + : allow_svd(allow), + svd_only(only), + svd_rel_error(rel), + svd_abs_error(abs), + force_filter(filter) {} +}; + + // I think that "coalesced..." functions are extremely general, which is nice, // but in the HISQ context it boils down to link reading and writing. template accelerator_inline @@ -94,16 +154,16 @@ class Smear_HISQ : public Gimpl { typedef decltype(real(ComplexScalar())) RealScalar; typedef iColourMatrix ComplexColourMatrix; - RealScalar Scut=-1; // Cutoff for U(3) projection eigenvalues, set at initialization - int HaloDepth=1; + RealScalar _Scut; // Cutoff for U(3) projection eigenvalues, set at initialization + int _HaloDepth=1; - SmearingParameters linkTreatment; + SmearingParameters _linkTreatment; void initialize() { if (sizeof(RealScalar)==4) { - Scut=1e-5; // Maybe should be higher? e.g. 1e-4 + _Scut=1e-5; // Maybe should be higher? e.g. 1e-4 } else if (sizeof(RealScalar)==8) { - Scut=1e-8; + _Scut=1e-8; } else { Grid_error("HISQ smearing only implemented for single and double"); } @@ -113,19 +173,19 @@ class Smear_HISQ : public Gimpl { Smear_HISQ(GridCartesian* grid, RealScalar c1, RealScalar cnaik, RealScalar c3, RealScalar c5, RealScalar c7, RealScalar clp) : _grid(grid), - linkTreatment(c1,cnaik,c3,c5,c7,clp) { + _linkTreatment(c1,cnaik,c3,c5,c7,clp) { initialize(); } // Allow to pass a pointer to a C-style array for MILC convenience Smear_HISQ(GridCartesian* grid, double* coeff) : _grid(grid), - linkTreatment(coeff[0],coeff[1],coeff[2],coeff[3],coeff[4],coeff[5]) { + _linkTreatment(coeff[0],coeff[1],coeff[2],coeff[3],coeff[4],coeff[5]) { initialize(); } Smear_HISQ(GridCartesian* grid, float* coeff) : _grid(grid), - linkTreatment(coeff[0],coeff[1],coeff[2],coeff[3],coeff[4],coeff[5]) { + _linkTreatment(coeff[0],coeff[1],coeff[2],coeff[3],coeff[4],coeff[5]) { initialize(); } @@ -136,11 +196,11 @@ class Smear_HISQ : public Gimpl { // IN--u_thin (thin links) void smear(GF& u_smr, GF& u_naik, GF& u_thin) const { - SmearingParameters lt = this->linkTreatment; + SmearingParameters lt = this->_linkTreatment; auto grid = this->_grid; // Create a padded cell of extra padding depth=1 and fill the padding. - PaddedCell Ghost(HaloDepth,grid); + PaddedCell Ghost(_HaloDepth,grid); GF Ughost = Ghost.Exchange(u_thin); // This is where auxiliary N-link fields and the final smear will be stored. @@ -379,7 +439,7 @@ class Smear_HISQ : public Gimpl { RealScalar c1 = (1/2.)*real(trace(Q*Q))()()(); RealScalar c2 = (1/3.)*real(trace(Q*Q*Q))()()(); RealScalar S = (1/3.)*c1-(1/18.)*c0*c0; - if (abs(S) ComplexColourMatrix; - RealScalar Scut=-1; // Cutoff for U(3) projection eigenvalues, set at initialization - int HaloDepth=1; + RealScalar _Scut=-1; // Cutoff for U(3) projection eigenvalues, set at initialization + int _HaloDepth=1; - SmearingParameters linkTreatment; + HISQParameters _linkParams; + HISQReunitSVDParameters _reunitParams; + GF _Umu, _Vmu, _Wmu; void initialize() { if (sizeof(RealScalar)==4) { - Scut=1e-5; // Maybe should be higher? e.g. 1e-4 + _Scut=1e-5; // Maybe should be higher? e.g. 1e-4 } else if (sizeof(RealScalar)==8) { - Scut=1e-8; + _Scut=1e-8; } else { Grid_error("HISQ force only implemented for single and double"); } @@ -443,21 +506,14 @@ class Force_HISQ : public Gimpl { assert(Nd == 4 && "HISQ force only defined for Nd==4"); } - Force_HISQ(GridCartesian* grid, RealScalar c1, RealScalar cnaik, RealScalar c3, RealScalar c5, RealScalar c7, RealScalar clp) - : _grid(grid), - linkTreatment(c1,cnaik,c3,c5,c7,clp) { - initialize(); - } - - // Allow to pass a pointer to a C-style array for MILC convenience - Force_HISQ(GridCartesian* grid, double* coeff) + Force_HISQ(GridCartesian* grid, HISQParameters linkParams, GF Wmu, GF Vmu, GF Umu, + HISQReunitSVDParameters reunitParams) : _grid(grid), - linkTreatment(coeff[0],coeff[1],coeff[2],coeff[3],coeff[4],coeff[5]) { - initialize(); - } - Force_HISQ(GridCartesian* grid, float* coeff) - : _grid(grid), - linkTreatment(coeff[0],coeff[1],coeff[2],coeff[3],coeff[4],coeff[5]) { + _linkParams(linkParams), + _Wmu(Wmu), + _Vmu(Vmu), + _Umu(Umu), + _reunitParams(reunitParams) { initialize(); } @@ -496,7 +552,7 @@ class Force_HISQ : public Gimpl { RealScalar c1 = (1/2.)*real(trace(Q*Q))()()(); RealScalar c2 = (1/3.)*real(trace(Q*Q*Q))()()(); RealScalar S = (1/3.)*c1-(1/18.)*c0*c0; - if (abs(S) vecx) { - void ddV_3link(GF& u_deriv, GF& u_mu, GF& u_force) { + // access ith monte carlo separation with vecdt[i] + // access ith staggered field pointer with vecx[i] - SmearingParameters lt = this->linkTreatment; + HISQParameters hp = this->_linkParams; auto grid = this->_grid; - PaddedCell Ghost(HaloDepth,grid); - GF Ughost = Ghost.Exchange(u_mu); - GF Fghost = Ghost.Exchange(u_force); + GF XY(grid); // outer product field + GF u_force(grid); // accumulates the force - GF Ughost_deriv(Ughost.Grid()); + // construct outer product... i really don't think InsertForce4D will work as is because + // i need to distinguish correctly odd sites from even sites (makes |X> shifts; for(int mu=0;mu(shifts,shiftSignal::NO_SHIFT); appendShift(shifts,mu,Back(nu)); appendShift(shifts,Back(nu)); - appendShift(shifts,Back(mu)); } GeneralLocalStencil gStencil(Ughost.Grid(),shifts); @@ -637,9 +703,9 @@ class Force_HISQ : public Gimpl { for(int mu=0;mulinkTreatment; + SmearingParameters lt = this->_linkTreatment; auto grid = this->_grid; PaddedCell Ghost(3,grid); diff --git a/tests/forces/Test_HISQ_force.cc b/tests/forces/Test_HISQ_force.cc index 9954dfaabe..773a7f5d06 100644 --- a/tests/forces/Test_HISQ_force.cc +++ b/tests/forces/Test_HISQ_force.cc @@ -38,27 +38,57 @@ directory using namespace Grid; -//#define USE_DOUBLE true -#define USE_DOUBLE false +#define USE_DOUBLE true +//#define USE_DOUBLE false #if USE_DOUBLE #define PREC double typedef LatticeGaugeFieldD LGF; - typedef PeriodicGimplD GIMPL; + typedef StaggeredImplD GIMPL; typedef vComplexD COMP; #else #define PREC float typedef LatticeGaugeFieldF LGF; - typedef PeriodicGimplF GIMPL; + typedef StaggeredImplF GIMPL; typedef vComplexF COMP; #endif -bool testForce(GridCartesian& GRID, LGF Umu, LGF Uforce, - LGF Ucontrol, PREC c1, PREC cnaik, PREC c3, PREC c5, PREC c7, PREC clp) { - Smear_HISQ hisq_fat(&GRID,c1,cnaik,c3,c5,c7,clp); +// Intent: IN--Umu: thin links +bool testForce(GridCartesian& GRID, LGF Umu, LGF Uforce, LGF Ucontrol) { + + int n_naiks = 1; + std::array eps_naik = {0,0,0}; + +// PREC fat7_c1 = 1/8.; PREC asqtad_c1 = 1.; +// PREC fat7_c3 = 1/16.; PREC asqtad_c3 = 1/16.; +// PREC fat7_c5 = 1/64.; PREC asqtad_c5 = 1/64.; +// PREC fat7_c7 = 1/384.; PREC asqtad_c7 = 1/384.; +// PREC cnaik = -1/24.+eps_naik[0]/8; PREC asqtad_clp = -1/8.; + + PREC fat7_c1 = 1.; PREC asqtad_c1 = 0.; + PREC fat7_c3 = 0.; PREC asqtad_c3 = 1/16.; + PREC fat7_c5 = 0.; PREC asqtad_c5 = 0.; + PREC fat7_c7 = 0.; PREC asqtad_c7 = 0.; + PREC cnaik = 0.; PREC asqtad_clp = 0.; + + LGF Vmu(&GRID), Wmu(&GRID), Nmu(&GRID); + Smear_HISQ fat7(&GRID,fat7_c1,0.,fat7_c3,fat7_c5,fat7_c7,0.); + + fat7.smear(Vmu,Nmu,Umu); // Populate fat7 and Naik links Vmu and Nmu + fat7.projectU3(Wmu,Vmu); // Populate U(3) projection Wmu + + HISQParameters hisq_param(n_naiks , eps_naik , + fat7_c1 , fat7_c3 , fat7_c5 , fat7_c7 , 0., + asqtad_c1, asqtad_c3, asqtad_c5, asqtad_c7, asqtad_clp, + cnaik , 0. , 0.); + + HISQReunitSVDParameters hisq_reunit_svd(false, false, 1, 1, 1); + + Force_HISQ hisq_force(&GRID, hisq_param, Wmu, Vmu, Umu, hisq_reunit_svd); + LGF diff(&GRID); - hisq_fat.ddVprojectU3(Uforce, Umu, Umu, 5e-5); + hisq_force.ddVprojectU3(Uforce, Umu, Umu, 5e-5); bool result; diff = Ucontrol-Uforce; auto absDiff = norm2(diff)/norm2(Ucontrol); @@ -104,7 +134,7 @@ int main (int argc, char** argv) { bool pass=true; NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.ddVU3.control"); - pass *= testForce(GRID,Umu,Umu,Ucontrol,0.,0.,0.,0.,0.,0.); + pass *= testForce(GRID,Umu,Umu,Ucontrol); if(pass){ Grid_pass("All tests passed."); From ec5a06acc2d9fa15d3f79015c9a8ad8717d9b29d Mon Sep 17 00:00:00 2001 From: "D. A. Clarke" Date: Thu, 12 Dec 2024 15:35:08 -0700 Subject: [PATCH 11/32] i think i have a working outer product --- Grid/qcd/smearing/HISQSmearing.h | 216 +++++++++++++++++++------------ 1 file changed, 134 insertions(+), 82 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index db17a1165c..be7088c36e 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -475,7 +475,8 @@ template class Force_HISQ : public Gimpl { public: - GridCartesian* const _grid; + GridCartesian* const _grid; + GridRedBlackCartesian* _gridRB; // Sort out the Gimpl. This handles BCs and part of the precision. INHERIT_GIMPL_TYPES(Gimpl); @@ -502,6 +503,7 @@ class Force_HISQ : public Gimpl { } else { Grid_error("HISQ force only implemented for single and double"); } + _gridRB = SpaceTimeGrid::makeFourDimRedBlackGrid(_grid); assert(Nc == 3 && "HISQ force currently implemented only for Nc==3"); assert(Nd == 4 && "HISQ force only defined for Nd==4"); } @@ -663,91 +665,141 @@ class Force_HISQ : public Gimpl { }); }; - void force(GF momentum, RealScalar* vecdt, std::vector vecx) { - - // access ith monte carlo separation with vecdt[i] - // access ith staggered field pointer with vecx[i] + // We are calculating the force using the rational approximation. The goal is that we can approximate + // (Mdag M)^(-nf/4) = alpha_0 + sum_l alpha_l/(M^dag M + beta_l). Hence the index l runs over the + // order of the rational approximation. The additional complication is that each M depends on the + // fermion mass, and for higher masses, in particular when there are charm quarks, we need to + // introduce a different "Naik epsilon" for each M. Hence, we can think of the total application + // of this operator as having an index inaik, running over the different Naik epsilons; for each inaik + // there is a possibly different order_inaik, then the operator has an index l running up to order_inaik. + // All terms with inaik=0 correspond to epsilon_Naik = 0. + // + // Intent: OUT--momentum + // IN--vecdt: Monte Carlo separation vector times alpha_{inaik,0}. + // vecx: A vector of fermion fields coming from the MILC code. It is organized so that + // |X_l> = (Mdag M + beta_l)^-1 |Phi> is on even sites, |Y_l>=D|X_l> is on odd sites. + // All the |X_l> for i=0 come first in memory, followed by all the |X_l> with + // i=1 in memory, and so on. + // n_orders_naik: Indexed by unique naik epsilon. + void force(GF& momentum, RealScalar* vecdt, std::vector vecx, std::vector n_orders_naik) { HISQParameters hp = this->_linkParams; - auto grid = this->_grid; - - GF XY(grid); // outer product field - GF u_force(grid); // accumulates the force - - // construct outer product... i really don't think InsertForce4D will work as is because - // i need to distinguish correctly odd sites from even sites (makes |X>_grid; + auto gridRB = this->_gridRB; + + GF XY(grid), tmp(grid); // outer product field + GF u_force(grid); // accumulates the force + + XY = Zero(); + + int l = 0; +// for (int inaik = 0; inaik < hp.n_naiks; inaik++) { + for (int inaik = 0; inaik < 1; inaik++) { + + int rat_order = n_orders_naik[inaik]; + FF X(gridRB), Y(gridRB), Xnu(gridRB), Ynu(gridRB); + + for (int i=0; i shifts; - for(int mu=0;mu(shifts,mu); - appendShift(shifts,nu); - appendShift(shifts,shiftSignal::NO_SHIFT); - appendShift(shifts,mu,Back(nu)); - appendShift(shifts,Back(nu)); + l++; + } } - - GeneralLocalStencil gStencil(Ughost.Grid(),shifts); - typedef decltype(gStencil.GetEntry(0,0)) stencilElement; - - for(int mu=0;mu_offset; - SE1 = gStencil_v.GetEntry(s+1,site); int x_p_nu = SE1->_offset; - SE2 = gStencil_v.GetEntry(s+2,site); int x = SE2->_offset; - SE3 = gStencil_v.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset; - SE4 = gStencil_v.GetEntry(s+4,site); int x_m_nu = SE4->_offset; - - U0 = getLink(U_v[x_p_mu ](nu),SE0); - U1 = getLink(U_v[x_p_nu ](mu),SE1); - U2 = getLink(U_v[x ](nu),SE2); - U3 = getLink(U_v[x_p_mu_m_nu](nu),SE3); - U4 = getLink(U_v[x_m_nu ](mu),SE4); - U5 = getLink(U_v[x_m_nu ](nu),SE4); - - XY0 = getLink(XY_v[x_p_mu ](nu),SE0); - XY1 = getLink(XY_v[x_p_nu ](mu),SE1); - XY2 = getLink(XY_v[x ](nu),SE2); - XY3 = getLink(XY_v[x_p_mu_m_nu](nu),SE3); - XY4 = getLink(XY_v[x_m_nu ](mu),SE4); - XY5 = getLink(XY_v[x_m_nu ](nu),SE4); - - W = adj(XY2)*U1*adj(U0) + U2 *adj(XY1)*adj(U0) + U2 *U1* XY0 - + XY5 *U4* U3 + adj(U5)*adj(XY4)* U3 + adj(U5)*U4*adj(XY3); - - setLink(F_v[x](mu), F_v(x)(mu) + hp.c_3*W); - } - }) - } // end mu loop - - u_force = Ghost.Extract(Fghost); + momentum=XY; + +// for (int mu = 0; mu < Nd; mu++) { +// U[mu] = PeekIndex(u_thin, mu); +// V[mu] = PeekIndex(u_smr, mu); +// for (int mu = 0; mu < Nd; mu++) { +// PokeIndex(u_smr , V[mu] , mu); +// PokeIndex(u_naik, Vnaik[mu], mu); +// } } + +// PaddedCell Ghost(_HaloDepth,grid); +// GF Ughost = Ghost.Exchange(_Umu); +// GF XYghost = Ghost.Exchange(XY); +// GF Fghost = Ghost.Exchange(u_force); +// +// Fghost = Zero(); +// +// std::vector shifts; +// for(int mu=0;mu(shifts,mu); +// appendShift(shifts,nu); +// appendShift(shifts,shiftSignal::NO_SHIFT); +// appendShift(shifts,mu,Back(nu)); +// appendShift(shifts,Back(nu)); +// } + +// GeneralLocalStencil gStencil(Ughost.Grid(),shifts); +// typedef decltype(gStencil.GetEntry(0,0)) stencilElement; +// +// for(int mu=0;mu_offset; +// SE1 = gStencil_v.GetEntry(s+1,site); int x_p_nu = SE1->_offset; +// SE2 = gStencil_v.GetEntry(s+2,site); int x = SE2->_offset; +// SE3 = gStencil_v.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset; +// SE4 = gStencil_v.GetEntry(s+4,site); int x_m_nu = SE4->_offset; +// +// U0 = getLink(U_v[x_p_mu ](nu),SE0); +// U1 = getLink(U_v[x_p_nu ](mu),SE1); +// U2 = getLink(U_v[x ](nu),SE2); +// U3 = getLink(U_v[x_p_mu_m_nu](nu),SE3); +// U4 = getLink(U_v[x_m_nu ](mu),SE4); +// U5 = getLink(U_v[x_m_nu ](nu),SE4); +// +// XY0 = getLink(XY_v[x_p_mu ](nu),SE0); +// XY1 = getLink(XY_v[x_p_nu ](mu),SE1); +// XY2 = getLink(XY_v[x ](nu),SE2); +// XY3 = getLink(XY_v[x_p_mu_m_nu](nu),SE3); +// XY4 = getLink(XY_v[x_m_nu ](mu),SE4); +// XY5 = getLink(XY_v[x_m_nu ](nu),SE4); +// +// W = adj(XY2)*U1*adj(U0) + U2 *adj(XY1)*adj(U0) + U2 *U1* XY0 +// + XY5 *U4* U3 + adj(U5)*adj(XY4)* U3 + adj(U5)*U4*adj(XY3); +// +// setLink(F_v[x](mu), F_v(x)(mu) + hp.c_3*W); +// } +// }) +// } // end mu loop +// +// u_force = Ghost.Extract(Fghost); } void ddV_naik(GF& u_deriv, GF& u_mu, GF& u_force) { From d00afb5746c3996ed90610e13e71b7d408ef7f59 Mon Sep 17 00:00:00 2001 From: "D. A. Clarke" Date: Mon, 3 Feb 2025 16:07:01 -0700 Subject: [PATCH 12/32] wrote dinky unit test for outer product term --- Grid/qcd/smearing/HISQSmearing.h | 16 +++-- tests/forces/Test_HISQ_force.cc | 109 ++++++++++++++++++++++++++----- 2 files changed, 101 insertions(+), 24 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index be7088c36e..f62858c43f 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -66,7 +66,7 @@ template struct HISQParameters{ // Structure from QOP/QDP int n_naiks; - std::array eps_naiks; + std::array eps_naiks; // TODO: Change to std::vector floatT fat7_c1; floatT fat7_c3; floatT fat7_c5; @@ -681,7 +681,8 @@ class Force_HISQ : public Gimpl { // All the |X_l> for i=0 come first in memory, followed by all the |X_l> with // i=1 in memory, and so on. // n_orders_naik: Indexed by unique naik epsilon. - void force(GF& momentum, RealScalar* vecdt, std::vector vecx, std::vector n_orders_naik) { +// void force(GF& momentum, std::vector vecdt, std::vector vecx, std::vector n_orders_naik) { + void force(GF& momentum, std::vector vecdt, std::vector vecx, std::vector n_orders_naik) { HISQParameters hp = this->_linkParams; auto grid = this->_grid; @@ -693,11 +694,10 @@ class Force_HISQ : public Gimpl { XY = Zero(); int l = 0; -// for (int inaik = 0; inaik < hp.n_naiks; inaik++) { - for (int inaik = 0; inaik < 1; inaik++) { + for (int inaik = 0; inaik < hp.n_naiks; inaik++) { int rat_order = n_orders_naik[inaik]; - FF X(gridRB), Y(gridRB), Xnu(gridRB), Ynu(gridRB); + FF X(gridRB), Y(gridRB), Xnu(gridRB), Ynu(gridRB), FFdag(gridRB); for (int i=0; i eps_naik = {0,0,0}; + std::vector n_orders_naik = {1,1}; + std::vector vecdt = {0.1,0.1}; + + HISQParameters hisq_param(n_naiks , eps_naik , + fat7_c1 , fat7_c3 , fat7_c5 , fat7_c7 , 0., + asqtad_c1, asqtad_c3, asqtad_c5, asqtad_c7, asqtad_clp, + cnaik , 0. , 0.); + HISQReunitSVDParameters hisq_reunit_svd(false, false, 1, 1, 1); + + Smear_HISQ fat7(&GRID,fat7_c1,0.,fat7_c3,fat7_c5,fat7_c7,0.); + fat7.smear(Vmu,Nmu,Umu); // Populate fat7 and Naik links Vmu and Nmu + fat7.projectU3(Wmu,Vmu); // Populate U(3) projection Wmu + Force_HISQ hisq_force(&GRID, hisq_param, Wmu, Vmu, Umu, hisq_reunit_svd); + + std::vector seeds({1,2,3,4}); + GridParallelRNG pRNG(&GRID); pRNG.SeedFixedIntegers(seeds); + + // Construct a vecx. Eventually it would be nice if this generated the correct distribution, + // but for now use Gaussian random variables as a placeholder. + std::vector vecx; + int l = 0; + for (int inaik = 0; inaik < hisq_param.n_naiks; inaik++) { + int rat_order = n_orders_naik[inaik]; + FF PHI(&GRID); + PHI = Zero(); + for (int i=0; i eps_naik = {0,0,0}; -// PREC fat7_c1 = 1/8.; PREC asqtad_c1 = 1.; -// PREC fat7_c3 = 1/16.; PREC asqtad_c3 = 1/16.; -// PREC fat7_c5 = 1/64.; PREC asqtad_c5 = 1/64.; -// PREC fat7_c7 = 1/384.; PREC asqtad_c7 = 1/384.; -// PREC cnaik = -1/24.+eps_naik[0]/8; PREC asqtad_clp = -1/8.; - - PREC fat7_c1 = 1.; PREC asqtad_c1 = 0.; - PREC fat7_c3 = 0.; PREC asqtad_c3 = 1/16.; - PREC fat7_c5 = 0.; PREC asqtad_c5 = 0.; - PREC fat7_c7 = 0.; PREC asqtad_c7 = 0.; - PREC cnaik = 0.; PREC asqtad_clp = 0.; + PREC fat7_c1 = 1/8.; PREC asqtad_c1 = 1.; + PREC fat7_c3 = 1/16.; PREC asqtad_c3 = 1/16.; + PREC fat7_c5 = 1/64.; PREC asqtad_c5 = 1/64.; + PREC fat7_c7 = 1/384.; PREC asqtad_c7 = 1/384.; + PREC cnaik = -1/24.+eps_naik[0]/8; PREC asqtad_clp = -1/8.; LGF Vmu(&GRID), Wmu(&GRID), Nmu(&GRID); Smear_HISQ fat7(&GRID,fat7_c1,0.,fat7_c3,fat7_c5,fat7_c7,0.); @@ -89,18 +159,16 @@ bool testForce(GridCartesian& GRID, LGF Umu, LGF Uforce, LGF Ucontrol) { LGF diff(&GRID); hisq_force.ddVprojectU3(Uforce, Umu, Umu, 5e-5); - bool result; diff = Ucontrol-Uforce; auto absDiff = norm2(diff)/norm2(Ucontrol); if (absDiff < 1e-30) { Grid_pass(" |Umu-Usmr|/|Umu| = ",absDiff); - result = true; + return true; } else { Grid_error(" |Umu-Usmr|/|Umu| = ",absDiff); - result = false; + return false; } // NerscIO::writeConfiguration(Uforce,"nersc.l8t4b3360.ddVU3"); - return result; } @@ -133,8 +201,15 @@ int main (int argc, char** argv) { bool pass=true; + // Check derivative of projection NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.ddVU3.control"); - pass *= testForce(GRID,Umu,Umu,Ucontrol); + pass *= testddUProj(GRID,Umu,Ucontrol); + + // Check the inner product (1-link) + NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.XY.control"); + pass *= testForce(GRID, Umu, Ucontrol, + 1, 0 , 0, 0, 0, + 0, 1/16, 0, 0, 0 ); if(pass){ Grid_pass("All tests passed."); From f67cd4c692dd5b8a1426784091a949b16fe6132a Mon Sep 17 00:00:00 2001 From: "D. A. Clarke" Date: Mon, 10 Feb 2025 13:46:11 -0700 Subject: [PATCH 13/32] change RealScalar to Real --- Grid/qcd/smearing/HISQSmearing.h | 14 +++++++------- tests/forces/Test_HISQ_force.cc | 33 ++++++++++++++++---------------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index f62858c43f..fcb5cb01c4 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -491,8 +491,8 @@ class Force_HISQ : public Gimpl { RealScalar _Scut=-1; // Cutoff for U(3) projection eigenvalues, set at initialization int _HaloDepth=1; - HISQParameters _linkParams; - HISQReunitSVDParameters _reunitParams; + HISQParameters _linkParams; + HISQReunitSVDParameters _reunitParams; GF _Umu, _Vmu, _Wmu; void initialize() { @@ -508,8 +508,8 @@ class Force_HISQ : public Gimpl { assert(Nd == 4 && "HISQ force only defined for Nd==4"); } - Force_HISQ(GridCartesian* grid, HISQParameters linkParams, GF Wmu, GF Vmu, GF Umu, - HISQReunitSVDParameters reunitParams) + Force_HISQ(GridCartesian* grid, HISQParameters linkParams, GF Wmu, GF Vmu, GF Umu, + HISQReunitSVDParameters reunitParams) : _grid(grid), _linkParams(linkParams), _Wmu(Wmu), @@ -681,10 +681,10 @@ class Force_HISQ : public Gimpl { // All the |X_l> for i=0 come first in memory, followed by all the |X_l> with // i=1 in memory, and so on. // n_orders_naik: Indexed by unique naik epsilon. -// void force(GF& momentum, std::vector vecdt, std::vector vecx, std::vector n_orders_naik) { - void force(GF& momentum, std::vector vecdt, std::vector vecx, std::vector n_orders_naik) { + void force(GF& momentum, std::vector vecdt, std::vector vecx, std::vector n_orders_naik) { +// void force(GF& momentum, std::vector vecdt, std::vector vecx, std::vector n_orders_naik) { - HISQParameters hp = this->_linkParams; + HISQParameters hp = this->_linkParams; auto grid = this->_grid; auto gridRB = this->_gridRB; diff --git a/tests/forces/Test_HISQ_force.cc b/tests/forces/Test_HISQ_force.cc index ed3ab766de..a11157a964 100644 --- a/tests/forces/Test_HISQ_force.cc +++ b/tests/forces/Test_HISQ_force.cc @@ -38,8 +38,8 @@ directory using namespace Grid; -#define USE_DOUBLE true -//#define USE_DOUBLE false +//#define USE_DOUBLE true +#define USE_DOUBLE false #if USE_DOUBLE #define PREC double @@ -61,8 +61,8 @@ typedef typename GIMPL::FermionField FF; // This is a sort of contrived test situation. The goal is to make sure the fermion force // code is stable against future changes and get an idea how the HISQ force interface works. bool testForce(GridCartesian& GRID, LGF Umu, LGF Ucontrol, - PREC fat7_c1 , PREC fat7_c3 , PREC fat7_c5 , PREC fat7_c7 , PREC cnaik, - PREC asqtad_c1, PREC asqtad_c3, PREC asqtad_c5, PREC asqtad_c7, PREC asqtad_clp) { + Real fat7_c1 , Real fat7_c3 , Real fat7_c5 , Real fat7_c7 , Real cnaik, + Real asqtad_c1, Real asqtad_c3, Real asqtad_c5, Real asqtad_c7, Real asqtad_clp) { LGF Vmu(&GRID), Wmu(&GRID), Nmu(&GRID), Umom(&GRID); @@ -76,15 +76,15 @@ bool testForce(GridCartesian& GRID, LGF Umu, LGF Ucontrol, // nonzero Naik epsilon. That group lumps together terms for each of the zero-epsilon // pseudofermions. int n_naiks = 1; // Just a charm - std::array eps_naik = {0,0,0}; + std::array eps_naik = {0,0,0}; std::vector n_orders_naik = {1,1}; - std::vector vecdt = {0.1,0.1}; + std::vector vecdt = {0.1,0.1}; - HISQParameters hisq_param(n_naiks , eps_naik , + HISQParameters hisq_param(n_naiks , eps_naik , fat7_c1 , fat7_c3 , fat7_c5 , fat7_c7 , 0., asqtad_c1, asqtad_c3, asqtad_c5, asqtad_c7, asqtad_clp, cnaik , 0. , 0.); - HISQReunitSVDParameters hisq_reunit_svd(false, false, 1, 1, 1); + HISQReunitSVDParameters hisq_reunit_svd(false, false, 1, 1, 1); Smear_HISQ fat7(&GRID,fat7_c1,0.,fat7_c3,fat7_c5,fat7_c7,0.); fat7.smear(Vmu,Nmu,Umu); // Populate fat7 and Naik links Vmu and Nmu @@ -134,13 +134,14 @@ bool testddUProj(GridCartesian& GRID, LGF Umu, LGF Ucontrol) { LGF Uforce(&GRID); int n_naiks = 1; - std::array eps_naik = {0,0,0}; + std::array eps_naik = {0,0,0}; + + Real fat7_c1 = 1/8.; Real asqtad_c1 = 1.; + Real fat7_c3 = 1/16.; Real asqtad_c3 = 1/16.; + Real fat7_c5 = 1/64.; Real asqtad_c5 = 1/64.; + Real fat7_c7 = 1/384.; Real asqtad_c7 = 1/384.; + Real cnaik = -1/24.+eps_naik[0]/8; Real asqtad_clp = -1/8.; - PREC fat7_c1 = 1/8.; PREC asqtad_c1 = 1.; - PREC fat7_c3 = 1/16.; PREC asqtad_c3 = 1/16.; - PREC fat7_c5 = 1/64.; PREC asqtad_c5 = 1/64.; - PREC fat7_c7 = 1/384.; PREC asqtad_c7 = 1/384.; - PREC cnaik = -1/24.+eps_naik[0]/8; PREC asqtad_clp = -1/8.; LGF Vmu(&GRID), Wmu(&GRID), Nmu(&GRID); Smear_HISQ fat7(&GRID,fat7_c1,0.,fat7_c3,fat7_c5,fat7_c7,0.); @@ -148,12 +149,12 @@ bool testddUProj(GridCartesian& GRID, LGF Umu, LGF Ucontrol) { fat7.smear(Vmu,Nmu,Umu); // Populate fat7 and Naik links Vmu and Nmu fat7.projectU3(Wmu,Vmu); // Populate U(3) projection Wmu - HISQParameters hisq_param(n_naiks , eps_naik , + HISQParameters hisq_param(n_naiks , eps_naik , fat7_c1 , fat7_c3 , fat7_c5 , fat7_c7 , 0., asqtad_c1, asqtad_c3, asqtad_c5, asqtad_c7, asqtad_clp, cnaik , 0. , 0.); - HISQReunitSVDParameters hisq_reunit_svd(false, false, 1, 1, 1); + HISQReunitSVDParameters hisq_reunit_svd(false, false, 1, 1, 1); Force_HISQ hisq_force(&GRID, hisq_param, Wmu, Vmu, Umu, hisq_reunit_svd); From 93c6fe221c5e6433f116ff27c23c998a5ea34edc Mon Sep 17 00:00:00 2001 From: "D. A. Clarke" Date: Tue, 18 Mar 2025 16:00:05 -0600 Subject: [PATCH 14/32] attempt at 3-link fat7 --- Grid/qcd/smearing/HISQSmearing.h | 203 +++++++++++++++---------------- tests/forces/Test_HISQ_force.cc | 7 +- 2 files changed, 97 insertions(+), 113 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index fcb5cb01c4..914bb4c8b7 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -66,20 +66,10 @@ template struct HISQParameters{ // Structure from QOP/QDP int n_naiks; - std::array eps_naiks; // TODO: Change to std::vector - floatT fat7_c1; - floatT fat7_c3; - floatT fat7_c5; - floatT fat7_c7; - floatT fat7_clp; - floatT asqtad_c1; - floatT asqtad_c3; - floatT asqtad_c5; - floatT asqtad_c7; - floatT asqtad_clp; - floatT asqtad_cnaik; - floatT diff_c1; - floatT diff_cnaik; + std::array eps_naiks; + floatT fat7_c1 ; floatT fat7_c3 ; floatT fat7_c5 ; floatT fat7_c7 ; floatT fat7_clp; + floatT asqtad_c1; floatT asqtad_c3; floatT asqtad_c5; floatT asqtad_c7; floatT asqtad_clp; floatT asqtad_cnaik; + floatT diff_c1 ; floatT diff_cnaik; HISQParameters(int n_naiks_in, std::array eps_naiks_in, floatT fat7_one_link , floatT fat7_three_staple , floatT fat7_five_staple , floatT fat7_seven_staple , floatT fat7_lepage, floatT asqtad_one_link, floatT asqtad_three_staple, floatT asqtad_five_staple, floatT asqtad_seven_staple, floatT asqtad_lepage, @@ -137,6 +127,21 @@ accelerator_inline int stencilIndex(int mu, int nu) { } +/*! @brief mu-nu plane stencil. We allow mu==nu to make indexing the stencil easier, + but these entries will not be used. */ +std::vector getHISQSupport() { + std::vector shifts; + for(int mu=0;mu(shifts,mu); + appendShift(shifts,nu); + appendShift(shifts,shiftSignal::NO_SHIFT); + appendShift(shifts,mu,Back(nu)); + appendShift(shifts,Back(nu)); + } + return shifts; +} + /*! @brief create fat links from link variables */ template @@ -209,17 +214,8 @@ class Smear_HISQ : public Gimpl { GF Ughost_5linkA(Ughost.Grid()); GF Ughost_5linkB(Ughost.Grid()); - // mu-nu plane stencil. We allow mu==nu to make indexing the stencil easier, - // but these entries will not be used. - std::vector shifts; - for(int mu=0;mu(shifts,mu); - appendShift(shifts,nu); - appendShift(shifts,shiftSignal::NO_SHIFT); - appendShift(shifts,mu,Back(nu)); - appendShift(shifts,Back(nu)); - } + // mu-nu plane stencil. + std::vector shifts = getHISQSupport(); // A GeneralLocalStencil has two indices: a site and stencil index GeneralLocalStencil gStencil(Ughost.Grid(),shifts); @@ -681,8 +677,7 @@ class Force_HISQ : public Gimpl { // All the |X_l> for i=0 come first in memory, followed by all the |X_l> with // i=1 in memory, and so on. // n_orders_naik: Indexed by unique naik epsilon. - void force(GF& momentum, std::vector vecdt, std::vector vecx, std::vector n_orders_naik) { -// void force(GF& momentum, std::vector vecdt, std::vector vecx, std::vector n_orders_naik) { + void force(GF& momentum, std::vector vecdt, std::vector& vecx, std::vector n_orders_naik) { HISQParameters hp = this->_linkParams; auto grid = this->_grid; @@ -691,17 +686,18 @@ class Force_HISQ : public Gimpl { GF XY(grid), tmp(grid); // outer product field GF u_force(grid); // accumulates the force - XY = Zero(); + FF X(gridRB), Y(gridRB), Xnu(gridRB), Ynu(gridRB), FFdag(gridRB); + + momentum = Zero(); int l = 0; for (int inaik = 0; inaik < hp.n_naiks; inaik++) { int rat_order = n_orders_naik[inaik]; - FF X(gridRB), Y(gridRB), Xnu(gridRB), Ynu(gridRB), FFdag(gridRB); for (int i=0; i shifts = getHISQSupport(); + + GeneralLocalStencil gStencil(Ughost.Grid(),shifts); + typedef decltype(gStencil.GetEntry(0,0)) stencilElement; + + for(int mu=0;mu_offset; + SE1 = gStencil_v.GetEntry(s+1,site); int x_p_nu = SE1->_offset; + SE2 = gStencil_v.GetEntry(s+2,site); int x = SE2->_offset; + SE3 = gStencil_v.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset; + SE4 = gStencil_v.GetEntry(s+4,site); int x_m_nu = SE4->_offset; + + U0 = getLink(U_v[x_p_mu ](nu),SE0); + U1 = getLink(U_v[x_p_nu ](mu),SE1); + U2 = getLink(U_v[x ](nu),SE2); + U3 = getLink(U_v[x_p_mu_m_nu](nu),SE3); + U4 = getLink(U_v[x_m_nu ](mu),SE4); + U5 = getLink(U_v[x_m_nu ](nu),SE4); + + XY0 = getLink(XY_v[x_p_mu ](nu),SE0); + XY1 = getLink(XY_v[x_p_nu ](mu),SE1); + XY2 = getLink(XY_v[x ](nu),SE2); + XY3 = getLink(XY_v[x_p_mu_m_nu](nu),SE3); + XY4 = getLink(XY_v[x_m_nu ](mu),SE4); + XY5 = getLink(XY_v[x_m_nu ](nu),SE4); + + W = adj(XY2)*U1*adj(U0) + U2 *adj(XY1)*adj(U0) + U2 *U1* XY0 + + XY5 *U4* U3 + adj(U5)*adj(XY4)* U3 + adj(U5)*U4*adj(XY3); + + setLink(F_v[x](mu), F_v(x)(mu) + hp.fat7_c3*W*vecdt[l]); + } + }) + } // end mu loop + + u_force = Ghost.Extract(Fghost); + momentum += u_force; + l++; } } - momentum=XY; - -// for (int mu = 0; mu < Nd; mu++) { -// U[mu] = PeekIndex(u_thin, mu); -// V[mu] = PeekIndex(u_smr, mu); -// for (int mu = 0; mu < Nd; mu++) { -// PokeIndex(u_smr , V[mu] , mu); -// PokeIndex(u_naik, Vnaik[mu], mu); -// } } - -// PaddedCell Ghost(_HaloDepth,grid); -// GF Ughost = Ghost.Exchange(_Umu); -// GF XYghost = Ghost.Exchange(XY); -// GF Fghost = Ghost.Exchange(u_force); -// -// Fghost = Zero(); -// -// std::vector shifts; -// for(int mu=0;mu(shifts,mu); -// appendShift(shifts,nu); -// appendShift(shifts,shiftSignal::NO_SHIFT); -// appendShift(shifts,mu,Back(nu)); -// appendShift(shifts,Back(nu)); -// } - -// GeneralLocalStencil gStencil(Ughost.Grid(),shifts); -// typedef decltype(gStencil.GetEntry(0,0)) stencilElement; -// -// for(int mu=0;mu_offset; -// SE1 = gStencil_v.GetEntry(s+1,site); int x_p_nu = SE1->_offset; -// SE2 = gStencil_v.GetEntry(s+2,site); int x = SE2->_offset; -// SE3 = gStencil_v.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset; -// SE4 = gStencil_v.GetEntry(s+4,site); int x_m_nu = SE4->_offset; -// -// U0 = getLink(U_v[x_p_mu ](nu),SE0); -// U1 = getLink(U_v[x_p_nu ](mu),SE1); -// U2 = getLink(U_v[x ](nu),SE2); -// U3 = getLink(U_v[x_p_mu_m_nu](nu),SE3); -// U4 = getLink(U_v[x_m_nu ](mu),SE4); -// U5 = getLink(U_v[x_m_nu ](nu),SE4); -// -// XY0 = getLink(XY_v[x_p_mu ](nu),SE0); -// XY1 = getLink(XY_v[x_p_nu ](mu),SE1); -// XY2 = getLink(XY_v[x ](nu),SE2); -// XY3 = getLink(XY_v[x_p_mu_m_nu](nu),SE3); -// XY4 = getLink(XY_v[x_m_nu ](mu),SE4); -// XY5 = getLink(XY_v[x_m_nu ](nu),SE4); -// -// W = adj(XY2)*U1*adj(U0) + U2 *adj(XY1)*adj(U0) + U2 *U1* XY0 -// + XY5 *U4* U3 + adj(U5)*adj(XY4)* U3 + adj(U5)*U4*adj(XY3); -// -// setLink(F_v[x](mu), F_v(x)(mu) + hp.c_3*W); -// } -// }) -// } // end mu loop -// -// u_force = Ghost.Extract(Fghost); } + void ddV_naik(GF& u_deriv, GF& u_mu, GF& u_force) { SmearingParameters lt = this->_linkTreatment; diff --git a/tests/forces/Test_HISQ_force.cc b/tests/forces/Test_HISQ_force.cc index a11157a964..045a3dc307 100644 --- a/tests/forces/Test_HISQ_force.cc +++ b/tests/forces/Test_HISQ_force.cc @@ -39,7 +39,7 @@ using namespace Grid; //#define USE_DOUBLE true -#define USE_DOUBLE false +#define USE_DOUBLE true #if USE_DOUBLE #define PREC double @@ -108,11 +108,6 @@ bool testForce(GridCartesian& GRID, LGF Umu, LGF Ucontrol, } } - // NEXT STEPS: Make sure fat7/asqtad parameters match the interface.md. Open up Carleton's MILC - // test code, which you are going to have to understand at some point anyway, and make sure - // your parameters agree with that. Run the test and write as control. Copy control over to - // local Grid folder where it is protected. Rewrite test as genuine check. Push it. - hisq_force.force(Umom,vecdt,vecx,n_orders_naik); LGF diff(&GRID); diff = Ucontrol-Umom; From b939efb7aa746659ef5067cc306a4239d7683e96 Mon Sep 17 00:00:00 2001 From: "D. A. Clarke" Date: Wed, 19 Mar 2025 15:28:48 -0600 Subject: [PATCH 15/32] some refactoring of HISQSmearing header --- Grid/qcd/smearing/HISQSmearing.h | 207 ++++++++++++++----------------- 1 file changed, 93 insertions(+), 114 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index 914bb4c8b7..02b4fed6a1 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -92,7 +92,7 @@ struct HISQParameters{ }; -/*! @brief somtimes in the U(3) projection we use SVD cuts; here we collect related parameters */ +/*! @brief Sometimes in the U(3) projection we use SVD cuts; here we collect related parameters */ template struct HISQReunitSVDParameters{ // Structure from QOP/QDP @@ -110,26 +110,26 @@ struct HISQReunitSVDParameters{ }; -// I think that "coalesced..." functions are extremely general, which is nice, -// but in the HISQ context it boils down to link reading and writing. -template accelerator_inline -vobj getLink(const vobj & __restrict__ vec,GeneralStencilEntry* SE) { - return coalescedReadGeneralPermute(vec, SE->_permute, Nd); +/*! @brief Get the link U_mu(x). */ +template accelerator_inline +auto getLink(const link& __restrict__ U, GeneralStencilEntry* x, int mu) { + return coalescedReadGeneralPermute(U[x->_offset](mu), x->_permute, Nd); } #define setLink coalescedWrite -// figure out the stencil index from mu and nu -accelerator_inline int stencilIndex(int mu, int nu) { +/*! @brief Figure out the stencil index from mu and nu. */ +accelerator_inline +int HISQStencilIndex(int mu, int nu) { // Nshifts depends on how you built the stencil int Nshifts = 5; return Nshifts*nu + Nd*Nshifts*mu; } -/*! @brief mu-nu plane stencil. We allow mu==nu to make indexing the stencil easier, - but these entries will not be used. */ -std::vector getHISQSupport() { +/*! @brief Create the mu-nu plane stencil. We allow mu==nu to make indexing the + stencil easier, but these entries will not be used. */ +std::vector createHISQStencil() { std::vector shifts; for(int mu=0;mu getHISQSupport() { return shifts; } +/*! @brief Retreieve the stencil entries. */ +template accelerator_inline +std::tuple +getHISQStencilEntries(acc sView, int sIndex, int site) { + GeneralStencilEntry* x_p_mu = sView.GetEntry(sIndex+0,site); + GeneralStencilEntry* x_p_nu = sView.GetEntry(sIndex+1,site); + GeneralStencilEntry* x = sView.GetEntry(sIndex+2,site); + GeneralStencilEntry* x_p_mu_m_nu = sView.GetEntry(sIndex+3,site); + GeneralStencilEntry* x_m_nu = sView.GetEntry(sIndex+4,site); + return { x_p_mu, x_p_nu, x, x_p_mu_m_nu, x_m_nu }; +} + -/*! @brief create fat links from link variables */ +/*! @brief Allows for ASQTAD-like smearings. */ template class Smear_HISQ : public Gimpl { public: @@ -208,18 +220,18 @@ class Smear_HISQ : public Gimpl { PaddedCell Ghost(_HaloDepth,grid); GF Ughost = Ghost.Exchange(u_thin); - // This is where auxiliary N-link fields and the final smear will be stored. + // This is where auxiliary N-link fields and the final smear will be stored. As + // implemented, this uses about 25% more memory than necessary. GF Ughost_fat(Ughost.Grid()); GF Ughost_3link(Ughost.Grid()); GF Ughost_5linkA(Ughost.Grid()); GF Ughost_5linkB(Ughost.Grid()); // mu-nu plane stencil. - std::vector shifts = getHISQSupport(); + std::vector shifts = createHISQStencil(); // A GeneralLocalStencil has two indices: a site and stencil index GeneralLocalStencil gStencil(Ughost.Grid(),shifts); - typedef decltype(gStencil.GetEntry(0,0)) stencilElement; // This is where contributions from the smearing get added together Ughost_fat=Zero(); @@ -227,7 +239,6 @@ class Smear_HISQ : public Gimpl { // This loop handles 3-, 5-, and 7-link constructs, minus Lepage and Naik. for(int mu=0;mu_offset; - SE1 = gStencil_v.GetEntry(s+1,site); int x_p_nu = SE1->_offset; - SE2 = gStencil_v.GetEntry(s+2,site); int x = SE2->_offset; - SE3 = gStencil_v.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset; - SE4 = gStencil_v.GetEntry(s+4,site); int x_m_nu = SE4->_offset; + auto [x_p_mu, x_p_nu, x, x_p_mu_m_nu, x_m_nu] = getHISQStencilEntries(gStencil_v,s,site); // When you're deciding whether to take an adjoint, the question is: how is the // stored link oriented compared to the one you want? If I imagine myself travelling // with the to-be-updated link, I have two possible, alternative 3-link paths I can // take, one starting by going to the left, the other starting by going to the right. - U0 = getLink(U_v[x_p_mu ](nu),SE0); - U1 = getLink(U_v[x_p_nu ](mu),SE1); - U2 = getLink(U_v[x ](nu),SE2); - U3 = getLink(U_v[x_p_mu_m_nu](nu),SE3); - U4 = getLink(U_v[x_m_nu ](mu),SE4); - U5 = getLink(U_v[x_m_nu ](nu),SE4); + U0 = getLink(U_v,x_p_mu ,nu); + U1 = getLink(U_v,x_p_nu ,mu); + U2 = getLink(U_v,x ,nu); + U3 = getLink(U_v,x_p_mu_m_nu,nu); + U4 = getLink(U_v,x_m_nu ,mu); + U5 = getLink(U_v,x_m_nu ,nu); // "left" "right" W = U2*U1*adj(U0) + adj(U5)*U4*U3; // Save 3-link construct for later and add to smeared field. - setLink(U_3link_v[x](nu), W); + setLink(U_3link_v[x->_offset](nu), W); // The index operator (x) returns the coalesced read on GPU. The view [] index returns // a reference to the vector object. The [x](mu) returns a reference to the densely // packed (contiguous in memory) mu-th element of the vector object. - setLink(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_3*W); + setLink(U_fat_v[x->_offset](mu), U_fat_v(x->_offset)(mu) + lt.c_3*W); } }) accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 5-link - stencilElement SE0, SE1, SE2, SE3, SE4; U3matrix U0, U1, U2, U3, U4, U5, W; int sigmaIndex = 0; for(int nu=0;nu_offset; - SE1 = gStencil_v.GetEntry(s+1,site); int x_p_nu = SE1->_offset; - SE2 = gStencil_v.GetEntry(s+2,site); int x = SE2->_offset; - SE3 = gStencil_v.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset; - SE4 = gStencil_v.GetEntry(s+4,site); int x_m_nu = SE4->_offset; + auto [x_p_mu, x_p_nu, x, x_p_mu_m_nu, x_m_nu] = getHISQStencilEntries(gStencil_v,s,site); - U0 = getLink( U_v[x_p_mu ](nu ),SE0); - U1 = getLink(U_3link_v[x_p_nu ](rho),SE1); - U2 = getLink( U_v[x ](nu ),SE2); - U3 = getLink( U_v[x_p_mu_m_nu](nu ),SE3); - U4 = getLink(U_3link_v[x_m_nu ](rho),SE4); - U5 = getLink( U_v[x_m_nu ](nu ),SE4); + U0 = getLink( U_v,x_p_mu ,nu ); + U1 = getLink(U_3link_v,x_p_nu ,rho); + U2 = getLink( U_v,x ,nu ); + U3 = getLink( U_v,x_p_mu_m_nu,nu ); + U4 = getLink(U_3link_v,x_m_nu ,rho); + U5 = getLink( U_v,x_m_nu ,nu ); W = U2*U1*adj(U0) + adj(U5)*U4*U3; if(sigmaIndex<3) { - setLink(U_5linkA_v[x](rho), W); + setLink(U_5linkA_v[x->_offset](rho), W); } else { - setLink(U_5linkB_v[x](rho), W); + setLink(U_5linkB_v[x->_offset](rho), W); } - setLink(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_5*W); + setLink(U_fat_v[x->_offset](mu), U_fat_v(x->_offset)(mu) + lt.c_5*W); sigmaIndex++; } } }) accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 7-link - stencilElement SE0, SE1, SE2, SE3, SE4; U3matrix U0, U1, U2, U3, U4, U5, W; int sigmaIndex = 0; for(int nu=0;nu_offset; - SE1 = gStencil_v.GetEntry(s+1,site); int x_p_nu = SE1->_offset; - SE2 = gStencil_v.GetEntry(s+2,site); int x = SE2->_offset; - SE3 = gStencil_v.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset; - SE4 = gStencil_v.GetEntry(s+4,site); int x_m_nu = SE4->_offset; + auto [x_p_mu, x_p_nu, x, x_p_mu_m_nu, x_m_nu] = getHISQStencilEntries(gStencil_v,s,site); - U0 = getLink(U_v[x_p_mu](nu),SE0); + U0 = getLink(U_v,x_p_mu,nu); if(sigmaIndex<3) { - U1 = getLink(U_5linkB_v[x_p_nu](rho),SE1); + U1 = getLink(U_5linkB_v,x_p_nu,rho); } else { - U1 = getLink(U_5linkA_v[x_p_nu](rho),SE1); + U1 = getLink(U_5linkA_v,x_p_nu,rho); } - U2 = getLink(U_v[x](nu),SE2); - U3 = getLink(U_v[x_p_mu_m_nu](nu),SE3); + U2 = getLink(U_v,x,nu); + U3 = getLink(U_v,x_p_mu_m_nu,nu); if(sigmaIndex<3) { - U4 = getLink(U_5linkB_v[x_m_nu](rho),SE4); + U4 = getLink(U_5linkB_v,x_m_nu,rho); } else { - U4 = getLink(U_5linkA_v[x_m_nu](rho),SE4); + U4 = getLink(U_5linkA_v,x_m_nu,rho); } - U5 = getLink(U_v[x_m_nu](nu),SE4); + U5 = getLink(U_v,x_m_nu,nu); W = U2*U1*adj(U0) + adj(U5)*U4*U3; - setLink(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_7*W); + setLink(U_fat_v[x->_offset](mu), U_fat_v(x->_offset)(mu) + lt.c_7*W); sigmaIndex++; } } @@ -584,36 +581,25 @@ class Force_HISQ : public Gimpl { force() = forcemu(mu); auto forcedag = adj(force); - RealScalar u2 = u * u; - RealScalar u3 = u2 * u; - RealScalar u4 = u3 * u; - RealScalar u5 = u4 * u; - RealScalar u6 = u5 * u; - RealScalar u7 = u6 * u; - RealScalar u8 = u7 * u; - RealScalar v2 = v * v; - RealScalar v3 = v2 * v; - RealScalar v4 = v3 * v; - RealScalar v5 = v4 * v; - RealScalar v6 = v5 * v; - RealScalar w2 = w * w; - RealScalar w3 = w2 * w; - RealScalar w4 = w3 * w; - RealScalar w5 = w4 * w; + RealScalar u2, u3, u4, u5, u6, u7, u8, v2 ,v3, v4, v5, v6, w2, w3, w4, w5; + + u2 = u *u; u3 = u2*u; u4 = u3*u; u5 = u4*u; u6 = u5*u; u7 = u6*u; u8 = u7*u; + v2 = v *v; v3 = v2*v; v4 = v3*v; v5 = v4*v; v6 = v5*v; + w2 = w *w; w3 = w2*w; w4 = w3*w; w5 = w4*w; // eq (C10) auto d = 2*w3*(u*v-w)*(u*v-w)*(u*v-w); // eq (C11) - auto C00 = ( -w3*u6 + 3*v*w3*u4 + 3*v4*w*u4 - v6*u3 - 4*w4*u3 - 12*v3*w2*u3 + 16*v2*w3*u2 - + 3*v5*w*u2 - 8*v*w4*u - 3*v4*w2*u + w5 + v3*w3 )/d; - auto C01 = ( -w2*u7 - v2*w*u6 + v4*u5 + 6*v*w2*u5 - 5*w3*u4 - v3*w*u4 - 2*v5*u3 - 6*v2*w2*u3 - + 10*v*w3*u2 + 6*v4*w*u2 - 3*w4*u - 6*v3*w2*u + 2*v2*w3 )/d; - auto C02 = ( w2*u5 + v2*w*u4 - v4*u3 - 4*v*w2*u3 + 4*w3*u2 +3*v3*w*u2 - 3*v2*w2*u + v*w3 )/d; - auto C11 = ( -w*u8 - v2*u7 + 7*v*w*u6 + 4*v3*u5 - 5*w2*u5 - 16*v2*w*u4 - 4*v4*u3 + 16*v*w2*u3 - - 3*w3*u2 + 12*v3*w*u2 - 12*v2*w2*u + 3*v*w3 )/d; - auto C12 = ( w*u6 + v2*u5 - 5*v*w*u4 - 2*v3*u3 + 4*w2*u3 + 6*v2*w*u2 - 6*v*w2*u + w3 )/d; - auto C22 = ( -w*u4 - v2*u3 + 3*v*w*u2 - 3*w2*u )/d; + auto C00 = ( -w3*u6 + 3*v*w3*u4 + 3*v4*w*u4 - v6*u3 - 4*w4*u3 - 12*v3*w2*u3 + 16*v2*w3*u2 + + 3*v5*w*u2 - 8*v*w4*u - 3*v4*w2*u + w5 + v3*w3 )/d; + auto C01 = ( -w2*u7 - v2*w*u6 + v4*u5 + 6*v*w2*u5 - 5*w3*u4 - v3*w*u4 - 2*v5*u3 - 6*v2*w2*u3 + + 10*v*w3*u2 + 6*v4*w*u2 - 3*w4*u - 6*v3*w2*u + 2*v2*w3 )/d; + auto C02 = ( w2*u5 + v2*w*u4 - v4*u3 - 4*v*w2*u3 + 4*w3*u2 +3*v3*w*u2 - 3*v2*w2*u + v*w3 )/d; + auto C11 = ( -w*u8 - v2*u7 + 7*v*w*u6 + 4*v3*u5 - 5*w2*u5 - 16*v2*w*u4 - 4*v4*u3 + 16*v*w2*u3 + - 3*w3*u2 + 12*v3*w*u2 - 12*v2*w2*u + 3*v*w3 )/d; + auto C12 = ( w*u6 + v2*u5 - 5*v*w*u4 - 2*v3*u3 + 4*w2*u3 + 6*v2*w*u2 - 6*v*w2*u + w3 )/d; + auto C22 = ( -w*u4 - v2*u3 + 3*v*w*u2 - 3*w2*u )/d; // These are all used in the loop over color entries, and we want to avoid recomputing // these products, which should be broadcast to all sites, 3*3*3*3=81 times. @@ -733,53 +719,46 @@ class Force_HISQ : public Gimpl { Fghost = Zero(); - std::vector shifts = getHISQSupport(); - + std::vector shifts = createHISQStencil(); GeneralLocalStencil gStencil(Ughost.Grid(),shifts); - typedef decltype(gStencil.GetEntry(0,0)) stencilElement; for(int mu=0;mu_offset; - SE1 = gStencil_v.GetEntry(s+1,site); int x_p_nu = SE1->_offset; - SE2 = gStencil_v.GetEntry(s+2,site); int x = SE2->_offset; - SE3 = gStencil_v.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset; - SE4 = gStencil_v.GetEntry(s+4,site); int x_m_nu = SE4->_offset; + auto [x_p_mu, x_p_nu, x, x_p_mu_m_nu, x_m_nu] = getHISQStencilEntries(gStencil_v,s,site); - U0 = getLink(U_v[x_p_mu ](nu),SE0); - U1 = getLink(U_v[x_p_nu ](mu),SE1); - U2 = getLink(U_v[x ](nu),SE2); - U3 = getLink(U_v[x_p_mu_m_nu](nu),SE3); - U4 = getLink(U_v[x_m_nu ](mu),SE4); - U5 = getLink(U_v[x_m_nu ](nu),SE4); + U0 = getLink(U_v ,x_p_mu ,nu); + U1 = getLink(U_v ,x_p_nu ,mu); + U2 = getLink(U_v ,x ,nu); + U3 = getLink(U_v ,x_p_mu_m_nu,nu); + U4 = getLink(U_v ,x_m_nu ,mu); + U5 = getLink(U_v ,x_m_nu ,nu); - XY0 = getLink(XY_v[x_p_mu ](nu),SE0); - XY1 = getLink(XY_v[x_p_nu ](mu),SE1); - XY2 = getLink(XY_v[x ](nu),SE2); - XY3 = getLink(XY_v[x_p_mu_m_nu](nu),SE3); - XY4 = getLink(XY_v[x_m_nu ](mu),SE4); - XY5 = getLink(XY_v[x_m_nu ](nu),SE4); + XY0 = getLink(XY_v,x_p_mu ,nu); + XY1 = getLink(XY_v,x_p_nu ,mu); + XY2 = getLink(XY_v,x ,nu); + XY3 = getLink(XY_v,x_p_mu_m_nu,nu); + XY4 = getLink(XY_v,x_m_nu ,mu); + XY5 = getLink(XY_v,x_m_nu ,nu); W = adj(XY2)*U1*adj(U0) + U2 *adj(XY1)*adj(U0) + U2 *U1* XY0 + XY5 *U4* U3 + adj(U5)*adj(XY4)* U3 + adj(U5)*U4*adj(XY3); - setLink(F_v[x](mu), F_v(x)(mu) + hp.fat7_c3*W*vecdt[l]); + setLink(F_v[x->_offset](mu), F_v(x->_offset)(mu) + hp.fat7_c3*W*vecdt[l]); } }) } // end mu loop From 1ef9f55af5684f4078a169d326e2c627c4cba81b Mon Sep 17 00:00:00 2001 From: "D. A. Clarke" Date: Thu, 20 Mar 2025 16:34:44 -0600 Subject: [PATCH 16/32] 3-link works --- Grid/qcd/smearing/HISQSmearing.h | 24 ++++++++++++------------ tests/forces/Test_HISQ_force.cc | 14 +++++++++++--- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index 02b4fed6a1..566c8db1c3 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -129,6 +129,7 @@ int HISQStencilIndex(int mu, int nu) { /*! @brief Create the mu-nu plane stencil. We allow mu==nu to make indexing the stencil easier, but these entries will not be used. */ +inline std::vector createHISQStencil() { std::vector shifts; for(int mu=0;mu shifts = createHISQStencil(); GeneralLocalStencil gStencil(Ughost.Grid(),shifts); - + for(int mu=0;mu_offset](mu), F_v(x->_offset)(mu) + hp.fat7_c3*W*vecdt[l]); } }) + + } // end mu loop u_force = Ghost.Extract(Fghost); diff --git a/tests/forces/Test_HISQ_force.cc b/tests/forces/Test_HISQ_force.cc index 045a3dc307..893e7ed275 100644 --- a/tests/forces/Test_HISQ_force.cc +++ b/tests/forces/Test_HISQ_force.cc @@ -120,6 +120,8 @@ bool testForce(GridCartesian& GRID, LGF Umu, LGF Ucontrol, return false; } // NerscIO::writeConfiguration(Umom,"nersc.l8t4b3360.Umom.XY.control"); +// NerscIO::writeConfiguration(Umom,"nersc.l8t4b3360.Umom.3link.control"); +// return true; } @@ -201,11 +203,17 @@ int main (int argc, char** argv) { NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.ddVU3.control"); pass *= testddUProj(GRID,Umu,Ucontrol); - // Check the inner product (1-link) + // Check the 1-link (inner product) NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.XY.control"); pass *= testForce(GRID, Umu, Ucontrol, - 1, 0 , 0, 0, 0, - 0, 1/16, 0, 0, 0 ); + 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0 ); + + // Check the 3-link + NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.3link.control"); + pass *= testForce(GRID, Umu, Ucontrol, + 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0 ); if(pass){ Grid_pass("All tests passed."); From 6b635089c6411525533d217529c65b27c87bb8c2 Mon Sep 17 00:00:00 2001 From: "D. A. Clarke" Date: Sat, 22 Mar 2025 19:49:13 -0600 Subject: [PATCH 17/32] try 5link --- Grid/qcd/smearing/HISQSmearing.h | 261 ++++++++++++++++++++++--------- 1 file changed, 185 insertions(+), 76 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index 566c8db1c3..de026b3bb7 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -209,6 +209,105 @@ class Smear_HISQ : public Gimpl { ~Smear_HISQ() {} + + // Intent: OUT--U_3link (sum of left and right staples attached to U) + // U_fat (accmulates the fat smearing) + // IN--U_v (thin links) + // gStencil (HISQ stencil) + // Nsites + // mu + // updateFatLinks (in the force, you only want U_3link_v) + template + void threeLinkStaple(linkWrite U_fat_v, linkWrite U_3link_v, linkRead U_v, stencilRead gStencil_v, + int Nsites, int mu, bool updateFatLinks=true) const { + + SmearingParameters lt = this->_linkTreatment; + typedef decltype(getLink(U_v,gStencil_v.GetEntry(0,0),0)) U3matrix; + + accelerator_for(site,Nsites,Simd::Nsimd(),{ + U3matrix U0, U1, U2, U3, U4, U5, W; + for(int nu=0;nu_offset](nu), W); + + // The index operator (x) returns the coalesced read on GPU. The view [] index returns + // a reference to the vector object. The [x](mu) returns a reference to the densely + // packed (contiguous in memory) mu-th element of the vector object. + if(updateFatLinks) setLink(U_fat_v[x->_offset](mu), U_fat_v(x->_offset)(mu) + lt.c_3*W); + } + }) + return; + } + + + // Intent: OUT--U_5link (sum of left and right staples attached to U) + // U_fat (accmulates the fat smearing) + // IN--U_v (thin links) + // gStencil (HISQ stencil) + // Nsites + // mu + // updateFatLinks (in the force, you only want U_5link_v) + template + void fiveLinkStaple(linkWrite U_fat_v, linkWrite U_5linkA_v, linkWrite U_5linkB_v, linkWrite U_3link_v, + linkRead U_v, stencilRead gStencil_v, int Nsites, int mu, bool updateFatLinks=true) const { + + SmearingParameters lt = this->_linkTreatment; + typedef decltype(getLink(U_v,gStencil_v.GetEntry(0,0),0)) U3matrix; + + accelerator_for(site,Nsites,Simd::Nsimd(),{ + U3matrix U0, U1, U2, U3, U4, U5, W; + int sigmaIndex = 0; + for(int nu=0;nu_offset](rho), W); + } else { + setLink(U_5linkB_v[x->_offset](rho), W); + } + + if(updateFatLinks) setLink(U_fat_v[x->_offset](mu), U_fat_v(x->_offset)(mu) + lt.c_5*W); + sigmaIndex++; + } + } + }) + return; + } + + // Intent: OUT--u_smr (smeared links), // u_naik (Naik links), // IN--u_thin (thin links) @@ -240,9 +339,7 @@ class Smear_HISQ : public Gimpl { // This loop handles 3-, 5-, and 7-link constructs, minus Lepage and Naik. for(int mu=0;mu_offset](nu), W); - - // The index operator (x) returns the coalesced read on GPU. The view [] index returns - // a reference to the vector object. The [x](mu) returns a reference to the densely - // packed (contiguous in memory) mu-th element of the vector object. - setLink(U_fat_v[x->_offset](mu), U_fat_v(x->_offset)(mu) + lt.c_3*W); - } - }) - - accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 5-link - U3matrix U0, U1, U2, U3, U4, U5, W; - int sigmaIndex = 0; - for(int nu=0;nu_offset](rho), W); - } else { - setLink(U_5linkB_v[x->_offset](rho), W); - } + threeLinkStaple(U_fat_v, U_3link_v, U_v, gStencil_v, Nsites, mu); - setLink(U_fat_v[x->_offset](mu), U_fat_v(x->_offset)(mu) + lt.c_5*W); - sigmaIndex++; - } - } - }) + fiveLinkStaple< linkRead,linkWrite,stencilRead>(U_fat_v, U_5linkA_v, U_5linkB_v, U_3link_v, U_v, gStencil_v, Nsites, mu); accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 7-link U3matrix U0, U1, U2, U3, U4, U5, W; @@ -720,16 +757,37 @@ class Force_HISQ : public Gimpl { std::vector shifts = createHISQStencil(); GeneralLocalStencil gStencil(Ughost.Grid(),shifts); + GF Ughost_3link(Ughost.Grid()); + GF Ughost_5linkA(Ughost.Grid()); + GF Ughost_5linkB(Ughost.Grid()); + GF dUghost_3link(Ughost.Grid()); + GF dUghost_5linkA(Ughost.Grid()); + GF dUghost_5linkB(Ughost.Grid()); + + Smear_HISQ fat7(grid,hp.fat7_c1,0.,hp.fat7_c3,hp.fat7_c5,hp.fat7_c7,0.); + for(int mu=0;mu_offset](nu), W); setLink(F_v[x->_offset](mu), F_v(x->_offset)(mu) + hp.fat7_c3*W*vecdt[l]); } }) + // U_3link_v is being used as a dummy in the first argument. That the last argument + // is false guarantees threeLinkStaple does not interact with its first argument. + fat7.template threeLinkStaple(U_3link_v, U_3link_v, + U_v, gStencil_v, Nsites, mu, false); + + accelerator_for(site,Nsites,Simd::Nsimd(),{ // 5-LINK DERIVATIVE + U3matrix U0, U1, U2, U3, U4, U5, XY0, V1, XY2, XY3, V4, XY5, W; + int sigmaIndex = 0; + for(int nu=0;nu_offset](rho), W); + } else { + setLink(dU_5linkB_v[x->_offset](rho), W); + } + + setLink(F_v[x->_offset](mu), F_v(x->_offset)(mu) + hp.fat7_c5*W*vecdt[l]); + sigmaIndex++; + } + } + }) + + fat7.template fiveLinkStaple< linkRead,linkWrite,stencilRead>(U_5linkA_v, U_5linkA_v, U_5linkB_v, + U_3link_v, U_v, gStencil_v, Nsites, mu, false); + + } // end mu loop u_force = Ghost.Extract(Fghost); From c809930848fffd61a9c6a3d71c98570ad11ab5a8 Mon Sep 17 00:00:00 2001 From: "D. A. Clarke" Date: Sun, 23 Mar 2025 17:28:50 -0600 Subject: [PATCH 18/32] working 5-link; 7-link ready to test --- Grid/qcd/smearing/HISQSmearing.h | 168 ++++++++++++++++++++----------- tests/forces/Test_HISQ_force.cc | 7 ++ 2 files changed, 119 insertions(+), 56 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index de026b3bb7..580a9a984b 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -143,7 +143,8 @@ std::vector createHISQStencil() { return shifts; } -/*! @brief Retreieve the stencil entries. */ + +/*! @brief Retrieve the stencil entries. */ template accelerator_inline std::tuple getHISQStencilEntries(acc sView, int sIndex, int site) { @@ -165,12 +166,12 @@ class Smear_HISQ : public Gimpl { // Sort out the Gimpl. This handles BCs and part of the precision. INHERIT_GIMPL_TYPES(Gimpl); - typedef typename Gimpl::GaugeField GF; - typedef typename Gimpl::GaugeLinkField LF; - typedef typename Gimpl::ComplexField CF; - typedef typename Gimpl::Scalar ComplexScalar; + typedef typename Gimpl::GaugeField GF; + typedef typename Gimpl::GaugeLinkField LF; + typedef typename Gimpl::ComplexField CF; + typedef typename Gimpl::Scalar ComplexScalar; typedef decltype(real(ComplexScalar())) RealScalar; - typedef iColourMatrix ComplexColourMatrix; + typedef iColourMatrix ComplexColourMatrix; RealScalar _Scut; // Cutoff for U(3) projection eigenvalues, set at initialization int _HaloDepth=1; @@ -210,19 +211,19 @@ class Smear_HISQ : public Gimpl { ~Smear_HISQ() {} - // Intent: OUT--U_3link (sum of left and right staples attached to U) + // Intent: OUT--U_3link (sum of left and right 3-staples attached to U) // U_fat (accmulates the fat smearing) // IN--U_v (thin links) // gStencil (HISQ stencil) - // Nsites // mu // updateFatLinks (in the force, you only want U_3link_v) template void threeLinkStaple(linkWrite U_fat_v, linkWrite U_3link_v, linkRead U_v, stencilRead gStencil_v, - int Nsites, int mu, bool updateFatLinks=true) const { + int mu, bool updateFatLinks=true) const { SmearingParameters lt = this->_linkTreatment; typedef decltype(getLink(U_v,gStencil_v.GetEntry(0,0),0)) U3matrix; + int Nsites = U_v.size(); accelerator_for(site,Nsites,Simd::Nsimd(),{ U3matrix U0, U1, U2, U3, U4, U5, W; @@ -259,19 +260,20 @@ class Smear_HISQ : public Gimpl { } - // Intent: OUT--U_5link (sum of left and right staples attached to U) + // Intent: OUT--U_5link (sum of left and right 5-staples attached to U) // U_fat (accmulates the fat smearing) // IN--U_v (thin links) + // U_3link (sum of left and right 3-staples attached to U) // gStencil (HISQ stencil) - // Nsites // mu // updateFatLinks (in the force, you only want U_5link_v) template void fiveLinkStaple(linkWrite U_fat_v, linkWrite U_5linkA_v, linkWrite U_5linkB_v, linkWrite U_3link_v, - linkRead U_v, stencilRead gStencil_v, int Nsites, int mu, bool updateFatLinks=true) const { + linkRead U_v, stencilRead gStencil_v, int mu, bool updateFatLinks=true) const { SmearingParameters lt = this->_linkTreatment; typedef decltype(getLink(U_v,gStencil_v.GetEntry(0,0),0)) U3matrix; + int Nsites = U_v.size(); accelerator_for(site,Nsites,Simd::Nsimd(),{ U3matrix U0, U1, U2, U3, U4, U5, W; @@ -308,6 +310,55 @@ class Smear_HISQ : public Gimpl { } + // Intent: OUT--U_fat (accmulates the fat smearing) + // IN--U_v (thin links) + // gStencil (HISQ stencil) + // mu + template + void sevenLinkStaple(linkWrite U_fat_v, linkWrite U_5linkA_v, linkWrite U_5linkB_v, linkWrite U_3link_v, + linkRead U_v, stencilRead gStencil_v, int mu) const { + + SmearingParameters lt = this->_linkTreatment; + typedef decltype(getLink(U_v,gStencil_v.GetEntry(0,0),0)) U3matrix; + int Nsites = U_v.size(); + + accelerator_for(site,Nsites,Simd::Nsimd(),{ + U3matrix U0, U1, U2, U3, U4, U5, W; + int sigmaIndex = 0; + for(int nu=0;nu_offset](mu), U_fat_v(x->_offset)(mu) + lt.c_7*W); + sigmaIndex++; + } + } + }) + return; + } + + // Intent: OUT--u_smr (smeared links), // u_naik (Naik links), // IN--u_thin (thin links) @@ -348,7 +399,6 @@ class Smear_HISQ : public Gimpl { autoView(U_5linkA_v, Ughost_5linkA, AcceleratorWrite); autoView(U_5linkB_v, Ughost_5linkB, AcceleratorWrite); - int Nsites = U_v.size(); auto gStencil_v = gStencil.View(AcceleratorRead); typedef decltype(getLink(U_v,gStencil.GetEntry(0,0),0)) U3matrix; @@ -356,45 +406,10 @@ class Smear_HISQ : public Gimpl { typedef decltype(U_fat_v) linkWrite; typedef decltype(gStencil_v) stencilRead; - threeLinkStaple(U_fat_v, U_3link_v, U_v, gStencil_v, Nsites, mu); - - fiveLinkStaple< linkRead,linkWrite,stencilRead>(U_fat_v, U_5linkA_v, U_5linkB_v, U_3link_v, U_v, gStencil_v, Nsites, mu); - - accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 7-link - U3matrix U0, U1, U2, U3, U4, U5, W; - int sigmaIndex = 0; - for(int nu=0;nu_offset](mu), U_fat_v(x->_offset)(mu) + lt.c_7*W); - sigmaIndex++; - } - } - }) - - } // end mu loop + threeLinkStaple(U_fat_v, U_3link_v, U_v, gStencil_v, mu); + fiveLinkStaple< linkRead,linkWrite,stencilRead>(U_fat_v, U_5linkA_v, U_5linkB_v, U_3link_v, U_v, gStencil_v, mu); + sevenLinkStaple(U_fat_v, U_5linkA_v, U_5linkB_v, U_3link_v, U_v, gStencil_v, mu); + } // c1, c3, c5, c7 construct contributions u_smr = Ghost.Extract(Ughost_fat) + lt.c_1*u_thin; @@ -505,7 +520,7 @@ template class Force_HISQ : public Gimpl { public: - GridCartesian* const _grid; + GridCartesian* const _grid; GridRedBlackCartesian* _gridRB; // Sort out the Gimpl. This handles BCs and part of the precision. @@ -823,7 +838,7 @@ class Force_HISQ : public Gimpl { // U_3link_v is being used as a dummy in the first argument. That the last argument // is false guarantees threeLinkStaple does not interact with its first argument. fat7.template threeLinkStaple(U_3link_v, U_3link_v, - U_v, gStencil_v, Nsites, mu, false); + U_v, gStencil_v, mu, false); accelerator_for(site,Nsites,Simd::Nsimd(),{ // 5-LINK DERIVATIVE U3matrix U0, U1, U2, U3, U4, U5, XY0, V1, XY2, XY3, V4, XY5, W; @@ -866,9 +881,50 @@ class Force_HISQ : public Gimpl { }) fat7.template fiveLinkStaple< linkRead,linkWrite,stencilRead>(U_5linkA_v, U_5linkA_v, U_5linkB_v, - U_3link_v, U_v, gStencil_v, Nsites, mu, false); + U_3link_v, U_v, gStencil_v, mu, false); - + accelerator_for(site,Nsites,Simd::Nsimd(),{ // 7-LINK DERIVATIVE + U3matrix U0, U1, U2, U3, U4, U5, XY0, V1, XY2, XY3, V4, XY5, W; + int sigmaIndex = 0; + for(int nu=0;nu_offset](mu), F_v(x->_offset)(mu) + hp.fat7_c7*W); + sigmaIndex++; + } + } + }) } // end mu loop diff --git a/tests/forces/Test_HISQ_force.cc b/tests/forces/Test_HISQ_force.cc index 893e7ed275..e780caaed4 100644 --- a/tests/forces/Test_HISQ_force.cc +++ b/tests/forces/Test_HISQ_force.cc @@ -121,6 +121,7 @@ bool testForce(GridCartesian& GRID, LGF Umu, LGF Ucontrol, } // NerscIO::writeConfiguration(Umom,"nersc.l8t4b3360.Umom.XY.control"); // NerscIO::writeConfiguration(Umom,"nersc.l8t4b3360.Umom.3link.control"); +// NerscIO::writeConfiguration(Umom,"nersc.l8t4b3360.Umom.5link.control"); // return true; } @@ -215,6 +216,12 @@ int main (int argc, char** argv) { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 ); + // Check the 5-link + NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.5link.control"); + pass *= testForce(GRID, Umu, Ucontrol, + 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0 ); + if(pass){ Grid_pass("All tests passed."); } else { From 11c8764fd104c325d8ea35c6dae17c7ec3987f03 Mon Sep 17 00:00:00 2001 From: "D. A. Clarke" Date: Tue, 25 Mar 2025 13:11:34 -0600 Subject: [PATCH 19/32] fix 7-link bug --- Grid/qcd/smearing/HISQSmearing.h | 106 +++++++++++++++++++++---------- 1 file changed, 73 insertions(+), 33 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index 580a9a984b..f617b821af 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -359,8 +359,8 @@ class Smear_HISQ : public Gimpl { } - // Intent: OUT--u_smr (smeared links), - // u_naik (Naik links), + // Intent: OUT--u_smr (smeared links) + // u_naik (Naik links) // IN--u_thin (thin links) void smear(GF& u_smr, GF& u_naik, GF& u_thin) const { @@ -699,6 +699,44 @@ class Force_HISQ : public Gimpl { }); }; + + // Intent: OUT--XY (|X> and |Y>) + // l (rat approx and Naik index) + // sep (separation between |X> and |Y>) + void outerProductHISQ(GF& XY, std::vector vecdt, std::vector& vecx, int l, int sep) { + + auto grid = this->_grid; + auto gridRB = this->_gridRB; + + GF tmp(grid); + FF X(gridRB), Y(gridRB), Xnu(gridRB), Ynu(gridRB), FFdag(gridRB); + + XY = Zero(); X = Zero(); Y = Zero(); + + pickCheckerboard(Even,X,vecx[l]); + pickCheckerboard(Odd ,Y,vecx[l]); + + for (int nu = 0; nu < Nd; nu++) { + // InsertForce4D is the thing that computes the outer product. Generically, + // it does this site-wise, i.e. A_i[s] B_j[s]. Hence to construct an outer + // product on different sites, we have to shift one of the guys first. Then + // we place into the outer product |X> vecdt, std::vector& vecx, std::vector n_orders_naik) { HISQParameters hp = this->_linkParams; - auto grid = this->_grid; - auto gridRB = this->_gridRB; - - GF XY(grid), tmp(grid); // outer product field - GF u_force(grid); // accumulates the force + auto grid = this->_grid; - FF X(gridRB), Y(gridRB), Xnu(gridRB), Ynu(gridRB), FFdag(gridRB); + GF XY(grid); // outer product field + GF u_force(grid); // accumulates the force momentum = Zero(); @@ -736,39 +771,42 @@ class Force_HISQ : public Gimpl { for (int i=0; i Uv(Nd, grid); +// std::vector XYv(Nd, grid); +// std::vector dVnaik(Nd, grid); +// for (int mu = 0; mu < Nd; mu++) { +// Uv[mu] = PeekIndex(_Umu, mu); +// XYv[mu] = PeekIndex(XY, mu); +// } +// +//// temp = gAcc.getLink(GInd::getSiteMu(up_mu , mu)) * gAcc.getLink(GInd::getSiteMu(up_2mu, mu)) * fAcc.getLink(GInd::getSiteMu(origin, mu)); +//// temp += gAcc.getLink(GInd::getSiteMu(up_mu , mu)) * fAcc.getLink(GInd::getSiteMu(dn_mu , mu)) * gAcc.getLink(GInd::getSiteMu(dn_mu , mu)); +//// temp += fAcc.getLink(GInd::getSiteMu(dn_2mu, mu)) * gAcc.getLink(GInd::getSiteMu(dn_2mu, mu)) * gAcc.getLink(GInd::getSiteMu(dn_mu , mu)); +// for (int mu = 0; mu < Nd; mu++) { +// Vnaik[mu] = lt.c_naik*Gimpl::CovShiftForward(U[mu],mu, +// Gimpl::CovShiftForward(U[mu],mu, +// Gimpl::CovShiftIdentityForward(XYv[mu],mu))); +//// Vnaik[mu] = lt.c_naik*Gimpl::CovShiftForward(U[mu],mu, +//// Gimpl::CovShiftForward(U[mu],mu, +//// Gimpl::CovShiftIdentityForward(U[mu],mu))); +// } + + outerProductHISQ(XY, vecdt, vecx, l, 1); momentum += hp.fat7_c1*XY; - // ------------------------------------------- SMEARING DERIVATIVES + // ------------------------------------------- N-LINK DERIVATIVES PaddedCell Ghost(_HaloDepth,grid); GF Ughost = Ghost.Exchange(_Umu); GF XYghost = Ghost.Exchange(XY); GF Fghost = Ghost.Exchange(u_force); - Fghost = Zero(); std::vector shifts = createHISQStencil(); GeneralLocalStencil gStencil(Ughost.Grid(),shifts); @@ -781,6 +819,8 @@ class Force_HISQ : public Gimpl { Smear_HISQ fat7(grid,hp.fat7_c1,0.,hp.fat7_c3,hp.fat7_c5,hp.fat7_c7,0.); + Fghost = Zero(); + for(int mu=0;mu_offset](mu), F_v(x->_offset)(mu) + hp.fat7_c7*W); + setLink(F_v[x->_offset](mu), F_v(x->_offset)(mu) + hp.fat7_c7*W*vecdt[l]); sigmaIndex++; } } From d7192fcdf08930d55ea061e165bfc34d3fa42077 Mon Sep 17 00:00:00 2001 From: "D. A. Clarke" Date: Wed, 26 Mar 2025 18:00:13 -0400 Subject: [PATCH 20/32] try one link again --- Grid/qcd/smearing/HISQSmearing.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index f617b821af..ed32e76626 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -295,6 +295,12 @@ class Smear_HISQ : public Gimpl { W = U2*U1*adj(U0) + adj(U5)*U4*U3; + // Counting 3-link staples: there are three planes attached to the to-be-updated link, + // which corresponds to three (forward+backward) staples. For the 5-link staples, for + // each plane, there are two remaining directions, so that there are six 5-link staples + // altogether. That will not fit in a single GaugeField object, so we use two. You can + // think of sigmaIndex and rho together as being the labels that pick out a particular + // 5-link staple. They therefore should not be interpreted as directions. if(sigmaIndex<3) { setLink(U_5linkA_v[x->_offset](rho), W); } else { @@ -711,12 +717,16 @@ class Force_HISQ : public Gimpl { auto gridRB = this->_gridRB; GF tmp(grid); - FF X(gridRB), Y(gridRB), Xnu(gridRB), Ynu(gridRB), FFdag(gridRB); + FF XRB(gridRB), YRB(gridRB); + FF X(grid), Y(grid), Xnu(grid), Ynu(grid), FFdag(grid); XY = Zero(); X = Zero(); Y = Zero(); - pickCheckerboard(Even,X,vecx[l]); - pickCheckerboard(Odd ,Y,vecx[l]); + // WRAP THIS TO SAVE MEMORY AND ENHANCE READABILITY + pickCheckerboard(Even,XRB,vecx[l]); + pickCheckerboard(Odd ,YRB,vecx[l]); + setCheckerboard(X,XRB); + setCheckerboard(Y,YRB); for (int nu = 0; nu < Nd; nu++) { // InsertForce4D is the thing that computes the outer product. Generically, From 892f6dc08523be44a4668661e0b5bea7e86bb107 Mon Sep 17 00:00:00 2001 From: "D. A. Clarke" Date: Fri, 28 Mar 2025 10:25:34 -0400 Subject: [PATCH 21/32] next attempt at fixing outer product --- Grid/qcd/smearing/HISQSmearing.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index ed32e76626..0355822ddc 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -718,8 +718,15 @@ class Force_HISQ : public Gimpl { GF tmp(grid); FF XRB(gridRB), YRB(gridRB); - FF X(grid), Y(grid), Xnu(grid), Ynu(grid), FFdag(grid); - + FF X(grid), Y(grid), Xnu(grid), Ynu(grid); + + // In ks_imp_rhmc/update_h_rhmc.c around line 84, it multiplies the even-site result X of ratinv by Dslash, + // which puts UX on the adjacent odd sites. + // TRANSLATION: Y=DX on odd, X on even + // Then in generic_ks/fermion_force_hisq_multi_cpu.c, lines 171-193 it shifts the odd site UX over to the + // even site and calculates the outer product. So the shift is needed here to assemble the two terms in + // the outer product on one site, but no multiplication by U is needed, since that was already done. + // TRANSLATION: shift to even, carry out outer product. XY = Zero(); X = Zero(); Y = Zero(); // WRAP THIS TO SAVE MEMORY AND ENHANCE READABILITY @@ -734,14 +741,12 @@ class Force_HISQ : public Gimpl { // product on different sites, we have to shift one of the guys first. Then // we place into the outer product |X> Date: Fri, 28 Mar 2025 12:24:53 -0400 Subject: [PATCH 22/32] another attempt to fix outer product --- Grid/qcd/smearing/HISQSmearing.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index 0355822ddc..42bf1df570 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -735,6 +735,7 @@ class Force_HISQ : public Gimpl { setCheckerboard(X,XRB); setCheckerboard(Y,YRB); + tmp = Zero(); for (int nu = 0; nu < Nd; nu++) { // InsertForce4D is the thing that computes the outer product. Generically, // it does this site-wise, i.e. A_i[s] B_j[s]. Hence to construct an outer @@ -744,6 +745,7 @@ class Force_HISQ : public Gimpl { Gimpl::InsertForce4D(tmp,Ynu,X,nu); } XY += vecdt[l]*tmp; + tmp = Zero(); for (int nu = 0; nu < Nd; nu++) { Xnu = Cshift(X,nu,sep); Gimpl::InsertForce4D(tmp,Xnu,Y,nu); @@ -984,7 +986,7 @@ class Force_HISQ : public Gimpl { } // end mu loop u_force = Ghost.Extract(Fghost); - momentum += u_force; +// momentum += u_force; l++; } From b094cf7c701699da50c386ed7d02a72fd2999d04 Mon Sep 17 00:00:00 2001 From: "D. A. Clarke" Date: Thu, 3 Apr 2025 14:28:30 -0600 Subject: [PATCH 23/32] this should be a working one-link --- Grid/qcd/smearing/HISQSmearing.h | 392 +++++++++++++++---------------- 1 file changed, 191 insertions(+), 201 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index 42bf1df570..e7e867b457 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -711,53 +711,30 @@ class Force_HISQ : public Gimpl { // vecx (contains |X> and |Y>) // l (rat approx and Naik index) // sep (separation between |X> and |Y>) - void outerProductHISQ(GF& XY, std::vector vecdt, std::vector& vecx, int l, int sep) { + void outerProductHISQ(GF& XY, std::vector& vecx, int l, int sep) { auto grid = this->_grid; auto gridRB = this->_gridRB; - GF tmp(grid); - FF XRB(gridRB), YRB(gridRB); - FF X(grid), Y(grid), Xnu(grid), Ynu(grid); - - // In ks_imp_rhmc/update_h_rhmc.c around line 84, it multiplies the even-site result X of ratinv by Dslash, - // which puts UX on the adjacent odd sites. - // TRANSLATION: Y=DX on odd, X on even - // Then in generic_ks/fermion_force_hisq_multi_cpu.c, lines 171-193 it shifts the odd site UX over to the - // even site and calculates the outer product. So the shift is needed here to assemble the two terms in - // the outer product on one site, but no multiplication by U is needed, since that was already done. - // TRANSLATION: shift to even, carry out outer product. - XY = Zero(); X = Zero(); Y = Zero(); - - // WRAP THIS TO SAVE MEMORY AND ENHANCE READABILITY + FF X(grid), Y(grid), XRB(gridRB), YRB(gridRB); pickCheckerboard(Even,XRB,vecx[l]); - pickCheckerboard(Odd ,YRB,vecx[l]); setCheckerboard(X,XRB); + pickCheckerboard(Odd ,YRB,vecx[l]); setCheckerboard(Y,YRB); - tmp = Zero(); + LF XYnu(grid), YXnu(grid); + XY = Zero(); XYnu = Zero(); YXnu=Zero(); for (int nu = 0; nu < Nd; nu++) { - // InsertForce4D is the thing that computes the outer product. Generically, - // it does this site-wise, i.e. A_i[s] B_j[s]. Hence to construct an outer - // product on different sites, we have to shift one of the guys first. Then - // we place into the outer product |X>(XY,(YXnu-XYnu),nu); } - XY += vecdt[l]*tmp; - tmp = Zero(); - for (int nu = 0; nu < Nd; nu++) { - Xnu = Cshift(X,nu,sep); - Gimpl::InsertForce4D(tmp,Xnu,Y,nu); - } - XY -= vecdt[l]*tmp; // capture (-1)^y in eq (2.6) + return; } // We are calculating the force using the rational approximation. The goal is that we can approximate // (Mdag M)^(-nf/4) = alpha_0 + sum_l alpha_l/(M^dag M + beta_l). Hence the index l runs over the - // order of the rational approximation. The additional complication is that each M depends on the - // fermion mass, and for higher masses, in particular when there are charm quarks, we need to // introduce a different "Naik epsilon" for each M. Hence, we can think of the total application // of this operator as having an index inaik, running over the different Naik epsilons; for each inaik // there is a possibly different order_inaik, then the operator has an index l running up to order_inaik. @@ -813,184 +790,197 @@ class Force_HISQ : public Gimpl { //// Gimpl::CovShiftIdentityForward(U[mu],mu))); // } - outerProductHISQ(XY, vecdt, vecx, l, 1); - - momentum += hp.fat7_c1*XY; - - + outerProductHISQ(XY, vecx, l, 1); - // ------------------------------------------- N-LINK DERIVATIVES - PaddedCell Ghost(_HaloDepth,grid); - GF Ughost = Ghost.Exchange(_Umu); - GF XYghost = Ghost.Exchange(XY); - GF Fghost = Ghost.Exchange(u_force); - std::vector shifts = createHISQStencil(); - GeneralLocalStencil gStencil(Ughost.Grid(),shifts); + momentum += hp.fat7_c1*vecdt[l]*XY; - GF Ughost_3link(Ughost.Grid()); - GF Ughost_5linkA(Ughost.Grid()); - GF Ughost_5linkB(Ughost.Grid()); - GF dUghost_3link(Ughost.Grid()); - GF dUghost_5linkA(Ughost.Grid()); - GF dUghost_5linkB(Ughost.Grid()); - Smear_HISQ fat7(grid,hp.fat7_c1,0.,hp.fat7_c3,hp.fat7_c5,hp.fat7_c7,0.); - Fghost = Zero(); - - for(int mu=0;mu_offset](nu), W); - - setLink(F_v[x->_offset](mu), F_v(x->_offset)(mu) + hp.fat7_c3*W*vecdt[l]); - } - }) - - // U_3link_v is being used as a dummy in the first argument. That the last argument - // is false guarantees threeLinkStaple does not interact with its first argument. - fat7.template threeLinkStaple(U_3link_v, U_3link_v, - U_v, gStencil_v, mu, false); - - accelerator_for(site,Nsites,Simd::Nsimd(),{ // 5-LINK DERIVATIVE - U3matrix U0, U1, U2, U3, U4, U5, XY0, V1, XY2, XY3, V4, XY5, W; - int sigmaIndex = 0; - for(int nu=0;nu_offset](rho), W); - } else { - setLink(dU_5linkB_v[x->_offset](rho), W); - } - - setLink(F_v[x->_offset](mu), F_v(x->_offset)(mu) + hp.fat7_c5*W*vecdt[l]); - sigmaIndex++; - } - } - }) - - fat7.template fiveLinkStaple< linkRead,linkWrite,stencilRead>(U_5linkA_v, U_5linkA_v, U_5linkB_v, - U_3link_v, U_v, gStencil_v, mu, false); - - accelerator_for(site,Nsites,Simd::Nsimd(),{ // 7-LINK DERIVATIVE - U3matrix U0, U1, U2, U3, U4, U5, XY0, V1, XY2, XY3, V4, XY5, W; - int sigmaIndex = 0; - for(int nu=0;nu_offset](mu), F_v(x->_offset)(mu) + hp.fat7_c7*W*vecdt[l]); - sigmaIndex++; - } - } - }) - - } // end mu loop - - u_force = Ghost.Extract(Fghost); +// // ------------------------------------------- N-LINK DERIVATIVES +// PaddedCell Ghost(_HaloDepth,grid); +// GF Ughost = Ghost.Exchange(_Umu); +// GF XYghost = Ghost.Exchange(XY); +// GF Fghost = Ghost.Exchange(u_force); +// std::vector shifts = createHISQStencil(); +// GeneralLocalStencil gStencil(Ughost.Grid(),shifts); +// +// GF Ughost_3link(Ughost.Grid()); +// GF Ughost_5linkA(Ughost.Grid()); +// GF Ughost_5linkB(Ughost.Grid()); +// GF dUghost_3link(Ughost.Grid()); +// GF dUghost_5linkA(Ughost.Grid()); +// GF dUghost_5linkB(Ughost.Grid()); +// +// Smear_HISQ fat7(grid,hp.fat7_c1,0.,hp.fat7_c3,hp.fat7_c5,hp.fat7_c7,0.); +// +// Fghost = Zero(); +// +// for(int mu=0;mu_offset](nu), W); +// +// setLink(F_v[x->_offset](mu), F_v(x->_offset)(mu) + hp.fat7_c3*W*vecdt[l]); +// } +// }) +// +// // U_3link_v is being used as a dummy in the first argument. That the last argument +// // is false guarantees threeLinkStaple does not interact with its first argument. +// fat7.template threeLinkStaple(U_3link_v, U_3link_v, +// U_v, gStencil_v, mu, false); +// +// accelerator_for(site,Nsites,Simd::Nsimd(),{ // 5-LINK DERIVATIVE +// U3matrix U0, U1, U2, U3, U4, U5, XY0, V1, XY2, XY3, V4, XY5, W; +// int sigmaIndex = 0; +// for(int nu=0;nu_offset](rho), W); +// } else { +// setLink(dU_5linkB_v[x->_offset](rho), W); +// } +// +// setLink(F_v[x->_offset](mu), F_v(x->_offset)(mu) + hp.fat7_c5*W*vecdt[l]); +// sigmaIndex++; +// } +// } +// }) +// +// fat7.template fiveLinkStaple< linkRead,linkWrite,stencilRead>(U_5linkA_v, U_5linkA_v, U_5linkB_v, +// U_3link_v, U_v, gStencil_v, mu, false); +// +// accelerator_for(site,Nsites,Simd::Nsimd(),{ // 7-LINK DERIVATIVE +// U3matrix U0, U1, U2, U3, U4, U5, XY0, V1, XY2, XY3, V4, XY5, W; +// int sigmaIndex = 0; +// for(int nu=0;nu_offset](mu), F_v(x->_offset)(mu) + hp.fat7_c7*W*vecdt[l]); +// sigmaIndex++; +// } +// } +// }) +// +// } // end mu loop +// +// u_force = Ghost.Extract(Fghost); // momentum += u_force; l++; } } + + std::vector Uv(Nd,grid); + std::vector mom(Nd, grid); + std::vector Umom(Nd, grid); + for (int mu = 0; mu < Nd; mu++) { + mom[mu] = PeekIndex(momentum, mu); + Uv[mu] = PeekIndex(_Umu, mu); + Umom[mu] = Uv[mu]*mom[mu]; + } + for (int mu = 0; mu < Nd; mu++) { + PokeIndex(momentum, Umom[mu], mu); + } + } From 7e5449756e8e84313deb13717676df1a5c466055 Mon Sep 17 00:00:00 2001 From: "D. A. Clarke" Date: Sun, 13 Apr 2025 16:55:29 -0600 Subject: [PATCH 24/32] working 3-link staple --- Grid/qcd/smearing/HISQSmearing.h | 333 ++++++++++++------------------- tests/forces/Test_HISQ_force.cc | 18 +- 2 files changed, 139 insertions(+), 212 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index e7e867b457..44eb917535 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -717,6 +717,8 @@ class Force_HISQ : public Gimpl { auto gridRB = this->_gridRB; FF X(grid), Y(grid), XRB(gridRB), YRB(gridRB); + X = Zero(); Y=Zero(); XRB=Zero(); YRB=Zero(); + pickCheckerboard(Even,XRB,vecx[l]); setCheckerboard(X,XRB); pickCheckerboard(Odd ,YRB,vecx[l]); @@ -784,7 +786,7 @@ class Force_HISQ : public Gimpl { // for (int mu = 0; mu < Nd; mu++) { // Vnaik[mu] = lt.c_naik*Gimpl::CovShiftForward(U[mu],mu, // Gimpl::CovShiftForward(U[mu],mu, -// Gimpl::CovShiftIdentityForward(XYv[mu],mu))); +// Gimpl::CovShiftIdentityForward(XYv[mu],mu))); //// Vnaik[mu] = lt.c_naik*Gimpl::CovShiftForward(U[mu],mu, //// Gimpl::CovShiftForward(U[mu],mu, //// Gimpl::CovShiftIdentityForward(U[mu],mu))); @@ -792,128 +794,127 @@ class Force_HISQ : public Gimpl { outerProductHISQ(XY, vecx, l, 1); - momentum += hp.fat7_c1*vecdt[l]*XY; - - - -// // ------------------------------------------- N-LINK DERIVATIVES -// PaddedCell Ghost(_HaloDepth,grid); -// GF Ughost = Ghost.Exchange(_Umu); -// GF XYghost = Ghost.Exchange(XY); -// GF Fghost = Ghost.Exchange(u_force); -// std::vector shifts = createHISQStencil(); -// GeneralLocalStencil gStencil(Ughost.Grid(),shifts); -// -// GF Ughost_3link(Ughost.Grid()); -// GF Ughost_5linkA(Ughost.Grid()); -// GF Ughost_5linkB(Ughost.Grid()); -// GF dUghost_3link(Ughost.Grid()); -// GF dUghost_5linkA(Ughost.Grid()); -// GF dUghost_5linkB(Ughost.Grid()); -// -// Smear_HISQ fat7(grid,hp.fat7_c1,0.,hp.fat7_c3,hp.fat7_c5,hp.fat7_c7,0.); -// -// Fghost = Zero(); -// -// for(int mu=0;mu_offset](nu), W); -// -// setLink(F_v[x->_offset](mu), F_v(x->_offset)(mu) + hp.fat7_c3*W*vecdt[l]); -// } -// }) -// -// // U_3link_v is being used as a dummy in the first argument. That the last argument -// // is false guarantees threeLinkStaple does not interact with its first argument. -// fat7.template threeLinkStaple(U_3link_v, U_3link_v, -// U_v, gStencil_v, mu, false); -// -// accelerator_for(site,Nsites,Simd::Nsimd(),{ // 5-LINK DERIVATIVE -// U3matrix U0, U1, U2, U3, U4, U5, XY0, V1, XY2, XY3, V4, XY5, W; -// int sigmaIndex = 0; -// for(int nu=0;nu_offset](rho), W); -// } else { -// setLink(dU_5linkB_v[x->_offset](rho), W); -// } -// -// setLink(F_v[x->_offset](mu), F_v(x->_offset)(mu) + hp.fat7_c5*W*vecdt[l]); -// sigmaIndex++; -// } -// } -// }) -// + // It's not clear to me whether this should be fat7 or asqtad. + momentum += hp.asqtad_c1*vecdt[l]*XY; + + + // ------------------------------------------- N-LINK DERIVATIVES + PaddedCell Ghost(_HaloDepth,grid); + GF Ughost = Ghost.Exchange(_Wmu); + GF XYghost = Ghost.Exchange(XY); + GF Fghost = Ghost.Exchange(u_force); + std::vector shifts = createHISQStencil(); + GeneralLocalStencil gStencil(Ughost.Grid(),shifts); + + GF Ughost_3link(Ughost.Grid()); + GF Ughost_5linkA(Ughost.Grid()); + GF Ughost_5linkB(Ughost.Grid()); + GF dUghost_3link(Ughost.Grid()); + GF dUghost_5linkA(Ughost.Grid()); + GF dUghost_5linkB(Ughost.Grid()); + + Smear_HISQ asqtad(grid,hp.asqtad_c1,0.,hp.asqtad_c3,hp.asqtad_c5,hp.asqtad_c7,0.); + Fghost = Zero(); + + for(int mu=0;mu_offset](nu), W); + + setLink(F_v[x->_offset](mu), F_v(x->_offset)(mu) + hp.asqtad_c3*vecdt[l]*adj(W)); + } + }) + + // U_3link_v is being used as a dummy in the first argument. That the last argument + // is false guarantees threeLinkStaple does not interact with its first argument. + asqtad.template threeLinkStaple(U_3link_v, U_3link_v, + U_v, gStencil_v, mu, false); + + accelerator_for(site,Nsites,Simd::Nsimd(),{ // 5-LINK DERIVATIVE + U3matrix U0, U1, U2, U3, U4, U5, XY0, V1, XY2, XY3, V4, XY5, W; + int sigmaIndex = 0; + for(int nu=0;nu_offset](rho), W); + } else { + setLink(dU_5linkB_v[x->_offset](rho), W); + } + + setLink(F_v[x->_offset](mu), F_v(x->_offset)(mu) + hp.fat7_c5*vecdt[l]*adj(W)); + sigmaIndex++; + } + } + }) + // fat7.template fiveLinkStaple< linkRead,linkWrite,stencilRead>(U_5linkA_v, U_5linkA_v, U_5linkB_v, // U_3link_v, U_v, gStencil_v, mu, false); // @@ -959,13 +960,14 @@ class Force_HISQ : public Gimpl { // } // } // }) -// -// } // end mu loop -// -// u_force = Ghost.Extract(Fghost); -// momentum += u_force; + + } // end mu loop + + u_force = Ghost.Extract(Fghost); + momentum += u_force; l++; + } } @@ -984,81 +986,6 @@ class Force_HISQ : public Gimpl { } - void ddV_naik(GF& u_deriv, GF& u_mu, GF& u_force) { - - SmearingParameters lt = this->_linkTreatment; - auto grid = this->_grid; - - PaddedCell Ghost(3,grid); - GF Ughost = Ghost.Exchange(u_mu); - GF Fghost = Ghost.Exchange(u_force); - - GF Ughost_deriv(Ughost.Grid()); - - Ughost_deriv = Zero(); - - std::vector shifts; - for(int mu=0;mu(shifts, shiftSignal::NO_SHIFT); - appendShift(shifts, mu); - appendShift(shifts, mu, mu); - appendShift(shifts, Back(mu)); - appendShift(shifts, Back(mu), Back(mu)); - appendShift(shifts, Back(mu), Back(mu), Back(mu)); - } - - GeneralLocalStencil gStencil(Ughost.Grid(),shifts); - typedef decltype(gStencil.GetEntry(0,0)) stencilElement; - - autoView(U_v , Ughost , AcceleratorRead); - autoView(F_v , Fghost , AcceleratorRead); - autoView(U_deriv_v, Ughost_deriv, AcceleratorWrite); - - typedef decltype(getLink(U_v[0](0),gStencil.GetEntry(0,0))) U3matrix; - - int Nsites = U_v.size(); - auto gStencil_v = gStencil.View(AcceleratorRead); - - accelerator_for(site,Nsites,Simd::Nsimd(),{ - stencilElement SE0, SE1, SE2, SE3, SE4, SE5; - U3matrix U0, U1, U2, U3, U4, U5, F0, F1, F2, F3, F4, F5, V; - int s = 0; - for(int mu=0;mu_offset; - SE1 = gStencil_v.GetEntry(s+1,site); int x_p_mu = SE1->_offset; - SE2 = gStencil_v.GetEntry(s+2,site); int x_p_2mu = SE2->_offset; - SE3 = gStencil_v.GetEntry(s+3,site); int x_m_mu = SE3->_offset; - SE4 = gStencil_v.GetEntry(s+4,site); int x_m_2mu = SE4->_offset; - SE5 = gStencil_v.GetEntry(s+5,site); int x_m_3mu = SE5->_offset; - - U0 = getLink(U_v[x ](mu),SE0); - U1 = getLink(U_v[x_p_mu ](mu),SE1); - U2 = getLink(U_v[x_p_2mu](mu),SE2); - U3 = getLink(U_v[x_m_mu ](mu),SE3); - U4 = getLink(U_v[x_m_2mu](mu),SE4); - U5 = getLink(U_v[x_m_3mu](mu),SE5); - - F0 = getLink(F_v[x ](mu),SE0); - F1 = getLink(F_v[x_p_mu ](mu),SE1); - F2 = getLink(F_v[x_p_2mu](mu),SE2); - F3 = getLink(F_v[x_m_mu ](mu),SE3); - F4 = getLink(F_v[x_m_2mu](mu),SE4); - F5 = getLink(F_v[x_m_3mu](mu),SE5); - - // ********Forward******** *******Backward******** - V = (adj(F2)* U1 * U0 )+(adj(U5)*adj(U4)* F3 ) - +( U2 *adj(F1)* U0 )+(adj(U5)* F4 *adj(U3)) - +( U2 * U1 *adj(F0))+( F5 *adj(U4)*adj(U3)); - - setLink(U_deriv_v[x](mu), U_deriv_v(x)(mu) + lt.c_naik*V); - - s += 6; - } - }); - u_deriv = Ghost.Extract(Ughost_deriv); - } - }; diff --git a/tests/forces/Test_HISQ_force.cc b/tests/forces/Test_HISQ_force.cc index e780caaed4..730af54807 100644 --- a/tests/forces/Test_HISQ_force.cc +++ b/tests/forces/Test_HISQ_force.cc @@ -204,23 +204,23 @@ int main (int argc, char** argv) { NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.ddVU3.control"); pass *= testddUProj(GRID,Umu,Ucontrol); - // Check the 1-link (inner product) + // Check the 1-link (outer product) NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.XY.control"); pass *= testForce(GRID, Umu, Ucontrol, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0 ); + 1, 0, 0, 0, 0 ); // Check the 3-link NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.3link.control"); pass *= testForce(GRID, Umu, Ucontrol, - 0, 1, 0, 0, 0, - 0, 0, 0, 0, 0 ); + 1, 0, 0, 0, 0, + 0, 1, 0, 0, 0 ); - // Check the 5-link - NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.5link.control"); - pass *= testForce(GRID, Umu, Ucontrol, - 0, 0, 1, 0, 0, - 0, 0, 0, 0, 0 ); +// // Check the 5-link +// NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.5link.control"); +// pass *= testForce(GRID, Umu, Ucontrol, +// 0, 0, 1, 0, 0, +// 0, 0, 0, 0, 0 ); if(pass){ Grid_pass("All tests passed."); From ac4e23501c8d6af94159e9df00e1bfd09e20afe8 Mon Sep 17 00:00:00 2001 From: "D. A. Clarke" Date: Wed, 16 Apr 2025 12:34:22 -0600 Subject: [PATCH 25/32] working LePage derivative --- Grid/qcd/smearing/HISQSmearing.h | 305 +++++++++++++++++++------------ 1 file changed, 187 insertions(+), 118 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index 44eb917535..7f25d0bdbd 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -157,6 +157,8 @@ getHISQStencilEntries(acc sView, int sIndex, int site) { } + + /*! @brief Allows for ASQTAD-like smearings. */ template class Smear_HISQ : public Gimpl { @@ -412,9 +414,13 @@ class Smear_HISQ : public Gimpl { typedef decltype(U_fat_v) linkWrite; typedef decltype(gStencil_v) stencilRead; - threeLinkStaple(U_fat_v, U_3link_v, U_v, gStencil_v, mu); - fiveLinkStaple< linkRead,linkWrite,stencilRead>(U_fat_v, U_5linkA_v, U_5linkB_v, U_3link_v, U_v, gStencil_v, mu); - sevenLinkStaple(U_fat_v, U_5linkA_v, U_5linkB_v, U_3link_v, U_v, gStencil_v, mu); + // CODE IN A MORE CAREFUL TEST FOR THIS + if((lt.c_7!=0) || (lt.c_5!=0) || (lt.c_3!=0)) + threeLinkStaple(U_fat_v, U_3link_v, U_v, gStencil_v, mu); + if((lt.c_7!=0) || (lt.c_5!=0)) + fiveLinkStaple< linkRead,linkWrite,stencilRead>(U_fat_v, U_5linkA_v, U_5linkB_v, U_3link_v, U_v, gStencil_v, mu); + if( lt.c_7!=0 ) + sevenLinkStaple(U_fat_v, U_5linkA_v, U_5linkB_v, U_3link_v, U_v, gStencil_v, mu); } // c1, c3, c5, c7 construct contributions @@ -425,32 +431,38 @@ class Smear_HISQ : public Gimpl { std::vector V(Nd, grid); std::vector Vnaik(Nd, grid); for (int mu = 0; mu < Nd; mu++) { - U[mu] = PeekIndex(u_thin, mu); - V[mu] = PeekIndex(u_smr, mu); + U[mu] = PeekIndex(u_thin, mu); + V[mu] = PeekIndex(u_smr, mu); + Vnaik[mu] = Zero(); } for(int mu=0;mu and |Y>) - // l (rat approx and Naik index) - // sep (separation between |X> and |Y>) - void outerProductHISQ(GF& XY, std::vector& vecx, int l, int sep) { + // vecx (contains |X> and |Y>) + // l (rat approx and Naik index) + // sep (separation between |X> and |Y>) + GF outerProductHISQ(std::vector& vecx, int l, int sep) { auto grid = this->_grid; auto gridRB = this->_gridRB; - FF X(grid), Y(grid), XRB(gridRB), YRB(gridRB); - X = Zero(); Y=Zero(); XRB=Zero(); YRB=Zero(); - - pickCheckerboard(Even,XRB,vecx[l]); - setCheckerboard(X,XRB); - pickCheckerboard(Odd ,YRB,vecx[l]); - setCheckerboard(Y,YRB); - + GF XY(grid); + FF X(grid), Y(grid), RB(gridRB); LF XYnu(grid), YXnu(grid); + X = Zero(); Y=Zero(); + + RB=Zero(); + pickCheckerboard(Even,RB,vecx[l]); + setCheckerboard(X,RB); + RB=Zero(); + pickCheckerboard(Odd ,RB,vecx[l]); + setCheckerboard(Y,RB); + XY = Zero(); XYnu = Zero(); YXnu=Zero(); for (int nu = 0; nu < Nd; nu++) { YXnu = outerProduct( Cshift(Y,nu,sep) ,X); XYnu = outerProduct( Cshift(X,nu,sep) ,Y); PokeIndex(XY,(YXnu-XYnu),nu); } - return; + return XY; } @@ -766,38 +779,96 @@ class Force_HISQ : public Gimpl { int rat_order = n_orders_naik[inaik]; for (int i=0; i Uv(Nd, grid); -// std::vector XYv(Nd, grid); -// std::vector dVnaik(Nd, grid); -// for (int mu = 0; mu < Nd; mu++) { -// Uv[mu] = PeekIndex(_Umu, mu); -// XYv[mu] = PeekIndex(XY, mu); -// } -// -//// temp = gAcc.getLink(GInd::getSiteMu(up_mu , mu)) * gAcc.getLink(GInd::getSiteMu(up_2mu, mu)) * fAcc.getLink(GInd::getSiteMu(origin, mu)); -//// temp += gAcc.getLink(GInd::getSiteMu(up_mu , mu)) * fAcc.getLink(GInd::getSiteMu(dn_mu , mu)) * gAcc.getLink(GInd::getSiteMu(dn_mu , mu)); -//// temp += fAcc.getLink(GInd::getSiteMu(dn_2mu, mu)) * gAcc.getLink(GInd::getSiteMu(dn_2mu, mu)) * gAcc.getLink(GInd::getSiteMu(dn_mu , mu)); -// for (int mu = 0; mu < Nd; mu++) { -// Vnaik[mu] = lt.c_naik*Gimpl::CovShiftForward(U[mu],mu, -// Gimpl::CovShiftForward(U[mu],mu, -// Gimpl::CovShiftIdentityForward(XYv[mu],mu))); -//// Vnaik[mu] = lt.c_naik*Gimpl::CovShiftForward(U[mu],mu, -//// Gimpl::CovShiftForward(U[mu],mu, -//// Gimpl::CovShiftIdentityForward(U[mu],mu))); -// } - - outerProductHISQ(XY, vecx, l, 1); + XY = outerProductHISQ(vecx, l, 1); // It's not clear to me whether this should be fat7 or asqtad. momentum += hp.asqtad_c1*vecdt[l]*XY; + // -------------------------------------------- LEPAGE DERIVATIVE + std::vector Wv(Nd, grid); + std::vector XYdag(Nd, grid); + std::vector dLPdWv(Nd, grid); + for (int mu = 0; mu < Nd; mu++) { + Wv[mu] = PeekIndex(_Wmu, mu); + XYdag[mu] = adj(PeekIndex(XY, mu)); + dLPdWv[mu] = Zero(); + } + for(int mu=0;mu(dLPdW, dLPdWv[mu], mu); + } + + momentum += hp.asqtad_clp*vecdt[l]*dLPdW; + + // ------------------------------------------- N-LINK DERIVATIVES PaddedCell Ghost(_HaloDepth,grid); GF Ughost = Ghost.Exchange(_Wmu); @@ -870,51 +941,53 @@ class Force_HISQ : public Gimpl { } }) - // U_3link_v is being used as a dummy in the first argument. That the last argument - // is false guarantees threeLinkStaple does not interact with its first argument. - asqtad.template threeLinkStaple(U_3link_v, U_3link_v, - U_v, gStencil_v, mu, false); - - accelerator_for(site,Nsites,Simd::Nsimd(),{ // 5-LINK DERIVATIVE - U3matrix U0, U1, U2, U3, U4, U5, XY0, V1, XY2, XY3, V4, XY5, W; - int sigmaIndex = 0; - for(int nu=0;nu_offset](rho), W); - } else { - setLink(dU_5linkB_v[x->_offset](rho), W); - } - - setLink(F_v[x->_offset](mu), F_v(x->_offset)(mu) + hp.fat7_c5*vecdt[l]*adj(W)); - sigmaIndex++; - } - } - }) - +// // U_3link_v is being used as a dummy in the first argument. That the last argument +// // is false guarantees threeLinkStaple does not interact with its first argument. +// asqtad.template threeLinkStaple(U_3link_v, U_3link_v, +// U_v, gStencil_v, mu, false); +// +// accelerator_for(site,Nsites,Simd::Nsimd(),{ // 5-LINK DERIVATIVE +// U3matrix U0, U1, U2, U3, U4, U5, XY0, V1, XY2, XY3, V4, XY5, W; +// int sigmaIndex = 0; +// for(int nu=0;nu_offset](rho), W); +// } else { +// setLink(dU_5linkB_v[x->_offset](rho), W); +// } +// +// setLink(F_v[x->_offset](mu), F_v(x->_offset)(mu) + hp.asqtad_c5*vecdt[l]*adj(W)); +// sigmaIndex++; +// } +// } +// }) +// // fat7.template fiveLinkStaple< linkRead,linkWrite,stencilRead>(U_5linkA_v, U_5linkA_v, U_5linkB_v, // U_3link_v, U_v, gStencil_v, mu, false); // @@ -971,18 +1044,14 @@ class Force_HISQ : public Gimpl { } } - std::vector Uv(Nd,grid); - std::vector mom(Nd, grid); - std::vector Umom(Nd, grid); - for (int mu = 0; mu < Nd; mu++) { - mom[mu] = PeekIndex(momentum, mu); - Uv[mu] = PeekIndex(_Umu, mu); - Umom[mu] = Uv[mu]*mom[mu]; - } + + + // Close the loop: Multiply on the left by U_mu(x) + LF mom(grid); for (int mu = 0; mu < Nd; mu++) { - PokeIndex(momentum, Umom[mu], mu); + mom = PeekIndex(_Umu, mu) * PeekIndex(momentum, mu); + PokeIndex(momentum, mom, mu); } - } From 2bf2c773968bd17b7d6b7c9d408a95eb584cfc3a Mon Sep 17 00:00:00 2001 From: "D. A. Clarke" Date: Thu, 17 Apr 2025 16:29:54 -0600 Subject: [PATCH 26/32] working Naik link --- Grid/qcd/smearing/HISQSmearing.h | 207 +++++++++++++++++-------------- tests/forces/Test_HISQ_force.cc | 7 ++ 2 files changed, 119 insertions(+), 95 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index 7f25d0bdbd..b5bb7bc0ef 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -147,7 +147,7 @@ std::vector createHISQStencil() { /*! @brief Retrieve the stencil entries. */ template accelerator_inline std::tuple -getHISQStencilEntries(acc sView, int sIndex, int site) { +get3StaplePoints(acc sView, int sIndex, int site) { GeneralStencilEntry* x_p_mu = sView.GetEntry(sIndex+0,site); GeneralStencilEntry* x_p_nu = sView.GetEntry(sIndex+1,site); GeneralStencilEntry* x = sView.GetEntry(sIndex+2,site); @@ -218,22 +218,20 @@ class Smear_HISQ : public Gimpl { // IN--U_v (thin links) // gStencil (HISQ stencil) // mu - // updateFatLinks (in the force, you only want U_3link_v) template - void threeLinkStaple(linkWrite U_fat_v, linkWrite U_3link_v, linkRead U_v, stencilRead gStencil_v, - int mu, bool updateFatLinks=true) const { + void threeLinkStaple(linkWrite U_fat_v, linkWrite U_3link_v, linkRead U_v, stencilRead gStencil_v, int mu) const { SmearingParameters lt = this->_linkTreatment; typedef decltype(getLink(U_v,gStencil_v.GetEntry(0,0),0)) U3matrix; int Nsites = U_v.size(); accelerator_for(site,Nsites,Simd::Nsimd(),{ - U3matrix U0, U1, U2, U3, U4, U5, W; + U3matrix U0, U1, U2, U3, U4, U5, res; for(int nu=0;nu_offset](nu), W); + setLink(U_3link_v[x->_offset](nu), res); // The index operator (x) returns the coalesced read on GPU. The view [] index returns // a reference to the vector object. The [x](mu) returns a reference to the densely // packed (contiguous in memory) mu-th element of the vector object. - if(updateFatLinks) setLink(U_fat_v[x->_offset](mu), U_fat_v(x->_offset)(mu) + lt.c_3*W); + setLink(U_fat_v[x->_offset](mu), U_fat_v(x->_offset)(mu) + lt.c_3*res); } }) return; @@ -271,14 +269,14 @@ class Smear_HISQ : public Gimpl { // updateFatLinks (in the force, you only want U_5link_v) template void fiveLinkStaple(linkWrite U_fat_v, linkWrite U_5linkA_v, linkWrite U_5linkB_v, linkWrite U_3link_v, - linkRead U_v, stencilRead gStencil_v, int mu, bool updateFatLinks=true) const { + linkRead U_v, stencilRead gStencil_v, int mu) const { SmearingParameters lt = this->_linkTreatment; typedef decltype(getLink(U_v,gStencil_v.GetEntry(0,0),0)) U3matrix; int Nsites = U_v.size(); accelerator_for(site,Nsites,Simd::Nsimd(),{ - U3matrix U0, U1, U2, U3, U4, U5, W; + U3matrix U0, U1, U2, U3, U4, U5, res; int sigmaIndex = 0; for(int nu=0;nu_offset](rho), W); + setLink(U_5linkA_v[x->_offset](rho), res); } else { - setLink(U_5linkB_v[x->_offset](rho), W); + setLink(U_5linkB_v[x->_offset](rho), res); } - if(updateFatLinks) setLink(U_fat_v[x->_offset](mu), U_fat_v(x->_offset)(mu) + lt.c_5*W); + setLink(U_fat_v[x->_offset](mu), U_fat_v(x->_offset)(mu) + lt.c_5*res); sigmaIndex++; } } @@ -323,7 +321,7 @@ class Smear_HISQ : public Gimpl { // gStencil (HISQ stencil) // mu template - void sevenLinkStaple(linkWrite U_fat_v, linkWrite U_5linkA_v, linkWrite U_5linkB_v, linkWrite U_3link_v, + void sevenLinkStaple(linkWrite U_fat_v, linkWrite U_5linkA_v, linkWrite U_5linkB_v, linkRead U_v, stencilRead gStencil_v, int mu) const { SmearingParameters lt = this->_linkTreatment; @@ -331,7 +329,7 @@ class Smear_HISQ : public Gimpl { int Nsites = U_v.size(); accelerator_for(site,Nsites,Simd::Nsimd(),{ - U3matrix U0, U1, U2, U3, U4, U5, W; + U3matrix U0, U1, U2, U3, U4, U5, res; int sigmaIndex = 0; for(int nu=0;nu_offset](mu), U_fat_v(x->_offset)(mu) + lt.c_7*W); + setLink(U_fat_v[x->_offset](mu), U_fat_v(x->_offset)(mu) + lt.c_7*res); sigmaIndex++; } } @@ -420,7 +418,7 @@ class Smear_HISQ : public Gimpl { if((lt.c_7!=0) || (lt.c_5!=0)) fiveLinkStaple< linkRead,linkWrite,stencilRead>(U_fat_v, U_5linkA_v, U_5linkB_v, U_3link_v, U_v, gStencil_v, mu); if( lt.c_7!=0 ) - sevenLinkStaple(U_fat_v, U_5linkA_v, U_5linkB_v, U_3link_v, U_v, gStencil_v, mu); + sevenLinkStaple(U_fat_v, U_5linkA_v, U_5linkB_v, U_v, gStencil_v, mu); } // c1, c3, c5, c7 construct contributions @@ -779,94 +777,114 @@ class Force_HISQ : public Gimpl { int rat_order = n_orders_naik[inaik]; for (int i=0; i Wv(Nd, grid); + std::vector XYdag(Nd, grid); + std::vector ddW(Nd, grid); + for (int mu = 0; mu < Nd; mu++) { + Wv[mu] = PeekIndex(_Wmu, mu); + XYdag[mu] = PeekIndex(XY, mu); + ddW[mu] = Zero(); + } + for (int mu = 0; mu < Nd; mu++) { + ddW[mu] = Cshift( Wv[mu],mu, 1)*Cshift( Wv[mu],mu, 2)* XYdag[mu] + + Cshift( Wv[mu],mu, 1)*Cshift(XYdag[mu],mu,-1)*Cshift( Wv[mu],mu,-1) + + Cshift(XYdag[mu],mu,-2)*Cshift( Wv[mu],mu,-2)*Cshift( Wv[mu],mu,-1); + } + for (int mu = 0; mu < Nd; mu++) { + PokeIndex(temp, ddW[mu], mu); + } + + momentum += hp.asqtad_cnaik*vecdt[l]*temp; + + + // -------------------------- ONE-LINK DERIVATIVE (OUTER PRODUCT) XY = outerProductHISQ(vecx, l, 1); - // It's not clear to me whether this should be fat7 or asqtad. - momentum += hp.asqtad_c1*vecdt[l]*XY; + momentum += hp.asqtad_c1*vecdt[l]*XY; // It's not clear to me whether this should be fat7 or asqtad. // -------------------------------------------- LEPAGE DERIVATIVE - std::vector Wv(Nd, grid); - std::vector XYdag(Nd, grid); - std::vector dLPdWv(Nd, grid); for (int mu = 0; mu < Nd; mu++) { - Wv[mu] = PeekIndex(_Wmu, mu); - XYdag[mu] = adj(PeekIndex(XY, mu)); - dLPdWv[mu] = Zero(); + ddW[mu] = Zero(); + XYdag[mu] = adj(PeekIndex(XY, mu)); } - for(int mu=0;mu(dLPdW, dLPdWv[mu], mu); + PokeIndex(temp, ddW[mu], mu); } - momentum += hp.asqtad_clp*vecdt[l]*dLPdW; + momentum += hp.asqtad_clp*vecdt[l]*temp; // ------------------------------------------- N-LINK DERIVATIVES @@ -877,30 +895,13 @@ class Force_HISQ : public Gimpl { std::vector shifts = createHISQStencil(); GeneralLocalStencil gStencil(Ughost.Grid(),shifts); - GF Ughost_3link(Ughost.Grid()); - GF Ughost_5linkA(Ughost.Grid()); - GF Ughost_5linkB(Ughost.Grid()); - GF dUghost_3link(Ughost.Grid()); - GF dUghost_5linkA(Ughost.Grid()); - GF dUghost_5linkB(Ughost.Grid()); - - Smear_HISQ asqtad(grid,hp.asqtad_c1,0.,hp.asqtad_c3,hp.asqtad_c5,hp.asqtad_c7,0.); Fghost = Zero(); for(int mu=0;mu_offset](nu), W); - setLink(F_v[x->_offset](mu), F_v(x->_offset)(mu) + hp.asqtad_c3*vecdt[l]*adj(W)); + setLink(F_v[x->_offset](mu), F_v(x->_offset)(mu) + hp.asqtad_c3*vecdt[l]*adj(res)); } }) -// // U_3link_v is being used as a dummy in the first argument. That the last argument -// // is false guarantees threeLinkStaple does not interact with its first argument. -// asqtad.template threeLinkStaple(U_3link_v, U_3link_v, -// U_v, gStencil_v, mu, false); -// + // accelerator_for(site,Nsites,Simd::Nsimd(),{ // 5-LINK DERIVATIVE -// U3matrix U0, U1, U2, U3, U4, U5, XY0, V1, XY2, XY3, V4, XY5, W; +// U3matrix U0, U1, U2, U3, U4, U5, XY0, XY1, XY2, XY3, XY4, XY5, V0, V1, V2, V3, V4, V5, S0, S1, S2, S3, S4, S5, res; // int sigmaIndex = 0; // for(int nu=0;nu_offset](rho), W); +// setLink(dU_5linkA_v[x->_offset](rho), res); // } else { -// setLink(dU_5linkB_v[x->_offset](rho), W); +// setLink(dU_5linkB_v[x->_offset](rho), res); // } // -// setLink(F_v[x->_offset](mu), F_v(x->_offset)(mu) + hp.asqtad_c5*vecdt[l]*adj(W)); +// setLink(F_v[x->_offset](mu), F_v(x->_offset)(mu) + hp.asqtad_c5*vecdt[l]*adj(res)); // sigmaIndex++; // } // } // }) // -// fat7.template fiveLinkStaple< linkRead,linkWrite,stencilRead>(U_5linkA_v, U_5linkA_v, U_5linkB_v, -// U_3link_v, U_v, gStencil_v, mu, false); // // accelerator_for(site,Nsites,Simd::Nsimd(),{ // 7-LINK DERIVATIVE // U3matrix U0, U1, U2, U3, U4, U5, XY0, V1, XY2, XY3, V4, XY5, W; @@ -1000,7 +1017,7 @@ class Force_HISQ : public Gimpl { // for(int rho=0;rho Date: Sun, 20 Apr 2025 23:33:59 -0600 Subject: [PATCH 27/32] working 5-link derivative --- Grid/qcd/smearing/HISQSmearing.h | 477 +++++++++++++++++------------ Grid/stencil/GeneralLocalStencil.h | 2 +- tests/forces/Test_HISQ_force.cc | 9 +- 3 files changed, 283 insertions(+), 205 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index b5bb7bc0ef..f516142292 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -120,31 +120,63 @@ auto getLink(const link& __restrict__ U, GeneralStencilEntry* x, int mu) { /*! @brief Figure out the stencil index from mu and nu. */ accelerator_inline -int HISQStencilIndex(int mu, int nu) { - // Nshifts depends on how you built the stencil - int Nshifts = 5; - return Nshifts*nu + Nd*Nshifts*mu; +int HISQStencilIndex(int mu, int nu, int rho=0, std::string kind="3STAPLE") { + int res; + if (kind=="3STAPLE") + res = 5*(nu + Nd*mu); + else if (kind=="5STAPLE") + res = 17*(rho + Nd*nu + Nd*Nd*mu); + else + Grid_error("Unknown staple kind",kind); + return res; } -/*! @brief Create the mu-nu plane stencil. We allow mu==nu to make indexing the - stencil easier, but these entries will not be used. */ +/*! @brief Create various stencils needed for HISQ calculations */ inline -std::vector createHISQStencil() { +std::vector createHISQStencil(std::string kind="3STAPLE") { std::vector shifts; - for(int mu=0;mu(shifts,mu); - appendShift(shifts,nu); - appendShift(shifts,shiftSignal::NO_SHIFT); - appendShift(shifts,mu,Back(nu)); - appendShift(shifts,Back(nu)); + // We allow nu=mu and rho=nu, rho=mu to make indexing easier, but these + // entries will not be used. + if (kind=="3STAPLE") { + for(int mu=0;mu(shifts,mu); + appendShift(shifts,nu); + appendShift(shifts,shiftSignal::NO_SHIFT); + appendShift(shifts,mu,Back(nu)); + appendShift(shifts,Back(nu)); + } + } else if (kind=="5STAPLE") { + for(int mu =0;mu (shifts,nu,Back(rho)); + appendShift(shifts,nu); + appendShift(shifts,Back(rho)); + appendShift(shifts,shiftSignal::NO_SHIFT); + appendShift(shifts,rho); + appendShift(shifts,Back(nu),Back(rho)); + appendShift(shifts,Back(nu)); + appendShift(shifts,Back(nu),rho); + appendShift(shifts,mu,nu,Back(rho)); + appendShift(shifts,mu,nu); + appendShift(shifts,mu,Back(rho)); + appendShift(shifts,mu); + appendShift(shifts,mu,rho); + appendShift(shifts,mu,Back(nu),Back(rho)); + appendShift(shifts,mu,Back(nu)); + appendShift(shifts,mu,Back(nu),rho); + appendShift(shifts,nu,rho); + } + } else { + Grid_error("Unknown staple kind",kind); } return shifts; } -/*! @brief Retrieve the stencil entries. */ +/*! @brief Retrieve 3-link stencil entries. */ template accelerator_inline std::tuple get3StaplePoints(acc sView, int sIndex, int site) { @@ -157,6 +189,39 @@ get3StaplePoints(acc sView, int sIndex, int site) { } +/*! @brief Retrieve 5-link stencil entries. */ +template accelerator_inline +std::tuple +get5StaplePoints(acc sView, int sIndex, int site) { + GeneralStencilEntry* x_p_nu_m_rho = sView.GetEntry(sIndex+0 ,site); + GeneralStencilEntry* x_p_nu = sView.GetEntry(sIndex+1 ,site); + GeneralStencilEntry* x_m_rho = sView.GetEntry(sIndex+2 ,site); + GeneralStencilEntry* x = sView.GetEntry(sIndex+3 ,site); + GeneralStencilEntry* x_p_rho = sView.GetEntry(sIndex+4 ,site); + GeneralStencilEntry* x_m_nu_m_rho = sView.GetEntry(sIndex+5 ,site); + GeneralStencilEntry* x_m_nu = sView.GetEntry(sIndex+6 ,site); + GeneralStencilEntry* x_m_nu_p_rho = sView.GetEntry(sIndex+7 ,site); + GeneralStencilEntry* x_p_mu_p_nu_m_rho = sView.GetEntry(sIndex+8 ,site); + GeneralStencilEntry* x_p_mu_p_nu = sView.GetEntry(sIndex+9 ,site); + GeneralStencilEntry* x_p_mu_m_rho = sView.GetEntry(sIndex+10,site); + GeneralStencilEntry* x_p_mu = sView.GetEntry(sIndex+11,site); + GeneralStencilEntry* x_p_mu_p_rho = sView.GetEntry(sIndex+12,site); + GeneralStencilEntry* x_p_mu_m_nu_m_rho = sView.GetEntry(sIndex+13,site); + GeneralStencilEntry* x_p_mu_m_nu = sView.GetEntry(sIndex+14,site); + GeneralStencilEntry* x_p_mu_m_nu_p_rho = sView.GetEntry(sIndex+15,site); + GeneralStencilEntry* x_p_nu_p_rho = sView.GetEntry(sIndex+16,site); + return {x_p_nu_m_rho , x_p_nu , x_m_rho , + x , x_p_rho , x_m_nu_m_rho , + x_m_nu , x_m_nu_p_rho , x_p_mu_p_nu_m_rho, + x_p_mu_p_nu , x_p_mu_m_rho , x_p_mu , + x_p_mu_p_rho , x_p_mu_m_nu_m_rho, x_p_mu_m_nu , + x_p_mu_m_nu_p_rho, x_p_nu_p_rho}; +} /*! @brief Allows for ASQTAD-like smearings. */ @@ -215,18 +280,21 @@ class Smear_HISQ : public Gimpl { // Intent: OUT--U_3link (sum of left and right 3-staples attached to U) // U_fat (accmulates the fat smearing) - // IN--U_v (thin links) - // gStencil (HISQ stencil) + // IN--U (thin links) + // gStencil (3-link stencil) // mu - template - void threeLinkStaple(linkWrite U_fat_v, linkWrite U_3link_v, linkRead U_v, stencilRead gStencil_v, int mu) const { + void threeLinkStaple(GF& U_fat, GF& U_3link, GF& U, GeneralLocalStencil gStencil, int mu) const { SmearingParameters lt = this->_linkTreatment; + autoView(U_v , U , AcceleratorRead); + autoView(U_fat_v , U_fat , AcceleratorWrite); + autoView(U_3link_v , U_3link , AcceleratorWrite); + auto gStencil_v = gStencil.View(AcceleratorRead); typedef decltype(getLink(U_v,gStencil_v.GetEntry(0,0),0)) U3matrix; int Nsites = U_v.size(); accelerator_for(site,Nsites,Simd::Nsimd(),{ - U3matrix U0, U1, U2, U3, U4, U5, res; + U3matrix res; for(int nu=0;nu_offset](nu), res); @@ -260,23 +323,28 @@ class Smear_HISQ : public Gimpl { } - // Intent: OUT--U_5link (sum of left and right 5-staples attached to U) + // Intent: OUT--U_5link (sum of 5-staples attached to U) // U_fat (accmulates the fat smearing) - // IN--U_v (thin links) + // IN--U (thin links) // U_3link (sum of left and right 3-staples attached to U) - // gStencil (HISQ stencil) + // gStencil (3-link stencil) // mu // updateFatLinks (in the force, you only want U_5link_v) - template - void fiveLinkStaple(linkWrite U_fat_v, linkWrite U_5linkA_v, linkWrite U_5linkB_v, linkWrite U_3link_v, - linkRead U_v, stencilRead gStencil_v, int mu) const { + void fiveLinkStaple(GF& U_fat, GF& U_5linkA, GF& U_5linkB, GF& U_3link, + GF& U, GeneralLocalStencil gStencil, int mu) const { SmearingParameters lt = this->_linkTreatment; + autoView(U_v , U , AcceleratorRead); + autoView(U_fat_v , U_fat , AcceleratorWrite); + autoView(U_3link_v , U_3link , AcceleratorWrite); + autoView(U_5linkA_v, U_5linkA, AcceleratorWrite); + autoView(U_5linkB_v, U_5linkB, AcceleratorWrite); + auto gStencil_v = gStencil.View(AcceleratorRead); typedef decltype(getLink(U_v,gStencil_v.GetEntry(0,0),0)) U3matrix; int Nsites = U_v.size(); accelerator_for(site,Nsites,Simd::Nsimd(),{ - U3matrix U0, U1, U2, U3, U4, U5, res; + U3matrix res; int sigmaIndex = 0; for(int nu=0;nu_offset](rho), res); - } else { + else setLink(U_5linkB_v[x->_offset](rho), res); - } setLink(U_fat_v[x->_offset](mu), U_fat_v(x->_offset)(mu) + lt.c_5*res); sigmaIndex++; @@ -317,19 +378,23 @@ class Smear_HISQ : public Gimpl { // Intent: OUT--U_fat (accmulates the fat smearing) - // IN--U_v (thin links) - // gStencil (HISQ stencil) + // IN--U (thin links) + // U_5link (sum of 5-staples attached to U) + // gStencil (3-link stencil) // mu - template - void sevenLinkStaple(linkWrite U_fat_v, linkWrite U_5linkA_v, linkWrite U_5linkB_v, - linkRead U_v, stencilRead gStencil_v, int mu) const { + void sevenLinkStaple(GF& U_fat, GF& U_5linkA, GF& U_5linkB, GF& U, GeneralLocalStencil gStencil, int mu) const { SmearingParameters lt = this->_linkTreatment; + autoView(U_v , U , AcceleratorRead); + autoView(U_fat_v , U_fat , AcceleratorWrite); + autoView(U_5linkA_v, U_5linkA, AcceleratorWrite); + autoView(U_5linkB_v, U_5linkB, AcceleratorWrite); + auto gStencil_v = gStencil.View(AcceleratorRead); typedef decltype(getLink(U_v,gStencil_v.GetEntry(0,0),0)) U3matrix; int Nsites = U_v.size(); accelerator_for(site,Nsites,Simd::Nsimd(),{ - U3matrix U0, U1, U2, U3, U4, U5, res; + U3matrix res; int sigmaIndex = 0; for(int nu=0;nu_offset](mu), U_fat_v(x->_offset)(mu) + lt.c_7*res); sigmaIndex++; } @@ -379,50 +434,31 @@ class Smear_HISQ : public Gimpl { // This is where auxiliary N-link fields and the final smear will be stored. As // implemented, this uses about 25% more memory than necessary. - GF Ughost_fat(Ughost.Grid()); - GF Ughost_3link(Ughost.Grid()); - GF Ughost_5linkA(Ughost.Grid()); - GF Ughost_5linkB(Ughost.Grid()); + GF U_fat(Ughost.Grid()); + GF U_3link(Ughost.Grid()); + GF U_5linkA(Ughost.Grid()); + GF U_5linkB(Ughost.Grid()); // mu-nu plane stencil. - std::vector shifts = createHISQStencil(); + std::vector shifts = createHISQStencil("3STAPLE"); // A GeneralLocalStencil has two indices: a site and stencil index GeneralLocalStencil gStencil(Ughost.Grid(),shifts); - // This is where contributions from the smearing get added together - Ughost_fat=Zero(); - - // This loop handles 3-, 5-, and 7-link constructs, minus Lepage and Naik. + // Store sum of 3-, 5-, 7-link contributions in U_fat + U_fat=Zero(); for(int mu=0;mu(U_fat_v, U_3link_v, U_v, gStencil_v, mu); + threeLinkStaple(U_fat, U_3link, Ughost, gStencil, mu); if((lt.c_7!=0) || (lt.c_5!=0)) - fiveLinkStaple< linkRead,linkWrite,stencilRead>(U_fat_v, U_5linkA_v, U_5linkB_v, U_3link_v, U_v, gStencil_v, mu); + fiveLinkStaple( U_fat, U_5linkA, U_5linkB, U_3link, Ughost, gStencil, mu); if( lt.c_7!=0 ) - sevenLinkStaple(U_fat_v, U_5linkA_v, U_5linkB_v, U_v, gStencil_v, mu); + sevenLinkStaple(U_fat, U_5linkA, U_5linkB, Ughost, gStencil, mu); } - // c1, c3, c5, c7 construct contributions - u_smr = Ghost.Extract(Ughost_fat) + lt.c_1*u_thin; + // Add 1-link contribution + u_smr = Ghost.Extract(U_fat) + lt.c_1*u_thin; // Load up U and V std::vectors to access thin and smeared links. std::vector U(Nd, grid); @@ -436,25 +472,20 @@ class Smear_HISQ : public Gimpl { for(int mu=0;mu shifts = createHISQStencil(); - GeneralLocalStencil gStencil(Ughost.Grid(),shifts); + std::vector shifts3 = createHISQStencil(); + std::vector shifts5 = createHISQStencil("5STAPLE"); + GeneralLocalStencil gStencil3(Ughost.Grid(),shifts3); + GeneralLocalStencil gStencil5(Ughost.Grid(),shifts5); Fghost = Zero(); @@ -904,109 +937,147 @@ class Force_HISQ : public Gimpl { autoView(F_v , Fghost , AcceleratorWrite); int Nsites = U_v.size(); - auto gStencil_v = gStencil.View(AcceleratorRead); + auto gStencil3_v = gStencil3.View(AcceleratorRead); + auto gStencil5_v = gStencil5.View(AcceleratorRead); - typedef decltype(getLink(U_v,gStencil.GetEntry(0,0),0)) U3matrix; - typedef decltype(U_v) linkRead; - typedef decltype(F_v) linkWrite; - typedef decltype(gStencil_v) stencilRead; + typedef decltype(getLink(U_v,gStencil3.GetEntry(0,0),0)) U3matrix; accelerator_for(site,Nsites,Simd::Nsimd(),{ // 3-LINK DERIVATIVE - U3matrix U0, U1, U2, U3, U4, U5, XY0, XY1, XY2, XY3, XY4, XY5, res; + U3matrix res; for(int nu=0;nu_offset](mu), F_v(x->_offset)(mu) + hp.asqtad_c3*vecdt[l]*adj(res)); + res = adj(getLink(XY_v,x,nu))* getLink(U_v ,x_p_nu,mu) *adj(getLink(U_v ,x_p_mu,nu)) + + getLink(U_v ,x,nu) *adj(getLink(XY_v,x_p_nu,mu))*adj(getLink(U_v ,x_p_mu,nu)) + + getLink(U_v ,x,nu) * getLink(U_v ,x_p_nu,mu) * getLink(XY_v,x_p_mu,nu) + + + getLink(XY_v,x_m_nu,nu) * getLink(U_v ,x_m_nu,mu) * getLink(U_v ,x_p_mu_m_nu,nu) + + adj(getLink(U_v ,x_m_nu,nu))*adj(getLink(XY_v,x_m_nu,mu))* getLink(U_v ,x_p_mu_m_nu,nu) + + adj(getLink(U_v ,x_m_nu,nu))* getLink(U_v ,x_m_nu,mu) *adj(getLink(XY_v,x_p_mu_m_nu,nu)); + + setLink(F_v[x->_offset](mu), F_v(x->_offset)(mu) + hp.asqtad_c3*adj(res)); } }) - -// accelerator_for(site,Nsites,Simd::Nsimd(),{ // 5-LINK DERIVATIVE -// U3matrix U0, U1, U2, U3, U4, U5, XY0, XY1, XY2, XY3, XY4, XY5, V0, V1, V2, V3, V4, V5, S0, S1, S2, S3, S4, S5, res; -// int sigmaIndex = 0; -// for(int nu=0;nu_offset](rho), res); -// } else { -// setLink(dU_5linkB_v[x->_offset](rho), res); -// } -// -// setLink(F_v[x->_offset](mu), F_v(x->_offset)(mu) + hp.asqtad_c5*vecdt[l]*adj(res)); -// sigmaIndex++; -// } -// } -// }) -// + accelerator_for(site,Nsites,Simd::Nsimd(),{ // 5-LINK DERIVATIVE + U3matrix res, U0, U1, U2, U3, U4, XY0; + for(int nu=0;nu_offset](mu), F_v(x->_offset)(mu) + hp.asqtad_c5*res); + } + } + }) + // // accelerator_for(site,Nsites,Simd::Nsimd(),{ // 7-LINK DERIVATIVE // U3matrix U0, U1, U2, U3, U4, U5, XY0, V1, XY2, XY3, V4, XY5, W; @@ -1054,7 +1125,7 @@ class Force_HISQ : public Gimpl { } // end mu loop u_force = Ghost.Extract(Fghost); - momentum += u_force; + momentum += vecdt[l]*u_force; l++; diff --git a/Grid/stencil/GeneralLocalStencil.h b/Grid/stencil/GeneralLocalStencil.h index edad5fc167..c2bc9111f4 100644 --- a/Grid/stencil/GeneralLocalStencil.h +++ b/Grid/stencil/GeneralLocalStencil.h @@ -204,7 +204,7 @@ template void appendShift(std::vector& shifts, int dir, Args... args) { Coordinate shift(d,0); generalShift(shift, dir, args...); - // push_back creates an element at the end of shifts and + // push_back creates an element at the end of shift and // assigns the data in the argument to it. shifts.push_back(shift); } diff --git a/tests/forces/Test_HISQ_force.cc b/tests/forces/Test_HISQ_force.cc index 8157d303ac..2ef2e48b31 100644 --- a/tests/forces/Test_HISQ_force.cc +++ b/tests/forces/Test_HISQ_force.cc @@ -122,8 +122,9 @@ bool testForce(GridCartesian& GRID, LGF Umu, LGF Ucontrol, // NerscIO::writeConfiguration(Umom,"nersc.l8t4b3360.Umom.XY.control"); // NerscIO::writeConfiguration(Umom,"nersc.l8t4b3360.Umom.3link.control"); // NerscIO::writeConfiguration(Umom,"nersc.l8t4b3360.Umom.lp.control"); +// NerscIO::writeConfiguration(Umom,"nersc.l8t4b3360.Umom.naik.control"); // NerscIO::writeConfiguration(Umom,"nersc.l8t4b3360.Umom.5link.control"); -// return true; + return true; } @@ -223,6 +224,12 @@ int main (int argc, char** argv) { 1, 0, 0, 0, 0, 0, 0, 0, 0, 1 ); + // Check the Naik-link + NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.naik.control"); + pass *= testForce(GRID, Umu, Ucontrol, + 1, 0, 0, 0, 1, + 0, 0, 0, 0, 0 ); + // // Check the 5-link // NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.5link.control"); // pass *= testForce(GRID, Umu, Ucontrol, From bf7e1025c2215b42d1df8a9875e365b305b667d5 Mon Sep 17 00:00:00 2001 From: "D. A. Clarke" Date: Thu, 1 May 2025 19:54:00 -0600 Subject: [PATCH 28/32] new attempt at 7-link --- Grid/qcd/smearing/HISQSmearing.h | 1209 ++++++++++++++++++++++-------- tests/forces/Test_HISQ_force.cc | 18 +- 2 files changed, 889 insertions(+), 338 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index f516142292..b885c32fdf 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -120,12 +120,14 @@ auto getLink(const link& __restrict__ U, GeneralStencilEntry* x, int mu) { /*! @brief Figure out the stencil index from mu and nu. */ accelerator_inline -int HISQStencilIndex(int mu, int nu, int rho=0, std::string kind="3STAPLE") { +int HISQStencilIndex(int mu, int nu, int rho=0, int sig=0, std::string kind="3STAPLE") { int res; if (kind=="3STAPLE") res = 5*(nu + Nd*mu); else if (kind=="5STAPLE") res = 17*(rho + Nd*nu + Nd*Nd*mu); + else if (kind=="7STAPLE") + res = 46*(sig + Nd*rho + Nd*Nd*nu + Nd*Nd*Nd*mu); else Grid_error("Unknown staple kind",kind); return res; @@ -169,6 +171,58 @@ std::vector createHISQStencil(std::string kind="3STAPLE") { appendShift(shifts,mu,Back(nu),rho); appendShift(shifts,nu,rho); } + } else if (kind=="7STAPLE") { + for(int mu =0;mu (shifts,shiftSignal::NO_SHIFT); + appendShift(shifts,mu); + appendShift(shifts,mu,nu); + appendShift(shifts,mu,nu,rho); + appendShift(shifts,mu,nu,rho,Back(sig)); + appendShift(shifts,mu,nu,Back(rho)); + appendShift(shifts,mu,nu,Back(rho),Back(sig)); + appendShift(shifts,mu,nu,Back(sig)); + appendShift(shifts,mu,Back(nu)); + appendShift(shifts,mu,Back(nu),rho); + appendShift(shifts,mu,Back(nu),rho,sig); + appendShift(shifts,mu,Back(nu),rho,Back(sig)); + appendShift(shifts,mu,Back(nu),Back(rho)); + appendShift(shifts,mu,Back(nu),Back(rho),sig); + appendShift(shifts,mu,Back(nu),Back(rho),Back(sig)); + appendShift(shifts,mu,Back(nu),sig); + appendShift(shifts,mu,Back(nu),Back(sig)); + appendShift(shifts,mu,rho); + appendShift(shifts,mu,rho,sig); + appendShift(shifts,mu,rho,Back(sig)); + appendShift(shifts,mu,Back(rho)); + appendShift(shifts,mu,Back(rho),sig); + appendShift(shifts,mu,Back(rho),Back(sig)); + appendShift(shifts,mu,sig); + appendShift(shifts,mu,Back(sig)); + appendShift(shifts,nu); + appendShift(shifts,nu,rho); + appendShift(shifts,nu,rho,sig); + appendShift(shifts,nu,rho,Back(sig)); + appendShift(shifts,nu,Back(rho)); + appendShift(shifts,nu,Back(rho),sig); + appendShift(shifts,nu,Back(rho),Back(sig)); + appendShift(shifts,rho); + appendShift(shifts,rho,Back(nu)); + appendShift(shifts,rho,sig); + appendShift(shifts,rho,Back(sig)); + appendShift(shifts,Back(nu)); + appendShift(shifts,Back(nu),rho); + appendShift(shifts,Back(nu),rho,sig); + appendShift(shifts,Back(nu),rho,Back(sig)); + appendShift(shifts,Back(nu),Back(rho)); + appendShift(shifts,Back(nu),Back(rho),sig); + appendShift(shifts,Back(nu),Back(rho),Back(sig)); + appendShift(shifts,Back(rho)); + appendShift(shifts,Back(rho),sig); + appendShift(shifts,Back(rho),Back(sig)); + } } else { Grid_error("Unknown staple kind",kind); } @@ -224,6 +278,82 @@ get5StaplePoints(acc sView, int sIndex, int site) { } +/*! @brief Retrieve 7-link stencil entries. */ +template accelerator_inline +std::tuple +get7StaplePoints(acc sView, int sIndex, int site) { + GeneralStencilEntry* x = sView.GetEntry(sIndex+0 ,site); + GeneralStencilEntry* x_p_mu = sView.GetEntry(sIndex+1 ,site); + GeneralStencilEntry* x_p_mu_p_nu = sView.GetEntry(sIndex+2 ,site); + GeneralStencilEntry* x_p_mu_p_nu_p_rho = sView.GetEntry(sIndex+3 ,site); + GeneralStencilEntry* x_p_mu_p_nu_p_rho_m_sig = sView.GetEntry(sIndex+4 ,site); + GeneralStencilEntry* x_p_mu_p_nu_m_rho = sView.GetEntry(sIndex+5 ,site); + GeneralStencilEntry* x_p_mu_p_nu_m_rho_m_sig = sView.GetEntry(sIndex+6 ,site); + GeneralStencilEntry* x_p_mu_p_nu_m_sig = sView.GetEntry(sIndex+7 ,site); + GeneralStencilEntry* x_p_mu_m_nu = sView.GetEntry(sIndex+8 ,site); + GeneralStencilEntry* x_p_mu_m_nu_p_rho = sView.GetEntry(sIndex+9 ,site); + GeneralStencilEntry* x_p_mu_m_nu_p_rho_p_sig = sView.GetEntry(sIndex+10,site); + GeneralStencilEntry* x_p_mu_m_nu_p_rho_m_sig = sView.GetEntry(sIndex+11,site); + GeneralStencilEntry* x_p_mu_m_nu_m_rho = sView.GetEntry(sIndex+12,site); + GeneralStencilEntry* x_p_mu_m_nu_m_rho_p_sig = sView.GetEntry(sIndex+13,site); + GeneralStencilEntry* x_p_mu_m_nu_m_rho_m_sig = sView.GetEntry(sIndex+14,site); + GeneralStencilEntry* x_p_mu_m_nu_p_sig = sView.GetEntry(sIndex+15,site); + GeneralStencilEntry* x_p_mu_m_nu_m_sig = sView.GetEntry(sIndex+16,site); + GeneralStencilEntry* x_p_mu_p_rho = sView.GetEntry(sIndex+17,site); + GeneralStencilEntry* x_p_mu_p_rho_p_sig = sView.GetEntry(sIndex+18,site); + GeneralStencilEntry* x_p_mu_p_rho_m_sig = sView.GetEntry(sIndex+19,site); + GeneralStencilEntry* x_p_mu_m_rho = sView.GetEntry(sIndex+20,site); + GeneralStencilEntry* x_p_mu_m_rho_p_sig = sView.GetEntry(sIndex+21,site); + GeneralStencilEntry* x_p_mu_m_rho_m_sig = sView.GetEntry(sIndex+22,site); + GeneralStencilEntry* x_p_mu_p_sig = sView.GetEntry(sIndex+23,site); + GeneralStencilEntry* x_p_mu_m_sig = sView.GetEntry(sIndex+24,site); + GeneralStencilEntry* x_p_nu = sView.GetEntry(sIndex+25,site); + GeneralStencilEntry* x_p_nu_p_rho = sView.GetEntry(sIndex+26,site); + GeneralStencilEntry* x_p_nu_p_rho_p_sig = sView.GetEntry(sIndex+27,site); + GeneralStencilEntry* x_p_nu_p_rho_m_sig = sView.GetEntry(sIndex+28,site); + GeneralStencilEntry* x_p_nu_m_rho = sView.GetEntry(sIndex+29,site); + GeneralStencilEntry* x_p_nu_m_rho_p_sig = sView.GetEntry(sIndex+30,site); + GeneralStencilEntry* x_p_nu_m_rho_m_sig = sView.GetEntry(sIndex+31,site); + GeneralStencilEntry* x_p_rho = sView.GetEntry(sIndex+32,site); + GeneralStencilEntry* x_p_rho_m_nu = sView.GetEntry(sIndex+33,site); + GeneralStencilEntry* x_p_rho_p_sig = sView.GetEntry(sIndex+34,site); + GeneralStencilEntry* x_p_rho_m_sig = sView.GetEntry(sIndex+35,site); + GeneralStencilEntry* x_m_nu = sView.GetEntry(sIndex+36,site); + GeneralStencilEntry* x_m_nu_p_rho = sView.GetEntry(sIndex+37,site); + GeneralStencilEntry* x_m_nu_p_rho_p_sig = sView.GetEntry(sIndex+38,site); + GeneralStencilEntry* x_m_nu_p_rho_m_sig = sView.GetEntry(sIndex+39,site); + GeneralStencilEntry* x_m_nu_m_rho = sView.GetEntry(sIndex+40,site); + GeneralStencilEntry* x_m_nu_m_rho_p_sig = sView.GetEntry(sIndex+41,site); + GeneralStencilEntry* x_m_nu_m_rho_m_sig = sView.GetEntry(sIndex+42,site); + GeneralStencilEntry* x_m_rho = sView.GetEntry(sIndex+43,site); + GeneralStencilEntry* x_m_rho_p_sig = sView.GetEntry(sIndex+44,site); + GeneralStencilEntry* x_m_rho_m_sig = sView.GetEntry(sIndex+45,site); + return {x , x_p_mu , x_p_mu_p_nu , x_p_mu_p_nu_p_rho , + x_p_mu_p_nu_p_rho_m_sig, x_p_mu_p_nu_m_rho , x_p_mu_p_nu_m_rho_m_sig, x_p_mu_p_nu_m_sig , + x_p_mu_m_nu , x_p_mu_m_nu_p_rho , x_p_mu_m_nu_p_rho_p_sig, x_p_mu_m_nu_p_rho_m_sig, + x_p_mu_m_nu_m_rho , x_p_mu_m_nu_m_rho_p_sig, x_p_mu_m_nu_m_rho_m_sig, x_p_mu_m_nu_p_sig , + x_p_mu_m_nu_m_sig , x_p_mu_p_rho , x_p_mu_p_rho_p_sig , x_p_mu_p_rho_m_sig , + x_p_mu_m_rho , x_p_mu_m_rho_p_sig , x_p_mu_m_rho_m_sig , x_p_mu_p_sig , + x_p_mu_m_sig , x_p_nu , x_p_nu_p_rho , x_p_nu_p_rho_p_sig , + x_p_nu_p_rho_m_sig , x_p_nu_m_rho , x_p_nu_m_rho_p_sig , x_p_nu_m_rho_m_sig , + x_p_rho , x_p_rho_m_nu , x_p_rho_p_sig , x_p_rho_m_sig , + x_m_nu , x_m_nu_p_rho , x_m_nu_p_rho_p_sig , x_m_nu_p_rho_m_sig , + x_m_nu_m_rho , x_m_nu_m_rho_p_sig , x_m_nu_m_rho_m_sig , x_m_rho , + x_m_rho_p_sig , x_m_rho_m_sig}; +} + + /*! @brief Allows for ASQTAD-like smearings. */ template class Smear_HISQ : public Gimpl { @@ -750,33 +880,648 @@ class Force_HISQ : public Gimpl { // vecx (contains |X> and |Y>) // l (rat approx and Naik index) // sep (separation between |X> and |Y>) - GF outerProductHISQ(std::vector& vecx, int l, int sep) { + GF outerProductHISQ(std::vector& vecx, std::vector vecdt, std::vector n_orders_naik, int n_naiks, int sep) { auto grid = this->_grid; auto gridRB = this->_gridRB; - GF XY(grid); + GF XY(grid), XY_l(grid); FF X(grid), Y(grid), RB(gridRB); LF XYnu(grid), YXnu(grid); - X = Zero(); Y=Zero(); + + XY = Zero(); + + // These four lines control the loop over rational approximation contributions. As explained in force(), + // l indexes over both Naik epsilon and rational approximation order. + int l = 0; + for (int inaik = 0; inaik < n_naiks; inaik++) { + int rat_order = n_orders_naik[inaik]; + for (int i=0; i(XY,(YXnu-XYnu),nu); - } - return XY; + XY_l = Zero(); XYnu = Zero(); YXnu=Zero(); + for (int nu = 0; nu < Nd; nu++) { + YXnu = outerProduct( Cshift(Y,nu,sep) ,X); + XYnu = outerProduct( Cshift(X,nu,sep) ,Y); + PokeIndex(XY_l,(YXnu-XYnu),nu); + } + XY += vecdt[l]*XY_l; + l++; + } + } + return XY; + } + + + void threeLinkDeriv(GF& Fghost, GF& Ughost, GF& XYghost, GeneralLocalStencil gStencil3, Real c3, int mu) const { + + autoView(U_v , Ughost , AcceleratorRead); + autoView(XY_v, XYghost, AcceleratorRead); + autoView(F_v , Fghost , AcceleratorWrite); + int Nsites = U_v.size(); + auto gStencil3_v = gStencil3.View(AcceleratorRead); + typedef decltype(getLink(U_v,gStencil3.GetEntry(0,0),0)) U3matrix; + + accelerator_for(site,Nsites,Simd::Nsimd(),{ + U3matrix res; + for(int nu=0;nu_offset](mu), F_v(x->_offset)(mu) + c3*adj(res)); + } + }) } + void fiveLinkDeriv(GF& Fghost, GF& Ughost, GF& XYghost, GeneralLocalStencil gStencil5, Real c5, int mu) const { + + autoView(U_v , Ughost , AcceleratorRead); + autoView(XY_v, XYghost, AcceleratorRead); + autoView(F_v , Fghost , AcceleratorWrite); + int Nsites = U_v.size(); + auto gStencil5_v = gStencil5.View(AcceleratorRead); + typedef decltype(getLink(U_v,gStencil5.GetEntry(0,0),0)) U3matrix; + + accelerator_for(site,Nsites,Simd::Nsimd(),{ + U3matrix res; + for(int nu=0;nu_offset](mu), F_v(x->_offset)(mu) + c5*res); + } + } + }) + } + + +// void sevenLinkDeriv(GF& Fghost, GF& Ughost, GF& XYghost, GeneralLocalStencil gStencil7, Real c7, int mu) const { +// +// autoView(U_v , Ughost , AcceleratorRead); +// autoView(XY_v, XYghost, AcceleratorRead); +// autoView(F_v , Fghost , AcceleratorWrite); +// int Nsites = U_v.size(); +// auto gStencil7_v = gStencil7.View(AcceleratorRead); +// typedef decltype(getLink(U_v,gStencil7.GetEntry(0,0),0)) U3matrix; +// +// // TODO: After this works, start consolidating some terms. This time +// // It should actually work. +// accelerator_for(site,Nsites,Simd::Nsimd(),{ +// U3matrix res, U1; +// for(int nu=0;nu_offset](mu), F_v(x->_offset)(mu) - c7*res); +// } +// } +// } +// }) +// } + + // We are calculating the force using the rational approximation. The goal is that we can approximate // (Mdag M)^(-nf/4) = alpha_0 + sum_l alpha_l/(M^dag M + beta_l). Hence the index l runs over the // introduce a different "Naik epsilon" for each M. Hence, we can think of the total application @@ -798,340 +1543,146 @@ class Force_HISQ : public Gimpl { GF XY(grid); // outer product field GF u_force(grid); // accumulates the force + GF temp(grid); momentum = Zero(); - // These four lines control the loop over rational approximation contributions. As explained above, - // l indexes over both Naik epsilon and rational approximation order. - int l = 0; - for (int inaik = 0; inaik < hp.n_naiks; inaik++) { - int rat_order = n_orders_naik[inaik]; - for (int i=0; i Wv(Nd, grid); - std::vector XYdag(Nd, grid); - std::vector ddW(Nd, grid); - for (int mu = 0; mu < Nd; mu++) { - Wv[mu] = PeekIndex(_Wmu, mu); - XYdag[mu] = PeekIndex(XY, mu); - ddW[mu] = Zero(); - } - for (int mu = 0; mu < Nd; mu++) { - ddW[mu] = Cshift( Wv[mu],mu, 1)*Cshift( Wv[mu],mu, 2)* XYdag[mu] - + Cshift( Wv[mu],mu, 1)*Cshift(XYdag[mu],mu,-1)*Cshift( Wv[mu],mu,-1) - + Cshift(XYdag[mu],mu,-2)*Cshift( Wv[mu],mu,-2)*Cshift( Wv[mu],mu,-1); - } - for (int mu = 0; mu < Nd; mu++) { - PokeIndex(temp, ddW[mu], mu); - } - - momentum += hp.asqtad_cnaik*vecdt[l]*temp; + std::vector Wv(Nd, grid); + std::vector XYdag(Nd, grid); + std::vector ddW(Nd, grid); + + // ----------------------------------------- NAIK-LINK DERIVATIVE + if(hp.asqtad_cnaik!=0) { + XY = outerProductHISQ(vecx, vecdt, n_orders_naik, hp.n_naiks, 3); + for (int mu = 0; mu < Nd; mu++) { + Wv[mu] = PeekIndex(_Wmu, mu); + XYdag[mu] = PeekIndex(XY, mu); + ddW[mu] = Zero(); + } + for (int mu = 0; mu < Nd; mu++) { + ddW[mu] = Cshift( Wv[mu],mu, 1)*Cshift( Wv[mu],mu, 2)* XYdag[mu] + + Cshift( Wv[mu],mu, 1)*Cshift(XYdag[mu],mu,-1)*Cshift( Wv[mu],mu,-1) + + Cshift(XYdag[mu],mu,-2)*Cshift( Wv[mu],mu,-2)*Cshift( Wv[mu],mu,-1); + } + for (int mu = 0; mu < Nd; mu++) { + PokeIndex(temp, ddW[mu], mu); + } + momentum += hp.asqtad_cnaik*temp; + } - // -------------------------- ONE-LINK DERIVATIVE (OUTER PRODUCT) - XY = outerProductHISQ(vecx, l, 1); - momentum += hp.asqtad_c1*vecdt[l]*XY; // It's not clear to me whether this should be fat7 or asqtad. + // -------------------------- ONE-LINK DERIVATIVE (OUTER PRODUCT) + XY = outerProductHISQ(vecx, vecdt, n_orders_naik, hp.n_naiks, 1); + momentum += hp.asqtad_c1*XY; // It's not clear to me whether this should be fat7 or asqtad. - // -------------------------------------------- LEPAGE DERIVATIVE - for (int mu = 0; mu < Nd; mu++) { - ddW[mu] = Zero(); - XYdag[mu] = adj(PeekIndex(XY, mu)); - } + // -------------------------------------------- LEPAGE DERIVATIVE + if(hp.asqtad_clp!=0) { + for (int mu = 0; mu < Nd; mu++) { + Wv[mu] = PeekIndex(_Wmu, mu); + ddW[mu] = Zero(); + XYdag[mu] = adj(PeekIndex(XY, mu)); + } - for (int mu = 0; mu < Nd; mu++) - for (int nu = 0; nu < Nd; nu++) { - if(mu==nu) continue; + for (int mu = 0; mu < Nd; mu++) + for (int nu = 0; nu < Nd; nu++) { + if(mu==nu) continue; - // (forward) - ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[mu],mu, - Gimpl::CovShiftForward(Wv[nu],nu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftIdentityBackward(XYdag[nu],nu)))))); - // (backward) - ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[mu],mu, - Gimpl::CovShiftBackward(Wv[nu],nu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftIdentityForward(XYdag[nu],nu)))))); - // (forward) - ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftForward(Wv[nu],nu, + // (forward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[nu],nu, + Gimpl::CovShiftForward(Wv[mu],mu, Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftBackward(XYdag[nu],nu, - Gimpl::CovShiftIdentityBackward(Wv[mu],mu)))))); - // (backward) - ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[nu],nu, + Gimpl::CovShiftIdentityBackward(XYdag[nu],nu)))))); + // (backward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[mu],mu, + Gimpl::CovShiftBackward(Wv[nu],nu, + Gimpl::CovShiftForward(Wv[mu],mu, Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftForward(XYdag[nu],nu, - Gimpl::CovShiftIdentityBackward(Wv[mu],mu)))))); - // (forward) - ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftForward(Wv[nu],nu, - Gimpl::CovShiftForward(Wv[nu],nu, - Gimpl::CovShiftForward(XYdag[mu],mu, - Gimpl::CovShiftBackward(Wv[nu],nu, - Gimpl::CovShiftIdentityBackward(Wv[nu],nu)))))); - // (backward) - ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[nu],nu, - Gimpl::CovShiftBackward(Wv[nu],nu, - Gimpl::CovShiftForward(XYdag[mu],mu, - Gimpl::CovShiftForward(Wv[nu],nu, - Gimpl::CovShiftIdentityForward(Wv[nu],nu)))))); - // (forward) - ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[mu],mu, - Gimpl::CovShiftForward(XYdag[nu],nu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftIdentityBackward(Wv[nu],nu)))))); - // (backward) - ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[mu],mu, + Gimpl::CovShiftIdentityForward(XYdag[nu],nu)))))); + // (forward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftForward(Wv[nu],nu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[mu],mu, Gimpl::CovShiftBackward(XYdag[nu],nu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftIdentityForward(Wv[nu],nu)))))); - // (forward) - ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftForward(XYdag[nu],nu, + Gimpl::CovShiftIdentityBackward(Wv[mu],mu)))))); + // (backward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[nu],nu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(XYdag[nu],nu, + Gimpl::CovShiftIdentityBackward(Wv[mu],mu)))))); + // (forward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftForward(Wv[nu],nu, + Gimpl::CovShiftForward(Wv[nu],nu, + Gimpl::CovShiftForward(XYdag[mu],mu, + Gimpl::CovShiftBackward(Wv[nu],nu, + Gimpl::CovShiftIdentityBackward(Wv[nu],nu)))))); + // (backward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[nu],nu, + Gimpl::CovShiftBackward(Wv[nu],nu, + Gimpl::CovShiftForward(XYdag[mu],mu, + Gimpl::CovShiftForward(Wv[nu],nu, + Gimpl::CovShiftIdentityForward(Wv[nu],nu)))))); + // (forward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[mu],mu, + Gimpl::CovShiftForward(XYdag[nu],nu, + Gimpl::CovShiftForward(Wv[mu],mu, Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftBackward(Wv[nu],nu, - Gimpl::CovShiftIdentityBackward(Wv[mu],mu)))))); - // (backward) - ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(XYdag[nu],nu, + Gimpl::CovShiftIdentityBackward(Wv[nu],nu)))))); + // (backward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[mu],mu, + Gimpl::CovShiftBackward(XYdag[nu],nu, + Gimpl::CovShiftForward(Wv[mu],mu, Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftForward(Wv[nu],nu, - Gimpl::CovShiftIdentityBackward(Wv[mu],mu)))))); - } - - for (int mu = 0; mu < Nd; mu++) { - PokeIndex(temp, ddW[mu], mu); - } - - momentum += hp.asqtad_clp*vecdt[l]*temp; + Gimpl::CovShiftIdentityForward(Wv[nu],nu)))))); + // (forward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftForward(XYdag[nu],nu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftBackward(Wv[nu],nu, + Gimpl::CovShiftIdentityBackward(Wv[mu],mu)))))); + // (backward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(XYdag[nu],nu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[nu],nu, + Gimpl::CovShiftIdentityBackward(Wv[mu],mu)))))); + } + for (int mu = 0; mu < Nd; mu++) { + PokeIndex(temp, ddW[mu], mu); + } - // ------------------------------------------- N-LINK DERIVATIVES - PaddedCell Ghost(_HaloDepth,grid); - GF Ughost = Ghost.Exchange(_Wmu); - GF XYghost = Ghost.Exchange(XY); - GF Fghost = Ghost.Exchange(u_force); - std::vector shifts3 = createHISQStencil(); - std::vector shifts5 = createHISQStencil("5STAPLE"); - GeneralLocalStencil gStencil3(Ughost.Grid(),shifts3); - GeneralLocalStencil gStencil5(Ughost.Grid(),shifts5); + momentum += hp.asqtad_clp*temp; + } - Fghost = Zero(); - for(int mu=0;mu shifts3 = createHISQStencil("3STAPLE"); + std::vector shifts5 = createHISQStencil("5STAPLE"); + std::vector shifts7 = createHISQStencil("7STAPLE"); + GeneralLocalStencil gStencil3(Wghost.Grid(),shifts3); + GeneralLocalStencil gStencil5(Wghost.Grid(),shifts5); + GeneralLocalStencil gStencil7(Wghost.Grid(),shifts7); + + Fghost = Zero(); - autoView(U_v , Ughost , AcceleratorRead); - autoView(XY_v, XYghost, AcceleratorRead); - autoView(F_v , Fghost , AcceleratorWrite); - - int Nsites = U_v.size(); - auto gStencil3_v = gStencil3.View(AcceleratorRead); - auto gStencil5_v = gStencil5.View(AcceleratorRead); - - typedef decltype(getLink(U_v,gStencil3.GetEntry(0,0),0)) U3matrix; - - accelerator_for(site,Nsites,Simd::Nsimd(),{ // 3-LINK DERIVATIVE - U3matrix res; - for(int nu=0;nu_offset](mu), F_v(x->_offset)(mu) + hp.asqtad_c3*adj(res)); - } - }) - - accelerator_for(site,Nsites,Simd::Nsimd(),{ // 5-LINK DERIVATIVE - U3matrix res, U0, U1, U2, U3, U4, XY0; - for(int nu=0;nu_offset](mu), F_v(x->_offset)(mu) + hp.asqtad_c5*res); - } - } - }) - -// -// accelerator_for(site,Nsites,Simd::Nsimd(),{ // 7-LINK DERIVATIVE -// U3matrix U0, U1, U2, U3, U4, U5, XY0, V1, XY2, XY3, V4, XY5, W; -// int sigmaIndex = 0; -// for(int nu=0;nu_offset](mu), F_v(x->_offset)(mu) + hp.fat7_c7*W*vecdt[l]); -// sigmaIndex++; -// } -// } -// }) - - } // end mu loop - - u_force = Ghost.Extract(Fghost); - momentum += vecdt[l]*u_force; + for(int mu=0;mu Date: Mon, 12 May 2025 15:23:14 -0600 Subject: [PATCH 29/32] working 7-link (finally) --- Grid/qcd/smearing/HISQSmearing.h | 1047 ++++++++++++++++-------------- 1 file changed, 547 insertions(+), 500 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index b885c32fdf..eb0739660c 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -127,6 +127,7 @@ int HISQStencilIndex(int mu, int nu, int rho=0, int sig=0, std::string kind="3ST else if (kind=="5STAPLE") res = 17*(rho + Nd*nu + Nd*Nd*mu); else if (kind=="7STAPLE") + // seems correct res = 46*(sig + Nd*rho + Nd*Nd*nu + Nd*Nd*Nd*mu); else Grid_error("Unknown staple kind",kind); @@ -176,52 +177,52 @@ std::vector createHISQStencil(std::string kind="3STAPLE") { for(int nu =0;nu (shifts,shiftSignal::NO_SHIFT); - appendShift(shifts,mu); - appendShift(shifts,mu,nu); - appendShift(shifts,mu,nu,rho); - appendShift(shifts,mu,nu,rho,Back(sig)); - appendShift(shifts,mu,nu,Back(rho)); - appendShift(shifts,mu,nu,Back(rho),Back(sig)); - appendShift(shifts,mu,nu,Back(sig)); - appendShift(shifts,mu,Back(nu)); - appendShift(shifts,mu,Back(nu),rho); - appendShift(shifts,mu,Back(nu),rho,sig); - appendShift(shifts,mu,Back(nu),rho,Back(sig)); - appendShift(shifts,mu,Back(nu),Back(rho)); - appendShift(shifts,mu,Back(nu),Back(rho),sig); - appendShift(shifts,mu,Back(nu),Back(rho),Back(sig)); - appendShift(shifts,mu,Back(nu),sig); - appendShift(shifts,mu,Back(nu),Back(sig)); - appendShift(shifts,mu,rho); - appendShift(shifts,mu,rho,sig); - appendShift(shifts,mu,rho,Back(sig)); - appendShift(shifts,mu,Back(rho)); - appendShift(shifts,mu,Back(rho),sig); - appendShift(shifts,mu,Back(rho),Back(sig)); - appendShift(shifts,mu,sig); - appendShift(shifts,mu,Back(sig)); - appendShift(shifts,nu); - appendShift(shifts,nu,rho); - appendShift(shifts,nu,rho,sig); - appendShift(shifts,nu,rho,Back(sig)); - appendShift(shifts,nu,Back(rho)); - appendShift(shifts,nu,Back(rho),sig); - appendShift(shifts,nu,Back(rho),Back(sig)); - appendShift(shifts,rho); - appendShift(shifts,rho,Back(nu)); - appendShift(shifts,rho,sig); - appendShift(shifts,rho,Back(sig)); - appendShift(shifts,Back(nu)); - appendShift(shifts,Back(nu),rho); - appendShift(shifts,Back(nu),rho,sig); - appendShift(shifts,Back(nu),rho,Back(sig)); - appendShift(shifts,Back(nu),Back(rho)); - appendShift(shifts,Back(nu),Back(rho),sig); - appendShift(shifts,Back(nu),Back(rho),Back(sig)); - appendShift(shifts,Back(rho)); - appendShift(shifts,Back(rho),sig); - appendShift(shifts,Back(rho),Back(sig)); + appendShift(shifts,shiftSignal::NO_SHIFT);//ok + appendShift(shifts,mu);//ok + appendShift(shifts,mu,nu);//ok + appendShift(shifts,mu,nu,rho);//ok + appendShift(shifts,mu,nu,rho,Back(sig));//ok + appendShift(shifts,mu,nu,Back(rho));//ok + appendShift(shifts,mu,nu,Back(rho),Back(sig));//ok + appendShift(shifts,mu,nu,Back(sig));//ok + appendShift(shifts,mu,Back(nu));//ok + appendShift(shifts,mu,Back(nu),rho);//ok + appendShift(shifts,mu,Back(nu),rho,sig);//ok + appendShift(shifts,mu,Back(nu),rho,Back(sig));//ok + appendShift(shifts,mu,Back(nu),Back(rho));//ok + appendShift(shifts,mu,Back(nu),Back(rho),sig);//ok + appendShift(shifts,mu,Back(nu),Back(rho),Back(sig));//ok + appendShift(shifts,mu,Back(nu),sig);//ok + appendShift(shifts,mu,Back(nu),Back(sig));//ok + appendShift(shifts,mu,rho);//ok + appendShift(shifts,mu,rho,sig);//ok + appendShift(shifts,mu,rho,Back(sig));//ok + appendShift(shifts,mu,Back(rho));//ok + appendShift(shifts,mu,Back(rho),sig);//ok + appendShift(shifts,mu,Back(rho),Back(sig));//ok + appendShift(shifts,mu,sig);//ok + appendShift(shifts,mu,Back(sig));//ok + appendShift(shifts,nu);//ok + appendShift(shifts,nu,rho);//ok + appendShift(shifts,nu,rho,sig);//ok + appendShift(shifts,nu,rho,Back(sig));//ok + appendShift(shifts,nu,Back(rho));//ok + appendShift(shifts,nu,Back(rho),sig);//ok + appendShift(shifts,nu,Back(rho),Back(sig));//ok + appendShift(shifts,rho);//ok + appendShift(shifts,rho,Back(nu));//ok + appendShift(shifts,rho,sig);//ok + appendShift(shifts,rho,Back(sig));//ok + appendShift(shifts,Back(nu));//ok + appendShift(shifts,Back(nu),rho);//ok + appendShift(shifts,Back(nu),rho,sig);//ok + appendShift(shifts,Back(nu),rho,Back(sig));//ok + appendShift(shifts,Back(nu),Back(rho));//ok + appendShift(shifts,Back(nu),Back(rho),sig);//ok + appendShift(shifts,Back(nu),Back(rho),Back(sig));//ok + appendShift(shifts,Back(rho));//ok + appendShift(shifts,Back(rho),sig);//ok + appendShift(shifts,Back(rho),Back(sig));//ok } } else { Grid_error("Unknown staple kind",kind); @@ -952,6 +953,7 @@ class Force_HISQ : public Gimpl { } + template void fiveLinkDeriv(GF& Fghost, GF& Ughost, GF& XYghost, GeneralLocalStencil gStencil5, Real c5, int mu) const { autoView(U_v , Ughost , AcceleratorRead); @@ -978,6 +980,7 @@ class Force_HISQ : public Gimpl { res = Zero(); +if constexpr(term==0) { res += ( getLink(U_v ,x_p_mu ,rho) * getLink(U_v ,x_p_mu_p_rho ,nu ) * adj(getLink(U_v ,x_p_mu_p_nu ,rho)) @@ -1023,7 +1026,8 @@ class Force_HISQ : public Gimpl { * adj(getLink(U_v ,x_p_mu_m_nu_p_rho,nu )) * getLink(XY_v,x_m_nu_p_rho ,mu ) )*getLink(U_v,x_m_nu_p_rho,nu)*adj(getLink(U_v,x,rho)); - +} +if constexpr(term==1) { res += ( getLink(U_v ,x_p_mu ,nu ) * adj(getLink(XY_v,x_p_mu_p_nu ,rho)) * adj(getLink(U_v ,x_p_mu_p_rho ,nu )) @@ -1063,7 +1067,7 @@ class Force_HISQ : public Gimpl { * adj(getLink(U_v ,x_p_mu_m_nu_m_rho,nu )) * getLink(XY_v,x_m_nu_m_rho ,mu ) )*getLink(U_v,x_m_nu_m_rho,nu)*getLink(U_v,x_m_rho,rho); - +} setLink(F_v[x->_offset](mu), F_v(x->_offset)(mu) + c5*res); } } @@ -1071,455 +1075,484 @@ class Force_HISQ : public Gimpl { } -// void sevenLinkDeriv(GF& Fghost, GF& Ughost, GF& XYghost, GeneralLocalStencil gStencil7, Real c7, int mu) const { -// -// autoView(U_v , Ughost , AcceleratorRead); -// autoView(XY_v, XYghost, AcceleratorRead); -// autoView(F_v , Fghost , AcceleratorWrite); -// int Nsites = U_v.size(); -// auto gStencil7_v = gStencil7.View(AcceleratorRead); -// typedef decltype(getLink(U_v,gStencil7.GetEntry(0,0),0)) U3matrix; -// -// // TODO: After this works, start consolidating some terms. This time -// // It should actually work. -// accelerator_for(site,Nsites,Simd::Nsimd(),{ -// U3matrix res, U1; -// for(int nu=0;nu_offset](mu), F_v(x->_offset)(mu) - c7*res); -// } -// } -// } -// }) -// } + // is the matching correct between sites and shifts? + // double-check xy adjoint thing + template + void sevenLinkDeriv(GF& Fghost, GF& Ughost, GF& XYghost, GeneralLocalStencil gStencil7, Real c7, int mu) const { + + autoView(U_v , Ughost , AcceleratorRead); + autoView(XY_v, XYghost, AcceleratorRead); + autoView(F_v , Fghost , AcceleratorWrite); + int Nsites = U_v.size(); + auto gStencil7_v = gStencil7.View(AcceleratorRead); + typedef decltype(getLink(U_v,gStencil7.GetEntry(0,0),0)) U3matrix; + + // TODO: After this works, start consolidating some terms. This time + // It should actually work. + accelerator_for(site,Nsites,Simd::Nsimd(),{ + U3matrix res, U1; + for(int nu=0;nu_offset](mu), F_v(x->_offset)(mu) + c7*res); + } + } + } + }) + } // We are calculating the force using the rational approximation. The goal is that we can approximate @@ -1675,9 +1708,23 @@ class Force_HISQ : public Gimpl { for(int mu=0;mu( Fghost, Wghost, XYghost, gStencil5, hp.asqtad_c5, mu); + if(hp.asqtad_c5!=0) fiveLinkDeriv<1>( Fghost, Wghost, XYghost, gStencil5, hp.asqtad_c5, mu); + if(hp.asqtad_c7!=0) sevenLinkDeriv<0>( Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); + if(hp.asqtad_c7!=0) sevenLinkDeriv<1>( Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); + if(hp.asqtad_c7!=0) sevenLinkDeriv<2>( Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); + if(hp.asqtad_c7!=0) sevenLinkDeriv<3>( Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); + if(hp.asqtad_c7!=0) sevenLinkDeriv<4>( Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); + if(hp.asqtad_c7!=0) sevenLinkDeriv<5>( Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); + if(hp.asqtad_c7!=0) sevenLinkDeriv<6>( Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); + if(hp.asqtad_c7!=0) sevenLinkDeriv<7>( Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); + if(hp.asqtad_c7!=0) sevenLinkDeriv<8>( Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); + if(hp.asqtad_c7!=0) sevenLinkDeriv<9>( Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); + if(hp.asqtad_c7!=0) sevenLinkDeriv<10>(Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); + if(hp.asqtad_c7!=0) sevenLinkDeriv<11>(Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); + if(hp.asqtad_c7!=0) sevenLinkDeriv<12>(Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); + if(hp.asqtad_c7!=0) sevenLinkDeriv<13>(Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); } // end mu loop From 896dad959fce6241a0aa649835f49514b2896db5 Mon Sep 17 00:00:00 2001 From: "D. A. Clarke" Date: Tue, 13 May 2025 14:37:51 -0600 Subject: [PATCH 30/32] refactor 7-link and add test --- Grid/qcd/smearing/HISQSmearing.h | 542 ++++++++++++------------------- tests/forces/Test_HISQ_force.cc | 15 + 2 files changed, 215 insertions(+), 342 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index eb0739660c..3a7328c3b6 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -688,7 +688,6 @@ class Smear_HISQ : public Gimpl { } }); }; - }; @@ -712,7 +711,7 @@ class Force_HISQ : public Gimpl { typedef iColourMatrix ComplexColourMatrix; RealScalar _Scut=-1; // Cutoff for U(3) projection eigenvalues, set at initialization - int _HaloDepth=1; + int _HaloDepth=1; // Depth of padded cell HISQParameters _linkParams; HISQReunitSVDParameters _reunitParams; @@ -813,9 +812,9 @@ class Force_HISQ : public Gimpl { RealScalar u2, u3, u4, u5, u6, u7, u8, v2 ,v3, v4, v5, v6, w2, w3, w4, w5; - u2 = u *u; u3 = u2*u; u4 = u3*u; u5 = u4*u; u6 = u5*u; u7 = u6*u; u8 = u7*u; - v2 = v *v; v3 = v2*v; v4 = v3*v; v5 = v4*v; v6 = v5*v; - w2 = w *w; w3 = w2*w; w4 = w3*w; w5 = w4*w; + u2 = u*u; u3 = u2*u; u4 = u3*u; u5 = u4*u; u6 = u5*u; u7 = u6*u; u8 = u7*u; + v2 = v*v; v3 = v2*v; v4 = v3*v; v5 = v4*v; v6 = v5*v; + w2 = w*w; w3 = w2*w; w4 = w3*w; w5 = w4*w; // eq (C10) auto d = 2*w3*(u*v-w)*(u*v-w)*(u*v-w); @@ -922,6 +921,12 @@ class Force_HISQ : public Gimpl { } + // Intent: OUT--Fghost (accumulates 3-link derivative contribution) + // IN--Ughost (thin links) + // XYghost (outer product) + // gStencil3 (3-link stencil) + // c3 + // mu void threeLinkDeriv(GF& Fghost, GF& Ughost, GF& XYghost, GeneralLocalStencil gStencil3, Real c3, int mu) const { autoView(U_v , Ughost , AcceleratorRead); @@ -953,6 +958,12 @@ class Force_HISQ : public Gimpl { } + // Intent: OUT--Fghost (accumulates 5-link derivative contribution) + // IN--Ughost (thin links) + // XYghost (outer product) + // gStencil5 (5-link stencil) + // c5 + // mu template void fiveLinkDeriv(GF& Fghost, GF& Ughost, GF& XYghost, GeneralLocalStencil gStencil5, Real c5, int mu) const { @@ -980,6 +991,9 @@ class Force_HISQ : public Gimpl { res = Zero(); + // The idea behind the constexpr syntax is to reduce compile times. These seem to grow + // with increasing kernel size. The template parameter term lets the user choose which + // part of the kernel to compile, and hence constexpr is evaluated at compile time. if constexpr(term==0) { res += ( getLink(U_v ,x_p_mu ,rho) * getLink(U_v ,x_p_mu_p_rho ,nu ) @@ -1075,8 +1089,6 @@ if constexpr(term==1) { } - // is the matching correct between sites and shifts? - // double-check xy adjoint thing template void sevenLinkDeriv(GF& Fghost, GF& Ughost, GF& XYghost, GeneralLocalStencil gStencil7, Real c7, int mu) const { @@ -1087,8 +1099,6 @@ if constexpr(term==1) { auto gStencil7_v = gStencil7.View(AcceleratorRead); typedef decltype(getLink(U_v,gStencil7.GetEntry(0,0),0)) U3matrix; - // TODO: After this works, start consolidating some terms. This time - // It should actually work. accelerator_for(site,Nsites,Simd::Nsimd(),{ U3matrix res, U1; for(int nu=0;nu_offset](mu), F_v(x->_offset)(mu) + c7*res); } diff --git a/tests/forces/Test_HISQ_force.cc b/tests/forces/Test_HISQ_force.cc index 8bd7f97eea..006b976bd5 100644 --- a/tests/forces/Test_HISQ_force.cc +++ b/tests/forces/Test_HISQ_force.cc @@ -124,6 +124,8 @@ bool testForce(GridCartesian& GRID, LGF Umu, LGF Ucontrol, // NerscIO::writeConfiguration(Umom,"nersc.l8t4b3360.Umom.lp.control"); // NerscIO::writeConfiguration(Umom,"nersc.l8t4b3360.Umom.naik.control"); // NerscIO::writeConfiguration(Umom,"nersc.l8t4b3360.Umom.5link.control"); +// NerscIO::writeConfiguration(Umom,"nersc.l8t4b3360.Umom.7link.control"); +// NerscIO::writeConfiguration(Umom,"nersc.l8t4b3360.Umom.level1.control"); // return true; } @@ -236,6 +238,19 @@ int main (int argc, char** argv) { 1, 0, 0, 0, 0, 1, 0, 1, 0, 0 ); + // Check the 7-link + NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.7link.control"); + pass *= testForce(GRID, Umu, Ucontrol, + 1, 0, 0, 0, 0, + 1, 0, 0, 1, 0 ); + + // Check level 1 smearing + NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.level1.control"); + pass *= testForce(GRID, Umu, Ucontrol, + 1, 0, 0, 0, -1/24., + 1, -1/16., 1/64., -1/384., -1/8. ); + + if(pass){ Grid_pass("All tests passed."); } else { From 9debaf5a7f0b20f148dd2ce481220651ce3bc43a Mon Sep 17 00:00:00 2001 From: "D. A. Clarke" Date: Tue, 3 Jun 2025 00:17:20 -0600 Subject: [PATCH 31/32] working fermion forcegit add . --- Grid/qcd/smearing/HISQSmearing.h | 219 +++++++++++++++++++------------ 1 file changed, 134 insertions(+), 85 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index 3a7328c3b6..72f524b9ab 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -177,52 +177,52 @@ std::vector createHISQStencil(std::string kind="3STAPLE") { for(int nu =0;nu (shifts,shiftSignal::NO_SHIFT);//ok - appendShift(shifts,mu);//ok - appendShift(shifts,mu,nu);//ok - appendShift(shifts,mu,nu,rho);//ok - appendShift(shifts,mu,nu,rho,Back(sig));//ok - appendShift(shifts,mu,nu,Back(rho));//ok - appendShift(shifts,mu,nu,Back(rho),Back(sig));//ok - appendShift(shifts,mu,nu,Back(sig));//ok - appendShift(shifts,mu,Back(nu));//ok - appendShift(shifts,mu,Back(nu),rho);//ok - appendShift(shifts,mu,Back(nu),rho,sig);//ok - appendShift(shifts,mu,Back(nu),rho,Back(sig));//ok - appendShift(shifts,mu,Back(nu),Back(rho));//ok - appendShift(shifts,mu,Back(nu),Back(rho),sig);//ok - appendShift(shifts,mu,Back(nu),Back(rho),Back(sig));//ok - appendShift(shifts,mu,Back(nu),sig);//ok - appendShift(shifts,mu,Back(nu),Back(sig));//ok - appendShift(shifts,mu,rho);//ok - appendShift(shifts,mu,rho,sig);//ok - appendShift(shifts,mu,rho,Back(sig));//ok - appendShift(shifts,mu,Back(rho));//ok - appendShift(shifts,mu,Back(rho),sig);//ok - appendShift(shifts,mu,Back(rho),Back(sig));//ok - appendShift(shifts,mu,sig);//ok - appendShift(shifts,mu,Back(sig));//ok - appendShift(shifts,nu);//ok - appendShift(shifts,nu,rho);//ok - appendShift(shifts,nu,rho,sig);//ok - appendShift(shifts,nu,rho,Back(sig));//ok - appendShift(shifts,nu,Back(rho));//ok - appendShift(shifts,nu,Back(rho),sig);//ok - appendShift(shifts,nu,Back(rho),Back(sig));//ok - appendShift(shifts,rho);//ok - appendShift(shifts,rho,Back(nu));//ok - appendShift(shifts,rho,sig);//ok - appendShift(shifts,rho,Back(sig));//ok - appendShift(shifts,Back(nu));//ok - appendShift(shifts,Back(nu),rho);//ok - appendShift(shifts,Back(nu),rho,sig);//ok - appendShift(shifts,Back(nu),rho,Back(sig));//ok - appendShift(shifts,Back(nu),Back(rho));//ok - appendShift(shifts,Back(nu),Back(rho),sig);//ok - appendShift(shifts,Back(nu),Back(rho),Back(sig));//ok - appendShift(shifts,Back(rho));//ok - appendShift(shifts,Back(rho),sig);//ok - appendShift(shifts,Back(rho),Back(sig));//ok + appendShift(shifts,shiftSignal::NO_SHIFT); + appendShift(shifts,mu); + appendShift(shifts,mu,nu); + appendShift(shifts,mu,nu,rho); + appendShift(shifts,mu,nu,rho,Back(sig)); + appendShift(shifts,mu,nu,Back(rho)); + appendShift(shifts,mu,nu,Back(rho),Back(sig)); + appendShift(shifts,mu,nu,Back(sig)); + appendShift(shifts,mu,Back(nu)); + appendShift(shifts,mu,Back(nu),rho); + appendShift(shifts,mu,Back(nu),rho,sig); + appendShift(shifts,mu,Back(nu),rho,Back(sig)); + appendShift(shifts,mu,Back(nu),Back(rho)); + appendShift(shifts,mu,Back(nu),Back(rho),sig); + appendShift(shifts,mu,Back(nu),Back(rho),Back(sig)); + appendShift(shifts,mu,Back(nu),sig); + appendShift(shifts,mu,Back(nu),Back(sig)); + appendShift(shifts,mu,rho); + appendShift(shifts,mu,rho,sig); + appendShift(shifts,mu,rho,Back(sig)); + appendShift(shifts,mu,Back(rho)); + appendShift(shifts,mu,Back(rho),sig); + appendShift(shifts,mu,Back(rho),Back(sig)); + appendShift(shifts,mu,sig); + appendShift(shifts,mu,Back(sig)); + appendShift(shifts,nu); + appendShift(shifts,nu,rho); + appendShift(shifts,nu,rho,sig); + appendShift(shifts,nu,rho,Back(sig)); + appendShift(shifts,nu,Back(rho)); + appendShift(shifts,nu,Back(rho),sig); + appendShift(shifts,nu,Back(rho),Back(sig)); + appendShift(shifts,rho); + appendShift(shifts,rho,Back(nu)); + appendShift(shifts,rho,sig); + appendShift(shifts,rho,Back(sig)); + appendShift(shifts,Back(nu)); + appendShift(shifts,Back(nu),rho); + appendShift(shifts,Back(nu),rho,sig); + appendShift(shifts,Back(nu),rho,Back(sig)); + appendShift(shifts,Back(nu),Back(rho)); + appendShift(shifts,Back(nu),Back(rho),sig); + appendShift(shifts,Back(nu),Back(rho),Back(sig)); + appendShift(shifts,Back(rho)); + appendShift(shifts,Back(rho),sig); + appendShift(shifts,Back(rho),Back(sig)); } } else { Grid_error("Unknown staple kind",kind); @@ -923,14 +923,14 @@ class Force_HISQ : public Gimpl { // Intent: OUT--Fghost (accumulates 3-link derivative contribution) // IN--Ughost (thin links) - // XYghost (outer product) + // XYCghost (outer product) // gStencil3 (3-link stencil) // c3 // mu - void threeLinkDeriv(GF& Fghost, GF& Ughost, GF& XYghost, GeneralLocalStencil gStencil3, Real c3, int mu) const { + void threeLinkDeriv(GF& Fghost, GF& Ughost, GF& XYCghost, GeneralLocalStencil gStencil3, Real c3, int mu) const { autoView(U_v , Ughost , AcceleratorRead); - autoView(XY_v, XYghost, AcceleratorRead); + autoView(XY_v, XYCghost, AcceleratorRead); autoView(F_v , Fghost , AcceleratorWrite); int Nsites = U_v.size(); auto gStencil3_v = gStencil3.View(AcceleratorRead); @@ -960,15 +960,15 @@ class Force_HISQ : public Gimpl { // Intent: OUT--Fghost (accumulates 5-link derivative contribution) // IN--Ughost (thin links) - // XYghost (outer product) + // XYCghost (outer product) // gStencil5 (5-link stencil) // c5 // mu template - void fiveLinkDeriv(GF& Fghost, GF& Ughost, GF& XYghost, GeneralLocalStencil gStencil5, Real c5, int mu) const { + void fiveLinkDeriv(GF& Fghost, GF& Ughost, GF& XYCghost, GeneralLocalStencil gStencil5, Real c5, int mu) const { autoView(U_v , Ughost , AcceleratorRead); - autoView(XY_v, XYghost, AcceleratorRead); + autoView(XY_v, XYCghost, AcceleratorRead); autoView(F_v , Fghost , AcceleratorWrite); int Nsites = U_v.size(); auto gStencil5_v = gStencil5.View(AcceleratorRead); @@ -1090,10 +1090,10 @@ if constexpr(term==1) { template - void sevenLinkDeriv(GF& Fghost, GF& Ughost, GF& XYghost, GeneralLocalStencil gStencil7, Real c7, int mu) const { + void sevenLinkDeriv(GF& Fghost, GF& Ughost, GF& XYCghost, GeneralLocalStencil gStencil7, Real c7, int mu) const { autoView(U_v , Ughost , AcceleratorRead); - autoView(XY_v, XYghost, AcceleratorRead); + autoView(XY_v, XYCghost, AcceleratorRead); autoView(F_v , Fghost , AcceleratorWrite); int Nsites = U_v.size(); auto gStencil7_v = gStencil7.View(AcceleratorRead); @@ -1443,6 +1443,7 @@ if constexpr(term==13) { std::vector ddW(Nd, grid); // ----------------------------------------- NAIK-LINK DERIVATIVE + if(hp.asqtad_cnaik!=0) { XY = outerProductHISQ(vecx, vecdt, n_orders_naik, hp.n_naiks, 3); for (int mu = 0; mu < Nd; mu++) { @@ -1464,9 +1465,10 @@ if constexpr(term==13) { // -------------------------- ONE-LINK DERIVATIVE (OUTER PRODUCT) + XY = outerProductHISQ(vecx, vecdt, n_orders_naik, hp.n_naiks, 1); - momentum += hp.asqtad_c1*XY; // It's not clear to me whether this should be fat7 or asqtad. + momentum += hp.asqtad_c1*XY; // -------------------------------------------- LEPAGE DERIVATIVE if(hp.asqtad_clp!=0) { @@ -1550,45 +1552,93 @@ if constexpr(term==13) { } - // ------------------------------------------- N-LINK DERIVATIVES + // ---------------------------------- N-LINK DERIVATIVES (ASQTAD) + PaddedCell Ghost(_HaloDepth,grid); - GF Wghost = Ghost.Exchange(_Wmu); - GF XYghost = Ghost.Exchange(XY); - GF Fghost = Ghost.Exchange(u_force); + + u_force = Zero(); + + GF UWghost = Ghost.Exchange(_Wmu); // Plays role of U or W + GF XYCghost = Ghost.Exchange(XY); // Plays role of XY or chain rule = dW/dV*dX/dW + GF Fghost = Ghost.Exchange(u_force); + std::vector shifts3 = createHISQStencil("3STAPLE"); std::vector shifts5 = createHISQStencil("5STAPLE"); std::vector shifts7 = createHISQStencil("7STAPLE"); - GeneralLocalStencil gStencil3(Wghost.Grid(),shifts3); - GeneralLocalStencil gStencil5(Wghost.Grid(),shifts5); - GeneralLocalStencil gStencil7(Wghost.Grid(),shifts7); - - Fghost = Zero(); + GeneralLocalStencil gStencil3(UWghost.Grid(),shifts3); + GeneralLocalStencil gStencil5(UWghost.Grid(),shifts5); + GeneralLocalStencil gStencil7(UWghost.Grid(),shifts7); for(int mu=0;mu( Fghost, Wghost, XYghost, gStencil5, hp.asqtad_c5, mu); - if(hp.asqtad_c5!=0) fiveLinkDeriv<1>( Fghost, Wghost, XYghost, gStencil5, hp.asqtad_c5, mu); - if(hp.asqtad_c7!=0) sevenLinkDeriv<0>( Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); - if(hp.asqtad_c7!=0) sevenLinkDeriv<1>( Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); - if(hp.asqtad_c7!=0) sevenLinkDeriv<2>( Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); - if(hp.asqtad_c7!=0) sevenLinkDeriv<3>( Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); - if(hp.asqtad_c7!=0) sevenLinkDeriv<4>( Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); - if(hp.asqtad_c7!=0) sevenLinkDeriv<5>( Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); - if(hp.asqtad_c7!=0) sevenLinkDeriv<6>( Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); - if(hp.asqtad_c7!=0) sevenLinkDeriv<7>( Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); - if(hp.asqtad_c7!=0) sevenLinkDeriv<8>( Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); - if(hp.asqtad_c7!=0) sevenLinkDeriv<9>( Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); - if(hp.asqtad_c7!=0) sevenLinkDeriv<10>(Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); - if(hp.asqtad_c7!=0) sevenLinkDeriv<11>(Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); - if(hp.asqtad_c7!=0) sevenLinkDeriv<12>(Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); - if(hp.asqtad_c7!=0) sevenLinkDeriv<13>(Fghost, Wghost, XYghost, gStencil7, hp.asqtad_c7, mu); - - } // end mu loop + if(hp.asqtad_c3!=0) { + threeLinkDeriv( Fghost, UWghost, XYCghost, gStencil3, hp.asqtad_c3, mu); + } + if(hp.asqtad_c5!=0) { + fiveLinkDeriv<0>( Fghost, UWghost, XYCghost, gStencil5, hp.asqtad_c5, mu); + fiveLinkDeriv<1>( Fghost, UWghost, XYCghost, gStencil5, hp.asqtad_c5, mu); + } + if(hp.asqtad_c7!=0) { + sevenLinkDeriv<0>( Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<1>( Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<2>( Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<3>( Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<4>( Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<5>( Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<6>( Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<7>( Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<8>( Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<9>( Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<10>(Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<11>(Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<12>(Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + sevenLinkDeriv<13>(Fghost, UWghost, XYCghost, gStencil7, hp.asqtad_c7, mu); + } + } u_force = Ghost.Extract(Fghost); momentum += u_force; + // ------------------------------------------- U3 PROJ DERIVATIVE + + u_force = Zero(); + ddVprojectU3(u_force, _Vmu, momentum, 5e-5); + momentum = hp.fat7_c1*u_force; + + // ------------------------------------ N-LINK DERIVATIVES (FAT7) + + UWghost = Ghost.Exchange(_Umu); + XYCghost = Ghost.Exchange(u_force); + Fghost = Zero(); + + for(int mu=0;mu( Fghost, UWghost, XYCghost, gStencil5, hp.fat7_c5, mu); + fiveLinkDeriv<1>( Fghost, UWghost, XYCghost, gStencil5, hp.fat7_c5, mu); + } + if(hp.fat7_c7!=0) { + sevenLinkDeriv<0>( Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<1>( Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<2>( Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<3>( Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<4>( Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<5>( Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<6>( Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<7>( Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<8>( Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<9>( Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<10>(Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<11>(Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<12>(Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + sevenLinkDeriv<13>(Fghost, UWghost, XYCghost, gStencil7, hp.fat7_c7, mu); + } + } // end mu loop + + if(hp.fat7_c3!=0 || hp.fat7_c5!=0 || hp.fat7_c7!=0) { + momentum += Ghost.Extract(Fghost); + } // Close the loop: Multiply on the left by U_mu(x) LF mom(grid); @@ -1598,7 +1648,6 @@ if constexpr(term==13) { } } - }; From eda96545ebe63824ef49171c77c1bcc69b7cd2e9 Mon Sep 17 00:00:00 2001 From: "D. A. Clarke" Date: Tue, 24 Jun 2025 11:45:19 -0600 Subject: [PATCH 32/32] some cleanup; force test (w/o multiple naik) --- Grid/qcd/smearing/HISQSmearing.h | 272 ++++++++++++++++--------------- tests/forces/Test_HISQ_force.cc | 39 +++-- 2 files changed, 164 insertions(+), 147 deletions(-) diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index 72f524b9ab..a55f8d4a71 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -749,7 +749,7 @@ class Force_HISQ : public Gimpl { // u_force (slot derivative into this force), // delta (force cutoff) // Follow MILC 10.1103/PhysRevD.82.074501 - void ddVprojectU3(GF& u_deriv, GF& u_mu, GF& u_force, RealScalar const delta=5e-5) { + void projU3Deriv(GF& u_deriv, GF& u_mu, GF& u_force, RealScalar const delta=5e-5) { conformable(u_force,u_mu); conformable(u_deriv,u_mu); @@ -898,16 +898,16 @@ class Force_HISQ : public Gimpl { int rat_order = n_orders_naik[inaik]; for (int i=0; i vecdt, std::vector& vecx, std::vector n_orders_naik, int n_naiks, Real cnaik) { + auto grid = this->_grid; + GF temp(grid); + std::vector Wv(Nd, grid); + std::vector XYdag(Nd, grid); + std::vector ddW(Nd, grid); + temp = outerProductHISQ(vecx, vecdt, n_orders_naik, n_naiks, 3); + for (int mu = 0; mu < Nd; mu++) { + Wv[mu] = PeekIndex(_Wmu, mu); + XYdag[mu] = PeekIndex(temp, mu); + ddW[mu] = Zero(); + } + for (int mu = 0; mu < Nd; mu++) { + ddW[mu] = Cshift( Wv[mu],mu, 1)*Cshift( Wv[mu],mu, 2)* XYdag[mu] + + Cshift( Wv[mu],mu, 1)*Cshift(XYdag[mu],mu,-1)*Cshift( Wv[mu],mu,-1) + + Cshift(XYdag[mu],mu,-2)*Cshift( Wv[mu],mu,-2)*Cshift( Wv[mu],mu,-1); + } + for (int mu = 0; mu < Nd; mu++) { + PokeIndex(temp, ddW[mu], mu); + } + return cnaik*temp; + } + + + GF lepageLinkDeriv(GF& XY, Real clp) { + + auto grid = this->_grid; + GF temp(grid); + std::vector Wv(Nd, grid); + std::vector XYdag(Nd, grid); + std::vector ddW(Nd, grid); + + for (int mu = 0; mu < Nd; mu++) { + Wv[mu] = PeekIndex(_Wmu, mu); + ddW[mu] = Zero(); + XYdag[mu] = adj(PeekIndex(XY, mu)); + } + + for (int mu = 0; mu < Nd; mu++) + for (int nu = 0; nu < Nd; nu++) { + if(mu==nu) continue; + + // (forward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[nu],nu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftIdentityBackward(XYdag[nu],nu)))))); + // (backward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[mu],mu, + Gimpl::CovShiftBackward(Wv[nu],nu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftIdentityForward(XYdag[nu],nu)))))); + // (forward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftForward(Wv[nu],nu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftBackward(XYdag[nu],nu, + Gimpl::CovShiftIdentityBackward(Wv[mu],mu)))))); + // (backward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[nu],nu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(XYdag[nu],nu, + Gimpl::CovShiftIdentityBackward(Wv[mu],mu)))))); + // (forward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftForward(Wv[nu],nu, + Gimpl::CovShiftForward(Wv[nu],nu, + Gimpl::CovShiftForward(XYdag[mu],mu, + Gimpl::CovShiftBackward(Wv[nu],nu, + Gimpl::CovShiftIdentityBackward(Wv[nu],nu)))))); + // (backward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[nu],nu, + Gimpl::CovShiftBackward(Wv[nu],nu, + Gimpl::CovShiftForward(XYdag[mu],mu, + Gimpl::CovShiftForward(Wv[nu],nu, + Gimpl::CovShiftIdentityForward(Wv[nu],nu)))))); + // (forward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[mu],mu, + Gimpl::CovShiftForward(XYdag[nu],nu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftIdentityBackward(Wv[nu],nu)))))); + // (backward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[mu],mu, + Gimpl::CovShiftBackward(XYdag[nu],nu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftIdentityForward(Wv[nu],nu)))))); + // (forward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftForward(XYdag[nu],nu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftBackward(Wv[nu],nu, + Gimpl::CovShiftIdentityBackward(Wv[mu],mu)))))); + // (backward) + ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(XYdag[nu],nu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[mu],mu, + Gimpl::CovShiftForward(Wv[nu],nu, + Gimpl::CovShiftIdentityBackward(Wv[mu],mu)))))); + } + + for (int mu = 0; mu < Nd; mu++) { + PokeIndex(temp, ddW[mu], mu); + } + + return clp*temp; } @@ -1420,147 +1533,40 @@ if constexpr(term==13) { // there is a possibly different order_inaik, then the operator has an index l running up to order_inaik. // All terms with inaik=0 correspond to epsilon_Naik = 0. // - // Intent: OUT--momentum + // Intent: OUT--u_force // IN--vecdt: Monte Carlo separation vector times alpha_{inaik,0}. // vecx: A vector of fermion fields coming from the MILC code. It is organized so that // |X_l> = (Mdag M + beta_l)^-1 |Phi> is on even sites, |Y_l>=D|X_l> is on odd sites. // All the |X_l> for i=0 come first in memory, followed by all the |X_l> with // i=1 in memory, and so on. // n_orders_naik: Indexed by unique naik epsilon. - void force(GF& momentum, std::vector vecdt, std::vector& vecx, std::vector n_orders_naik) { + void force(GF& u_force, std::vector vecdt, std::vector& vecx, std::vector n_orders_naik) { HISQParameters hp = this->_linkParams; auto grid = this->_grid; - GF XY(grid); // outer product field - GF u_force(grid); // accumulates the force - GF temp(grid); - - momentum = Zero(); - - std::vector Wv(Nd, grid); - std::vector XYdag(Nd, grid); - std::vector ddW(Nd, grid); - - // ----------------------------------------- NAIK-LINK DERIVATIVE - - if(hp.asqtad_cnaik!=0) { - XY = outerProductHISQ(vecx, vecdt, n_orders_naik, hp.n_naiks, 3); - for (int mu = 0; mu < Nd; mu++) { - Wv[mu] = PeekIndex(_Wmu, mu); - XYdag[mu] = PeekIndex(XY, mu); - ddW[mu] = Zero(); - } - for (int mu = 0; mu < Nd; mu++) { - ddW[mu] = Cshift( Wv[mu],mu, 1)*Cshift( Wv[mu],mu, 2)* XYdag[mu] - + Cshift( Wv[mu],mu, 1)*Cshift(XYdag[mu],mu,-1)*Cshift( Wv[mu],mu,-1) - + Cshift(XYdag[mu],mu,-2)*Cshift( Wv[mu],mu,-2)*Cshift( Wv[mu],mu,-1); - } - for (int mu = 0; mu < Nd; mu++) { - PokeIndex(temp, ddW[mu], mu); - } - - momentum += hp.asqtad_cnaik*temp; - } - - - // -------------------------- ONE-LINK DERIVATIVE (OUTER PRODUCT) - - XY = outerProductHISQ(vecx, vecdt, n_orders_naik, hp.n_naiks, 1); - - momentum += hp.asqtad_c1*XY; + GF XY(grid); // outer product field + GF temp(grid); // used to accumulate N-link force contributions and projU3Deriv - // -------------------------------------------- LEPAGE DERIVATIVE - if(hp.asqtad_clp!=0) { - for (int mu = 0; mu < Nd; mu++) { - Wv[mu] = PeekIndex(_Wmu, mu); - ddW[mu] = Zero(); - XYdag[mu] = adj(PeekIndex(XY, mu)); - } + u_force = Zero(); - for (int mu = 0; mu < Nd; mu++) - for (int nu = 0; nu < Nd; nu++) { - if(mu==nu) continue; + if(hp.asqtad_cnaik!=0) u_force += naikLinkDeriv(vecdt, vecx, n_orders_naik, hp.n_naiks, hp.asqtad_cnaik); - // (forward) - ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[mu],mu, - Gimpl::CovShiftForward(Wv[nu],nu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftIdentityBackward(XYdag[nu],nu)))))); - // (backward) - ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[mu],mu, - Gimpl::CovShiftBackward(Wv[nu],nu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftIdentityForward(XYdag[nu],nu)))))); - // (forward) - ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftForward(Wv[nu],nu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftBackward(XYdag[nu],nu, - Gimpl::CovShiftIdentityBackward(Wv[mu],mu)))))); - // (backward) - ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[nu],nu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftForward(XYdag[nu],nu, - Gimpl::CovShiftIdentityBackward(Wv[mu],mu)))))); - // (forward) - ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftForward(Wv[nu],nu, - Gimpl::CovShiftForward(Wv[nu],nu, - Gimpl::CovShiftForward(XYdag[mu],mu, - Gimpl::CovShiftBackward(Wv[nu],nu, - Gimpl::CovShiftIdentityBackward(Wv[nu],nu)))))); - // (backward) - ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[nu],nu, - Gimpl::CovShiftBackward(Wv[nu],nu, - Gimpl::CovShiftForward(XYdag[mu],mu, - Gimpl::CovShiftForward(Wv[nu],nu, - Gimpl::CovShiftIdentityForward(Wv[nu],nu)))))); - // (forward) - ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[mu],mu, - Gimpl::CovShiftForward(XYdag[nu],nu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftIdentityBackward(Wv[nu],nu)))))); - // (backward) - ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(Wv[mu],mu, - Gimpl::CovShiftBackward(XYdag[nu],nu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftIdentityForward(Wv[nu],nu)))))); - // (forward) - ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftForward(XYdag[nu],nu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftBackward(Wv[nu],nu, - Gimpl::CovShiftIdentityBackward(Wv[mu],mu)))))); - // (backward) - ddW[mu] = ddW[mu] + adj(Gimpl::CovShiftBackward(XYdag[nu],nu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftForward(Wv[mu],mu, - Gimpl::CovShiftForward(Wv[nu],nu, - Gimpl::CovShiftIdentityBackward(Wv[mu],mu)))))); - } + XY = outerProductHISQ(vecx, vecdt, n_orders_naik, hp.n_naiks, 1); + u_force += hp.asqtad_c1*XY; - for (int mu = 0; mu < Nd; mu++) { - PokeIndex(temp, ddW[mu], mu); - } - - momentum += hp.asqtad_clp*temp; - } + if(hp.asqtad_clp!=0) u_force += lepageLinkDeriv(XY, hp.asqtad_clp); // ---------------------------------- N-LINK DERIVATIVES (ASQTAD) PaddedCell Ghost(_HaloDepth,grid); - u_force = Zero(); + temp = Zero(); GF UWghost = Ghost.Exchange(_Wmu); // Plays role of U or W GF XYCghost = Ghost.Exchange(XY); // Plays role of XY or chain rule = dW/dV*dX/dW - GF Fghost = Ghost.Exchange(u_force); + GF Fghost = Ghost.Exchange(temp); std::vector shifts3 = createHISQStencil("3STAPLE"); std::vector shifts5 = createHISQStencil("5STAPLE"); @@ -1595,19 +1601,17 @@ if constexpr(term==13) { } } - u_force = Ghost.Extract(Fghost); - momentum += u_force; + u_force += Ghost.Extract(Fghost); // ------------------------------------------- U3 PROJ DERIVATIVE - u_force = Zero(); - ddVprojectU3(u_force, _Vmu, momentum, 5e-5); - momentum = hp.fat7_c1*u_force; + projU3Deriv(temp, _Vmu, u_force, 5e-5); + u_force = hp.fat7_c1*temp; // ------------------------------------ N-LINK DERIVATIVES (FAT7) UWghost = Ghost.Exchange(_Umu); - XYCghost = Ghost.Exchange(u_force); + XYCghost = Ghost.Exchange(temp); Fghost = Zero(); for(int mu=0;mu(_Umu, mu) * PeekIndex(momentum, mu); - PokeIndex(momentum, mom, mu); + force_mu = PeekIndex(_Umu, mu) * PeekIndex(u_force, mu); + PokeIndex(u_force, force_mu, mu); } } diff --git a/tests/forces/Test_HISQ_force.cc b/tests/forces/Test_HISQ_force.cc index 006b976bd5..3d06c509b5 100644 --- a/tests/forces/Test_HISQ_force.cc +++ b/tests/forces/Test_HISQ_force.cc @@ -60,7 +60,7 @@ typedef typename GIMPL::FermionField FF; // This is a sort of contrived test situation. The goal is to make sure the fermion force // code is stable against future changes and get an idea how the HISQ force interface works. -bool testForce(GridCartesian& GRID, LGF Umu, LGF Ucontrol, +bool testForce(GridCartesian& GRID, LGF Umu, LGF Ucontrol, std::string testKind, Real fat7_c1 , Real fat7_c3 , Real fat7_c5 , Real fat7_c7 , Real cnaik, Real asqtad_c1, Real asqtad_c3, Real asqtad_c5, Real asqtad_c7, Real asqtad_clp) { @@ -108,6 +108,10 @@ bool testForce(GridCartesian& GRID, LGF Umu, LGF Ucontrol, } } + Grid_log(""); + Grid_log(" TEST "+testKind); + Grid_log(""); + hisq_force.force(Umom,vecdt,vecx,n_orders_naik); LGF diff(&GRID); diff = Ucontrol-Umom; @@ -125,7 +129,8 @@ bool testForce(GridCartesian& GRID, LGF Umu, LGF Ucontrol, // NerscIO::writeConfiguration(Umom,"nersc.l8t4b3360.Umom.naik.control"); // NerscIO::writeConfiguration(Umom,"nersc.l8t4b3360.Umom.5link.control"); // NerscIO::writeConfiguration(Umom,"nersc.l8t4b3360.Umom.7link.control"); -// NerscIO::writeConfiguration(Umom,"nersc.l8t4b3360.Umom.level1.control"); +// NerscIO::writeConfiguration(Umom,"nersc.l8t4b3360.Umom.level2.control"); +// NerscIO::writeConfiguration(Umom,"nersc.l8t4b3360.Umom.level12.control"); // return true; } @@ -160,8 +165,12 @@ bool testddUProj(GridCartesian& GRID, LGF Umu, LGF Ucontrol) { Force_HISQ hisq_force(&GRID, hisq_param, Wmu, Vmu, Umu, hisq_reunit_svd); + Grid_log(""); + Grid_log(" TEST DERIVATIVE U3 PROJECTION"); + Grid_log(""); + LGF diff(&GRID); - hisq_force.ddVprojectU3(Uforce, Umu, Umu, 5e-5); + hisq_force.projU3Deriv(Uforce, Umu, Umu, 5e-5); diff = Ucontrol-Uforce; auto absDiff = norm2(diff)/norm2(Ucontrol); if (absDiff < 1e-30) { @@ -210,46 +219,52 @@ int main (int argc, char** argv) { // Check the 1-link (outer product) NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.XY.control"); - pass *= testForce(GRID, Umu, Ucontrol, + pass *= testForce(GRID, Umu, Ucontrol, "1-link", 1, 0, 0, 0, 0, 1, 0, 0, 0, 0 ); // Check the 3-link NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.3link.control"); - pass *= testForce(GRID, Umu, Ucontrol, + pass *= testForce(GRID, Umu, Ucontrol, "3-link", 1, 0, 0, 0, 0, 1, 1, 0, 0, 0 ); // Check the LePage-link NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.lp.control"); - pass *= testForce(GRID, Umu, Ucontrol, + pass *= testForce(GRID, Umu, Ucontrol, "LePage", 1, 0, 0, 0, 0, 1, 0, 0, 0, 1 ); // Check the Naik-link NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.naik.control"); - pass *= testForce(GRID, Umu, Ucontrol, + pass *= testForce(GRID, Umu, Ucontrol, "Naik", 1, 0, 0, 0, 1, 1, 0, 0, 0, 0 ); // Check the 5-link NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.5link.control"); - pass *= testForce(GRID, Umu, Ucontrol, + pass *= testForce(GRID, Umu, Ucontrol, "5-link", 1, 0, 0, 0, 0, 1, 0, 1, 0, 0 ); // Check the 7-link NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.7link.control"); - pass *= testForce(GRID, Umu, Ucontrol, + pass *= testForce(GRID, Umu, Ucontrol, "7-link", 1, 0, 0, 0, 0, 1, 0, 0, 1, 0 ); - // Check level 1 smearing - NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.level1.control"); - pass *= testForce(GRID, Umu, Ucontrol, + // Check level 2 smearing + NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.level2.control"); + pass *= testForce(GRID, Umu, Ucontrol, "level 2", 1, 0, 0, 0, -1/24., 1, -1/16., 1/64., -1/384., -1/8. ); + // Check level 1+2 smearing + NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.Umom.level12.control"); + pass *= testForce(GRID, Umu, Ucontrol, "level 1+2", + 1/8., -1/16., 1/64., -1/384., -1/24., + 1 , -1/16., 1/64., -1/384., -1/8. ); + if(pass){ Grid_pass("All tests passed.");