42 const char *funcname =
"Average_Plaquette";
50 __managed__
double hgs = 0; __managed__
double hgt = 0;
51 cuAverage_Plaquette(&hgs, &hgt, ut[0], ut[1], iu,dimGrid,dimBlock);
53 double hgs = 0;
double hgt = 0;
54 for(
int mu=1;mu<
ndim;mu++)
55 for(
int nu=0;nu<mu;nu++)
58#pragma omp parallel
for simd reduction(+:hgs,hgt)
59 for(
int i=0;i<
kvol;i++){
64 case(
ndim-1): hgt -= creal(Sigma[0]);
67 default: hgs -= creal(Sigma[0]);
75 *avplaqs=-hgs/(3.0*
gvol); *avplaqt=-hgt/(
gvol*3.0);
79 printf(
"hgs=%e hgt=%e hg=%e\n", hgs, hgt, *hg);
84#pragma omp declare simd
102 const char *funcname =
"SU2plaq";
103 int uidm = iu[mu+
ndim*i];
115 Sigma[0]=ut[0][i*
ndim+mu]*ut[0][uidm*
ndim+nu]-ut[1][i*
ndim+mu]*conj(ut[1][uidm*
ndim+nu]);
116 Sigma[1]=ut[0][i*
ndim+mu]*ut[1][uidm*
ndim+nu]+ut[1][i*
ndim+mu]*conj(ut[0][uidm*
ndim+nu]);
118 int uidn = iu[nu+
ndim*i];
119 Complex_f a11=Sigma[0]*conj(ut[0][uidn*
ndim+mu])+Sigma[1]*conj(ut[1][uidn*
ndim+mu]);
122 Sigma[0]=a11*conj(ut[0][i*
ndim+nu])+a12*conj(ut[1][i*
ndim+nu]);
123 Sigma[1]=-a11*ut[1][i*
ndim+nu]+a12*ut[0][i*
ndim+mu];
139 const char *funcname =
"Polyakov";
143 cuPolyakov(Sigma,ut,dimGrid,dimBlock);
150 cblas_ccopy(
kvol3, ut[0]+3,
ndim, Sigma[0], 1);
151 cblas_ccopy(
kvol3, ut[1]+3,
ndim, Sigma[1], 1);
153 for(
int i=0; i<
kvol3; i++){
154 Sigma[0][i]=ut[0][i*
ndim+3];
155 Sigma[1][i]=ut[1][i*
ndim+3];
173 for(
int it=1;it<
ksizet;it++)
174#pragma omp parallel
for simd
175 for(
int i=0;i<
kvol3;i++){
178 int indexu=it*
kvol3+i;
179 Complex_f a11=Sigma[0][i]*ut[0][indexu*
ndim+3]-Sigma[1][i]*conj(ut[1][indexu*
ndim+3]);
181 Sigma[1][i]=Sigma[0][i]*ut[1][indexu*
ndim+3]+Sigma[1][i]*conj(ut[0][indexu*
ndim+3]);
194#error Par_tmul is not yet implimented in CUDA as Sigma[1] is device only memory
197 printf(
"Multiplying with MPI\n");
199 Par_tmul(Sigma[0], Sigma[1]);
210#pragma omp parallel for simd reduction(+:poly)
211 for(
int i=0;i<
kvol3;i++)
212 poly+=creal(Sigma[0][i]);
227 unsigned int *iu,
unsigned int *
id,
int i,
int mu,
int nu,
short leaf){
246 char *funcname=
"Leaf";
248 unsigned int didm,didn,uidn,uidm;
252 SU2plaq(u11t,u12t,Sigma11,Sigma12,iu,i,mu,nu);
256 didm =
id[mu+
ndim*i]; uidn = iu[nu+
ndim*i];
259 int uin_didm=
id[mu+
ndim*uidn];
260 *Sigma11=u11t[i*
ndim+nu]*conj(u11t[uin_didm*
ndim+mu])+u12t[i*
ndim+nu]*conj(u12t[uin_didm*
ndim+mu]);
261 *Sigma12=-u11t[i*
ndim+nu]*conj(u12t[uin_didm*
ndim+mu])+u12t[i*
ndim+nu]*u11t[uin_didm*
ndim+mu];
264 a11=*Sigma11*conj(u11t[didm*
ndim+nu])+*Sigma12*conj(u12t[didm*
ndim+nu]);
265 a12=-*Sigma11*u12t[didm*
ndim+nu]+*Sigma12*u11t[didm*
ndim+nu];
268 *Sigma11=a11*u11t[didm*
ndim+mu]-a12*conj(u12t[didm*
ndim+mu]);
269 *Sigma12=a11*u12t[didm*
ndim+mu]+a12*conj(u11t[didm*
ndim+mu]);
274 uidm = iu[mu+
ndim*i]; didn =
id[nu+
ndim*i];
276 *Sigma11=conj(u11t[didn*
ndim+nu])*u11t[didn*
ndim+mu]+conj(u12t[didn*
ndim+nu])*u12t[didn*
ndim+mu];
277 *Sigma12=conj(u11t[didn*
ndim+nu])*u12t[didn*
ndim+mu]-u12t[didn*
ndim+mu]*conj(u11t[didn*
ndim+nu]);
281 int uim_didn=
id[nu+
ndim*uidm];
282 a11=*Sigma11*u11t[uim_didn*
ndim+nu]-*Sigma12*conj(u12t[uim_didn*
ndim+nu]);
283 a12=*Sigma11*u12t[uim_didn*
ndim+nu]+*Sigma12*conj(u11t[uim_didn*
ndim+nu]);
286 *Sigma11=a11*conj(u11t[i*
ndim+mu])+a12*conj(u12t[i*
ndim+mu]);
287 *Sigma12=-a11*u12t[i*
ndim+mu]+a12*u11t[i*
ndim+mu];
291 didm =
id[mu+
ndim*i]; didn =
id[nu+
ndim*i];
293 int dim_didn=
id[nu+
ndim*didm];
294 *Sigma11=conj(u11t[didm*
ndim+mu])*conj(u11t[dim_didn*
ndim+nu])+conj(u12t[didm*
ndim+mu])*conj(u12t[dim_didn*
ndim+nu]);
296 Sigma11=a11*conj(u11t[i*
ndim+nu])+a12*conj(u12t[i*
ndim+nu]);
299 return creal(Sigma11);
double Polyakov(Complex_f *ut[2])
Calculate the Polyakov loop (no prizes for guessing that one...)
int SU2plaq(Complex_f *ut[2], Complex_f Sigma[2], unsigned int *iu, int i, int mu, int nu)
Calculates the plaquette at site i in the direction.
int Average_Plaquette(double *hg, double *avplaqs, double *avplaqt, Complex_f *ut[2], unsigned int *iu, float beta)
Calculates the gauge action using new (how new?) lookup table.
int Par_dsum(double *dval)
Performs a reduction on a double dval to get a sum which is then distributed to all ranks.
int * pcoord
The processor grid.
#define AVX
Alignment of arrays. 64 for AVX-512, 32 for AVX/AVX2. 16 for SSE. Since AVX is standard on modern x86...
#define gvol3
Lattice spatial volume.
#define ksizet
Sublattice t extent.
#define kvol
Sublattice volume.
#define gvol
Lattice volume.
#define kvol3
Sublattice spatial volume.
#define Complex_f
Single precision complex number.
Function declarations for most of the routines.