30 const char *funcname =
"Average_Plaquette";
38 __managed__
double hgs = 0; __managed__
double hgt = 0;
39 cuAverage_Plaquette(&hgs, &hgt, u11t, u12t, iu,dimGrid,dimBlock);
41 double hgs = 0;
double hgt = 0;
42 for(
int mu=1;mu<
ndim;mu++)
43 for(
int nu=0;nu<mu;nu++)
46#pragma omp parallel
for simd aligned(u11t,u12t,iu:
AVX) reduction(+:hgs,hgt)
47 for(
int i=0;i<
kvol;i++){
54 default: hgs -=
SU2plaq(u11t,u12t,iu,i,mu,nu);
62 *avplaqs=-hgs/(3.0*
gvol); *avplaqt=-hgt/(
gvol*3.0);
66 printf(
"hgs=%e hgt=%e hg=%e\n", hgs, hgt, *hg);
121 const char *funcname =
"Polyakov";
125 cudaGetDevice(&device);
127 cudaMallocManaged((
void **)&Sigma11,
kvol3*
sizeof(
Complex_f),cudaMemAttachGlobal);
129 cudaMallocManaged((
void **)&Sigma12,
kvol3*
sizeof(
Complex_f),cudaMemAttachGlobal);
131 cudaMallocAsync((
void **)&Sigma12,
kvol3*
sizeof(
Complex_f),streams[0]);
140 cublasCcopy(cublas_handle,
kvol3, (cuComplex *)(u11t+3),
ndim, (cuComplex *)Sigma11, 1);
141 cublasCcopy(cublas_handle,
kvol3, (cuComplex *)(u12t+3),
ndim, (cuComplex *)Sigma12, 1);
142#elif defined USE_BLAS
143 cblas_ccopy(
kvol3, u11t+3,
ndim, Sigma11, 1);
144 cblas_ccopy(
kvol3, u12t+3,
ndim, Sigma12, 1);
146 for(
int i=0; i<
kvol3; i++){
147 Sigma11[i]=u11t[i*
ndim+3];
148 Sigma12[i]=u12t[i*
ndim+3];
166 cudaDeviceSynchronise();
167 cuPolyakov(Sigma11,Sigma12,u11t,u12t,dimGrid,dimBlock);
168 cudaMemPrefetchAsync(Sigma11,
kvol3*
sizeof(
Complex_f),cudaCpuDeviceId,NULL);
171 for(
int it=1;it<
ksizet;it++)
172#pragma omp parallel
for simd aligned(u11t,u12t,Sigma11,Sigma12:
AVX)
173 for(
int i=0;i<
kvol3;i++){
176 int indexu=it*
kvol3+i;
177 Complex_f a11=Sigma11[i]*u11t[indexu*
ndim+3]-Sigma12[i]*conj(u12t[indexu*
ndim+3]);
179 Sigma12[i]=Sigma11[i]*u12t[indexu*
ndim+3]+Sigma12[i]*conj(u11t[indexu*
ndim+3]);
191#error Par_tmul is not yet implimented in CUDA as Sigma12 is device only memory
194 printf(
"Multiplying with MPI\n");
196 Par_tmul(Sigma11, Sigma12);
208 cudaDeviceSynchronise();
209#pragma omp parallel for simd reduction(+:poly)
211#pragma omp parallel for simd reduction(+:poly) aligned(Sigma11:AVX)
213 for(
int i=0;i<
kvol3;i++)
214 poly+=creal(Sigma11[i]);
220 cudaFreeAsync(Sigma12,streams[0]);
223 free(Sigma11); free(Sigma12);
int Average_Plaquette(double *hg, double *avplaqs, double *avplaqt, Complex_f *u11t, Complex_f *u12t, unsigned int *iu, float beta)
Calculates the gauge action using new (how new?) lookup table.
float SU2plaq(Complex_f *u11t, Complex_f *u12t, unsigned int *iu, int i, int mu, int nu)
Calculates the plaquette at site i in the direction.