For the early phases of this translation, I'm going to try and copy the original format as much as possible and keep things in one file. Hopefully this will change as we move through the methods so we can get a more logical structure.
Another vestige of the Fortran code that will be implemented here is the frequent flattening of arrays. But while FORTRAN Allows you to write array(i,j) as array(i+M*j) where M is the number of rows, C resorts to pointers
One change I will try and make is the introduction of error-codes (nothing to do with the Irish postal service) These can be found in the file errorcode.h and can help with debugging
Lastly, the comment style for the start of a function is based off of doxygen. It should consist of a description of the function, a list of parameters with a brief explanation and lastly what is returned by the function (on success or failure).
The input file format is like the table below, with values sepearated by whitespace
The default values here are straight from the FORTRAN. Note that the bottom line labelling each input is ignored
Changing the value of istart in the input parameter file gives us the following start options. These are quoted from the FORTRAN comments
istart < 0: Start from tape in FORTRAN?!? How old was this code? (depreciated, replaced with iread)
istart = 0: Ordered/Cold Start For some reason this leaves the trial fields as zero in the FORTRAN code?
79 {
80
81 const char *funcname = "main";
82
84
85#if(nproc>1)
86 MPI_Comm_rank(comm, &
rank);
87 MPI_Comm_size(comm, &
size);
88#endif
89
113 float beta = 1.7f;
114 float akappa = 0.1780f;
115#ifdef __NVCC__
116 __managed__
117#endif
119 float fmu = 0.0f; int iread = 0; int istart = 1;
120 int iprint = 1;
121 int icheck = 5;
122 int ibound = -1;
123#ifdef USE_MATH_DEFINES
124 const double tpi = 2*
M_PI;
125#else
126 const double tpi = 2*acos(-1.0);
127#endif
128 float dt=0.004; float ajq = 0.0; float delb=0;
129 float athq = 0.0; int stepl = 250; int ntraj = 10;
130
132 FILE *midout;
133 const char *filename = (argc!=2) ?"midout":argv[1];
134 char *fileop = "r";
135 if( !(midout = fopen(filename, fileop) ) ){
136 fprintf(stderr, "Error %i in %s: Failed to open file %s for %s.\nExiting\n\n",\
137 OPENERROR, funcname, filename, fileop);
138#if(nproc>1)
139 MPI_Abort(comm,OPENERROR);
140#else
141 exit(OPENERROR);
142#endif
143 }
144
145 fscanf(midout, "%f %f %f %f %f %f %f %d %d %d %d %d", &dt, &beta, &akappa,\
146 &ajq, &athq, &fmu, &delb, &stepl, &ntraj, &istart, &icheck, &iread);
147 fclose(midout);
148 assert(stepl>0); assert(ntraj>0); assert(istart>=0); assert(icheck>0); assert(iread>=0);
149 }
150
151#if(nproc>1)
156#endif
157 jqq=ajq*cexp(athq*I);
158
159#ifdef __NVCC__
160
161 cublasCreate(&cublas_handle);
162
163 blockInit(
nx,
ny,
nz,
nt, &dimBlock, &dimGrid);
164
165 int device=-1;
166 cudaGetDevice(&device);
167
168
169 cudaDeviceGetDefaultMemPool(&mempool, device);
171 cudaMemPoolSetAttribute(mempool, cudaMemPoolAttrReleaseThreshold, &threshold);
172#endif
173#ifdef _DEBUG
174 printf("jqq=%f+(%f)I\n",creal(jqq),cimag(jqq));
175#endif
176#ifdef _DEBUG
178#else
180#endif
181
182
183
184
185
186
187
188
189
190
191
192
193
196 double *dk[2], *pp;
197 float *dk_f[2];
198
199 unsigned int *iu, *id;
200#ifdef __NVCC__
201 cudaMallocManaged((
void**)&iu,
ndim*
kvol*
sizeof(
int),cudaMemAttachGlobal);
202 cudaMallocManaged((
void**)&
id,
ndim*
kvol*
sizeof(
int),cudaMemAttachGlobal);
203
204 cudaMallocManaged(&dk[0],(
kvol+
halo)*
sizeof(
double),cudaMemAttachGlobal);
205 cudaMallocManaged(&dk[1],(
kvol+
halo)*
sizeof(
double),cudaMemAttachGlobal);
206#ifdef _DEBUG
207 cudaMallocManaged(&dk_f[0],(
kvol+
halo)*
sizeof(
float),cudaMemAttachGlobal);
208 cudaMallocManaged(&dk_f[1],(
kvol+
halo)*
sizeof(
float),cudaMemAttachGlobal);
209#else
210 cudaMalloc(&dk_f[0],(
kvol+
halo)*
sizeof(
float));
211 cudaMalloc(&dk_f[1],(
kvol+
halo)*
sizeof(
float));
212#endif
213
214 int *gamin;
217 cudaMallocManaged(&gamin,4*4*
sizeof(
Complex),cudaMemAttachGlobal);
218 cudaMallocManaged(&gamval,5*4*
sizeof(
Complex),cudaMemAttachGlobal);
219#ifdef _DEBUG
220 cudaMallocManaged(&gamval_f,5*4*
sizeof(
Complex_f),cudaMemAttachGlobal);
221#else
222 cudaMalloc(&gamval_f,5*4*
sizeof(
Complex_f));
223#endif
224 cudaMallocManaged(&u[0],
ndim*
kvol*
sizeof(
Complex),cudaMemAttachGlobal);
225 cudaMallocManaged(&u[1],
ndim*
kvol*
sizeof(
Complex),cudaMemAttachGlobal);
228#ifdef _DEBUG
231#else
234#endif
235#else
236 id = (
unsigned int*)aligned_alloc(
AVX,
ndim*
kvol*
sizeof(
int));
237 iu = (
unsigned int*)aligned_alloc(
AVX,
ndim*
kvol*
sizeof(
int));
238
239 int *gamin = (
int *)aligned_alloc(
AVX,4*4*
sizeof(
int));
242
243 dk[0] = (
double *)aligned_alloc(
AVX,(
kvol+
halo)*
sizeof(
double));
244 dk[1] = (
double *)aligned_alloc(
AVX,(
kvol+
halo)*
sizeof(
double));
245 dk_f[0] = (
float *)aligned_alloc(
AVX,(
kvol+
halo)*
sizeof(
float));
246 dk_f[1] = (
float *)aligned_alloc(
AVX,(
kvol+
halo)*
sizeof(
float));
247
254#endif
268 Init(istart,ibound,iread,beta,fmu,akappa,ajq,u,ut,ut_f,gamval,gamval_f,gamin,dk,dk_f,iu,
id);
269#ifdef __NVCC__
270
271 Init_CUDA(ut[0],ut[1],gamval,gamval_f,gamin,dk[0],dk[1],iu,id);
272#endif
273
275
278#ifdef __NVCC__
279
282
285 cudaDeviceSynchronise();
286#endif
287#ifdef DIAGNOSTIC
288 double ancg_diag=0;
289 Diagnostics(istart, u[0], u[1], ut[0], ut[1], ut_f[0], ut_f[1], iu,
id,
hu,
hd, dk[0], dk[1],\
290 dk_f[0], dk_f[1], gamin, gamval, gamval_f, jqq, akappa, beta, ancg_diag);
291#endif
292
293
294
297#ifdef _DEBUG
298 if(!
rank) printf(
"Initial Polyakov loop evaluated as %e\n", poly);
299#endif
300 double hg, avplaqs, avplaqt;
301
303
304 double traj=stepl*dt;
305
306 double proby = 2.5/stepl;
308 int buffer; char buff2[7];
309
310 buffer = (int)round(100*beta);
311 sprintf(buff2,"b%03d",buffer);
312 strcat(suffix,buff2);
313
314 buffer = (int)round(10000*akappa);
315 sprintf(buff2,"k%04d",buffer);
316 strcat(suffix,buff2);
317
318 buffer = (int)round(1000*fmu);
319 sprintf(buff2,"mu%04d",buffer);
320 strcat(suffix,buff2);
321
322 buffer = (int)round(1000*ajq);
323 sprintf(buff2,"j%03d",buffer);
324 strcat(suffix,buff2);
325
326 sprintf(buff2,
"s%02d",
nx);
327 strcat(suffix,buff2);
328
329 sprintf(buff2,
"t%02d",
nt);
330 strcat(suffix,buff2);
331 char outname[
FILELEN] =
"Output.";
char *outop=
"a";
332 strcat(outname,suffix);
333 FILE *output;
335 if(!(output=fopen(outname, outop) )){
336 fprintf(stderr,"Error %i in %s: Failed to open file %s for %s.\nExiting\n\n",OPENERROR,funcname,outname,outop);
337#if(nproc>1)
338 MPI_Abort(comm,OPENERROR);
339#else
340 exit(OPENERROR);
341#endif
342 }
343 printf("hg = %e, <Ps> = %e, <Pt> = %e, <Poly> = %e\n", hg, avplaqs, avplaqt, poly);
344 fprintf(output, "ksize = %i ksizet = %i Nf = %i Halo =%i\nTime step dt = %e Trajectory length = %e\n"\
345 "No. of Trajectories = %i β = %e\nκ = %e μ = %e\nDiquark source = %e Diquark phase angle = %e\n"\
346 "Stopping Residuals: Guidance: %e Acceptance: %e, Estimator: %e\nSeed = %ld\n",
347 ksize,
ksizet,
nf,
halo, dt, traj, ntraj, beta, akappa, fmu, ajq, athq,
rescgg,
rescga,
respbp,
seed);
348#ifdef _DEBUG
349
350 printf("ksize = %i ksizet = %i Nf = %i Halo = %i\nTime step dt = %e Trajectory length = %e\n"\
351 "No. of Trajectories = %i β = %e\nκ = %e μ = %e\nDiquark source = %e Diquark phase angle = %e\n"\
352 "Stopping Residuals: Guidance: %e Acceptance: %e, Estimator: %e\nSeed = %ld\n",
353 ksize,
ksizet,
nf,
halo, dt, traj, ntraj, beta, akappa, fmu, ajq, athq,
rescgg,
rescga,
respbp,
seed);
354#endif
355 }
356
357
358 double actiona = 0.0; double vel2a = 0.0; double pbpa = 0.0; double endenfa = 0.0; double denfa = 0.0;
359
360 double e_dH=0; double e_dH_e=0;
361
362 double yav = 0.0; double yyav = 0.0;
363
364 int naccp = 0; int ipbp = 0; int itot = 0;
365
366
367
368 double pbp;
370 double *dSdpi;
371
373
374
375#ifdef __NVCC__
378#ifdef _DEBUG
380 cudaMallocManaged(&dSdpi,
kmom*
sizeof(
double),cudaMemAttachGlobal);
381#else
383 cudaMalloc(&dSdpi,
kmom*
sizeof(
double));
384#endif
385
387 cudaMallocManaged(&pp,
kmom*
sizeof(
double),cudaMemAttachGlobal);
388 cudaDeviceSynchronise();
389#else
394 dSdpi = aligned_alloc(
AVX,
kmom*
sizeof(
double));
395
396 pp = aligned_alloc(
AVX,
kmom*
sizeof(
double));
397#endif
403#if (defined SA3AT)
404 double start_time=0;
406#if(nproc>1)
407 start_time = MPI_Wtime();
408#else
409 start_time = omp_get_wtime();
410#endif
411 }
412#endif
413 double action;
414
415 double ancg,ancgh,totancg,totancgh=0;
416 for(int itraj = iread+1; itraj <= ntraj+iread; itraj++){
417
418 ancg = 0; ancgh = 0;
419#ifdef _DEBUG
421 printf("Starting itraj %i\n", itraj);
422#endif
423 for(
int na=0; na<
nf; na++){
424
425
426
427
428
429#ifdef __NVCC__
432#ifdef _DEBUG
433 cudaMallocManaged(&R1_f,
kferm*
sizeof(
Complex_f),cudaMemAttachGlobal);
435#else
438#endif
439#else
443#endif
444
445
446
447#if (defined __NVCC__ && defined _DEBUG)
448 cudaMemPrefetchAsync(R1_f,
kferm*
sizeof(
Complex_f),device,streams[1]);
449#endif
450#if (defined(USE_RAN2)||defined(__RANLUX__)||!defined(__INTEL_MKL__))
452#else
453 vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, 2*
kferm, R, 0, 1/sqrt(2));
454#endif
455#ifdef __NVCC__
457
459
460
461 cudaDeviceSynchronise();
462#endif
463 Dslashd_f(R1_f,R,ut_f[0],ut_f[1],iu,
id,gamval_f,gamin,dk_f,jqq,akappa);
464#ifdef __NVCC__
465
466 cudaFree(R);
467
468 cuComplex_convert(R1_f,R1,
kferm,
false,dimBlock,dimGrid);
469#ifdef _DEBUG
470 cudaFree(R1_f);
471#else
472 cudaFreeAsync(R1_f,NULL);
473#endif
475
476 cudaDeviceSynchronise();
477#else
478 free(R);
479#pragma omp simd aligned(R1_f,R1:AVX)
480 for(
int i=0;i<
kferm;i++)
482 free(R1_f);
484
485#endif
486 UpDownPart(na, X0, R1);
487 }
488
489
490
491
492#ifdef __NVCC__
493 cudaMemcpyAsync(ut[0], u[0],
ndim*
kvol*
sizeof(
Complex),cudaMemcpyHostToDevice,NULL);
494 cudaMemcpyAsync(ut[1], u[1],
ndim*
kvol*
sizeof(
Complex),cudaMemcpyHostToDevice,NULL);
495
498#else
501#endif
503#if (defined(USE_RAN2)||defined(__RANLUX__)||!defined(__INTEL_MKL__))
505#else
506 vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, stream,
kmom, pp, 0, 1);
507#endif
508
509#ifdef __NVCC__
510
512 cudaMemPrefetchAsync(pp,
kmom*
sizeof(
double),device,streams[1]);
513#endif
514 double H0, S0;
515 Hamilton(&H0,&S0,
rescga,pp,X0,X1,Phi,ut_f,iu,
id,gamval_f,gamin,dk_f,jqq,akappa,beta,&ancgh,itraj);
516#ifdef _DEBUG
517 if(!
rank) printf(
"H0: %e S0: %e\n", H0, S0);
518#endif
519 if(itraj==1)
521
522
523
524#if (defined INT_LPFR && defined INT_OMF2) ||(defined INT_LPFR && defined INT_OMF4)||(defined INT_OMF2 && defined INT_OMF4)
525#error "Only one integrator may be defined"
526#elif defined INT_LPFR
527 Leapfrog(ut,ut_f,X0,X1,Phi,dk,dk_f,dSdpi,pp,iu,
id,gamval,gamval_f,gamin,jqq,beta,akappa,stepl,dt,&ancg,&itot,proby);
528#elif defined INT_OMF2
529 OMF2(ut,ut_f,X0,X1,Phi,dk,dk_f,dSdpi,pp,iu,
id,gamval,gamval_f,gamin,jqq,beta,akappa,stepl,dt,&ancg,&itot,proby);
530#elif defined INT_OMF4
531 OMF4(ut,ut_f,X0,X1,Phi,dk,dk_f,dSdpi,pp,iu,
id,gamval,gamval_f,gamin,jqq,beta,akappa,stepl,dt,&ancg,&itot,proby);
532#else
533#error "No integrator defined. Please define {INT_LPFR.INT_OMF2,INT_OMF4}"
534#endif
535
536 totancg+=ancg;
537
538
540 double H1, S1;
541 Hamilton(&H1,&S1,
rescga,pp,X0,X1,Phi,ut_f,iu,
id,gamval_f,gamin,dk_f,jqq,akappa,beta,&ancgh,itraj);
542 ancgh/=2.0;
543 totancgh+=ancgh;
544#ifdef _DEBUG
545 printf("H0-H1=%f-%f",H0,H1);
546#endif
547 double dH = H0 - H1;
548#ifdef _DEBUG
549 printf("=%f\n",dH);
550#endif
551 double dS = S0 - S1;
553 fprintf(output, "dH = %e dS = %e\n", dH, dS);
554#ifdef _DEBUG
555 printf("dH = %e dS = %e\n", dH, dS);
556#endif
557 }
558 e_dH+=dH; e_dH_e+=dH*dH;
559 double y = exp(dH);
560 yav+=y;
561 yyav+=y*y;
562
563
564 bool acc;
566
568 printf("New configuration accepted on trajectory %i.\n", itraj);
569
570
571
574#ifdef __NVCC__
577#endif
578 naccp++;
579
581 acc=true;
582 }
583 else{
585 printf("New configuration rejected on trajectory %i.\n", itraj);
586 acc=false;
587 }
588 actiona+=action;
589 double vel2=0.0;
590#ifdef __NVCC__
591 cublasDnrm2(cublas_handle,
kmom, pp, 1,&vel2);
592 vel2*=vel2;
593#elif defined USE_BLAS
594 vel2 = cblas_dnrm2(
kmom, pp, 1);
595 vel2*=vel2;
596#else
597#pragma unroll
598 for(
int i=0; i<
kmom; i++)
599 vel2+=pp[i]*pp[i];
600#endif
601#if(nproc>1)
603#endif
605
606 if(itraj%iprint==0){
607
608 if(!acc){
609#ifdef __NVCC__
610 cudaMemcpyAsync(ut[0], u[0],
ndim*
kvol*
sizeof(
Complex),cudaMemcpyDefault,streams[0]);
611 cudaMemcpyAsync(ut[1], u[1],
ndim*
kvol*
sizeof(
Complex),cudaMemcpyDefault,streams[1]);
612 cudaDeviceSynchronise();
615#else
618#endif
620 }
621#ifdef _DEBUG
623 printf("Starting measurements\n");
624#endif
625 int itercg=0;
626 double endenf, denf;
628
629
630
631 int measure_check=0;
632 measure_check =
Measure(&pbp,&endenf,&denf,&qq,&qbqb,
respbp,&itercg,ut,ut_f,iu,
id,\
633 gamval,gamval_f,gamin,dk,dk_f,jqq,akappa,Phi,R1);
634#ifdef _DEBUG
636 printf("Finished measurements\n");
637#endif
638 pbpa+=pbp; endenfa+=endenf; denfa+=denf; ipbp++;
641
642
643
644#if (nproc>=4)
646#else
648#pragma omp parallel for
649 for(int i=0; i<4; i++)
650 switch(i)
651#endif
652 {
653 case(0):
654
655
656
657
658 fprintf(output, "Measure (CG) %i Update (CG) %.3f Hamiltonian (CG) %.3f\n", itercg, ancg, ancgh);
659 fflush(output);
660 break;
661 case(1):
662 {
663 FILE *fortout;
664 char fortname[
FILELEN] =
"fermi.";
665 strcat(fortname,suffix);
666 const char *fortop= (itraj==1) ? "w" : "a";
667 if(!(fortout=fopen(fortname, fortop) )){
668 fprintf(stderr, "Error %i in %s: Failed to open file %s for %s.\nExiting\n\n",\
669 OPENERROR, funcname, fortname, fortop);
670#if(nproc>1)
671 MPI_Abort(comm,OPENERROR);
672#else
673 exit(OPENERROR);
674#endif
675 }
676 if(itraj==1)
677 fprintf(fortout, "pbp\tendenf\tdenf\n");
678 if(measure_check)
679 fprintf(fortout, "%e\t%e\t%e\n", NAN, NAN, NAN);
680 else
681 fprintf(fortout, "%e\t%e\t%e\n", pbp, endenf, denf);
682 fclose(fortout);
683 break;
684 }
685 case(2):
686
687
688
689 {
690 FILE *fortout;
691 char fortname[
FILELEN] =
"bose.";
692 strcat(fortname,suffix);
693 const char *fortop= (itraj==1) ? "w" : "a";
694 if(!(fortout=fopen(fortname, fortop) )){
695 fprintf(stderr, "Error %i in %s: Failed to open file %s for %s.\nExiting\n\n",\
696 OPENERROR, funcname, fortname, fortop);
697 }
698 if(itraj==1)
699 fprintf(fortout, "avplaqs\tavplaqt\tpoly\n");
700 fprintf(fortout, "%e\t%e\t%e\n", avplaqs, avplaqt, poly);
701 fclose(fortout);
702 break;
703 }
704 case(3):
705 {
706 FILE *fortout;
707 char fortname[
FILELEN] =
"diq.";
708 strcat(fortname,suffix);
709 const char *fortop= (itraj==1) ? "w" : "a";
710 if(!(fortout=fopen(fortname, fortop) )){
711 fprintf(stderr, "Error %i in %s: Failed to open file %s for %s.\nExiting\n\n",\
712 OPENERROR, funcname, fortname, fortop);
713#if(nproc>1)
714 MPI_Abort(comm,OPENERROR);
715#else
716 exit(OPENERROR);
717#endif
718 }
719 if(itraj==1)
720 fprintf(fortout, "Re(qq)\n");
721 if(measure_check)
722 fprintf(fortout, "%e\n", NAN);
723 else
724 fprintf(fortout, "%e\n", creal(qq));
725 fclose(fortout);
726 break;
727 }
728 default: break;
729 }
730 }
731 if(itraj%icheck==0){
732 Par_swrite(itraj,icheck,beta,fmu,akappa,ajq,u[0],u[1]);
733 }
735 fflush(output);
736 }
737#if (defined SA3AT)
738 double elapsed = 0;
740#if(nproc>1)
741 elapsed = MPI_Wtime()-start_time;
742#else
743 elapsed = omp_get_wtime()-start_time;
744#endif
745 }
746#endif
747
748
749#ifdef __NVCC__
750
751 cudaFree(dk[0]); cudaFree(dk[1]); cudaFree(R1); cudaFree(dSdpi); cudaFree(pp);
752 cudaFree(Phi); cudaFree(ut[0]); cudaFree(ut[1]);
753 cudaFree(X0); cudaFree(X1); cudaFree(u[0]); cudaFree(u[1]);
754 cudaFree(id); cudaFree(iu);
755 cudaFree(dk_f[0]); cudaFree(dk_f[1]); cudaFree(ut_f[0]); cudaFree(ut_f[1]);
756 cudaFree(gamin); cudaFree(gamval); cudaFree(gamval_f);
757 cublasDestroy(cublas_handle);
758#else
759 free(dk[0]); free(dk[1]); free(R1); free(dSdpi); free(pp);
760 free(Phi); free(ut[0]); free(ut[1]);
761 free(X0); free(X1); free(u[0]); free(u[1]);
762 free(id); free(iu);
763 free(dk_f[0]); free(dk_f[1]); free(ut_f[0]); free(ut_f[1]);
764 free(gamin); free(gamval); free(gamval_f);
765#endif
767#ifdef __RANLUX__
768 gsl_rng_free(ranlux_instd);
769#elif (defined __INTEL_MKL__ &&!defined USE_RAN2)
770 vslDeleteStream(&stream);
771#endif
772#if (defined SA3AT)
774 FILE *sa3at = fopen("Bench_times.csv", "a");
775#ifdef __NVCC__
776 char *version[256];
777 int cuversion; cudaRuntimeGetVersion(&cuversion);
778 sprintf(version,"CUDA %d\tBlock: (%d,%d,%d)\tGrid: (%d,%d,%d)\n%s\n",cuversion,\
779 dimBlock.x,dimBlock.y,dimBlock.z,dimGrid.x,dimGrid.y,dimGrid.z,__VERSION__);
780#else
781 char *version=__VERSION__;
782#endif
783 fprintf(sa3at, "%s\nβ%0.3f κ:%0.4f μ:%0.4f j:%0.3f s:%i t:%i kvol:%ld\n"
784 "npx:%i npt:%i nthread:%i ncore:%i time:%f traj_time:%f\n\n",\
785 version,beta,akappa,fmu,ajq,
nx,
nt,
kvol,
npx,
npt,
nthreads,
npx*
npy*
npz*
npt*
nthreads,elapsed,elapsed/ntraj);
786 fclose(sa3at);
787 }
788#endif
789
790 actiona/=ntraj; vel2a/=ntraj; pbpa/=ipbp; endenfa/=ipbp; denfa/=ipbp;
791 totancg/=ntraj; totancgh/=ntraj;
792 e_dH/=ntraj; e_dH_e=sqrt((e_dH_e/ntraj-e_dH*e_dH)/(ntraj-1));
793 yav/=ntraj; yyav=sqrt((yyav/ntraj - yav*yav)/(ntraj-1));
794 float traj_cost=totancg/dt;
795 double atraj=dt*itot/ntraj;
796
798 fprintf(output, "Averages for the last %i trajectories\n"\
799 "Number of acceptances: %i\tAverage Trajectory Length = %e\n"\
800 "<dH>=%e+/-%e\t<exp(dH)>=%e+/-%e\tTrajectory cost=N_cg/dt =%e\n"\
801 "Average number of congrad iter guidance: %.3f acceptance %.3f\n"\
802 "psibarpsi = %e\n"\
803 "Mean Square Velocity = %e\tAction Per Site = %e\n"\
804 "Energy Density = %e\tNumber Density %e\n\n\n",\
805 ntraj, naccp, atraj, e_dH,e_dH_e, yav, yyav, traj_cost, totancg, totancgh, pbpa, vel2a, actiona, endenfa, denfa);
806 fclose(output);
807 }
808#if(nproc>1)
809
810 MPI_Barrier(comm);
812#endif
813 fflush(stdout);
814 return 0;
815}
unsigned int * h1u
Up halo starting element.
unsigned int * h1d
Down halo starting element.
unsigned int * halosize
Array containing the size of the halo in each direction.
unsigned int * hd
Down halo indices.
unsigned int * hu
Up halo indices.
int OMF2(Complex *ut[2], Complex_f *ut_f[2], Complex *X0, Complex *X1, Complex *Phi, double *dk[2], float *dk_f[2], double *dSdpi, double *pp, int *iu, int *id, Complex *gamval, Complex_f *gamval_f, int *gamin, Complex jqq, float beta, float akappa, int stepl, float dt, double *ancg, int *itot, float proby)
OMF second order five step integrator.
int Leapfrog(Complex *ut[2], Complex_f *ut_f[2], Complex *X0, Complex *X1, Complex *Phi, double *dk[2], float *dk_f[2], double *dSdpi, double *pp, int *iu, int *id, Complex *gamval, Complex_f *gamval_f, int *gamin, Complex jqq, float beta, float akappa, int stepl, float dt, double *ancg, int *itot, float proby)
Leapfrog integrator. Each trajectory step takes the form of p->p+dt/2,u->u+dt,p->p+dt/2 In practice t...
int OMF4(Complex *ut[2], Complex_f *ut_f[2], Complex *X0, Complex *X1, Complex *Phi, double *dk[2], float *dk_f[2], double *dSdpi, double *pp, int *iu, int *id, Complex *gamval, Complex_f *gamval_f, int *gamin, Complex jqq, float beta, float akappa, int stepl, float dt, double *ancg, int *itot, float proby)
OMF fourth order eleven step integrator.
int Dslashd_f(Complex_f *phi, Complex_f *r, Complex_f *u11t, Complex_f *u12t, unsigned int *iu, unsigned int *id, Complex_f *gamval, int *gamin, float *dk[2], Complex_f jqq, float akappa)
Evaluates in single precision.
int Par_swrite(const int itraj, const int icheck, const float beta, const float fmu, const float akappa, const Complex_f ajq, Complex *u11, Complex *u12)
Copies u11 and u12 into arrays without halos which then get written to output.
int Par_fcopy(float *fval)
Broadcasts a float to the other processes.
int size
The number of MPI ranks in total.
int Par_begin(int argc, char *argv[])
Initialises the MPI configuration.
int Par_icopy(int *ival)
Broadcasts an integer to the other processes.
int Trial_Exchange(Complex *ut[2], Complex_f *ut_f[2])
Exchanges the trial fields.
#define MPI_Finalise()
Avoid any accidents with US/UK spelling.
int Par_dsum(double *dval)
Performs a reduction on a double dval to get a sum which is then distributed to all ranks.
int * pcoord
The processor grid.
#define M_PI
if not defined elsewhere
int Gauss_c(Complex_f *ps, unsigned int n, const Complex_f mu, const float sigma)
Generates a vector of normally distributed random single precision complex numbers using the Box-Mull...
int Gauss_d(double *ps, unsigned int n, const double mu, const double sigma)
Generates a vector of normally distributed random double precision numbers using the Box-Muller Metho...
double Par_granf()
Generates a random double which is then sent to the other ranks.
#define AVX
Alignment of arrays. 64 for AVX-512, 32 for AVX/AVX2. 16 for SSE. Since AVX is standard on modern x86...
#define rescgg
Conjugate gradient residue for update.
#define nt
Lattice temporal extent. This also corresponds to the inverse temperature.
#define kmom
sublattice momentum sites
#define nx
Lattice x extent.
#define ngorkov
Gor'kov indices.
#define ksizet
Sublattice t extent.
#define kferm2Halo
Dirac lattice and halo.
#define npx
Processor grid x extent. This must be a divisor of nx.
#define nadj
adjacent spatial indices
#define kvol
Sublattice volume.
#define Complex
Double precision complex number.
#define kferm
sublattice size including Gor'kov indices
#define rescga
Conjugate gradient residue for acceptance.
#define npz
Processor grid z extent.
#define nthreads
Number of threads for OpenMP, which can be overwritten at runtime.
#define ksize
Sublattice spatial extent for a cubic lattice.
#define nf
Fermion flavours (double it)
#define respbp
Conjugate gradient residue for .
#define gvol
Lattice volume.
#define FILELEN
Default file name length.
#define halo
Total Halo size.
#define Complex_f
Single precision complex number.
#define npy
Processor grid y extent.
#define kferm2
sublattice size including Dirac indices
#define npt
Processor grid t extent.
#define kfermHalo
Gor'kov lattice and halo.
#define nz
Lattice z extent. We normally use cubic lattices so this is the same as nx.
#define ny
Lattice y extent. We normally use cubic lattices so this is the same as nx.
double Polyakov(Complex_f *ut[2])
Calculate the Polyakov loop (no prizes for guessing that one...)
int Average_Plaquette(double *hg, double *avplaqs, double *avplaqt, Complex_f *ut[2], unsigned int *iu, float beta)
Calculates the gauge action using new (how new?) lookup table.
int Measure(double *pbp, double *endenf, double *denf, Complex *qq, Complex *qbqb, double res, int *itercg, Complex *ut[2], Complex_f *ut_f[2], unsigned int *iu, unsigned int *id, Complex *gamval, Complex_f *gamval_f, int *gamin, double *dk[2], float *dk_f[2], Complex_f jqq, float akappa, Complex *Phi, Complex *R1)
Calculate fermion expectation values via a noisy estimator.
int Init(int istart, int ibound, int iread, float beta, float fmu, float akappa, Complex_f ajq, Complex *u[2], Complex *ut[2], Complex_f *ut_f[2], Complex *gamval, Complex_f *gamval_f, int *gamin, double *dk[2], float *dk_f[2], unsigned int *iu, unsigned int *id)
Initialises the system.
int Hamilton(double *h, double *s, double res2, double *pp, Complex *X0, Complex *X1, Complex *Phi, Complex_f *ut[2], unsigned int *iu, unsigned int *id, Complex_f *gamval_f, int *gamin, float *dk[2], Complex_f jqq, float akappa, float beta, double *ancgh, int traj)
Calculate the Hamiltonian.
int Reunitarise(Complex *ut[2])
Reunitarises u11t and u12t as in conj(u11t[i])*u11t[i]+conj(u12t[i])*u12t[i]=1.