su2hmc
Loading...
Searching...
No Matches
su2hmc.c File Reference

An ecclectic collection of functions used in the HMC. More...

#include <assert.h>
#include <coord.h>
#include <matrices.h>
#include <par_mpi.h>
#include <random.h>
#include <string.h>
#include <su2hmc.h>
Include dependency graph for su2hmc.c:

Go to the source code of this file.

Functions

int Init (int istart, int ibound, int iread, float beta, float fmu, float akappa, Complex_f ajq, Complex *u[2], Complex *ut[2], Complex_f *ut_f[2], Complex *gamval, Complex_f *gamval_f, int *gamin, double *dk[2], float *dk_f[2], unsigned int *iu, unsigned int *id)
 Initialises the system.
 
int Hamilton (double *h, double *s, double res2, double *pp, Complex *X0, Complex *X1, Complex *Phi, Complex_f *ut[2], unsigned int *iu, unsigned int *id, Complex_f *gamval_f, int *gamin, float *dk[2], Complex_f jqq, float akappa, float beta, double *ancgh, int traj)
 Calculate the Hamiltonian.
 
int C_gather (Complex_f *x, Complex_f *y, int n, unsigned int *table, unsigned int mu)
 Extracts all the single precision gauge links in the \(\mu\) direction only.
 
int Z_gather (Complex *x, Complex *y, int n, unsigned int *table, unsigned int mu)
 Extracts all the double precision gauge links in the \(\mu\) direction only.
 
int Fill_Small_Phi (int na, Complex *smallPhi, Complex *Phi)
 
int UpDownPart (const int na, Complex *X0, Complex *R1)
 

Detailed Description

An ecclectic collection of functions used in the HMC.

Definition in file su2hmc.c.

Function Documentation

◆ C_gather()

int C_gather ( Complex_f * x,
Complex_f * y,
int n,
unsigned int * table,
unsigned int mu )
inline

Extracts all the single precision gauge links in the \(\mu\) direction only.

Parameters
xThe output
yThe gauge field for a particular colour
nNumber of sites in the gauge field. This is typically kvol
tableTable containing information on nearest neighbours. Usually id or iu
muDireciton we're interested in extractng
Returns
Zero on success, integer error code otherwise

Definition at line 357 of file su2hmc.c.

358{
359 const char *funcname = "C_gather";
360 //FORTRAN had a second parameter m giving the size of y (kvol+halo) normally
361 //Pointers mean that's not an issue for us so I'm leaving it out
362#ifdef __NVCC__
363 cuC_gather(x,y,n,table,mu,dimBlock,dimGrid);
364#else
365#pragma omp parallel for simd aligned (x,y,table:AVX)
366 for(int i=0; i<n; i++)
367 x[i]=y[table[i*ndim+mu]*ndim+mu];
368#endif
369 return 0;
370}
#define ndim
Dimensions.
Definition sizes.h:179

References Complex_f, and ndim.

Here is the caller graph for this function:

◆ Fill_Small_Phi()

int Fill_Small_Phi ( int na,
Complex * smallPhi,
Complex * Phi )
inline

Copies necessary (2*4*kvol) elements of Phi into a vector variable

Parameters
naflavour index
smallPhiThe partitioned output
PhiThe pseudofermion field
Returns
Zero on success, integer error code otherwise

Definition at line 385 of file su2hmc.c.

386{
387 /*Copies necessary (2*4*kvol) elements of Phi into a vector variable
388 *
389 * Globals:
390 * =======
391 * Phi: The source array
392 *
393 * Parameters:
394 * ==========
395 * int na: flavour index
396 * Complex *smallPhi: The target array
397 *
398 * Returns:
399 * =======
400 * Zero on success, integer error code otherwise
401 */
402 const char *funcname = "Fill_Small_Phi";
403 //BIG and small phi index
404#ifdef __NVCC__
405 cuFill_Small_Phi(na,smallPhi,Phi,dimBlock,dimGrid);
406#else
407#pragma omp parallel for simd aligned(smallPhi,Phi:AVX) collapse(3)
408 for(int i = 0; i<kvol;i++)
409 for(int idirac = 0; idirac<ndirac; idirac++)
410 for(int ic= 0; ic<nc; ic++)
411 // PHI_index=i*16+j*2+k;
412 smallPhi[(i*ndirac+idirac)*nc+ic]=Phi[((na*kvol+i)*ngorkov+idirac)*nc+ic];
413#endif
414 return 0;
415}
#define nc
Colours.
Definition sizes.h:173
#define ngorkov
Gor'kov indices.
Definition sizes.h:181
#define kvol
Sublattice volume.
Definition sizes.h:154
#define ndirac
Dirac indices.
Definition sizes.h:177

References Complex, kvol, nc, ndirac, and ngorkov.

Here is the caller graph for this function:

◆ Hamilton()

int Hamilton ( double * h,
double * s,
double res2,
double * pp,
Complex * X0,
Complex * X1,
Complex * Phi,
Complex_f * ut[2],
unsigned int * iu,
unsigned int * id,
Complex_f * gamval_f,
int * gamin,
float * dk[2],
Complex_f jqq,
float akappa,
float beta,
double * ancgh,
int traj )

Calculate the Hamiltonian.

Parameters
hHamiltonian
sAction
res2Limit for conjugate gradient
ppMomentum field
X0Up/down partitioned pseudofermion field
X1Holder for the partitioned fermion field, then the conjugate gradient output
PhiPseudofermion field
utGauge fields (single precision)
iu,idLattice indices
gamval_fSingle precision gamma matrices rescaled by kappa
gaminGamma indices
dk_f\(\left(1+\gamma_0\right)e^{-\mu}\) and \(\left(1-\gamma_0\right)e^\mu\) float
jqqDiquark source
akappaHopping parameter
betaInverse gauge coupling
ancghConjugate gradient iterations counter
trajCalling trajectory for error reporting
Returns
Zero on success. Integer Error code otherwise.

Definition at line 245 of file su2hmc.c.

247 {
248 /*
249 * @brief Calculate the Hamiltonian
250 *
251 *
252 * @param h: Hamiltonian
253 * @param s: Action
254 * @param res2: Limit for conjugate gradient
255 * @param X0:
256 * @param X1:
257 * @param Phi:
258 * @param ut[0],ut[1]: Gauge fields
259 * @param ut[0],ut[1]: Gauge fields
260 * @param iu,id: Lattice indices
261 * @param gamval_f: Gamma matrices
262 * @param gamin: Gamma indices
263 * @param dk_f[0]: $exp(-\mu)$ float
264 * @param dk_f[1]: $exp(\mu)$ float
265 * @param jqq: Diquark source
266 * @param akappa: Hopping parameter
267 * @param beta: Inverse gauge coupling
268 * @param ancgh: Conjugate gradient iterations counter
269 *
270 * @return Zero on success. Integer Error code otherwise.
271 */
272 const char *funcname = "Hamilton";
273 //Iterate over momentum terms.
274#ifdef __NVCC__
275 double hp;
276 int device=-1;
277 cudaGetDevice(&device);
278 cudaMemPrefetchAsync(pp,kmom*sizeof(double),device,NULL);
279 cublasDnrm2(cublas_handle, kmom, pp, 1,&hp);
280 hp*=hp;
281#elif defined USE_BLAS
282 double hp = cblas_dnrm2(kmom, pp, 1);
283 hp*=hp;
284#else
285 double hp=0;
286 for(int i = 0; i<kmom; i++)
287 hp+=pp[i]*pp[i];
288#endif
289 hp*=0.5;
290 double avplaqs, avplaqt;
291 double hg = 0;
292 //avplaq? isn't seen again here.
293 Average_Plaquette(&hg,&avplaqs,&avplaqt,ut,iu,beta);
294
295 double hf = 0; int itercg = 0;
296#ifdef __NVCC__
297 Complex *smallPhi;
298 cudaMallocAsync((void **)&smallPhi,kferm2*sizeof(Complex),NULL);
299#else
300 Complex *smallPhi = aligned_alloc(AVX,kferm2*sizeof(Complex));
301#endif
302 //Iterating over flavours
303 for(int na=0;na<nf;na++){
304#ifdef __NVCC__
305#ifdef _DEBUG
306 cudaDeviceSynchronise();
307#endif
308 cudaMemcpyAsync(X1,X0+na*kferm2,kferm2*sizeof(Complex),cudaMemcpyDeviceToDevice,streams[0]);
309#ifdef _DEBUG
310 cudaDeviceSynchronise();
311#endif
312#else
313 memcpy(X1,X0+na*kferm2,kferm2*sizeof(Complex));
314#endif
315 Fill_Small_Phi(na, smallPhi, Phi);
316 if(Congradq(na,res2,X1,smallPhi,ut,iu,id,gamval_f,gamin,dk,jqq,akappa,&itercg))
317 fprintf(stderr,"Trajectory %d\n", traj);
318
319 *ancgh+=itercg;
320#ifdef __NVCC__
321 cudaMemcpyAsync(X0+na*kferm2,X1,kferm2*sizeof(Complex),cudaMemcpyDeviceToDevice,streams[0]);
322#else
323 memcpy(X0+na*kferm2,X1,kferm2*sizeof(Complex));
324#endif
325 Fill_Small_Phi(na, smallPhi,Phi);
326#ifdef __NVCC__
327 Complex dot;
328 cublasZdotc(cublas_handle,kferm2,(cuDoubleComplex *)smallPhi,1,(cuDoubleComplex *) X1,1,(cuDoubleComplex *) &dot);
329 hf+=creal(dot);
330#elif defined USE_BLAS
331 Complex dot;
332 cblas_zdotc_sub(kferm2, smallPhi, 1, X1, 1, &dot);
333 hf+=creal(dot);
334#else
335 //It is a dot product of the flattened arrays, could use
336 //a module to convert index to coordinate array...
337 for(int j=0;j<kferm2;j++)
338 hf+=creal(conj(smallPhi[j])*X1[j]);
339#endif
340 }
341#ifdef __NVCC__
342 cudaFreeAsync(smallPhi,NULL);
343#else
344 free(smallPhi);
345#endif
346 //hg was summed over inside of Average_Plaquette.
347#if(nproc>1)
348 Par_dsum(&hp); Par_dsum(&hf);
349#endif
350 *s=hg+hf; *h=(*s)+hp;
351#ifdef _DEBUG
352 if(!rank)
353 printf("hg=%.5e; hf=%.5e; hp=%.5e; h=%.5e\n", hg, hf, hp, *h);
354#endif
355 return 0;
356}
int rank
The MPI rank.
Definition par_mpi.c:22
int Par_dsum(double *dval)
Performs a reduction on a double dval to get a sum which is then distributed to all ranks.
#define AVX
Alignment of arrays. 64 for AVX-512, 32 for AVX/AVX2. 16 for SSE. Since AVX is standard on modern x86...
Definition sizes.h:268
#define kmom
sublattice momentum sites
Definition sizes.h:184
#define Complex
Double precision complex number.
Definition sizes.h:58
#define nf
Fermion flavours (double it)
Definition sizes.h:151
#define kferm2
sublattice size including Dirac indices
Definition sizes.h:188
int Fill_Small_Phi(int na, Complex *smallPhi, Complex *Phi)
Definition su2hmc.c:385
int Average_Plaquette(double *hg, double *avplaqs, double *avplaqt, Complex_f *ut[2], unsigned int *iu, float beta)
Calculates the gauge action using new (how new?) lookup table.
Definition bosonic.c:20
int Congradq(int na, double res, Complex *X1, Complex *r, Complex_f *ut[2], unsigned int *iu, unsigned int *id, Complex_f *gamval_f, int *gamin, float *dk[2], Complex_f jqq, float akappa, int *itercg)
Matrix Inversion via Conjugate Gradient (up/down flavour partitioning). Solves Implements up/down pa...
Definition congrad.c:7

References Average_Plaquette(), AVX, Complex, Complex_f, Congradq(), Fill_Small_Phi(), kferm2, kmom, nf, Par_dsum(), and rank.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ Init()

int Init ( int istart,
int ibound,
int iread,
float beta,
float fmu,
float akappa,
Complex_f ajq,
Complex * u[2],
Complex * ut[2],
Complex_f * ut_f[2],
Complex * gamval,
Complex_f * gamval_f,
int * gamin,
double * dk[2],
float * dk_f[2],
unsigned int * iu,
unsigned int * id )

Initialises the system.

Parameters
istartZero for cold, >1 for hot, <1 for none
iboundPeriodic boundary conditions
ireadRead configuration from file
betaInverse gauge coupling
fmuChemical potential
akappaHopping parameter
ajqDiquark source
uGauge fields
utTrial gauge field
ut_fTrial gauge field (single precision)
dk\(\left(1+\gamma_0\right)e^{-\mu}\) and \(\left(1-\gamma_0\right)^\mu\)
dk_f\(\left(1+\gamma_0\right)e^{-\mu}\) and \(\left(1-\gamma_0\right)^\mu\) float
iu,idUp halo indices
gaminGamma matrix indices
gamvalDouble precision gamma matrices rescaled by kappa
gamval_fSingle precision gamma matrices rescaled by kappa
Returns
Zero on success, integer error code otherwise

Definition at line 19 of file su2hmc.c.

24 {
25 /*
26 * Initialises the system
27 *
28 * Calls:
29 * ======
30 * Addrc, Check_addr, ran2, DHalo_swap_dir, Par_sread, Par_ranset, Reunitarise
31 *
32 * Globals:
33 * =======
34 * Complex gamval: Gamma Matrices
35 * Complex_f gamval_f: Float Gamma matrices:
36 *
37 * Parameters:
38 * ==========
39 * int istart: Zero for cold, >1 for hot, <1 for none
40 * int ibound: Periodic boundary conditions
41 * int iread: Read configuration from file
42 * float beta: beta
43 * float fmu: Chemical potential
44 * float akappa: Hopping parameter
45 * Complex_f ajq: Diquark source
46 * Complex *u[0]: First colour field
47 * Complex *u[1]: Second colour field
48 * Complex *ut[0]: First colour trial field
49 * Complex *ut[1]: Second colour trial field
50 * Complex_f *ut_f[0]: First float trial field
51 * Complex_f *ut_f[1]: Second float trial field
52 * double *dk[0] $exp(-\mu)$
53 * double *dk[1]: $exp(\mu)$
54 * float *dk_f[0]: $exp(-\mu)$ float
55 * float *dk_f[1]: $exp(\mu)$ float
56 * unsigned int *iu: Up halo indices
57 * unsigned int *id: Down halo indices
58 *
59 * Returns:
60 * =======
61 * Zero on success, integer error code otherwise
62 */
63 const char *funcname = "Init";
64
65#ifdef _OPENMP
66 omp_set_num_threads(nthreads);
67#ifdef __INTEL_MKL__
68 mkl_set_num_threads(nthreads);
69#endif
70#endif
71 //First things first, calculate a few constants for coordinates
72 Addrc(iu, id);
73 //And confirm they're legit
76#ifdef _DEBUG
77 printf("Checked addresses\n");
78#endif
79 double chem1=exp(fmu); double chem2 = 1/chem1;
80 //CUDA this. Only limit will be the bus speed
81#pragma omp parallel for simd //aligned(dk[0],dk[1]:AVX)
82 for(int i = 0; i<kvol; i++){
83 dk[1][i]=akappa*chem1;
84 dk[0][i]=akappa*chem2;
85 }
86 //Anti periodic Boundary Conditions. Flip the terms at the edge of the time
87 //direction
88 if(ibound == -1 && pcoord[3+ndim*rank]==npt-1){
89#ifdef _DEBUG
90 printf("Implementing antiperiodic boundary conditions on rank %i\n", rank);
91#endif
92#pragma omp parallel for simd //aligned(dk[0],dk[1]:AVX)
93 for(int k= kvol-1; k>=kvol-kvol3; k--){
94 //int k = kvol - kvol3 + i;
95 dk[1][k]*=-1;
96 dk[0][k]*=-1;
97 }
98 }
99 //These are constant so swap the halos when initialising and be done with it
100 //May need to add a synchronisation statement here first
101#if(npt>1)
102 DHalo_swap_dir(dk[1], 1, 3, UP);
103 DHalo_swap_dir(dk[0], 1, 3, UP);
104#endif
105 //Float versions
106#ifdef __NVCC__
107 cuReal_convert(dk_f[1],dk[1],kvol+halo,true,dimBlock,dimGrid);
108 cuReal_convert(dk_f[0],dk[0],kvol+halo,true,dimBlock,dimGrid);
109#else
110#pragma omp parallel for simd //aligned(dk[0],dk[1],dk_f[0],dk_f[1]:AVX)
111 for(int i=0;i<kvol+halo;i++){
112 dk_f[1][i]=(float)dk[1][i];
113 dk_f[0][i]=(float)dk[0][i];
114 }
115#endif
116 //What row of each dirac/sigma matrix contains the entry acting on element i of the spinor
117 int __attribute__((aligned(AVX))) gamin_t[4][4] = {{3,2,1,0},{3,2,1,0},{2,3,0,1},{2,3,0,1}};
118 //Gamma Matrices in Chiral Representation
119 //See Appendix 8.1.2 of Montvay and Munster
120 //_t is for temp. We copy these into the real gamvals later
121#ifdef __NVCC__
122 cudaMemcpy(gamin,gamin_t,4*4*sizeof(int),cudaMemcpyDefault);
123#else
124 memcpy(gamin,gamin_t,4*4*sizeof(int));
125#endif
126 //Each row of the dirac matrix contains only one non-zero entry, so that's all we encode here
127 Complex __attribute__((aligned(AVX))) gamval_t[5][4] = {{-I,-I,I,I},{-1,1,1,-1},{-I,I,I,-I},{1,1,1,1},{1,1,-1,-1}};
128 //Each gamma matrix is rescaled by akappa by flattening the gamval array
129#if defined USE_BLAS
130 //Don't cuBLAS this. It is small and won't saturate the GPU. Let the CPU handle
131 //it and just copy it later
132 cblas_zdscal(5*4, akappa, gamval_t, 1);
133#else
134#pragma omp parallel for simd collapse(2) aligned(gamval,gamval_f:AVX)
135 for(int i=0;i<5;i++)
136 for(int j=0;j<4;j++)
137 gamval_t[i][j]*=akappa;
138#endif
139
140
141#ifdef __NVCC__
142 cudaMemcpy(gamval,gamval_t,5*4*sizeof(Complex),cudaMemcpyDefault);
143 cuComplex_convert(gamval_f,gamval,20,true,dimBlockOne,dimGridOne);
144#else
145 memcpy(gamval,gamval_t,5*4*sizeof(Complex));
146 for(int i=0;i<5*4;i++)
147 gamval_f[i]=(Complex_f)gamval[i];
148#endif
149
150#ifdef __CLOVER__
151 int __attribute__((aligned(AVX))) sigin_t[7][4] = {{0,1,2,3},{0,1,2,3},{1,0,3,2},{1,0,3,2},{1,0,3,2},{1,0,3,2},{0,1,2,3}};
152 //The sigma matrices are the commutators of the gamma matrices. These are antisymmetric when you swap the indices
153 //Zero is the identical index case.
154 //1 is sigma_1,2
155 //2 is sigma_1,3
156 //3 is sigma_1,4
157 //4 is sigma_2,3
158 //5 is sigma_2,4
159 //6 is sigma_3,4
160 //TODO: Do we mutiply by 1/2 and kappa here or not? I say yes to be consistent with gamma. It means the factor of 2
161 //in the sigma matrices get's droppeed here
162 Complex __attribute__((aligned(AVX))) sigval_t[7][4] = {{0,0,0,0},{-1,1,-1,1},{-I,I,-I,I},{1,1,-1,-1},{-1,-1,-1,-1},{-I,I,I,-I},{1,-1,-1,1}};
163#if defined USE_BLAS
164 cblas_zdscal(6*4, akappa, sigval_t+4*sizeof(Complex), 1);
165#else
166#pragma omp parallel for simd collapse(2) aligned(sigval,sigval_f:AVX)
167 for(int i=1;i<7;i++)
168 for(int j=0;j<4;j++)
169 sigval_t[i][j]*=akappa;
170#endif
171
172#ifdef __NVCC__
173 cudaMemcpy(sigval,sigval_t,7*4*sizeof(Complex),cudaMemcpyDefault);
174 cuComplex_convert(sigval_f,sigval,28,true,dimBlockOne,dimGridOne);
175#else
176 memcpy(sigval,sigval_t,7*4*sizeof(Complex));
177 for(int i=0;i<7*4;i++)
178 sigval_f[i]=(Complex_f)sigval[i];
179#endif
180#endif
181
182 if(iread){
183 if(!rank) printf("Calling Par_sread() for configuration: %i\n", iread);
184 Par_sread(iread, beta, fmu, akappa, ajq,u[0],u[1],ut[0],ut[1]);
185 Par_ranset(&seed,iread);
186 }
187 else{
188 Par_ranset(&seed,iread);
189 if(istart==0){
190 //Initialise a cold start to zero
191 //memset is safe to use here because zero is zero
192#pragma omp parallel for simd //aligned(ut[0]:AVX)
193 //Leave it to the GPU?
194 for(int i=0; i<kvol*ndim;i++){
195 ut[0][i]=1; ut[1][i]=0;
196 }
197 }
198 else if(istart>0){
199 //Ideally, we can use gsl_ranlux as the PRNG
200#ifdef __RANLUX__
201 for(int i=0; i<kvol*ndim;i++){
202 ut[0][i]=2*(gsl_rng_uniform(ranlux_instd)-0.5+I*(gsl_rng_uniform(ranlux_instd)-0.5));
203 ut[1][i]=2*(gsl_rng_uniform(ranlux_instd)-0.5+I*(gsl_rng_uniform(ranlux_instd)-0.5));
204 }
205 //If not, the Intel Vectorise Mersenne Twister
206#elif (defined __INTEL_MKL__&&!defined USE_RAN2)
207 //Good news, casting works for using a double to create random complex numbers
208 vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, stream, 2*ndim*kvol, ut[0], -1, 1);
209 vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, stream, 2*ndim*kvol, ut[1], -1, 1);
210 //Last resort, Numerical Recipes' Ran2
211#else
212 for(int i=0; i<kvol*ndim;i++){
213 ut[0][i]=2*(ran2(&seed)-0.5+I*(ran2(&seed)-0.5));
214 ut[1][i]=2*(ran2(&seed)-0.5+I*(ran2(&seed)-0.5));
215 }
216#endif
217 }
218 else
219 fprintf(stderr,"Warning %i in %s: Gauge fields are not initialised.\n", NOINIT, funcname);
220
221#ifdef __NVCC__
222 int device=-1;
223 cudaGetDevice(&device);
224 cudaMemPrefetchAsync(ut[0], ndim*kvol*sizeof(Complex),device,streams[0]);
225 cudaMemPrefetchAsync(ut[1], ndim*kvol*sizeof(Complex),device,streams[1]);
226#endif
227 //Send trials to accelerator for reunitarisation
228 Reunitarise(ut);
229 //Get trials back
230#ifdef __NVCC__
231 cudaMemcpyAsync(u[0],ut[0],ndim*kvol*sizeof(Complex),cudaMemcpyDefault,streams[0]);
232 cudaMemPrefetchAsync(u[0], ndim*kvol*sizeof(Complex),device,streams[0]);
233 cudaMemcpyAsync(u[1],ut[1],ndim*kvol*sizeof(Complex),cudaMemcpyDefault,streams[1]);
234 cudaMemPrefetchAsync(u[1], ndim*kvol*sizeof(Complex),device,streams[1]);
235#else
236 memcpy(u[0], ut[0], ndim*kvol*sizeof(Complex));
237 memcpy(u[1], ut[1], ndim*kvol*sizeof(Complex));
238#endif
239 }
240#ifdef _DEBUG
241 printf("Initialisation Complete\n");
242#endif
243 return 0;
244}
int Check_addr(unsigned int *table, int lns, int lnt, int imin, int imax)
Definition coord.c:334
int Addrc(unsigned int *iu, unsigned int *id)
Loads the addresses required during the update.
Definition coord.c:16
#define UP
Flag for send up.
Definition par_mpi.h:37
int Par_sread(const int iread, const float beta, const float fmu, const float akappa, const Complex_f ajq, Complex *u11, Complex *u12, Complex *u11t, Complex *u12t)
Reads and assigns the gauges from file.
Definition par_mpi.c:127
int DHalo_swap_dir(double *d, int ncpt, int idir, int layer)
Swaps the halos along the axis given by idir in the direction given by layer.
int * pcoord
The processor grid.
Definition par_mpi.c:19
int Par_ranset(long *seed, int iread)
Uses the rank to get a new seed. Copying from the FORTRAN description here c create new seeds in rang...
Definition random.c:135
double ran2(long *idum)
Generates uniformly distributed random double between zero and one as described in numerical recipes....
Definition random.c:440
long seed
RAN2 seed.
Definition random.c:27
#define ksizet
Sublattice t extent.
Definition sizes.h:149
#define nthreads
Number of threads for OpenMP, which can be overwritten at runtime.
Definition sizes.h:135
#define ksize
Sublattice spatial extent for a cubic lattice.
Definition sizes.h:146
#define kvol3
Sublattice spatial volume.
Definition sizes.h:156
#define halo
Total Halo size.
Definition sizes.h:222
#define Complex_f
Single precision complex number.
Definition sizes.h:56
#define npt
Processor grid t extent.
Definition sizes.h:124
int Reunitarise(Complex *ut[2])
Reunitarises u11t and u12t as in conj(u11t[i])*u11t[i]+conj(u12t[i])*u12t[i]=1.
Definition matrices.c:1012

References Addrc(), AVX, Check_addr(), Complex, Complex_f, DHalo_swap_dir(), halo, ksize, ksizet, kvol, kvol3, ndim, npt, nthreads, Par_ranset(), Par_sread(), pcoord, ran2(), rank, Reunitarise(), seed, and UP.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ UpDownPart()

int UpDownPart ( const int na,
Complex * X0,
Complex * R1 )
inline

Definition at line 416 of file su2hmc.c.

416 {
417#ifdef __NVCC__
418 cuUpDownPart(na,X0,R1,dimBlock,dimGrid);
419 cudaDeviceSynchronise();
420#else
421 //The only reason this was removed from the original function is for diagnostics
422#pragma omp parallel for simd collapse(2) aligned(X0,R1:AVX)
423 for(int i=0; i<kvol; i++)
424 for(int idirac = 0; idirac < ndirac; idirac++){
425 X0[((na*kvol+i)*ndirac+idirac)*nc]=R1[(i*ngorkov+idirac)*nc];
426 X0[((na*kvol+i)*ndirac+idirac)*nc+1]=R1[(i*ngorkov+idirac)*nc+1];
427 }
428#endif
429 return 0;
430}

◆ Z_gather()

int Z_gather ( Complex * x,
Complex * y,
int n,
unsigned int * table,
unsigned int mu )
inline

Extracts all the double precision gauge links in the \(\mu\) direction only.

Parameters
xThe output
yThe gauge field for a particular colour
nNumber of sites in the gauge field. This is typically kvol
tableTable containing information on nearest neighbours. Usually id or iu
muDireciton we're interested in extractng
Returns
Zero on success, integer error code otherwise

Definition at line 371 of file su2hmc.c.

372{
373 const char *funcname = "Z_gather";
374 //FORTRAN had a second parameter m giving the size of y (kvol+halo) normally
375 //Pointers mean that's not an issue for us so I'm leaving it out
376#ifdef __NVCC__
377 cuZ_gather(x,y,n,table,mu,dimBlock,dimGrid);
378#else
379#pragma omp parallel for simd aligned (x,y,table:AVX)
380 for(int i=0; i<n; i++)
381 x[i]=y[table[i*ndim+mu]*ndim+mu];
382#endif
383 return 0;
384}

References Complex, and ndim.