su2hmc
Loading...
Searching...
No Matches
bosonic.c
Go to the documentation of this file.
1
6#include <par_mpi.h>
7#include <su2hmc.h>
8
19
20int Average_Plaquette(double *hg, double *avplaqs, double *avplaqt, Complex_f *ut[2], unsigned int *iu, float beta){
21 /*
22 * Calculates the gauge action using new (how new?) lookup table
23 * Follows a routine called qedplaq in some QED3 code
24 *
25 * Parameters:
26 * =========
27 * hg Gauge component of Hamilton
28 * avplaqs Average spacial Plaquette
29 * avplaqt Average Temporal Plaquette
30 * u11t,u12t The trial fields
31 * iu Upper halo indices
32 * beta Inverse gauge coupling
33 *
34 * Calls:
35 * ======
36 * Par_dsum()
37 *
38 * Return:
39 * ======
40 * Zero on success, integer error code otherwise
41 */
42 const char *funcname = "Average_Plaquette";
43 /*There was a halo exchange here but moved it outside
44 The FORTRAN code used several consecutive loops to get the plaquette
45 Instead we'll just make the arrays variables and do everything in one loop
46 Should work since in the FORTRAN Sigma11[i] only depends on i components for example
47 Since the ν loop doesn't get called for μ=0 we'll start at μ=1
48 */
49#ifdef __NVCC__
50 __managed__ double hgs = 0; __managed__ double hgt = 0;
51 cuAverage_Plaquette(&hgs, &hgt, ut[0], ut[1], iu,dimGrid,dimBlock);
52#else
53 double hgs = 0; double hgt = 0;
54 for(int mu=1;mu<ndim;mu++)
55 for(int nu=0;nu<mu;nu++)
56 //Don't merge into a single loop. Makes vectorisation easier?
57 //Or merge into a single loop and dispense with the a arrays?
58#pragma omp parallel for simd reduction(+:hgs,hgt)
59 for(int i=0;i<kvol;i++){
60 Complex_f Sigma[2];
61 SU2plaq(ut,Sigma,iu,i,mu,nu);
62 switch(mu){
63 //Time component
64 case(ndim-1): hgt -= creal(Sigma[0]);
65 break;
66 //Space component
67 default: hgs -= creal(Sigma[0]);
68 break;
69 }
70 }
71#endif
72#if(nproc>1)
73 Par_dsum(&hgs); Par_dsum(&hgt);
74#endif
75 *avplaqs=-hgs/(3.0*gvol); *avplaqt=-hgt/(gvol*3.0);
76 *hg=(hgs+hgt)*beta;
77#ifdef _DEBUG
78 if(!rank)
79 printf("hgs=%e hgt=%e hg=%e\n", hgs, hgt, *hg);
80#endif
81 return 0;
82}
83#ifndef __NVCC__
84#pragma omp declare simd
85inline int SU2plaq(Complex_f *ut[2], Complex_f Sigma[2], unsigned int *iu, int i, int mu, int nu){
102 const char *funcname = "SU2plaq";
103 int uidm = iu[mu+ndim*i];
104 /***
105 * Let's take a quick moment to compare this to the analysis code.
106 * The analysis code stores the gauge field as a 4 component real valued vector, whereas the produciton code
107 * used two complex numbers.
108 *
109 * Analysis code: u=(Re(u11),Im(u12),Re(u12),Im(u11))
110 * Production code: u11=u[0]+I*u[3] u12=u[2]+I*u[1]
111 *
112 * This applies to the Sigmas and a's below too
113 */
114
115 Sigma[0]=ut[0][i*ndim+mu]*ut[0][uidm*ndim+nu]-ut[1][i*ndim+mu]*conj(ut[1][uidm*ndim+nu]);
116 Sigma[1]=ut[0][i*ndim+mu]*ut[1][uidm*ndim+nu]+ut[1][i*ndim+mu]*conj(ut[0][uidm*ndim+nu]);
117
118 int uidn = iu[nu+ndim*i];
119 Complex_f a11=Sigma[0]*conj(ut[0][uidn*ndim+mu])+Sigma[1]*conj(ut[1][uidn*ndim+mu]);
120 Complex_f a12=-Sigma[0]*ut[1][uidn*ndim+mu]+Sigma[1]*ut[0][uidn*ndim+mu];
121
122 Sigma[0]=a11*conj(ut[0][i*ndim+nu])+a12*conj(ut[1][i*ndim+nu]);
123 Sigma[1]=-a11*ut[1][i*ndim+nu]+a12*ut[0][i*ndim+mu];
124 return 0;
125}
126#endif
127double Polyakov(Complex_f *ut[2]){
139 const char *funcname = "Polyakov";
140 double poly = 0;
141 Complex_f *Sigma[2];
142#ifdef __NVCC__
143 cuPolyakov(Sigma,ut,dimGrid,dimBlock);
144#else
145 Sigma[0] = (Complex_f *)aligned_alloc(AVX,kvol3*sizeof(Complex_f));
146 Sigma[1] = (Complex_f *)aligned_alloc(AVX,kvol3*sizeof(Complex_f));
147
148 //Extract the time component from each site and save in corresponding Sigma
149#ifdef USE_BLAS
150 cblas_ccopy(kvol3, ut[0]+3, ndim, Sigma[0], 1);
151 cblas_ccopy(kvol3, ut[1]+3, ndim, Sigma[1], 1);
152#else
153 for(int i=0; i<kvol3; i++){
154 Sigma[0][i]=ut[0][i*ndim+3];
155 Sigma[1][i]=ut[1][i*ndim+3];
156 }
157#endif
158 /* Some Fortran commentary
159 Changed this routine.
160 ut[0] and ut[1] now defined as normal ie (kvol+halo,4).
161 Copy of Sigma[0] and Sigma[1] is changed so that it copies
162 in blocks of ksizet.
163 Variable indexu also used to select correct element of ut[0] and ut[1]
164 in loop 10 below.
165
166 Change the order of multiplication so that it can
167 be done in parallel. Start at t=1 and go up to t=T:
168 previously started at t+T and looped back to 1, 2, ... T-1
169 Buffers
170 There is a dependency. Can only parallelise the inner loop
171 */
172#pragma unroll
173 for(int it=1;it<ksizet;it++)
174#pragma omp parallel for simd
175 for(int i=0;i<kvol3;i++){
176 //Seems a bit more efficient to increment indexu instead of reassigning
177 //it every single loop
178 int indexu=it*kvol3+i;
179 Complex_f a11=Sigma[0][i]*ut[0][indexu*ndim+3]-Sigma[1][i]*conj(ut[1][indexu*ndim+3]);
180 //Instead of having to store a second buffer just assign it directly
181 Sigma[1][i]=Sigma[0][i]*ut[1][indexu*ndim+3]+Sigma[1][i]*conj(ut[0][indexu*ndim+3]);
182 Sigma[0][i]=a11;
183 }
184 free(Sigma[1]);
185#endif
186
187 //Multiply this partial loop with the contributions of the other cores in the
188 //Time-like dimension
189 //
190 //Par_tmul does nothing if there is only a single processor in the time direction. So we only compile
191 //its call if it is required
192#if (npt>1)
193#ifdef __NVCC_
194#error Par_tmul is not yet implimented in CUDA as Sigma[1] is device only memory
195#endif
196#ifdef _DEBUG
197 printf("Multiplying with MPI\n");
198#endif
199 Par_tmul(Sigma[0], Sigma[1]);
200 //end of #if(npt>1)
201#endif
202 /*Now all cores have the value for the complete Polyakov line at all spacial sites
203 We need to globally sum over spacial processors but not across time as these
204 are duplicates. So we zero the value for all but t=0
205 This is (according to the FORTRAN code) a bit of a hack
206 I will expand on this hack and completely avoid any work
207 for this case rather than calculating everything just to set it to zero
208 */
209 if(!pcoord[3+rank*ndim])
210#pragma omp parallel for simd reduction(+:poly)
211 for(int i=0;i<kvol3;i++)
212 poly+=creal(Sigma[0][i]);
213#ifdef __NVCC__
214 cudaFree(Sigma[0]);
215#else
216 free(Sigma[0]);
217#endif
218
219#if(nproc>1)
220 Par_dsum(&poly);
221#endif
222 poly/=gvol3;
223 return poly;
224}
225#ifdef _CLOVER
226int Leaf(Complex_f *u11t, Complex_f *u12t, Complex_f *Sigma11, Complex_f *Sigma12,
227 unsigned int *iu, unsigned int *id, int i, int mu, int nu, short leaf){
246 char *funcname="Leaf";
247 Complex_f a11,a12;
248 unsigned int didm,didn,uidn,uidm;
249 switch(leaf){
250 case(0):
251 //Both positive is just a standard plaquette
252 SU2plaq(u11t,u12t,Sigma11,Sigma12,iu,i,mu,nu);
253 return 0;
254 case(1):
255 //μ<0 and ν>=0
256 didm = id[mu+ndim*i]; uidn = iu[nu+ndim*i];
257 //U_ν(x)*U_-μ(x+ν)=U_ν(x)*U^† _μ(x-μ+ν)
258 //Awkward index here unfortunately. Seems safer than trying to find -μ
259 int uin_didm=id[mu+ndim*uidn];
260 *Sigma11=u11t[i*ndim+nu]*conj(u11t[uin_didm*ndim+mu])+u12t[i*ndim+nu]*conj(u12t[uin_didm*ndim+mu]);
261 *Sigma12=-u11t[i*ndim+nu]*conj(u12t[uin_didm*ndim+mu])+u12t[i*ndim+nu]*u11t[uin_didm*ndim+mu];
262
263 //(U_ν(x)*U_-μ(x+ν))*U_-ν(x-μ+ν)=(U_ν(x)*U^† _μ(x-μ+ν))*U^†_ν(x-μ)
264 a11=*Sigma11*conj(u11t[didm*ndim+nu])+*Sigma12*conj(u12t[didm*ndim+nu]);
265 a12=-*Sigma11*u12t[didm*ndim+nu]+*Sigma12*u11t[didm*ndim+nu];
266
267 //((U_ν(x)*U_-μ(x+ν))*U_-ν(x-μ+ν))*U_μ(x-μ)=((U_ν(x)*U^† _μ(x-μ_ν))*U^† _ν(x-μ))*U_μ(x-μ)
268 *Sigma11=a11*u11t[didm*ndim+mu]-a12*conj(u12t[didm*ndim+mu]);
269 *Sigma12=a11*u12t[didm*ndim+mu]+a12*conj(u11t[didm*ndim+mu]);
270 return 0;
271 case(2):
272 //μ>=0 and ν<0
273 //TODO: Figure out down site index
274 uidm = iu[mu+ndim*i]; didn = id[nu+ndim*i];
275 //U_-ν(x)*U_μ(x-ν)=U^†_ν(x-ν)*U_μ(x-ν)
276 *Sigma11=conj(u11t[didn*ndim+nu])*u11t[didn*ndim+mu]+conj(u12t[didn*ndim+nu])*u12t[didn*ndim+mu];
277 *Sigma12=conj(u11t[didn*ndim+nu])*u12t[didn*ndim+mu]-u12t[didn*ndim+mu]*conj(u11t[didn*ndim+nu]);
278
279 //(U_-ν(x)*U_μ(x-ν))*U_ν(x+μ-ν)=(U^†_ν(x-ν)*U_μ(x-ν))*U_ν(x+μ-ν)
280 //Another awkward index
281 int uim_didn=id[nu+ndim*uidm];
282 a11=*Sigma11*u11t[uim_didn*ndim+nu]-*Sigma12*conj(u12t[uim_didn*ndim+nu]);
283 a12=*Sigma11*u12t[uim_didn*ndim+nu]+*Sigma12*conj(u11t[uim_didn*ndim+nu]);
284
285 //((U_-ν(x)*U_μ(x-ν))*U_ν(x+μ-ν))*U_-μ(x+μ)=(U^†_ν(x-ν)*U_μ(x-ν))*U_ν(x+μ-ν)
286 *Sigma11=a11*conj(u11t[i*ndim+mu])+a12*conj(u12t[i*ndim+mu]);
287 *Sigma12=-a11*u12t[i*ndim+mu]+a12*u11t[i*ndim+mu];
288 return 0;
289 case(3):
290 //μ<0 and ν<0
291 didm = id[mu+ndim*i]; didn = id[nu+ndim*i];
292 //U_-μ(x)*U_-ν(x-μ)=U^†_μ(x-μ)*U^†_ν(x-μ-ν)
293 int dim_didn=id[nu+ndim*didm];
294 *Sigma11=conj(u11t[didm*ndim+mu])*conj(u11t[dim_didn*ndim+nu])+conj(u12t[didm*ndim+mu])*conj(u12t[dim_didn*ndim+nu]);
295
296 Sigma11=a11*conj(u11t[i*ndim+nu])+a12*conj(u12t[i*ndim+nu]);
297 // Sigma12[i]=-a11[i]*u12t[i*ndim+nu]+a12*u11t[i*ndim+mu];
298 // Not needed in final result as it traces out
299 return creal(Sigma11);
300 }
301#endif
double Polyakov(Complex_f *ut[2])
Calculate the Polyakov loop (no prizes for guessing that one...)
Definition bosonic.c:127
int SU2plaq(Complex_f *ut[2], Complex_f Sigma[2], unsigned int *iu, int i, int mu, int nu)
Calculates the plaquette at site i in the direction.
Definition bosonic.c:85
int Average_Plaquette(double *hg, double *avplaqs, double *avplaqt, Complex_f *ut[2], unsigned int *iu, float beta)
Calculates the gauge action using new (how new?) lookup table.
Definition bosonic.c:20
MPI headers.
int rank
The MPI rank.
Definition par_mpi.c:22
int Par_dsum(double *dval)
Performs a reduction on a double dval to get a sum which is then distributed to all ranks.
int * pcoord
The processor grid.
Definition par_mpi.c:19
#define AVX
Alignment of arrays. 64 for AVX-512, 32 for AVX/AVX2. 16 for SSE. Since AVX is standard on modern x86...
Definition sizes.h:268
#define gvol3
Lattice spatial volume.
Definition sizes.h:94
#define ksizet
Sublattice t extent.
Definition sizes.h:149
#define kvol
Sublattice volume.
Definition sizes.h:154
#define gvol
Lattice volume.
Definition sizes.h:92
#define kvol3
Sublattice spatial volume.
Definition sizes.h:156
#define Complex_f
Single precision complex number.
Definition sizes.h:56
#define ndim
Dimensions.
Definition sizes.h:179
Function declarations for most of the routines.