su2hmc
Loading...
Searching...
No Matches
su2hmc.h
Go to the documentation of this file.
1
5#ifndef SU2HEAD
6#define SU2HEAD
7//ARM Based machines. BLAS routines should work with other libraries, so we can set a compiler
8//flag to sort them out. But the PRNG routines etc. are MKL exclusive
9#include <errorcodes.h>
10#include <integrate.h>
11#ifdef __INTEL_MKL__
12#define USE_BLAS
13#include <mkl.h>
14#elif defined GSL_BLAS
15#define USE_BLAS
16#include <gsl/gsl_cblas.h>
17#elif defined AMD_BLAS
18#define USE_BLAS
19#include <cblas.h>
20#endif
21#include <sizes.h>
22#ifdef __cplusplus
23#include <cstdio>
24#include <cstdlib>
25#include <ctime>
26#else
27#include <stdio.h>
28#include <stdlib.h>
29#include <time.h>
30#endif
31
32//Definitions:
33//###########
34#ifdef _DEBUGCG
35#define _DEBUG
36#endif
37//Function Declarations:
38//#####################
39#if (defined __cplusplus)
40extern "C"
41{
42#endif
67 int Force(double *dSdpi, int iflag, double res1, Complex *X0, Complex *X1, Complex *Phi,Complex *ut[2],\
68 Complex_f *ut_f[2],unsigned int *iu,unsigned int *id,Complex *gamval,Complex_f *gamval_f,\
69 int *gamin,double *dk[2], float *dk_f[2],Complex_f jqq, float akappa,float beta,double *ancg);
80 int Gauge_force(double *dSdpi, Complex_f *ut[2],unsigned int *iu,unsigned int *id, float beta);
103 int Init(int istart, int ibound, int iread, float beta, float fmu, float akappa, Complex_f ajq,\
104 Complex *u[2], Complex *ut[2], Complex_f *ut_f[2], Complex *gamval, Complex_f *gamval_f,int *gamin,
105#ifdef __CLOVER__
106 Complex *sigval, Complex_f *sigval_f, int *sigin,
107#endif
108 double *dk[2], float *dk_f[2], unsigned int *iu, unsigned int *id);
132 int Hamilton(double *h,double *s,double res2,double *pp,Complex *X0,Complex *X1,Complex *Phi,\
133 Complex_f *ut[2],unsigned int *iu,unsigned int *id, Complex_f *gamval_f,int *gamin,\
134 float *dk[2],Complex_f jqq,float akappa,float beta,double *ancgh,int traj);
157 int Congradq(int na,double res,Complex *X1,Complex *r,Complex_f *ut[2],unsigned int *iu,unsigned int *id,\
158 Complex_f *gamval_f,int *gamin,float *dk[2],Complex_f jqq,float akappa,int *itercg);
182 int Congradp(int na,double res,Complex *Phi,Complex *xi,Complex_f *ut[2],unsigned int *iu,unsigned int *id,\
183 Complex_f *gamval,int *gamin,float *dk[2],Complex_f jqq,float akappa,int *itercg);
218 int Measure(double *pbp, double *endenf, double *denf, Complex *qq, Complex *qbqb, double res, int *itercg,\
219 Complex *ut[2], Complex_f *ut_f[2], unsigned int *iu, unsigned int *id,\
220 Complex *gamval, Complex_f *gamval_f, int *gamin, double *dk[2],\
221 float *dk_f[2], Complex_f jqq, float akappa, Complex *Phi, Complex *R1);
237 int Average_Plaquette(double *hg, double *avplaqs, double *avplaqt, Complex_f *ut[2],unsigned int *iu, float beta);
252#ifndef __NVCC__
253 int SU2plaq(Complex_f *ut[2], Complex_f Sigma[2], unsigned int *iu, int i, int mu, int nu);
254#endif
264 double Polyakov(Complex_f *ut[2]);
265 //Inline functions
277 int C_gather(Complex_f *x, Complex_f *y, int n, unsigned int *table, unsigned int mu);
289 int Z_gather(Complex *x, Complex *y, int n, unsigned int *table, unsigned int mu);
299 int Fill_Small_Phi(int na, Complex *smallPhi, Complex *Phi);
300 /*
301 * @brief Up/Down partitioning of the pseudofermion field
302 *
303 * @param na: Flavour index
304 * @param X0: Partitioned field
305 * @param R1: Full pseudofermion field
306 *
307 * @return Zero on success, integer error code otherwise
308 */
309 int UpDownPart(const int na, Complex *X0, Complex *R1);
321 int Reunitarise(Complex *ut[2]);
322 //CUDA Declarations:
323 //#################
324#ifdef __NVCC__
325 //Not a function. An array of concurrent GPU streams to keep it busy
326 extern cudaStream_t streams[ndirac*ndim*nadj];
327 //Calling Functions:
328 //=================
329 void cuAverage_Plaquette(double *hgs, double *hgt, Complex_f *u11t, Complex_f *u12t, unsigned int *iu,dim3 dimGrid, dim3 dimBlock);
330 void cuPolyakov(Complex_f *Sigma[2], Complex_f *ut[2],dim3 dimGrid, dim3 dimBlock);
331 void cuGauge_force(Complex_f *ut[2],double *dSdpi,float beta,unsigned int *iu,unsigned int *id,dim3 dimGrid, dim3 dimBlock);
332 void cuPlus_staple(int mu, int nu, unsigned int *iu, Complex_f *Sigma11, Complex_f *Sigma12, Complex_f *u11t, Complex_f *u12t,\
333 dim3 dimGrid, dim3 dimBlock);
334 void cuMinus_staple(int mu, int nu, unsigned int *iu, unsigned int *id, Complex_f *Sigma11, Complex_f *Sigma12,\
335 Complex_f *u11sh, Complex_f *u12sh,Complex_f *u11t, Complex_f*u12t, dim3 dimGrid, dim3 dimBlock);
336 void cuForce(double *dSdpi, Complex_f *ut[2], Complex_f *X1, Complex_f *X2, \
337 Complex_f *gamval,float *dk[2],unsigned int *iu,int *gamin,\
338 float akappa, dim3 dimGrid, dim3 dimBlock);
339 //cuInit was taken already by CUDA (unsurprisingly)
340 void Init_CUDA(Complex *u11t, Complex *u12t,Complex *gamval, Complex_f *gamval_f, int *gamin, double*dk4m,\
341 double *dk4p, unsigned int *iu, unsigned int *id);
342 void cuFill_Small_Phi(int na, Complex *smallPhi, Complex *Phi,dim3 dimBlock, dim3 dimGrid);
343 void cuC_gather(Complex_f *x, Complex_f *y, int n, unsigned int *table, unsigned int mu,dim3 dimBlock, dim3 dimGrid);
344 void cuZ_gather(Complex *x, Complex *y, int n, unsigned int *table, unsigned int mu,dim3 dimBlock, dim3 dimGrid);
345 void cuComplex_convert(Complex_f *a, Complex *b, int len, bool ftod, dim3 dimBlock, dim3 dimGrid);
346 void cuReal_convert(float *a, double *b, int len, bool ftod, dim3 dimBlock, dim3 dimGrid);
347 void cuUpDownPart(int na, Complex *X0, Complex *R1,dim3 dimBlock, dim3 dimGrid);
348 void cuReunitarise(Complex *u11t, Complex *u12t,dim3 dimGrid, dim3 dimBlock);
349 //And a little something to set the CUDA grid and block sizes
350 void blockInit(int x, int y, int z, int t, dim3 *dimBlock, dim3 *dimGrid);
351#endif
352#if (defined __cplusplus)
353}
354#endif
355//CUDA Kernels:
356//============
357#ifdef __CUDACC__
358//__global__ void cuForce(double *dSdpi, Complex *u11t, Complex *u12t, Complex *X1, Complex *X2, Complex *gamval,\
359// double *dk4m, double *dk4p, unsigned int *iu, int *gamin,float akappa);
360__global__ void Plus_staple(int mu, int nu,unsigned int *iu, Complex_f *Sigma11, Complex_f *Sigma12,\
361 Complex_f *u11t, Complex_f *u12t);
362__global__ void Minus_staple(int mu, int nu,unsigned int *iu,unsigned int *id, Complex_f *Sigma11, Complex_f *Sigma12,\
363 Complex_f *u11sh, Complex_f *u12sh, Complex_f *u11t, Complex_f *u12t);
364__global__ void cuGaugeForce(int mu, Complex_f *Sigma11, Complex_f *Sigma12,double* dSdpi,Complex_f *u11t, Complex_f *u12t,\
365 float beta);
366__global__ void cuAverage_Plaquette(float *hgs_d, float *hgt_d, Complex_f *u11t, Complex_f *u12t, unsigned int *iu);
367__global__ void cuPolyakov(Complex_f *Sigma11, Complex_f * Sigma12, Complex_f *u11t, Complex_f *u12t);
368__device__ void cuSU2plaq(Complex_f *u11t, Complex_f *u12t, Complex_f *Sigma11, Complex_f *Sigma12, unsigned int *iu, int i, int mu, int nu);
369//Force Kernels. We've taken each nadj index and the spatial/temporal components and created a separate kernel for each
370//CPU code just has these as a huge blob that the vectoriser can't handle. May be worth splitting it there too?
371//It might not be a bad idea to make a seperate header for all these kernels...
372__global__ void cuForce_s(double *dSdpi, Complex_f *u11t, Complex_f *u12t, Complex_f *X1, Complex_f *X2, Complex_f *gamval,
373 unsigned int *iu, int *gamin,float akappa, int mu);
374__global__ void cuForce_t(double *dSdpi, Complex_f *u11t, Complex_f *u12t,Complex_f *X1, Complex_f *X2, Complex_f *gamval,\
375 float *dk4m, float *dk4p, unsigned int *iu, int *gamin,float akappa);
376__global__ void cuFill_Small_Phi(int na, Complex *smallPhi, Complex *Phi);
377__global__ void cuC_gather(Complex_f *x, Complex_f *y, int n, unsigned int *table, unsigned int mu);
378__global__ void cuZ_gather(Complex *x, Complex *y, int n, unsigned int *table, unsigned int mu);
379__global__ void cuComplex_convert(Complex_f *a, Complex *b, int len, bool dtof);
380__global__ void cuReal_convert(float *a, double *b, int len, bool dtof);
381__global__ void cuUpDownPart(int na, Complex *X0, Complex *R1);
382__global__ void cuReunitarise(Complex *u11t, Complex *u12t);
383#endif
384#endif
This header is intended to be a useful reference for error codes and their meanings.
Integrators for the HMC.
Defines the constants of the code and other parameters for loop dimensions. Each subroutine includes ...
#define nadj
adjacent spatial indices
Definition sizes.h:175
#define Complex
Double precision complex number.
Definition sizes.h:58
#define ndirac
Dirac indices.
Definition sizes.h:177
#define Complex_f
Single precision complex number.
Definition sizes.h:56
#define ndim
Dimensions.
Definition sizes.h:179
double Polyakov(Complex_f *ut[2])
Calculate the Polyakov loop (no prizes for guessing that one...)
Definition bosonic.c:127
int SU2plaq(Complex_f *ut[2], Complex_f Sigma[2], unsigned int *iu, int i, int mu, int nu)
Calculates the plaquette at site i in the direction.
Definition bosonic.c:85
int Z_gather(Complex *x, Complex *y, int n, unsigned int *table, unsigned int mu)
Extracts all the double precision gauge links in the direction only.
Definition su2hmc.c:371
int Average_Plaquette(double *hg, double *avplaqs, double *avplaqt, Complex_f *ut[2], unsigned int *iu, float beta)
Calculates the gauge action using new (how new?) lookup table.
Definition bosonic.c:20
int Congradq(int na, double res, Complex *X1, Complex *r, Complex_f *ut[2], unsigned int *iu, unsigned int *id, Complex_f *gamval_f, int *gamin, float *dk[2], Complex_f jqq, float akappa, int *itercg)
Matrix Inversion via Conjugate Gradient (up/down flavour partitioning). Solves Implements up/down pa...
Definition congrad.c:7
int Measure(double *pbp, double *endenf, double *denf, Complex *qq, Complex *qbqb, double res, int *itercg, Complex *ut[2], Complex_f *ut_f[2], unsigned int *iu, unsigned int *id, Complex *gamval, Complex_f *gamval_f, int *gamin, double *dk[2], float *dk_f[2], Complex_f jqq, float akappa, Complex *Phi, Complex *R1)
Calculate fermion expectation values via a noisy estimator.
Definition fermionic.c:8
int Init(int istart, int ibound, int iread, float beta, float fmu, float akappa, Complex_f ajq, Complex *u[2], Complex *ut[2], Complex_f *ut_f[2], Complex *gamval, Complex_f *gamval_f, int *gamin, double *dk[2], float *dk_f[2], unsigned int *iu, unsigned int *id)
Initialises the system.
Definition su2hmc.c:19
int Force(double *dSdpi, int iflag, double res1, Complex *X0, Complex *X1, Complex *Phi, Complex *ut[2], Complex_f *ut_f[2], unsigned int *iu, unsigned int *id, Complex *gamval, Complex_f *gamval_f, int *gamin, double *dk[2], float *dk_f[2], Complex_f jqq, float akappa, float beta, double *ancg)
Calculates the force at each intermediate time.
Definition force.c:94
int Congradp(int na, double res, Complex *Phi, Complex *xi, Complex_f *ut[2], unsigned int *iu, unsigned int *id, Complex_f *gamval, int *gamin, float *dk[2], Complex_f jqq, float akappa, int *itercg)
Matrix Inversion via Conjugate Gradient (no up/down flavour partitioning). Solves The matrix multipl...
Definition congrad.c:250
int Hamilton(double *h, double *s, double res2, double *pp, Complex *X0, Complex *X1, Complex *Phi, Complex_f *ut[2], unsigned int *iu, unsigned int *id, Complex_f *gamval_f, int *gamin, float *dk[2], Complex_f jqq, float akappa, float beta, double *ancgh, int traj)
Calculate the Hamiltonian.
Definition su2hmc.c:245
int Reunitarise(Complex *ut[2])
Reunitarises u11t and u12t as in conj(u11t[i])*u11t[i]+conj(u12t[i])*u12t[i]=1.
Definition matrices.c:1012
int Gauge_force(double *dSdpi, Complex_f *ut[2], unsigned int *iu, unsigned int *id, float beta)
Calculates the gauge force due to the Wilson Action at each intermediate time.
Definition force.c:6
int Fill_Small_Phi(int na, Complex *smallPhi, Complex *Phi)
Definition su2hmc.c:385
int C_gather(Complex_f *x, Complex_f *y, int n, unsigned int *table, unsigned int mu)
Extracts all the single precision gauge links in the direction only.
Definition su2hmc.c:357