su2hmc
Loading...
Searching...
No Matches
su2hmc.h
Go to the documentation of this file.
1
5#ifndef SU2HEAD
6#define SU2HEAD
7//ARM Based machines. BLAS routines should work with other libraries, so we can set a compiler
8//flag to sort them out. But the PRNG routines etc. are MKL exclusive
9#include <errorcodes.h>
10#include <integrate.h>
11#ifdef __INTEL_MKL__
12#define USE_BLAS
13#include <mkl.h>
14#elif defined GSL_BLAS
15#define USE_BLAS
16#include <gsl/gsl_cblas.h>
17#elif defined AMD_BLAS
18#define USE_BLAS
19#include <cblas.h>
20#endif
21#include <sizes.h>
22#ifdef __cplusplus
23#include <cstdio>
24#include <cstdlib>
25#include <ctime>
26#else
27#include <stdio.h>
28#include <stdlib.h>
29#include <time.h>
30#endif
31
32//Definitions:
33//###########
34#ifdef _DEBUGCG
35#define _DEBUG
36#endif
37//Function Declarations:
38//#####################
39#if (defined __cplusplus)
40extern "C"
41{
42#endif
69 int Force(double *dSdpi, int iflag, double res1, Complex *X0, Complex *X1, Complex *Phi,Complex *u11t, Complex *u12t,\
70 Complex_f *u11t_f,Complex_f *u12t_f,unsigned int *iu,unsigned int *id,Complex *gamval,Complex_f *gamval_f,\
71 int *gamin,double *dk4m, double *dk4p, float *dk4m_f,float *dk4p_f,Complex_f jqq,\
72 float akappa,float beta,double *ancg);
83 int Gauge_force(double *dSdpi,Complex_f *u11t, Complex_f *u12t, unsigned int *iu, unsigned int *id, float beta);
108 int Init(int istart, int ibound, int iread, float beta, float fmu, float akappa, Complex_f ajq,\
109 Complex *u11, Complex *u12, Complex *u11t, Complex *u12t, Complex_f *u11t_f, Complex_f *u12t_f,\
110 Complex *gamval, Complex_f *gamval_f, int *gamin, double *dk4m, double *dk4p, float *dk4m_f, float *dk4p_f,\
111 unsigned int *iu, unsigned int *id);
137 int Hamilton(double *h, double *s, double res2, double *pp, Complex *X0, Complex *X1, Complex *Phi,\
138 Complex *u11t, Complex *u12t, Complex_f *u11t_f, Complex_f *u12t_f, unsigned int * iu, unsigned int *id,\
139 Complex_f *gamval_f, int *gamin, float *dk4m_f, float * dk4p_f, Complex_f jqq,\
140 float akappa, float beta,double *ancgh,int traj);
165 int Congradq(int na,double res,Complex *X1,Complex *r,Complex_f *u11t_f,Complex_f *u12t_f,unsigned int *iu,unsigned int *id,\
166 Complex_f *gamval_f,int *gamin,float *dk4m_f,float *dk4p_f,Complex_f jqq,float akappa,int *itercg);
190 int Congradp(int na,double res,Complex *Phi,Complex *xi,Complex_f *u11t,Complex_f *u12t,unsigned int *iu,unsigned int *id,\
191 Complex_f *gamval,int *gamin,float *dk4m,float *dk4p,Complex_f jqq,float akappa,int *itercg);
228 int Measure(double *pbp, double *endenf, double *denf, Complex *qq, Complex *qbqb, double res, int *itercg,\
229 Complex *u11t, Complex *u12t, Complex_f *u11t_f, Complex_f *u12t_f, unsigned int *iu, unsigned int *id,\
230 Complex *gamval, Complex_f *gamval_f, int *gamin, double *dk4m, double *dk4p,\
231 float *dk4m_f, float *dk4p_f, Complex_f jqq, float akappa, Complex *Phi, Complex *R1);
247 int Average_Plaquette(double *hg, double *avplaqs, double *avplaqt, Complex_f *u11t, Complex_f *u12t,\
248 unsigned int *iu, float beta);
249#if (!defined __NVCC__ && !defined __HIPCC__)
263 float SU2plaq(Complex_f *u11t, Complex_f *u12t, unsigned int *iu, int i, int mu, int nu);
264#endif
274 double Polyakov(Complex_f *u11t, Complex_f *u12t);
275 //Inline functions
287 int C_gather(Complex_f *x, Complex_f *y, int n, unsigned int *table, unsigned int mu);
299 int Z_gather(Complex *x, Complex *y, int n, unsigned int *table, unsigned int mu);
309 int Fill_Small_Phi(int na, Complex *smallPhi, Complex *Phi);
310 /*
311 * @brief Up/Down partitioning of the pseudofermion field
312 *
313 * @param na: Flavour index
314 * @param X0: Partitioned field
315 * @param R1: Full pseudofermion field
316 *
317 * @return Zero on success, integer error code otherwise
318 */
319 int UpDownPart(const int na, Complex *X0, Complex *R1);
331 int Reunitarise(Complex *u11t, Complex *u12t);
332 //CUDA Declarations:
333 //#################
334#ifdef __NVCC__
335 //Not a function. An array of concurrent GPU streams to keep it busy
336 extern cudaStream_t streams[ndirac*ndim*nadj];
337 //Calling Functions:
338 //=================
339 void cuAverage_Plaquette(double *hgs, double *hgt, Complex_f *u11t, Complex_f *u12t, unsigned int *iu,dim3 dimGrid, dim3 dimBlock);
340 void cuPolyakov(Complex_f *Sigma11, Complex_f * Sigma12, Complex_f *u11t, Complex_f *u12t,dim3 dimGrid, dim3 dimBlock);
341 void cuGauge_force(int mu,Complex_f *Sigma11, Complex_f *Sigma12, Complex_f *u11t,Complex_f *u12t,double *dSdpi,float beta,\
342 dim3 dimGrid, dim3 dimBlock);
343 void cuPlus_staple(int mu, int nu, unsigned int *iu, Complex_f *Sigma11, Complex_f *Sigma12, Complex_f *u11t, Complex_f *u12t,\
344 dim3 dimGrid, dim3 dimBlock);
345 void cuMinus_staple(int mu, int nu, unsigned int *iu, unsigned int *id, Complex_f *Sigma11, Complex_f *Sigma12,\
346 Complex_f *u11sh, Complex_f *u12sh,Complex_f *u11t, Complex_f*u12t, dim3 dimGrid, dim3 dimBlock);
347 void cuForce(double *dSdpi, Complex_f *u11t, Complex_f *u12t, Complex_f *X1, Complex_f *X2, \
348 Complex_f *gamval,float *dk4m, float *dk4p,unsigned int *iu,int *gamin,\
349 float akappa, dim3 dimGrid, dim3 dimBlock);
350 //cuInit was taken already by CUDA (unsurprisingly)
351 void Init_CUDA(Complex *u11t, Complex *u12t,Complex *gamval, Complex_f *gamval_f, int *gamin, double*dk4m,\
352 double *dk4p, unsigned int *iu, unsigned int *id);
353 void cuFill_Small_Phi(int na, Complex *smallPhi, Complex *Phi,dim3 dimBlock, dim3 dimGrid);
354 void cuC_gather(Complex_f *x, Complex_f *y, int n, unsigned int *table, unsigned int mu,dim3 dimBlock, dim3 dimGrid);
355 void cuZ_gather(Complex *x, Complex *y, int n, unsigned int *table, unsigned int mu,dim3 dimBlock, dim3 dimGrid);
356 void cuComplex_convert(Complex_f *a, Complex *b, int len, bool ftod, dim3 dimBlock, dim3 dimGrid);
357 void cuReal_convert(float *a, double *b, int len, bool ftod, dim3 dimBlock, dim3 dimGrid);
358 void cuUpDownPart(int na, Complex *X0, Complex *R1,dim3 dimBlock, dim3 dimGrid);
359 void cuReunitarise(Complex *u11t, Complex *u12t,dim3 dimGrid, dim3 dimBlock);
360 //And a little something to set the CUDA grid and block sizes
361 void blockInit(int x, int y, int z, int t, dim3 *dimBlock, dim3 *dimGrid);
362#endif
363#if (defined __cplusplus)
364}
365#endif
366//CUDA Kernels:
367//============
368#ifdef __CUDACC__
369//__global__ void cuForce(double *dSdpi, Complex *u11t, Complex *u12t, Complex *X1, Complex *X2, Complex *gamval,\
370// double *dk4m, double *dk4p, unsigned int *iu, int *gamin,float akappa);
371__global__ void Plus_staple(int mu, int nu,unsigned int *iu, Complex_f *Sigma11, Complex_f *Sigma12,\
372 Complex_f *u11t, Complex_f *u12t);
373__global__ void Minus_staple(int mu, int nu,unsigned int *iu,unsigned int *id, Complex_f *Sigma11, Complex_f *Sigma12,\
374 Complex_f *u11sh, Complex_f *u12sh, Complex_f *u11t, Complex_f *u12t);
375__global__ void cuGaugeForce(int mu, Complex_f *Sigma11, Complex_f *Sigma12,double* dSdpi,Complex_f *u11t, Complex_f *u12t,\
376 float beta);
377__global__ void cuAverage_Plaquette(float *hgs_d, float *hgt_d, Complex_f *u11t, Complex_f *u12t, unsigned int *iu);
378__global__ void cuPolyakov(Complex_f *Sigma11, Complex_f * Sigma12, Complex_f *u11t, Complex_f *u12t);
379__device__ float SU2plaq(Complex_f *u11t, Complex_f *u12t, unsigned int *iu, int i, int mu, int nu);
380//Force Kernels. We've taken each nadj index and the spatial/temporal components and created a separate kernel for each
381//CPU code just has these as a huge blob that the vectoriser can't handle. May be worth splitting it there too?
382//It might not be a bad idea to make a seperate header for all these kernels...
383__global__ void cuForce_s(double *dSdpi, Complex_f *u11t, Complex_f *u12t, Complex_f *X1, Complex_f *X2, Complex_f *gamval,
384 unsigned int *iu, int *gamin,float akappa, int mu);
385__global__ void cuForce_t(double *dSdpi, Complex_f *u11t, Complex_f *u12t, Complex_f *X1, Complex_f *X2, Complex_f *gamval,\
386 float *dk4m, float *dk4p, unsigned int *iu, int *gamin,float akappa);
387__global__ void cuFill_Small_Phi(int na, Complex *smallPhi, Complex *Phi);
388__global__ void cuC_gather(Complex_f *x, Complex_f *y, int n, unsigned int *table, unsigned int mu);
389__global__ void cuZ_gather(Complex *x, Complex *y, int n, unsigned int *table, unsigned int mu);
390__global__ void cuComplex_convert(Complex_f *a, Complex *b, int len, bool dtof);
391__global__ void cuReal_convert(float *a, double *b, int len, bool dtof);
392__global__ void cuUpDownPart(int na, Complex *X0, Complex *R1);
393__global__ void cuReunitarise(Complex *u11t, Complex *u12t);
394#endif
395#endif
This header is intended to be a useful reference for error codes and their meanings.
Integrators for the HMC.
Defines the constants of the code and other parameters for loop dimensions. Each subroutine includes ...
#define nadj
adjacent spatial indices
Definition sizes.h:175
#define Complex
Double precision complex number.
Definition sizes.h:58
#define ndirac
Dirac indices.
Definition sizes.h:177
#define Complex_f
Single precision complex number.
Definition sizes.h:56
#define ndim
Dimensions.
Definition sizes.h:179
int Init(int istart, int ibound, int iread, float beta, float fmu, float akappa, Complex_f ajq, Complex *u11, Complex *u12, Complex *u11t, Complex *u12t, Complex_f *u11t_f, Complex_f *u12t_f, Complex *gamval, Complex_f *gamval_f, int *gamin, double *dk4m, double *dk4p, float *dk4m_f, float *dk4p_f, unsigned int *iu, unsigned int *id)
Initialises the system.
Definition su2hmc.c:19
int Z_gather(Complex *x, Complex *y, int n, unsigned int *table, unsigned int mu)
Extracts all the double precision gauge links in the direction only.
Definition su2hmc.c:335
int Measure(double *pbp, double *endenf, double *denf, Complex *qq, Complex *qbqb, double res, int *itercg, Complex *u11t, Complex *u12t, Complex_f *u11t_f, Complex_f *u12t_f, unsigned int *iu, unsigned int *id, Complex *gamval, Complex_f *gamval_f, int *gamin, double *dk4m, double *dk4p, float *dk4m_f, float *dk4p_f, Complex_f jqq, float akappa, Complex *Phi, Complex *R1)
Calculate fermion expectation values via a noisy estimator.
Definition fermionic.c:8
int Force(double *dSdpi, int iflag, double res1, Complex *X0, Complex *X1, Complex *Phi, Complex *u11t, Complex *u12t, Complex_f *u11t_f, Complex_f *u12t_f, unsigned int *iu, unsigned int *id, Complex *gamval, Complex_f *gamval_f, int *gamin, double *dk4m, double *dk4p, float *dk4m_f, float *dk4p_f, Complex_f jqq, float akappa, float beta, double *ancg)
Calculates the force at each intermediate time.
Definition force.c:131
int Gauge_force(double *dSdpi, Complex_f *u11t, Complex_f *u12t, unsigned int *iu, unsigned int *id, float beta)
Calculates the gauge force due to the Wilson Action at each intermediate time.
Definition force.c:6
int Congradq(int na, double res, Complex *X1, Complex *r, Complex_f *u11t_f, Complex_f *u12t_f, unsigned int *iu, unsigned int *id, Complex_f *gamval_f, int *gamin, float *dk4m_f, float *dk4p_f, Complex_f jqq, float akappa, int *itercg)
Matrix Inversion via Conjugate Gradient (up/down flavour partitioning). Solves Implements up/down pa...
Definition congrad.c:7
int Hamilton(double *h, double *s, double res2, double *pp, Complex *X0, Complex *X1, Complex *Phi, Complex *u11t, Complex *u12t, Complex_f *u11t_f, Complex_f *u12t_f, unsigned int *iu, unsigned int *id, Complex_f *gamval_f, int *gamin, float *dk4m_f, float *dk4p_f, Complex_f jqq, float akappa, float beta, double *ancgh, int traj)
Calculate the Hamiltonian.
Definition su2hmc.c:208
int Average_Plaquette(double *hg, double *avplaqs, double *avplaqt, Complex_f *u11t, Complex_f *u12t, unsigned int *iu, float beta)
Calculates the gauge action using new (how new?) lookup table.
Definition bosonic.c:8
float SU2plaq(Complex_f *u11t, Complex_f *u12t, unsigned int *iu, int i, int mu, int nu)
Calculates the plaquette at site i in the direction.
Definition bosonic.c:72
int Reunitarise(Complex *u11t, Complex *u12t)
Reunitarises u11t and u12t as in conj(u11t[i])*u11t[i]+conj(u12t[i])*u12t[i]=1.
Definition matrices.c:904
double Polyakov(Complex_f *u11t, Complex_f *u12t)
Calculate the Polyakov loop (no prizes for guessing that one...)
Definition bosonic.c:105
int Fill_Small_Phi(int na, Complex *smallPhi, Complex *Phi)
Definition su2hmc.c:349
int C_gather(Complex_f *x, Complex_f *y, int n, unsigned int *table, unsigned int mu)
Extracts all the single precision gauge links in the direction only.
Definition su2hmc.c:321
int Congradp(int na, double res, Complex *Phi, Complex *xi, Complex_f *u11t, Complex_f *u12t, unsigned int *iu, unsigned int *id, Complex_f *gamval, int *gamin, float *dk4m, float *dk4p, Complex_f jqq, float akappa, int *itercg)
Matrix Inversion via Conjugate Gradient (no up/down flavour partitioning). Solves The matrix multipl...
Definition congrad.c:262