19 Complex *gamval,
int *gamin,
double *dk4m,
double *dk4p,
Complex_f jqq,
float akappa){
40 const char *funcname =
"Dslash";
49 cuDslash(phi,r,u11t,u12t,iu,
id,gamval,gamin,dk4m,dk4p,jqq,akappa,dimGrid,dimBlock);
52#pragma omp parallel for
53 for(
int i=0;i<
kvol;i++){
54#pragma omp simd aligned(phi,r,gamval:AVX)
55 for(
int idirac = 0; idirac<
ndirac; idirac++){
58 a_1=conj(jqq)*gamval[4*
ndirac+idirac];
60 a_2=-jqq*gamval[4*
ndirac+idirac];
69 for(
int mu = 0; mu <3; mu++){
70 int did=
id[mu+
ndim*i];
int uid = iu[mu+
ndim*i];
71#pragma omp simd aligned(phi,r,u11t,u12t,gamval:AVX)
72 for(
int igorkov=0; igorkov<
ngorkov; igorkov++){
75 int igork1 = (igorkov<4) ? gamin[mu*
ndirac+idirac] : gamin[mu*
ndirac+idirac]+4;
104 int did=
id[3+
ndim*i];
int uid = iu[3+
ndim*i];
106#pragma omp simd aligned(phi,r,u11t,u12t,dk4m,dk4p:AVX)
107 for(
int igorkov=0; igorkov<4; igorkov++){
108 int igorkovPP=igorkov+4;
110 int igork1 = gamin[3*
ndirac+igorkov];
int igork1PP = igork1+4;
141 Complex *gamval,
int *gamin,
double *dk4m,
double *dk4p,
Complex_f jqq,
float akappa){
162 const char *funcname =
"Dslashd";
170 cuDslashd(phi,r,u11t,u12t,iu,
id,gamval,gamin,dk4m,dk4p,jqq,akappa,dimGrid,dimBlock);
173#pragma omp parallel for
174 for(
int i=0;i<
kvol;i++){
175#pragma omp simd aligned(phi,r,gamval:AVX)
177 for(
int idirac = 0; idirac<
ndirac; idirac++){
178 int igork = idirac+4;
181 a_1=-conj(jqq)*gamval[4*
ndirac+idirac];
182 a_2=jqq*gamval[4*
ndirac+idirac];
191 for(
int mu = 0; mu <3; mu++){
192 int did=
id[mu+
ndim*i];
int uid = iu[mu+
ndim*i];
193#pragma omp simd aligned(phi,r,u11t,u12t,gamval:AVX)
194 for(
int igorkov=0; igorkov<
ngorkov; igorkov++){
196 int idirac=igorkov%4;
197 int igork1 = (igorkov<4) ? gamin[mu*
ndirac+idirac] : gamin[mu*
ndirac+idirac]+4;
206 -gamval[mu*
ndirac+idirac]*
217 -gamval[mu*
ndirac+idirac]*
229 int did=
id[3+
ndim*i];
int uid = iu[3+
ndim*i];
231#pragma omp simd aligned(phi,r,u11t,u12t,dk4m,dk4p:AVX)
232 for(
int igorkov=0; igorkov<4; igorkov++){
234 int igork1 = gamin[3*
ndirac+igorkov];
248 int igorkovPP=igorkov+4;
249 int igork1PP = igork1+4;
268 Complex *gamval,
int *gamin,
double *dk4m,
double *dk4p,
float akappa){
288 const char *funcname =
"Hdslash";
297 cuHdslash(phi,r,u11t,u12t,iu,
id,gamval,gamin,dk4m,dk4p,akappa,dimGrid,dimBlock);
300#pragma omp parallel for
301 for(
int i=0;i<
kvol;i++){
303 for(
int mu = 0; mu <3; mu++){
304 int did=
id[mu+
ndim*i];
int uid = iu[mu+
ndim*i];
305#pragma omp simd aligned(phi,r,u11t,u12t,gamval:AVX)
306 for(
int idirac=0; idirac<
ndirac; idirac++){
308 int igork1 = gamin[mu*
ndirac+idirac];
335 int did=
id[3+
ndim*i];
int uid = iu[3+
ndim*i];
337#pragma omp simd aligned(phi,r,u11t,u12t,dk4m,dk4p:AVX)
338 for(
int idirac=0; idirac<
ndirac; idirac++){
339 int igork1 = gamin[3*
ndirac+idirac];
359 Complex *gamval,
int *gamin,
double *dk4m,
double *dk4p,
float akappa){
379 const char *funcname =
"Hdslashd";
393 cuHdslashd(phi,r,u11t,u12t,iu,
id,gamval,gamin,dk4m,dk4p,akappa,dimGrid,dimBlock);
397#pragma omp parallel for
398 for(
int i=0;i<
kvol;i++){
400 for(
int mu = 0; mu <
ndim-1; mu++){
401 int did=
id[mu+
ndim*i];
int uid = iu[mu+
ndim*i];
402#pragma omp simd aligned(phi,r,u11t,u12t,gamval:AVX)
403 for(
int idirac=0; idirac<
ndirac; idirac++){
405 int igork1 = gamin[mu*
ndirac+idirac];
416 -gamval[mu*
ndirac+idirac]*
423 -akappa*(-conj(u12t[i*
ndim+mu])*r[(uid*
ndirac+idirac)*
nc]
427 -gamval[mu*
ndirac+idirac]*
436 int did=
id[3+
ndim*i];
int uid = iu[3+
ndim*i];
438#pragma omp simd aligned(phi,r,u11t,u12t,dk4m,dk4p:AVX)
439 for(
int idirac=0; idirac<
ndirac; idirac++){
440 int igork1 = gamin[3*
ndirac+idirac];
463 Complex_f *gamval_f,
int *gamin,
float *dk4m_f,
float *dk4p_f,
Complex_f jqq,
float akappa){
484 const char *funcname =
"Dslash_f";
493 cuDslash_f(phi,r,u11t_f,u12t_f,iu,
id,gamval_f,gamin,dk4m_f,dk4p_f,jqq,akappa,dimGrid,dimBlock);
496#pragma omp parallel for
497 for(
int i=0;i<
kvol;i++){
498#pragma omp simd aligned(phi,r,gamval_f:AVX)
499 for(
int idirac = 0; idirac<
ndirac; idirac++){
500 int igork = idirac+4;
502 a_1=conj(jqq)*gamval_f[4*
ndirac+idirac];
504 a_2=-jqq*gamval_f[4*
ndirac+idirac];
513 for(
int mu = 0; mu <3; mu++){
514 int did=
id[mu+
ndim*i];
int uid = iu[mu+
ndim*i];
515#pragma omp simd aligned(phi,r,u11t_f,u12t_f,gamval_f,gamin:AVX)
516 for(
int igorkov=0; igorkov<
ngorkov; igorkov++){
518 int idirac=igorkov%4;
519 int igork1 = (igorkov<4) ? gamin[mu*
ndirac+idirac] : gamin[mu*
ndirac+idirac]+4;
548 int did=
id[3+
ndim*i];
int uid = iu[3+
ndim*i];
550#pragma omp simd aligned(phi,r,u11t_f,u12t_f,dk4m_f,dk4p_f,gamin:AVX)
551 for(
int igorkov=0; igorkov<4; igorkov++){
552 int igorkovPP=igorkov+4;
554 int igork1 = gamin[3*
ndirac+igorkov];
int igork1PP = igork1+4;
585 Complex_f *gamval_f,
int *gamin,
float *dk4m_f,
float *dk4p_f,
Complex_f jqq,
float akappa){
606 const char *funcname =
"Dslashd_f";
614 cuDslashd_f(phi,r,u11t_f,u12t_f,iu,
id,gamval_f,gamin,dk4m_f,dk4p_f,jqq,akappa,dimGrid,dimBlock);
617#pragma omp parallel for
618 for(
int i=0;i<
kvol;i++){
619#pragma omp simd aligned(phi,r,gamval_f:AVX)
621 for(
int idirac = 0; idirac<
ndirac; idirac++){
622 int igork = idirac+4;
625 a_1=-conj(jqq)*gamval_f[4*
ndirac+idirac];
626 a_2=jqq*gamval_f[4*
ndirac+idirac];
635 for(
int mu = 0; mu <3; mu++){
636 int did=
id[mu+
ndim*i];
int uid = iu[mu+
ndim*i];
637#pragma omp simd aligned(phi,r,u11t_f,u12t_f,gamval_f:AVX)
638 for(
int igorkov=0; igorkov<
ngorkov; igorkov++){
640 int idirac=igorkov%4;
641 int igork1 = (igorkov<4) ? gamin[mu*
ndirac+idirac] : gamin[mu*
ndirac+idirac]+4;
650 -gamval_f[mu*
ndirac+idirac]*
657 -akappa*(-conj(u12t_f[i*
ndim+mu])*r[(uid*
ngorkov+igorkov)*
nc]
661 -gamval_f[mu*
ndirac+idirac]*
673 int did=
id[3+
ndim*i];
int uid = iu[3+
ndim*i];
675#pragma omp simd aligned(phi,r,u11t_f,u12t_f,dk4m_f,dk4p_f:AVX)
676 for(
int igorkov=0; igorkov<4; igorkov++){
678 int igork1 = gamin[3*
ndirac+igorkov];
692 int igorkovPP=igorkov+4;
693 int igork1PP = igork1+4;
712 Complex_f *gamval_f,
int *gamin,
float *dk4m_f,
float *dk4p_f,
float akappa){
732 const char *funcname =
"Hdslash_f";
738 cuHdslash_f(phi,r,u11t_f,u12t_f,iu,
id,gamval_f,gamin,dk4m_f,dk4p_f,akappa,dimGrid,dimBlock);
742#pragma omp parallel for
743 for(
int i=0;i<
kvol;i++){
746 for(
int mu = 0; mu <3; mu++){
747 int did=
id[mu+
ndim*i];
int uid = iu[mu+
ndim*i];
748#pragma omp simd aligned(phi,r,u11t_f,u12t_f,gamval_f:AVX)
749 for(
int idirac=0; idirac<
ndirac; idirac++){
751 int igork1 = gamin[mu*
ndirac+idirac];
779 int did=
id[3+
ndim*i];
int uid = iu[3+
ndim*i];
781#pragma omp simd aligned(phi,r,u11t_f,u12t_f,dk4m_f,dk4p_f:AVX)
782 for(
int idirac=0; idirac<
ndirac; idirac++){
783 int igork1 = gamin[3*
ndirac+idirac];
803 Complex_f *gamval_f,
int *gamin,
float *dk4m_f,
float *dk4p_f,
float akappa){
823 const char *funcname =
"Hdslashd_f";
837 cuHdslashd_f(phi,r,u11t_f,u12t_f,iu,
id,gamval_f,gamin,dk4m_f,dk4p_f,akappa,dimGrid,dimBlock);
841#pragma omp parallel for
842 for(
int i=0;i<
kvol;i++){
844 for(
int mu = 0; mu <
ndim-1; mu++){
845 int did=
id[mu+
ndim*i];
int uid = iu[mu+
ndim*i];
846#pragma omp simd aligned(phi,r,u11t_f,u12t_f,gamval_f:AVX)
847 for(
int idirac=0; idirac<
ndirac; idirac++){
849 int igork1 = gamin[mu*
ndirac+idirac];
860 -gamval_f[mu*
ndirac+idirac]*
867 -akappa*(-conjf(u12t_f[i*
ndim+mu])*r[(uid*
ndirac+idirac)*
nc]
871 -gamval_f[mu*
ndirac+idirac]*
880 int did=
id[3+
ndim*i];
int uid = iu[3+
ndim*i];
882#pragma omp simd aligned(phi,r,u11t_f,u12t_f,gamval_f:AVX)
883 for(
int idirac=0; idirac<
ndirac; idirac++){
884 int igork1 = gamin[3*
ndirac+idirac];
918 const char *funcname =
"Reunitarise";
920 cuReunitarise(u11t,u12t,dimGrid,dimBlock);
922#pragma omp parallel for simd aligned(u11t,u12t:AVX)
926 double anorm=sqrt(conj(u11t[i])*u11t[i]+conj(u12t[i])*u12t[i]);
int Hdslash_f(Complex_f *phi, Complex_f *r, Complex_f *u11t_f, Complex_f *u12t_f, unsigned int *iu, unsigned int *id, Complex_f *gamval_f, int *gamin, float *dk4m_f, float *dk4p_f, float akappa)
Evaluates in single precision.
int Dslash(Complex *phi, Complex *r, Complex *u11t, Complex *u12t, unsigned int *iu, unsigned int *id, Complex *gamval, int *gamin, double *dk4m, double *dk4p, Complex_f jqq, float akappa)
Evaluates in double precision.
int Dslashd(Complex *phi, Complex *r, Complex *u11t, Complex *u12t, unsigned int *iu, unsigned int *id, Complex *gamval, int *gamin, double *dk4m, double *dk4p, Complex_f jqq, float akappa)
Evaluates in double precision.
int Hdslashd_f(Complex_f *phi, Complex_f *r, Complex_f *u11t_f, Complex_f *u12t_f, unsigned int *iu, unsigned int *id, Complex_f *gamval_f, int *gamin, float *dk4m_f, float *dk4p_f, float akappa)
Evaluates in single precision.
int Reunitarise(Complex *u11t, Complex *u12t)
Reunitarises u11t and u12t as in conj(u11t[i])*u11t[i]+conj(u12t[i])*u12t[i]=1.
int Hdslash(Complex *phi, Complex *r, Complex *u11t, Complex *u12t, unsigned int *iu, unsigned int *id, Complex *gamval, int *gamin, double *dk4m, double *dk4p, float akappa)
Evaluates in double precision.
int Dslashd_f(Complex_f *phi, Complex_f *r, Complex_f *u11t_f, Complex_f *u12t_f, unsigned int *iu, unsigned int *id, Complex_f *gamval_f, int *gamin, float *dk4m_f, float *dk4p_f, Complex_f jqq, float akappa)
Evaluates in single precision.
int Dslash_f(Complex_f *phi, Complex_f *r, Complex_f *u11t_f, Complex_f *u12t_f, unsigned int *iu, unsigned int *id, Complex_f *gamval_f, int *gamin, float *dk4m_f, float *dk4p_f, Complex_f jqq, float akappa)
Evaluates in single precision.
int Hdslashd(Complex *phi, Complex *r, Complex *u11t, Complex *u12t, unsigned int *iu, unsigned int *id, Complex *gamval, int *gamin, double *dk4m, double *dk4p, float akappa)
Evaluates in double precision.
Matrix multiplication and related declarations.
int CHalo_swap_all(Complex_f *c, int ncpt)
Calls the functions to send data to both the up and down halos.
int ZHalo_swap_all(Complex *z, int ncpt)
Calls the functions to send data to both the up and down halos.
#define ngorkov
Gor'kov indices.
#define kvol
Sublattice volume.
#define Complex
Double precision complex number.
#define kferm
sublattice size including Gor'kov indices
#define ndirac
Dirac indices.
#define Complex_f
Single precision complex number.
#define kferm2
sublattice size including Dirac indices