su2hmc
Loading...
Searching...
No Matches
matrices.c File Reference

Matrix multiplication and related routines. More...

#include <assert.h>
#include <complex.h>
#include <matrices.h>
#include <string.h>
Include dependency graph for matrices.c:

Go to the source code of this file.

Functions

int Dslash (Complex *phi, Complex *r, Complex *u11t, Complex *u12t, unsigned int *iu, unsigned int *id, Complex *gamval, int *gamin, double *dk4m, double *dk4p, Complex_f jqq, float akappa)
 Evaluates \(\Phi=M r\) in double precision.
 
int Dslashd (Complex *phi, Complex *r, Complex *u11t, Complex *u12t, unsigned int *iu, unsigned int *id, Complex *gamval, int *gamin, double *dk4m, double *dk4p, Complex_f jqq, float akappa)
 Evaluates \(\Phi=M^\dagger r\) in double precision.
 
int Hdslash (Complex *phi, Complex *r, Complex *u11t, Complex *u12t, unsigned int *iu, unsigned int *id, Complex *gamval, int *gamin, double *dk4m, double *dk4p, float akappa)
 Evaluates \(\Phi=M r\) in double precision.
 
int Hdslashd (Complex *phi, Complex *r, Complex *u11t, Complex *u12t, unsigned int *iu, unsigned int *id, Complex *gamval, int *gamin, double *dk4m, double *dk4p, float akappa)
 Evaluates \(\Phi=M^\dagger r\) in double precision.
 
int Dslash_f (Complex_f *phi, Complex_f *r, Complex_f *u11t_f, Complex_f *u12t_f, unsigned int *iu, unsigned int *id, Complex_f *gamval_f, int *gamin, float *dk4m_f, float *dk4p_f, Complex_f jqq, float akappa)
 Evaluates \(\Phi=M r\) in single precision.
 
int Dslashd_f (Complex_f *phi, Complex_f *r, Complex_f *u11t_f, Complex_f *u12t_f, unsigned int *iu, unsigned int *id, Complex_f *gamval_f, int *gamin, float *dk4m_f, float *dk4p_f, Complex_f jqq, float akappa)
 Evaluates \(\Phi=M^\dagger r\) in single precision.
 
int Hdslash_f (Complex_f *phi, Complex_f *r, Complex_f *u11t_f, Complex_f *u12t_f, unsigned int *iu, unsigned int *id, Complex_f *gamval_f, int *gamin, float *dk4m_f, float *dk4p_f, float akappa)
 Evaluates \(\Phi=M r\) in single precision.
 
int Hdslashd_f (Complex_f *phi, Complex_f *r, Complex_f *u11t_f, Complex_f *u12t_f, unsigned int *iu, unsigned int *id, Complex_f *gamval_f, int *gamin, float *dk4m_f, float *dk4p_f, float akappa)
 Evaluates \(\Phi=M^\dagger r\) in single precision.
 
int Reunitarise (Complex *u11t, Complex *u12t)
 Reunitarises u11t and u12t as in conj(u11t[i])*u11t[i]+conj(u12t[i])*u12t[i]=1.
 

Detailed Description

Matrix multiplication and related routines.

There are two four matrix mutiplication routines, and each had a double and single (_f) version The Hdslash? routines are called when acting on half of the fermions (up/down flavour partitioning) The Dslash routines act on everything

Any routine ending in a d is the daggered multiplication

Definition in file matrices.c.

Function Documentation

◆ Dslash()

int Dslash ( Complex * phi,
Complex * r,
Complex * u11t,
Complex * u12t,
unsigned int * iu,
unsigned int * id,
Complex * gamval,
int * gamin,
double * dk4m,
double * dk4p,
Complex_f jqq,
float akappa )

Evaluates \(\Phi=M r\) in double precision.

Parameters
phiThe product
rThe array being acted on by M
u11tFirst colour trial field
u12tSecond colour trial field
iuUpper halo indices
idLower halo indices
gamvalGamma matrices rescaled by kappa
gaminIndices for dirac terms
dk4m\(\left(1+\gamma_0\right)e^{-\mu}\)
dk4p\(\left(1-\gamma_0\right)e^\mu\)
jqqDiquark source
akappaHopping parameter
Returns
Zero on success, integer error code otherwise

Definition at line 18 of file matrices.c.

19 {
20 /*
21 * @brief Evaluates @f(\Phi=M r@f) in double precision.
22 *
23 * @param phi: The product
24 * @param r: The array being acted on by M
25 * @param u11t: First colour trial field
26 * @param u12t: Second colour trial field
27 * @param iu: Upper halo indices
28 * @param id: Lower halo indices
29 * @param gamval: Gamma matrices
30 * @param gamin: Indices for dirac terms
31 * @param dk4m:
32 * @param dk4p:
33 * @param jqq: Diquark source
34 * @param akappa: Hopping parameter
35 *
36 * @see ZHalo_swap_all (MPI only)
37 *
38 * @return Zero on success, integer error code otherwise
39 */
40 const char *funcname = "Dslash";
41 //Get the halos in order
42#if(nproc>1)
43 ZHalo_swap_all(r, 16);
44#endif
45
46 //Mass term
47 //Diquark Term (antihermitian)
48#ifdef __NVCC__
49 cuDslash(phi,r,u11t,u12t,iu,id,gamval,gamin,dk4m,dk4p,jqq,akappa,dimGrid,dimBlock);
50#else
51 memcpy(phi, r, kferm*sizeof(Complex));
52#pragma omp parallel for
53 for(int i=0;i<kvol;i++){
54#pragma omp simd aligned(phi,r,gamval:AVX)
55 for(int idirac = 0; idirac<ndirac; idirac++){
56 int igork = idirac+4;
57 Complex a_1, a_2;
58 a_1=conj(jqq)*gamval[4*ndirac+idirac];
59 //We subtract a_2, hence the minus
60 a_2=-jqq*gamval[4*ndirac+idirac];
61 phi[(i*ngorkov+idirac)*nc]+=a_1*r[(i*ngorkov+igork)*nc+0];
62 phi[(i*ngorkov+idirac)*nc+1]+=a_1*r[(i*ngorkov+igork)*nc+1];
63 phi[(i*ngorkov+igork)*nc+0]+=a_2*r[(i*ngorkov+idirac)*nc];
64 phi[(i*ngorkov+igork)*nc+1]+=a_2*r[(i*ngorkov+idirac)*nc+1];
65 }
66
67 //Spacelike terms. Here's hoping I haven't put time as the zeroth component somewhere!
68#ifndef NO_SPACE
69 for(int mu = 0; mu <3; mu++){
70 int did=id[mu+ndim*i]; int uid = iu[mu+ndim*i];
71#pragma omp simd aligned(phi,r,u11t,u12t,gamval:AVX)
72 for(int igorkov=0; igorkov<ngorkov; igorkov++){
73 //FORTRAN had mod((igorkov-1),4)+1 to prevent issues with non-zero indexing in the dirac term.
74 int idirac=igorkov%4;
75 int igork1 = (igorkov<4) ? gamin[mu*ndirac+idirac] : gamin[mu*ndirac+idirac]+4;
76 //Can manually vectorise with a pragma?
77 //Wilson + Dirac term in that order. Definitely easier
78 //to read when split into different loops, but should be faster this way
79 phi[(i*ngorkov+igorkov)*nc]+=-akappa*(u11t[i*ndim+mu]*r[(uid*ngorkov+igorkov)*nc]+\
80 u12t[i*ndim+mu]*r[(uid*ngorkov+igorkov)*nc+1]+\
81 conj(u11t[did*ndim+mu])*r[(did*ngorkov+igorkov)*nc]-\
82 u12t[did*ndim+mu]*r[(did*ngorkov+igorkov)*nc+1])+\
83 //Dirac term. Reminder! gamval was rescaled by kappa when we defined it
84 gamval[mu*ndirac+idirac]*(u11t[i*ndim+mu]*r[(uid*ngorkov+igork1)*nc]+\
85 u12t[i*ndim+mu]*r[(uid*ngorkov+igork1)*nc+1]-\
86 conj(u11t[did*ndim+mu])*r[(did*ngorkov+igork1)*nc]+\
87 u12t[did*ndim+mu]*r[(did*ngorkov+igork1)*nc+1]);
88
89 phi[(i*ngorkov+igorkov)*nc+1]+=-akappa*(-conj(u12t[i*ndim+mu])*r[(uid*ngorkov+igorkov)*nc]+\
90 conj(u11t[i*ndim+mu])*r[(uid*ngorkov+igorkov)*nc+1]+\
91 conj(u12t[did*ndim+mu])*r[(did*ngorkov+igorkov)*nc]+\
92 u11t[did*ndim+mu]*r[(did*ngorkov+igorkov)*nc+1])+\
93 //Dirac term
94 gamval[mu*ndirac+idirac]*(-conj(u12t[i*ndim+mu])*r[(uid*ngorkov+igork1)*nc]+\
95 conj(u11t[i*ndim+mu])*r[(uid*ngorkov+igork1)*nc+1]-\
96 conj(u12t[did*ndim+mu])*r[(did*ngorkov+igork1)*nc]-\
97 u11t[did*ndim+mu]*r[(did*ngorkov+igork1)*nc+1]);
98 }
99 }
100 //Timelike terms next. These run from igorkov=0..3 and 4..7 with slightly different rules for each
101 //We can fit it into a single loop by declaring igorkovPP=igorkov+4 instead of looping igorkov=4..7 separately
102 //Note that for the igorkov 4..7 loop idirac=igorkov-4, so we don't need to declare idiracPP separately
103#endif
104 int did=id[3+ndim*i]; int uid = iu[3+ndim*i];
105#ifndef NO_TIME
106#pragma omp simd aligned(phi,r,u11t,u12t,dk4m,dk4p:AVX)
107 for(int igorkov=0; igorkov<4; igorkov++){
108 int igorkovPP=igorkov+4; //idirac = igorkov; It is a bit redundant but I'll mention it as that's how
109 //the FORTRAN code did it.
110 int igork1 = gamin[3*ndirac+igorkov]; int igork1PP = igork1+4;
111
112 //Factorising for performance, we get dk4?*u1?*(+/-r_wilson -/+ r_dirac)
113 phi[(i*ngorkov+igorkov)*nc]+=
114 -dk4p[i]*(u11t[i*ndim+3]*(r[(uid*ngorkov+igorkov)*nc]-r[(uid*ngorkov+igork1)*nc])
115 +u12t[i*ndim+3]*(r[(uid*ngorkov+igorkov)*nc+1]-r[(uid*ngorkov+igork1)*nc+1]))
116 -dk4m[did]*(conj(u11t[did*ndim+3])*(r[(did*ngorkov+igorkov)*nc]+r[(did*ngorkov+igork1)*nc])
117 -u12t[did*ndim+3] *(r[(did*ngorkov+igorkov)*nc+1]+r[(did*ngorkov+igork1)*nc+1]));
118 phi[(i*ngorkov+igorkov)*nc+1]+=
119 -dk4p[i]*(-conj(u12t[i*ndim+3])*(r[(uid*ngorkov+igorkov)*nc]-r[(uid*ngorkov+igork1)*nc])
120 +conj(u11t[i*ndim+3])*(r[(uid*ngorkov+igorkov)*nc+1]-r[(uid*ngorkov+igork1)*nc+1]))
121 -dk4m[did]*(conj(u12t[did*ndim+3])*(r[(did*ngorkov+igorkov)*nc]+r[(did*ngorkov+igork1)*nc])
122 +u11t[did*ndim+3] *(r[(did*ngorkov+igorkov)*nc+1]+r[(did*ngorkov+igork1)*nc+1]));
123
124 //And the +4 terms. Note that dk4p and dk4m swap positions compared to the above
125 phi[(i*ngorkov+igorkovPP)*nc]+=-dk4m[i]*(u11t[i*ndim+3]*(r[(uid*ngorkov+igorkovPP)*nc]-r[(uid*ngorkov+igork1PP)*nc])+\
126 u12t[i*ndim+3]*(r[(uid*ngorkov+igorkovPP)*nc+1]-r[(uid*ngorkov+igork1PP)*nc+1]))-\
127 dk4p[did]*(conj(u11t[did*ndim+3])*(r[(did*ngorkov+igorkovPP)*nc]+r[(did*ngorkov+igork1PP)*nc])-\
128 u12t[did*ndim+3]*(r[(did*ngorkov+igorkovPP)*nc+1]+r[(did*ngorkov+igork1PP)*nc+1]));
129
130 phi[(i*ngorkov+igorkovPP)*nc+1]+=-dk4m[i]*(conj(-u12t[i*ndim+3])*(r[(uid*ngorkov+igorkovPP)*nc]-r[(uid*ngorkov+igork1PP)*nc])+\
131 conj(u11t[i*ndim+3])*(r[(uid*ngorkov+igorkovPP)*nc+1]-r[(uid*ngorkov+igork1PP)*nc+1]))-\
132 dk4p[did]*(conj(u12t[did*ndim+3])*(r[(did*ngorkov+igorkovPP)*nc]+r[(did*ngorkov+igork1PP)*nc])+\
133 u11t[did*ndim+3]*(r[(did*ngorkov+igorkovPP)*nc+1]+r[(did*ngorkov+igork1PP)*nc+1]));
134 }
135#endif
136 }
137#endif
138 return 0;
139}
int ZHalo_swap_all(Complex *z, int ncpt)
Calls the functions to send data to both the up and down halos.
#define nc
Colours.
Definition sizes.h:173
#define ngorkov
Gor'kov indices.
Definition sizes.h:181
#define kvol
Sublattice volume.
Definition sizes.h:154
#define Complex
Double precision complex number.
Definition sizes.h:58
#define kferm
sublattice size including Gor'kov indices
Definition sizes.h:186
#define ndirac
Dirac indices.
Definition sizes.h:177
#define ndim
Dimensions.
Definition sizes.h:179

References Complex, kferm, kvol, nc, ndim, ndirac, ngorkov, and ZHalo_swap_all().

Here is the call graph for this function:

◆ Dslash_f()

int Dslash_f ( Complex_f * phi,
Complex_f * r,
Complex_f * u11t,
Complex_f * u12t,
unsigned int * iu,
unsigned int * id,
Complex_f * gamval,
int * gamin,
float * dk4m,
float * dk4p,
Complex_f jqq,
float akappa )

Evaluates \(\Phi=M r\) in single precision.

Parameters
phiThe product
rThe array being acted on by M
u11tFirst colour trial field
u12tSecond colour trial field
iuUpper halo indices
idLower halo indices
gamvalGamma matrices rescaled by kappa
gaminIndices for dirac terms
dk4m\(\left(1+\gamma_0\right)e^{-\mu}\)
dk4p\(\left(1-\gamma_0\right)e^\mu\)
jqqDiquark source
akappaHopping parameter
Returns
Zero on success, integer error code otherwise

Definition at line 462 of file matrices.c.

463 {
464 /*
465 * @brief Evaluates @f(\Phi=M r@f) in single precision.
466 *
467 * @param phi: The product
468 * @param r: The array being acted on by M
469 * @param u11t_f: First colour trial field
470 * @param u12t_f: Second colour trial field
471 * @param iu: Upper halo indices
472 * @param id: Lower halo indices
473 * @param gamval_f: Gamma matrices
474 * @param gamin: Indices for dirac terms
475 * @param dk4m_f:
476 * @param dk4p_f:
477 * @param jqq: Diquark source
478 * @param akappa: Hopping parameter
479 *
480 * @see CHalo_swap_all (MPI only)
481 *
482 * @return Zero on success, integer error code otherwise
483 */
484 const char *funcname = "Dslash_f";
485 //Get the halos in order
486#if(nproc>1)
487 CHalo_swap_all(r, 16);
488#endif
489
490 //Mass term
491 //Diquark Term (antihermitian)
492#ifdef __NVCC__
493 cuDslash_f(phi,r,u11t_f,u12t_f,iu,id,gamval_f,gamin,dk4m_f,dk4p_f,jqq,akappa,dimGrid,dimBlock);
494#else
495 memcpy(phi, r, kferm*sizeof(Complex_f));
496#pragma omp parallel for
497 for(int i=0;i<kvol;i++){
498#pragma omp simd aligned(phi,r,gamval_f:AVX)
499 for(int idirac = 0; idirac<ndirac; idirac++){
500 int igork = idirac+4;
501 Complex_f a_1, a_2;
502 a_1=conj(jqq)*gamval_f[4*ndirac+idirac];
503 //We subtract a_2, hence the minus
504 a_2=-jqq*gamval_f[4*ndirac+idirac];
505 phi[(i*ngorkov+idirac)*nc]+=a_1*r[(i*ngorkov+igork)*nc+0];
506 phi[(i*ngorkov+idirac)*nc+1]+=a_1*r[(i*ngorkov+igork)*nc+1];
507 phi[(i*ngorkov+igork)*nc+0]+=a_2*r[(i*ngorkov+idirac)*nc];
508 phi[(i*ngorkov+igork)*nc+1]+=a_2*r[(i*ngorkov+idirac)*nc+1];
509 }
510
511 //Spacelike terms. Here's hoping I haven't put time as the zeroth component somewhere!
512#ifndef NO_SPACE
513 for(int mu = 0; mu <3; mu++){
514 int did=id[mu+ndim*i]; int uid = iu[mu+ndim*i];
515#pragma omp simd aligned(phi,r,u11t_f,u12t_f,gamval_f,gamin:AVX)
516 for(int igorkov=0; igorkov<ngorkov; igorkov++){
517 //FORTRAN had mod((igorkov-1),4)+1 to prevent issues with non-zero indexing in the dirac term.
518 int idirac=igorkov%4;
519 int igork1 = (igorkov<4) ? gamin[mu*ndirac+idirac] : gamin[mu*ndirac+idirac]+4;
520 //Can manually vectorise with a pragma?
521 //Wilson + Dirac term in that order. Definitely easier
522 //to read when split into different loops, but should be faster this way
523 phi[(i*ngorkov+igorkov)*nc]+=-akappa*(u11t_f[i*ndim+mu]*r[(uid*ngorkov+igorkov)*nc]+\
524 u12t_f[i*ndim+mu]*r[(uid*ngorkov+igorkov)*nc+1]+\
525 conj(u11t_f[did*ndim+mu])*r[(did*ngorkov+igorkov)*nc]-\
526 u12t_f[did*ndim+mu]*r[(did*ngorkov+igorkov)*nc+1])+\
527 //Dirac term. Reminder! gamval was rescaled by kappa when we defined it
528 gamval_f[mu*ndirac+idirac]*(u11t_f[i*ndim+mu]*r[(uid*ngorkov+igork1)*nc]+\
529 u12t_f[i*ndim+mu]*r[(uid*ngorkov+igork1)*nc+1]-\
530 conj(u11t_f[did*ndim+mu])*r[(did*ngorkov+igork1)*nc]+\
531 u12t_f[did*ndim+mu]*r[(did*ngorkov+igork1)*nc+1]);
532
533 phi[(i*ngorkov+igorkov)*nc+1]+=-akappa*(-conj(u12t_f[i*ndim+mu])*r[(uid*ngorkov+igorkov)*nc]+\
534 conj(u11t_f[i*ndim+mu])*r[(uid*ngorkov+igorkov)*nc+1]+\
535 conj(u12t_f[did*ndim+mu])*r[(did*ngorkov+igorkov)*nc]+\
536 u11t_f[did*ndim+mu]*r[(did*ngorkov+igorkov)*nc+1])+\
537 //Dirac term
538 gamval_f[mu*ndirac+idirac]*(-conj(u12t_f[i*ndim+mu])*r[(uid*ngorkov+igork1)*nc]+\
539 conj(u11t_f[i*ndim+mu])*r[(uid*ngorkov+igork1)*nc+1]-\
540 conj(u12t_f[did*ndim+mu])*r[(did*ngorkov+igork1)*nc]-\
541 u11t_f[did*ndim+mu]*r[(did*ngorkov+igork1)*nc+1]);
542 }
543 }
544 //Timelike terms next. These run from igorkov=0..3 and 4..7 with slightly different rules for each
545 //We can fit it into a single loop by declaring igorkovPP=igorkov+4 instead of looping igorkov=4..7 separately
546 //Note that for the igorkov 4..7 loop idirac=igorkov-4, so we don't need to declare idiracPP separately
547#endif
548 int did=id[3+ndim*i]; int uid = iu[3+ndim*i];
549#ifndef NO_TIME
550#pragma omp simd aligned(phi,r,u11t_f,u12t_f,dk4m_f,dk4p_f,gamin:AVX)
551 for(int igorkov=0; igorkov<4; igorkov++){
552 int igorkovPP=igorkov+4; //idirac = igorkov; It is a bit redundant but I'll mention it as that's how
553 //the FORTRAN code did it.
554 int igork1 = gamin[3*ndirac+igorkov]; int igork1PP = igork1+4;
555
556 //Factorising for performance, we get dk4?*u1?*(+/-r_wilson -/+ r_dirac)
557 phi[(i*ngorkov+igorkov)*nc]+=
558 -dk4p_f[i]*(u11t_f[i*ndim+3]*(r[(uid*ngorkov+igorkov)*nc]-r[(uid*ngorkov+igork1)*nc])
559 +u12t_f[i*ndim+3]*(r[(uid*ngorkov+igorkov)*nc+1]-r[(uid*ngorkov+igork1)*nc+1]))
560 -dk4m_f[did]*(conj(u11t_f[did*ndim+3])*(r[(did*ngorkov+igorkov)*nc]+r[(did*ngorkov+igork1)*nc])
561 -u12t_f[did*ndim+3] *(r[(did*ngorkov+igorkov)*nc+1]+r[(did*ngorkov+igork1)*nc+1]));
562 phi[(i*ngorkov+igorkov)*nc+1]+=
563 -dk4p_f[i]*(-conj(u12t_f[i*ndim+3])*(r[(uid*ngorkov+igorkov)*nc]-r[(uid*ngorkov+igork1)*nc])
564 +conj(u11t_f[i*ndim+3])*(r[(uid*ngorkov+igorkov)*nc+1]-r[(uid*ngorkov+igork1)*nc+1]))
565 -dk4m_f[did]*(conj(u12t_f[did*ndim+3])*(r[(did*ngorkov+igorkov)*nc]+r[(did*ngorkov+igork1)*nc])
566 +u11t_f[did*ndim+3] *(r[(did*ngorkov+igorkov)*nc+1]+r[(did*ngorkov+igork1)*nc+1]));
567
568 //And the +4 terms. Note that dk4p_f and dk4m_f swap positions compared to the above
569 phi[(i*ngorkov+igorkovPP)*nc]+=-dk4m_f[i]*(u11t_f[i*ndim+3]*(r[(uid*ngorkov+igorkovPP)*nc]-r[(uid*ngorkov+igork1PP)*nc])
570 +u12t_f[i*ndim+3]*(r[(uid*ngorkov+igorkovPP)*nc+1]-r[(uid*ngorkov+igork1PP)*nc+1]))
571 -dk4p_f[did]*(conj(u11t_f[did*ndim+3])*(r[(did*ngorkov+igorkovPP)*nc]+r[(did*ngorkov+igork1PP)*nc])
572 -u12t_f[did*ndim+3]*(r[(did*ngorkov+igorkovPP)*nc+1]+r[(did*ngorkov+igork1PP)*nc+1]));
573
574 phi[(i*ngorkov+igorkovPP)*nc+1]+=-dk4m_f[i]*(conj(-u12t_f[i*ndim+3])*(r[(uid*ngorkov+igorkovPP)*nc]-r[(uid*ngorkov+igork1PP)*nc])
575 +conj(u11t_f[i*ndim+3])*(r[(uid*ngorkov+igorkovPP)*nc+1]-r[(uid*ngorkov+igork1PP)*nc+1]))
576 -dk4p_f[did]*(conj(u12t_f[did*ndim+3])*(r[(did*ngorkov+igorkovPP)*nc]+r[(did*ngorkov+igork1PP)*nc])
577 +u11t_f[did*ndim+3]*(r[(did*ngorkov+igorkovPP)*nc+1]+r[(did*ngorkov+igork1PP)*nc+1]));
578 }
579#endif
580 }
581#endif
582 return 0;
583}
int CHalo_swap_all(Complex_f *c, int ncpt)
Calls the functions to send data to both the up and down halos.
#define Complex_f
Single precision complex number.
Definition sizes.h:56

References CHalo_swap_all(), Complex_f, kferm, kvol, nc, ndim, ndirac, and ngorkov.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ Dslashd()

int Dslashd ( Complex * phi,
Complex * r,
Complex * u11t,
Complex * u12t,
unsigned int * iu,
unsigned int * id,
Complex * gamval,
int * gamin,
double * dk4m,
double * dk4p,
Complex_f jqq,
float akappa )

Evaluates \(\Phi=M^\dagger r\) in double precision.

Parameters
phiThe product
rThe array being acted on by M
u11tFirst colour trial field
u12tSecond colour trial field
iuUpper halo indices
idLower halo indices
gamvalGamma matrices rescaled by kappa
gaminIndices for dirac terms
dk4m\(\left(1+\gamma_0\right)e^{-\mu}\)
dk4p\(\left(1-\gamma_0\right)e^\mu\)
jqqDiquark source
akappaHopping parameter
Returns
Zero on success, integer error code otherwise

Definition at line 140 of file matrices.c.

141 {
142 /*
143 * @brief Evaluates @f(\Phi=M^\dagger r@f) in double precision.
144 *
145 * @param phi: The product
146 * @param r: The array being acted on by M
147 * @param u11t: First colour trial field
148 * @param u12t: Second colour trial field
149 * @param iu: Upper halo indices
150 * @param id: Lower halo indices
151 * @param gamval: Gamma matrices
152 * @param gamin: Indices for dirac terms
153 * @param dk4m:
154 * @param dk4p:
155 * @param jqq: Diquark source
156 * @param akappa: Hopping parameter
157 *
158 * @see ZHalo_swap_all (MPI only)
159 *
160 * @return Zero on success, integer error code otherwise
161 */
162 const char *funcname = "Dslashd";
163 //Get the halos in order
164#if(nproc>1)
165 ZHalo_swap_all(r, 16);
166#endif
167
168 //Mass term
169#ifdef __NVCC__
170 cuDslashd(phi,r,u11t,u12t,iu,id,gamval,gamin,dk4m,dk4p,jqq,akappa,dimGrid,dimBlock);
171#else
172 memcpy(phi, r, kferm*sizeof(Complex));
173#pragma omp parallel for
174 for(int i=0;i<kvol;i++){
175#pragma omp simd aligned(phi,r,gamval:AVX)
176 //Diquark Term (antihermitian) The signs of a_1 and a_2 below flip under dagger
177 for(int idirac = 0; idirac<ndirac; idirac++){
178 int igork = idirac+4;
179 Complex a_1, a_2;
180 //We subtract a_1, hence the minus
181 a_1=-conj(jqq)*gamval[4*ndirac+idirac];
182 a_2=jqq*gamval[4*ndirac+idirac];
183 phi[(i*ngorkov+idirac)*nc]+=a_1*r[(i*ngorkov+igork)*nc];
184 phi[(i*ngorkov+idirac)*nc+1]+=a_1*r[(i*ngorkov+igork)*nc+1];
185 phi[(i*ngorkov+igork)*nc]+=a_2*r[(i*ngorkov+idirac)*nc];
186 phi[(i*ngorkov+igork)*nc+1]+=a_2*r[(i*ngorkov+idirac)*nc+1];
187 }
188
189 //Spacelike terms. Here's hoping I haven't put time as the zeroth component somewhere!
190#ifndef NO_SPACE
191 for(int mu = 0; mu <3; mu++){
192 int did=id[mu+ndim*i]; int uid = iu[mu+ndim*i];
193#pragma omp simd aligned(phi,r,u11t,u12t,gamval:AVX)
194 for(int igorkov=0; igorkov<ngorkov; igorkov++){
195 //FORTRAN had mod((igorkov-1),4)+1 to prevent issues with non-zero indexing.
196 int idirac=igorkov%4;
197 int igork1 = (igorkov<4) ? gamin[mu*ndirac+idirac] : gamin[mu*ndirac+idirac]+4;
198 //Wilson + Dirac term in that order. Definitely easier
199 //to read when split into different loops, but should be faster this way
200 //Reminder! gamval was rescaled by kappa when we defined it
201 phi[(i*ngorkov+igorkov)*nc]+=
202 -akappa*( u11t[i*ndim+mu]*r[(uid*ngorkov+igorkov)*nc]
203 +u12t[i*ndim+mu]*r[(uid*ngorkov+igorkov)*nc+1]
204 +conj(u11t[did*ndim+mu])*r[(did*ngorkov+igorkov)*nc]
205 -u12t[did*ndim+mu] *r[(did*ngorkov+igorkov)*nc+1])
206 -gamval[mu*ndirac+idirac]*
207 ( u11t[i*ndim+mu]*r[(uid*ngorkov+igork1)*nc]
208 +u12t[i*ndim+mu]*r[(uid*ngorkov+igork1)*nc+1]
209 -conj(u11t[did*ndim+mu])*r[(did*ngorkov+igork1)*nc]
210 +u12t[did*ndim+mu] *r[(did*ngorkov+igork1)*nc+1]);
211
212 phi[(i*ngorkov+igorkov)*nc+1]+=
213 -akappa*(-conj(u12t[i*ndim+mu])*r[(uid*ngorkov+igorkov)*nc]
214 +conj(u11t[i*ndim+mu])*r[(uid*ngorkov+igorkov)*nc+1]
215 +conj(u12t[did*ndim+mu])*r[(did*ngorkov+igorkov)*nc]
216 +u11t[did*ndim+mu] *r[(did*ngorkov+igorkov)*nc+1])
217 -gamval[mu*ndirac+idirac]*
218 (-conj(u12t[i*ndim+mu])*r[(uid*ngorkov+igork1)*nc]
219 +conj(u11t[i*ndim+mu])*r[(uid*ngorkov+igork1)*nc+1]
220 -conj(u12t[did*ndim+mu])*r[(did*ngorkov+igork1)*nc]
221 -u11t[did*ndim+mu] *r[(did*ngorkov+igork1)*nc+1]);
222 }
223 }
224#endif
225 //Timelike terms next. These run from igorkov=0..3 and 4..7 with slightly different rules for each
226 //We can fit it into a single loop by declaring igorkovPP=igorkov+4 instead of looping igorkov=4..7 separately
227 //Note that for the igorkov 4..7 loop idirac=igorkov-4, so we don't need to declare idiracPP separately
228 //Under dagger, dk4p and dk4m get swapped and the dirac component flips sign.
229 int did=id[3+ndim*i]; int uid = iu[3+ndim*i];
230#ifndef NO_TIME
231#pragma omp simd aligned(phi,r,u11t,u12t,dk4m,dk4p:AVX)
232 for(int igorkov=0; igorkov<4; igorkov++){
233 //the FORTRAN code did it.
234 int igork1 = gamin[3*ndirac+igorkov];
235 //Factorising for performance, we get dk4?*u1?*(+/-r_wilson -/+ r_dirac)
236 phi[(i*ngorkov+igorkov)*nc]+=
237 -dk4m[i]*(u11t[i*ndim+3]*(r[(uid*ngorkov+igorkov)*nc]+r[(uid*ngorkov+igork1)*nc])
238 +u12t[i*ndim+3]*(r[(uid*ngorkov+igorkov)*nc+1]+r[(uid*ngorkov+igork1)*nc+1]))
239 -dk4p[did]*(conj(u11t[did*ndim+3])*(r[(did*ngorkov+igorkov)*nc]-r[(did*ngorkov+igork1)*nc])
240 -u12t[did*ndim+3] *(r[(did*ngorkov+igorkov)*nc+1]-r[(did*ngorkov+igork1)*nc+1]));
241 phi[(i*ngorkov+igorkov)*nc+1]+=
242 -dk4m[i]*(-conj(u12t[i*ndim+3])*(r[(uid*ngorkov+igorkov)*nc]+r[(uid*ngorkov+igork1)*nc])
243 +conj(u11t[i*ndim+3])*(r[(uid*ngorkov+igorkov)*nc+1]+r[(uid*ngorkov+igork1)*nc+1]))
244 -dk4p[did]*(conj(u12t[did*ndim+3])*(r[(did*ngorkov+igorkov)*nc]-r[(did*ngorkov+igork1)*nc])
245 +u11t[did*ndim+3] *(r[(did*ngorkov+igorkov)*nc+1]-r[(did*ngorkov+igork1)*nc+1]));
246
247
248 int igorkovPP=igorkov+4; //idirac = igorkov; It is a bit redundant but I'll mention it as that's how
249 int igork1PP = igork1+4;
250 //And the +4 terms. Note that dk4p and dk4m swap positions compared to the above
251 phi[(i*ngorkov+igorkovPP)*nc]+=-dk4p[i]*(u11t[i*ndim+3]*(r[(uid*ngorkov+igorkovPP)*nc]+r[(uid*ngorkov+igork1PP)*nc])+\
252 u12t[i*ndim+3]*(r[(uid*ngorkov+igorkovPP)*nc+1]+r[(uid*ngorkov+igork1PP)*nc+1]))-\
253 dk4m[did]*(conj(u11t[did*ndim+3])*(r[(did*ngorkov+igorkovPP)*nc]-r[(did*ngorkov+igork1PP)*nc])-\
254 u12t[did*ndim+3]*(r[(did*ngorkov+igorkovPP)*nc+1]-r[(did*ngorkov+igork1PP)*nc+1]));
255
256 phi[(i*ngorkov+igorkovPP)*nc+1]+=dk4p[i]*(conj(u12t[i*ndim+3])*(r[(uid*ngorkov+igorkovPP)*nc]+r[(uid*ngorkov+igork1PP)*nc])-\
257 conj(u11t[i*ndim+3])*(r[(uid*ngorkov+igorkovPP)*nc+1]+r[(uid*ngorkov+igork1PP)*nc+1]))-\
258 dk4m[did]*(conj(u12t[did*ndim+3])*(r[(did*ngorkov+igorkovPP)*nc]-r[(did*ngorkov+igork1PP)*nc])+
259 u11t[did*ndim+3]*(r[(did*ngorkov+igorkovPP)*nc+1]-r[(did*ngorkov+igork1PP)*nc+1]));
260
261 }
262#endif
263 }
264#endif
265 return 0;
266}

References Complex, kferm, kvol, nc, ndim, ndirac, ngorkov, and ZHalo_swap_all().

Here is the call graph for this function:

◆ Dslashd_f()

int Dslashd_f ( Complex_f * phi,
Complex_f * r,
Complex_f * u11t,
Complex_f * u12t,
unsigned int * iu,
unsigned int * id,
Complex_f * gamval,
int * gamin,
float * dk4m,
float * dk4p,
Complex_f jqq,
float akappa )

Evaluates \(\Phi=M^\dagger r\) in single precision.

Parameters
phiThe product
rThe array being acted on by M
u11tFirst colour trial field
u12tSecond colour trial field
iuUpper halo indices
idLower halo indices
gamvalGamma matrices rescaled by kappa
gaminIndices for dirac terms
dk4m\(\left(1+\gamma_0\right)e^{-\mu}\)
dk4p\(\left(1-\gamma_0\right)e^\mu\)
jqqDiquark source
akappaHopping parameter
Returns
Zero on success, integer error code otherwise

Definition at line 584 of file matrices.c.

585 {
586 /*
587 * @brief Evaluates @f(\Phi=M^\dagger r@f) in single precision.
588 *
589 * @param phi: The product
590 * @param r: The array being acted on by M
591 * @param u11t_f: First colour trial field
592 * @param u12t_f: Second colour trial field
593 * @param iu: Upper halo indices
594 * @param id: Lower halo indices
595 * @param gamval_f: Gamma matrices
596 * @param gamin: Indices for dirac terms
597 * @param dk4m_f:
598 * @param dk4p_f:
599 * @param jqq: Diquark source
600 * @param akappa: Hopping parameter
601 *
602 * @see CHalo_swap_all (MPI only)
603 *
604 * @return Zero on success, integer error code otherwise
605 */
606 const char *funcname = "Dslashd_f";
607 //Get the halos in order
608#if(nproc>1)
609 CHalo_swap_all(r, 16);
610#endif
611
612 //Mass term
613#ifdef __NVCC__
614 cuDslashd_f(phi,r,u11t_f,u12t_f,iu,id,gamval_f,gamin,dk4m_f,dk4p_f,jqq,akappa,dimGrid,dimBlock);
615#else
616 memcpy(phi, r, kferm*sizeof(Complex_f));
617#pragma omp parallel for
618 for(int i=0;i<kvol;i++){
619#pragma omp simd aligned(phi,r,gamval_f:AVX)
620 //Diquark Term (antihermitian) The signs of a_1 and a_2 below flip under dagger
621 for(int idirac = 0; idirac<ndirac; idirac++){
622 int igork = idirac+4;
623 Complex_f a_1, a_2;
624 //We subtract a_1, hence the minus
625 a_1=-conj(jqq)*gamval_f[4*ndirac+idirac];
626 a_2=jqq*gamval_f[4*ndirac+idirac];
627 phi[(i*ngorkov+idirac)*nc]+=a_1*r[(i*ngorkov+igork)*nc];
628 phi[(i*ngorkov+idirac)*nc+1]+=a_1*r[(i*ngorkov+igork)*nc+1];
629 phi[(i*ngorkov+igork)*nc]+=a_2*r[(i*ngorkov+idirac)*nc];
630 phi[(i*ngorkov+igork)*nc+1]+=a_2*r[(i*ngorkov+idirac)*nc+1];
631 }
632
633 //Spacelike terms. Here's hoping I haven't put time as the zeroth component somewhere!
634#ifndef NO_SPACE
635 for(int mu = 0; mu <3; mu++){
636 int did=id[mu+ndim*i]; int uid = iu[mu+ndim*i];
637#pragma omp simd aligned(phi,r,u11t_f,u12t_f,gamval_f:AVX)
638 for(int igorkov=0; igorkov<ngorkov; igorkov++){
639 //FORTRAN had mod((igorkov-1),4)+1 to prevent issues with non-zero indexing.
640 int idirac=igorkov%4;
641 int igork1 = (igorkov<4) ? gamin[mu*ndirac+idirac] : gamin[mu*ndirac+idirac]+4;
642 //Wilson + Dirac term in that order. Definitely easier
643 //to read when split into different loops, but should be faster this way
644 //Reminder! gamval was rescaled by kappa when we defined it
645 phi[(i*ngorkov+igorkov)*nc]+=
646 -akappa*( u11t_f[i*ndim+mu]*r[(uid*ngorkov+igorkov)*nc]
647 +u12t_f[i*ndim+mu]*r[(uid*ngorkov+igorkov)*nc+1]
648 +conj(u11t_f[did*ndim+mu])*r[(did*ngorkov+igorkov)*nc]
649 -u12t_f[did*ndim+mu] *r[(did*ngorkov+igorkov)*nc+1])
650 -gamval_f[mu*ndirac+idirac]*
651 ( u11t_f[i*ndim+mu]*r[(uid*ngorkov+igork1)*nc]
652 +u12t_f[i*ndim+mu]*r[(uid*ngorkov+igork1)*nc+1]
653 -conj(u11t_f[did*ndim+mu])*r[(did*ngorkov+igork1)*nc]
654 +u12t_f[did*ndim+mu] *r[(did*ngorkov+igork1)*nc+1]);
655
656 phi[(i*ngorkov+igorkov)*nc+1]+=
657 -akappa*(-conj(u12t_f[i*ndim+mu])*r[(uid*ngorkov+igorkov)*nc]
658 +conj(u11t_f[i*ndim+mu])*r[(uid*ngorkov+igorkov)*nc+1]
659 +conj(u12t_f[did*ndim+mu])*r[(did*ngorkov+igorkov)*nc]
660 +u11t_f[did*ndim+mu] *r[(did*ngorkov+igorkov)*nc+1])
661 -gamval_f[mu*ndirac+idirac]*
662 (-conj(u12t_f[i*ndim+mu])*r[(uid*ngorkov+igork1)*nc]
663 +conj(u11t_f[i*ndim+mu])*r[(uid*ngorkov+igork1)*nc+1]
664 -conj(u12t_f[did*ndim+mu])*r[(did*ngorkov+igork1)*nc]
665 -u11t_f[did*ndim+mu] *r[(did*ngorkov+igork1)*nc+1]);
666 }
667 }
668#endif
669 //Timelike terms next. These run from igorkov=0..3 and 4..7 with slightly different rules for each
670 //We can fit it into a single loop by declaring igorkovPP=igorkov+4 instead of looping igorkov=4..7 separately
671 //Note that for the igorkov 4..7 loop idirac=igorkov-4, so we don't need to declare idiracPP separately
672 //Under dagger, dk4p_f and dk4m_f get swapped and the dirac component flips sign.
673 int did=id[3+ndim*i]; int uid = iu[3+ndim*i];
674#ifndef NO_TIME
675#pragma omp simd aligned(phi,r,u11t_f,u12t_f,dk4m_f,dk4p_f:AVX)
676 for(int igorkov=0; igorkov<4; igorkov++){
677 //the FORTRAN code did it.
678 int igork1 = gamin[3*ndirac+igorkov];
679 //Factorising for performance, we get dk4?*u1?*(+/-r_wilson -/+ r_dirac)
680 phi[(i*ngorkov+igorkov)*nc]+=
681 -dk4m_f[i]*(u11t_f[i*ndim+3]*(r[(uid*ngorkov+igorkov)*nc]+r[(uid*ngorkov+igork1)*nc])
682 +u12t_f[i*ndim+3]*(r[(uid*ngorkov+igorkov)*nc+1]+r[(uid*ngorkov+igork1)*nc+1]))
683 -dk4p_f[did]*(conj(u11t_f[did*ndim+3])*(r[(did*ngorkov+igorkov)*nc]-r[(did*ngorkov+igork1)*nc])
684 -u12t_f[did*ndim+3] *(r[(did*ngorkov+igorkov)*nc+1]-r[(did*ngorkov+igork1)*nc+1]));
685 phi[(i*ngorkov+igorkov)*nc+1]+=
686 -dk4m_f[i]*(-conj(u12t_f[i*ndim+3])*(r[(uid*ngorkov+igorkov)*nc]+r[(uid*ngorkov+igork1)*nc])
687 +conj(u11t_f[i*ndim+3])*(r[(uid*ngorkov+igorkov)*nc+1]+r[(uid*ngorkov+igork1)*nc+1]))
688 -dk4p_f[did]*(conj(u12t_f[did*ndim+3])*(r[(did*ngorkov+igorkov)*nc]-r[(did*ngorkov+igork1)*nc])
689 +u11t_f[did*ndim+3] *(r[(did*ngorkov+igorkov)*nc+1]-r[(did*ngorkov+igork1)*nc+1]));
690
691
692 int igorkovPP=igorkov+4; //idirac = igorkov; It is a bit redundant but I'll mention it as that's how
693 int igork1PP = igork1+4;
694 //And the +4 terms. Note that dk4p_f and dk4m_f swap positions compared to the above
695 phi[(i*ngorkov+igorkovPP)*nc]+=-dk4p_f[i]*(u11t_f[i*ndim+3]*(r[(uid*ngorkov+igorkovPP)*nc]+r[(uid*ngorkov+igork1PP)*nc])
696 +u12t_f[i*ndim+3]*(r[(uid*ngorkov+igorkovPP)*nc+1]+r[(uid*ngorkov+igork1PP)*nc+1]))
697 -dk4m_f[did]*(conj(u11t_f[did*ndim+3])*(r[(did*ngorkov+igorkovPP)*nc]-r[(did*ngorkov+igork1PP)*nc])
698 -u12t_f[did*ndim+3]*(r[(did*ngorkov+igorkovPP)*nc+1]-r[(did*ngorkov+igork1PP)*nc+1]));
699
700 phi[(i*ngorkov+igorkovPP)*nc+1]+=dk4p_f[i]*(conj(u12t_f[i*ndim+3])*(r[(uid*ngorkov+igorkovPP)*nc]+r[(uid*ngorkov+igork1PP)*nc])
701 -conj(u11t_f[i*ndim+3])*(r[(uid*ngorkov+igorkovPP)*nc+1]+r[(uid*ngorkov+igork1PP)*nc+1]))
702 -dk4m_f[did]*(conj(u12t_f[did*ndim+3])*(r[(did*ngorkov+igorkovPP)*nc]-r[(did*ngorkov+igork1PP)*nc])
703 +u11t_f[did*ndim+3]*(r[(did*ngorkov+igorkovPP)*nc+1]-r[(did*ngorkov+igork1PP)*nc+1]));
704
705 }
706#endif
707 }
708#endif
709 return 0;
710}

References CHalo_swap_all(), Complex_f, kferm, kvol, nc, ndim, ndirac, and ngorkov.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ Hdslash()

int Hdslash ( Complex * phi,
Complex * r,
Complex * u11t,
Complex * u12t,
unsigned int * iu,
unsigned int * id,
Complex * gamval,
int * gamin,
double * dk4m,
double * dk4p,
float akappa )

Evaluates \(\Phi=M r\) in double precision.

Parameters
phiThe product
rThe array being acted on by M
u11tFirst colour trial field
u12tSecond colour trial field
iuUpper halo indices
idLower halo indices
gamvalGamma matrices rescaled by kappa
gaminIndices for dirac terms
dk4m\(\left(1+\gamma_0\right)e^{-\mu}\)
dk4p\(\left(1-\gamma_0\right)e^\mu\)
akappaHopping parameter
Returns
Zero on success, integer error code otherwise

Definition at line 267 of file matrices.c.

268 {
269 /*
270 * @brief Evaluates @f(\Phi=M r@f) in double precision
271 *
272 * @param phi: The product
273 * @param r: The array being acted on by M
274 * @param u11t: First colour trial field
275 * @param u12t: Second colour trial field
276 * @param iu: Upper halo indices
277 * @param id: Lower halo indices
278 * @param gamval: Gamma matrices
279 * @param gamin: Indices for dirac terms
280 * @param dk4m:
281 * @param dk4p:
282 * @param akappa: Hopping parameter
283 *
284 * @see ZHalo_swap_all (MPI only)
285 *
286 * @return Zero on success, integer error code otherwise
287 */
288 const char *funcname = "Hdslash";
289 //Get the halos in order
290#if(nproc>1)
291 ZHalo_swap_all(r, 8);
292#endif
293
294 //Mass term
295 //Spacelike term
296#ifdef __NVCC__
297 cuHdslash(phi,r,u11t,u12t,iu,id,gamval,gamin,dk4m,dk4p,akappa,dimGrid,dimBlock);
298#else
299 memcpy(phi, r, kferm2*sizeof(Complex));
300#pragma omp parallel for
301 for(int i=0;i<kvol;i++){
302#ifndef NO_SPACE
303 for(int mu = 0; mu <3; mu++){
304 int did=id[mu+ndim*i]; int uid = iu[mu+ndim*i];
305#pragma omp simd aligned(phi,r,u11t,u12t,gamval:AVX)
306 for(int idirac=0; idirac<ndirac; idirac++){
307 //FORTRAN had mod((idirac-1),4)+1 to prevent issues with non-zero indexing.
308 int igork1 = gamin[mu*ndirac+idirac];
309 //Can manually vectorise with a pragma?
310 //Wilson + Dirac term in that order. Definitely easier
311 //to read when split into different loops, but should be faster this way
312 phi[(i*ndirac+idirac)*nc]+=-akappa*(u11t[i*ndim+mu]*r[(uid*ndirac+idirac)*nc]+\
313 u12t[i*ndim+mu]*r[(uid*ndirac+idirac)*nc+1]+\
314 conj(u11t[did*ndim+mu])*r[(did*ndirac+idirac)*nc]-\
315 u12t[did*ndim+mu]*r[(did*ndirac+idirac)*nc+1])+\
316 //Dirac term
317 gamval[mu*ndirac+idirac]*(u11t[i*ndim+mu]*r[(uid*ndirac+igork1)*nc]+\
318 u12t[i*ndim+mu]*r[(uid*ndirac+igork1)*nc+1]-\
319 conj(u11t[did*ndim+mu])*r[(did*ndirac+igork1)*nc]+\
320 u12t[did*ndim+mu]*r[(did*ndirac+igork1)*nc+1]);
321
322 phi[(i*ndirac+idirac)*nc+1]+=-akappa*(-conj(u12t[i*ndim+mu])*r[(uid*ndirac+idirac)*nc]+\
323 conj(u11t[i*ndim+mu])*r[(uid*ndirac+idirac)*nc+1]+\
324 conj(u12t[did*ndim+mu])*r[(did*ndirac+idirac)*nc]+\
325 u11t[did*ndim+mu]*r[(did*ndirac+idirac)*nc+1])+\
326 //Dirac term
327 gamval[mu*ndirac+idirac]*(-conj(u12t[i*ndim+mu])*r[(uid*ndirac+igork1)*nc]+\
328 conj(u11t[i*ndim+mu])*r[(uid*ndirac+igork1)*nc+1]-\
329 conj(u12t[did*ndim+mu])*r[(did*ndirac+igork1)*nc]-\
330 u11t[did*ndim+mu]*r[(did*ndirac+igork1)*nc+1]);
331 }
332 }
333#endif
334 //Timelike terms
335 int did=id[3+ndim*i]; int uid = iu[3+ndim*i];
336#ifndef NO_TIME
337#pragma omp simd aligned(phi,r,u11t,u12t,dk4m,dk4p:AVX)
338 for(int idirac=0; idirac<ndirac; idirac++){
339 int igork1 = gamin[3*ndirac+idirac];
340 //Factorising for performance, we get dk4?*u1?*(+/-r_wilson -/+ r_dirac)
341 //Reminder! gamval was rescaled by kappa when we defined it
342 phi[(i*ndirac+idirac)*nc]+=
343 -dk4p[i]*(u11t[i*ndim+3]*(r[(uid*ndirac+idirac)*nc]-r[(uid*ndirac+igork1)*nc])
344 +u12t[i*ndim+3]*(r[(uid*ndirac+idirac)*nc+1]-r[(uid*ndirac+igork1)*nc+1]))
345 -dk4m[did]*(conj(u11t[did*ndim+3])*(r[(did*ndirac+idirac)*nc]+r[(did*ndirac+igork1)*nc])
346 -u12t[did*ndim+3] *(r[(did*ndirac+idirac)*nc+1]+r[(did*ndirac+igork1)*nc+1]));
347 phi[(i*ndirac+idirac)*nc+1]+=
348 -dk4p[i]*(-conj(u12t[i*ndim+3])*(r[(uid*ndirac+idirac)*nc]-r[(uid*ndirac+igork1)*nc])
349 +conj(u11t[i*ndim+3])*(r[(uid*ndirac+idirac)*nc+1]-r[(uid*ndirac+igork1)*nc+1]))
350 -dk4m[did]*(conj(u12t[did*ndim+3])*(r[(did*ndirac+idirac)*nc]+r[(did*ndirac+igork1)*nc])
351 +u11t[did*ndim+3] *(r[(did*ndirac+idirac)*nc+1]+r[(did*ndirac+igork1)*nc+1]));
352 }
353#endif
354 }
355#endif
356 return 0;
357}
#define kferm2
sublattice size including Dirac indices
Definition sizes.h:188

References Complex, kferm2, kvol, nc, ndim, ndirac, and ZHalo_swap_all().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ Hdslash_f()

int Hdslash_f ( Complex_f * phi,
Complex_f * r,
Complex_f * u11t,
Complex_f * u12t,
unsigned int * iu,
unsigned int * id,
Complex_f * gamval,
int * gamin,
float * dk4m,
float * dk4p,
float akappa )

Evaluates \(\Phi=M r\) in single precision.

Parameters
phiThe product
rThe array being acted on by M
u11tFirst colour trial field
u12tSecond colour trial field
iuUpper halo indices
idLower halo indices
gamvalGamma matrices rescaled by kappa
gaminIndices for dirac terms
dk4m\(\left(1+\gamma_0\right)e^{-\mu}\)
dk4p\(\left(1-\gamma_0\right)e^\mu\)
akappaHopping parameter
Returns
Zero on success, integer error code otherwise

Definition at line 711 of file matrices.c.

712 {
713 /*
714 * @brief Evaluates @f(\Phi=M r@f) in single precision.
715 *
716 * @param phi: The product
717 * @param r: The array being acted on by M
718 * @param u11t_f: First colour trial field
719 * @param u12t_f: Second colour trial field
720 * @param iu: Upper halo indices
721 * @param id: Lower halo indices
722 * @param gamval_f: Gamma matrices
723 * @param gamin: Indices for dirac terms
724 * @param dk4m_f:
725 * @param dk4p_f:
726 * @param akappa: Hopping parameter
727 *
728 * @see CHalo_swap_all (MPI only)
729 *
730 * @return Zero on success, integer error code otherwise
731 */
732 const char *funcname = "Hdslash_f";
733 //Get the halos in order
734#if(nproc>1)
735 CHalo_swap_all(r, 8);
736#endif
737#ifdef __NVCC__
738 cuHdslash_f(phi,r,u11t_f,u12t_f,iu,id,gamval_f,gamin,dk4m_f,dk4p_f,akappa,dimGrid,dimBlock);
739#else
740 //Mass term
741 memcpy(phi, r, kferm2*sizeof(Complex_f));
742#pragma omp parallel for
743 for(int i=0;i<kvol;i++){
744 //Spacelike term
745#ifndef NO_SPACE
746 for(int mu = 0; mu <3; mu++){
747 int did=id[mu+ndim*i]; int uid = iu[mu+ndim*i];
748#pragma omp simd aligned(phi,r,u11t_f,u12t_f,gamval_f:AVX)
749 for(int idirac=0; idirac<ndirac; idirac++){
750 //FORTRAN had mod((idirac-1),4)+1 to prevent issues with non-zero indexing.
751 int igork1 = gamin[mu*ndirac+idirac];
752 //Can manually vectorise with a pragma?
753 //Wilson + Dirac term in that order. Definitely easier
754 //to read when split into different loops, but should be faster this way
755 phi[(i*ndirac+idirac)*nc]+=-akappa*(u11t_f[i*ndim+mu]*r[(uid*ndirac+idirac)*nc]+\
756 u12t_f[i*ndim+mu]*r[(uid*ndirac+idirac)*nc+1]+\
757 conjf(u11t_f[did*ndim+mu])*r[(did*ndirac+idirac)*nc]-\
758 u12t_f[did*ndim+mu]*r[(did*ndirac+idirac)*nc+1]);
759 //Dirac term
760 //Reminder! gamval was rescaled by kappa when we defined it
761 phi[(i*ndirac+idirac)*nc]+=gamval_f[mu*ndirac+idirac]*(u11t_f[i*ndim+mu]*r[(uid*ndirac+igork1)*nc]+\
762 u12t_f[i*ndim+mu]*r[(uid*ndirac+igork1)*nc+1]-\
763 conjf(u11t_f[did*ndim+mu])*r[(did*ndirac+igork1)*nc]+\
764 u12t_f[did*ndim+mu]*r[(did*ndirac+igork1)*nc+1]);
765
766 phi[(i*ndirac+idirac)*nc+1]+=-akappa*(-conjf(u12t_f[i*ndim+mu])*r[(uid*ndirac+idirac)*nc]+\
767 conjf(u11t_f[i*ndim+mu])*r[(uid*ndirac+idirac)*nc+1]+\
768 conjf(u12t_f[did*ndim+mu])*r[(did*ndirac+idirac)*nc]+\
769 u11t_f[did*ndim+mu]*r[(did*ndirac+idirac)*nc+1]);
770 //Dirac term
771 phi[(i*ndirac+idirac)*nc+1]+=gamval_f[mu*ndirac+idirac]*(-conjf(u12t_f[i*ndim+mu])*r[(uid*ndirac+igork1)*nc]+\
772 conjf(u11t_f[i*ndim+mu])*r[(uid*ndirac+igork1)*nc+1]-\
773 conjf(u12t_f[did*ndim+mu])*r[(did*ndirac+igork1)*nc]-\
774 u11t_f[did*ndim+mu]*r[(did*ndirac+igork1)*nc+1]);
775 }
776 }
777#endif
778 //Timelike terms
779 int did=id[3+ndim*i]; int uid = iu[3+ndim*i];
780#ifndef NO_TIME
781#pragma omp simd aligned(phi,r,u11t_f,u12t_f,dk4m_f,dk4p_f:AVX)
782 for(int idirac=0; idirac<ndirac; idirac++){
783 int igork1 = gamin[3*ndirac+idirac];
784 //Factorising for performance, we get dk4?*(float)u1?*(+/-r_wilson -/+ r_dirac)
785 phi[(i*ndirac+idirac)*nc]+=
786 -dk4p_f[i]*(u11t_f[i*ndim+3]*(r[(uid*ndirac+idirac)*nc]-r[(uid*ndirac+igork1)*nc])
787 +u12t_f[i*ndim+3]*(r[(uid*ndirac+idirac)*nc+1]-r[(uid*ndirac+igork1)*nc+1]))
788 -dk4m_f[did]*(conjf(u11t_f[did*ndim+3])*(r[(did*ndirac+idirac)*nc]+r[(did*ndirac+igork1)*nc])
789 -u12t_f[did*ndim+3] *(r[(did*ndirac+idirac)*nc+1]+r[(did*ndirac+igork1)*nc+1]));
790
791 phi[(i*ndirac+idirac)*nc+1]+=
792 -dk4p_f[i]*(-conjf(u12t_f[i*ndim+3])*(r[(uid*ndirac+idirac)*nc]-r[(uid*ndirac+igork1)*nc])
793 +conjf(u11t_f[i*ndim+3])*(r[(uid*ndirac+idirac)*nc+1]-r[(uid*ndirac+igork1)*nc+1]))
794 -dk4m_f[did]*(conjf(u12t_f[did*ndim+3])*(r[(did*ndirac+idirac)*nc]+r[(did*ndirac+igork1)*nc])
795 +u11t_f[did*ndim+3] *(r[(did*ndirac+idirac)*nc+1]+r[(did*ndirac+igork1)*nc+1]));
796 }
797#endif
798 }
799#endif
800 return 0;
801}

References CHalo_swap_all(), Complex_f, kferm2, kvol, nc, ndim, and ndirac.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ Hdslashd()

int Hdslashd ( Complex * phi,
Complex * r,
Complex * u11t,
Complex * u12t,
unsigned int * iu,
unsigned int * id,
Complex * gamval,
int * gamin,
double * dk4m,
double * dk4p,
float akappa )

Evaluates \(\Phi=M^\dagger r\) in double precision.

Parameters
phiThe product
rThe array being acted on by M
u11tFirst colour trial field
u12tSecond colour trial field
iuUpper halo indices
idLower halo indices
gamvalGamma matrices rescaled by kappa
gaminIndices for dirac terms
dk4m\(\left(1+\gamma_0\right)e^{-\mu}\)
dk4p\(\left(1-\gamma_0\right)e^\mu\)
akappaHopping parameter
Returns
Zero on success, integer error code otherwise

Definition at line 358 of file matrices.c.

359 {
360 /*
361 * @brief Evaluates @f(\Phi=M^\dagger r@f) in double precision
362 *
363 * @param phi: The product
364 * @param r: The array being acted on by M
365 * @param u11t: First colour trial field
366 * @param u12t: Second colour trial field
367 * @param iu: Upper halo indices
368 * @param id: Lower halo indices
369 * @param gamval: Gamma matrices
370 * @param gamin: Indices for dirac terms
371 * @param dk4m:
372 * @param dk4p:
373 * @param akappa: Hopping parameter
374 *
375 * @see ZHalo_swap_all (MPI only)
376 *
377 * @return Zero on success, integer error code otherwise
378 */
379 const char *funcname = "Hdslashd";
380 //Get the halos in order. Because C is row major, we need to extract the correct
381 //terms for each halo first. Changing the indices was considered but that caused
382 //issues with the BLAS routines.
383#if(nproc>1)
384 ZHalo_swap_all(r, 8);
385#endif
386
387 //Looks like flipping the array ordering for C has meant a lot
388 //of for loops. Sense we're jumping around quite a bit the cache is probably getting refreshed
389 //anyways so memory access patterns mightn't be as big of an limiting factor here anyway
390
391 //Mass term
392#ifdef __NVCC__
393 cuHdslashd(phi,r,u11t,u12t,iu,id,gamval,gamin,dk4m,dk4p,akappa,dimGrid,dimBlock);
394#else
395 memcpy(phi, r, kferm2*sizeof(Complex));
396 //Spacelike term
397#pragma omp parallel for
398 for(int i=0;i<kvol;i++){
399#ifndef NO_SPACE
400 for(int mu = 0; mu <ndim-1; mu++){
401 int did=id[mu+ndim*i]; int uid = iu[mu+ndim*i];
402#pragma omp simd aligned(phi,r,u11t,u12t,gamval:AVX)
403 for(int idirac=0; idirac<ndirac; idirac++){
404 //FORTRAN had mod((idirac-1),4)+1 to prevent issues with non-zero indexing.
405 int igork1 = gamin[mu*ndirac+idirac];
406 //Can manually vectorise with a pragma?
407 //Wilson + Dirac term in that order. Definitely easier
408 //to read when split into different loops, but should be faster this way
409
410 //Reminder! gamval was rescaled by kappa when we defined it
411 phi[(i*ndirac+idirac)*nc]+=
412 -akappa*(u11t[i*ndim+mu]*r[(uid*ndirac+idirac)*nc]
413 +u12t[i*ndim+mu]*r[(uid*ndirac+idirac)*nc+1]
414 +conj(u11t[did*ndim+mu])*r[(did*ndirac+idirac)*nc]
415 -u12t[did*ndim+mu] *r[(did*ndirac+idirac)*nc+1])
416 -gamval[mu*ndirac+idirac]*
417 ( u11t[i*ndim+mu]*r[(uid*ndirac+igork1)*nc]
418 +u12t[i*ndim+mu]*r[(uid*ndirac+igork1)*nc+1]
419 -conj(u11t[did*ndim+mu])*r[(did*ndirac+igork1)*nc]
420 +u12t[did*ndim+mu] *r[(did*ndirac+igork1)*nc+1]);
421
422 phi[(i*ndirac+idirac)*nc+1]+=
423 -akappa*(-conj(u12t[i*ndim+mu])*r[(uid*ndirac+idirac)*nc]
424 +conj(u11t[i*ndim+mu])*r[(uid*ndirac+idirac)*nc+1]
425 +conj(u12t[did*ndim+mu])*r[(did*ndirac+idirac)*nc]
426 +u11t[did*ndim+mu] *r[(did*ndirac+idirac)*nc+1])
427 -gamval[mu*ndirac+idirac]*
428 (-conj(u12t[i*ndim+mu])*r[(uid*ndirac+igork1)*nc]
429 +conj(u11t[i*ndim+mu])*r[(uid*ndirac+igork1)*nc+1]
430 -conj(u12t[did*ndim+mu])*r[(did*ndirac+igork1)*nc]
431 -u11t[did*ndim+mu] *r[(did*ndirac+igork1)*nc+1]);
432 }
433 }
434#endif
435 //Timelike terms
436 int did=id[3+ndim*i]; int uid = iu[3+ndim*i];
437#ifndef NO_TIME
438#pragma omp simd aligned(phi,r,u11t,u12t,dk4m,dk4p:AVX)
439 for(int idirac=0; idirac<ndirac; idirac++){
440 int igork1 = gamin[3*ndirac+idirac];
441 //Factorising for performance, we get dk4?*u1?*(+/-r_wilson -/+ r_dirac)
442 //dk4m and dk4p swap under dagger
443 phi[(i*ndirac+idirac)*nc]+=
444 -dk4m[i]*(u11t[i*ndim+3]*(r[(uid*ndirac+idirac)*nc]+r[(uid*ndirac+igork1)*nc])
445 +u12t[i*ndim+3]*(r[(uid*ndirac+idirac)*nc+1]+r[(uid*ndirac+igork1)*nc+1]))
446 -dk4p[did]*(conj(u11t[did*ndim+3])*(r[(did*ndirac+idirac)*nc]-r[(did*ndirac+igork1)*nc])
447 -u12t[did*ndim+3] *(r[(did*ndirac+idirac)*nc+1]-r[(did*ndirac+igork1)*nc+1]));
448
449 phi[(i*ndirac+idirac)*nc+1]+=
450 -dk4m[i]*(-conj(u12t[i*ndim+3])*(r[(uid*ndirac+idirac)*nc]+r[(uid*ndirac+igork1)*nc])
451 +conj(u11t[i*ndim+3])*(r[(uid*ndirac+idirac)*nc+1]+r[(uid*ndirac+igork1)*nc+1]))
452 -dk4p[did]*(conj(u12t[did*ndim+3])*(r[(did*ndirac+idirac)*nc]-r[(did*ndirac+igork1)*nc])
453 +u11t[did*ndim+3] *(r[(did*ndirac+idirac)*nc+1]-r[(did*ndirac+igork1)*nc+1]));
454 }
455#endif
456 }
457#endif
458 return 0;
459}

References Complex, kferm2, kvol, nc, ndim, ndirac, and ZHalo_swap_all().

Here is the call graph for this function:

◆ Hdslashd_f()

int Hdslashd_f ( Complex_f * phi,
Complex_f * r,
Complex_f * u11t,
Complex_f * u12t,
unsigned int * iu,
unsigned int * id,
Complex_f * gamval,
int * gamin,
float * dk4m,
float * dk4p,
float akappa )

Evaluates \(\Phi=M^\dagger r\) in single precision.

Parameters
phiThe product
rThe array being acted on by M
u11tFirst colour trial field
u12tSecond colour trial field
iuUpper halo indices
idLower halo indices
gamvalGamma matrices rescaled by kappa
gaminIndices for dirac terms
dk4m\(\left(1+\gamma_0\right)e^{-\mu}\)
dk4p\(\left(1-\gamma_0\right)e^\mu\)
akappaHopping parameter
Returns
Zero on success, integer error code otherwise

Definition at line 802 of file matrices.c.

803 {
804 /*
805 * @brief Evaluates @f(\Phi=M^\dagger r@f) in single precision
806 *
807 * @param phi: The product
808 * @param r: The array being acted on by M
809 * @param u11t_f: First colour trial field
810 * @param u12t_f: Second colour trial field
811 * @param iu: Upper halo indices
812 * @param id: Lower halo indices
813 * @param gamval_f: Gamma matrices
814 * @param gamin: Indices for dirac terms
815 * @param dk4m_f:
816 * @param dk4p_f:
817 * @param akappa: Hopping parameter
818 *
819 * @see CHalo_swap_all (MPI only)
820 *
821 * @return Zero on success, integer error code otherwise
822 */
823 const char *funcname = "Hdslashd_f";
824 //Get the halos in order. Because C is row major, we need to extract the correct
825 //terms for each halo first. Changing the indices was considered but that caused
826 //issues with the BLAS routines.
827#if(nproc>1)
828 CHalo_swap_all(r, 8);
829#endif
830
831 //Looks like flipping the array ordering for C has meant a lot
832 //of for loops. Sense we're jumping around quite a bit the cache is probably getting refreshed
833 //anyways so memory access patterns mightn't be as big of an limiting factor here anyway
834
835 //Mass term
836#ifdef __NVCC__
837 cuHdslashd_f(phi,r,u11t_f,u12t_f,iu,id,gamval_f,gamin,dk4m_f,dk4p_f,akappa,dimGrid,dimBlock);
838#else
839 memcpy(phi, r, kferm2*sizeof(Complex_f));
840 //Spacelike term
841#pragma omp parallel for
842 for(int i=0;i<kvol;i++){
843#ifndef NO_SPACE
844 for(int mu = 0; mu <ndim-1; mu++){
845 int did=id[mu+ndim*i]; int uid = iu[mu+ndim*i];
846#pragma omp simd aligned(phi,r,u11t_f,u12t_f,gamval_f:AVX)
847 for(int idirac=0; idirac<ndirac; idirac++){
848 //FORTRAN had mod((idirac-1),4)+1 to prevent issues with non-zero indexing.
849 int igork1 = gamin[mu*ndirac+idirac];
850 //Can manually vectorise with a pragma?
851 //Wilson + Dirac term in that order. Definitely easier
852 //to read when split into different loops, but should be faster this way
853
854 //Reminder! gamval was rescaled by kappa when we defined it
855 phi[(i*ndirac+idirac)*nc]+=
856 -akappa*(u11t_f[i*ndim+mu]*r[(uid*ndirac+idirac)*nc]
857 +u12t_f[i*ndim+mu]*r[(uid*ndirac+idirac)*nc+1]
858 +conjf(u11t_f[did*ndim+mu])*r[(did*ndirac+idirac)*nc]
859 -u12t_f[did*ndim+mu] *r[(did*ndirac+idirac)*nc+1])
860 -gamval_f[mu*ndirac+idirac]*
861 ( u11t_f[i*ndim+mu]*r[(uid*ndirac+igork1)*nc]
862 +u12t_f[i*ndim+mu]*r[(uid*ndirac+igork1)*nc+1]
863 -conjf(u11t_f[did*ndim+mu])*r[(did*ndirac+igork1)*nc]
864 +u12t_f[did*ndim+mu] *r[(did*ndirac+igork1)*nc+1]);
865
866 phi[(i*ndirac+idirac)*nc+1]+=
867 -akappa*(-conjf(u12t_f[i*ndim+mu])*r[(uid*ndirac+idirac)*nc]
868 +conjf(u11t_f[i*ndim+mu])*r[(uid*ndirac+idirac)*nc+1]
869 +conjf(u12t_f[did*ndim+mu])*r[(did*ndirac+idirac)*nc]
870 +u11t_f[did*ndim+mu] *r[(did*ndirac+idirac)*nc+1])
871 -gamval_f[mu*ndirac+idirac]*
872 (-conjf(u12t_f[i*ndim+mu])*r[(uid*ndirac+igork1)*nc]
873 +conjf(u11t_f[i*ndim+mu])*r[(uid*ndirac+igork1)*nc+1]
874 -conjf(u12t_f[did*ndim+mu])*r[(did*ndirac+igork1)*nc]
875 -u11t_f[did*ndim+mu] *r[(did*ndirac+igork1)*nc+1]);
876 }
877 }
878#endif
879 //Timelike terms
880 int did=id[3+ndim*i]; int uid = iu[3+ndim*i];
881#ifndef NO_TIME
882#pragma omp simd aligned(phi,r,u11t_f,u12t_f,gamval_f:AVX)
883 for(int idirac=0; idirac<ndirac; idirac++){
884 int igork1 = gamin[3*ndirac+idirac];
885 //Factorising for performance, we get (float)dk4?*(float)u1?*(+/-r_wilson -/+ r_dirac)
886 //(float)dk4m and dk4p_f swap under dagger
887 phi[(i*ndirac+idirac)*nc]+=
888 -dk4m_f[i]*(u11t_f[i*ndim+3]*(r[(uid*ndirac+idirac)*nc]+r[(uid*ndirac+igork1)*nc])
889 +u12t_f[i*ndim+3]*(r[(uid*ndirac+idirac)*nc+1]+r[(uid*ndirac+igork1)*nc+1]))
890 -dk4p_f[did]*(conjf(u11t_f[did*ndim+3])*(r[(did*ndirac+idirac)*nc]-r[(did*ndirac+igork1)*nc])
891 -u12t_f[did*ndim+3] *(r[(did*ndirac+idirac)*nc+1]-r[(did*ndirac+igork1)*nc+1]));
892
893 phi[(i*ndirac+idirac)*nc+1]+=
894 -dk4m_f[i]*(-conjf(u12t_f[i*ndim+3])*(r[(uid*ndirac+idirac)*nc]+r[(uid*ndirac+igork1)*nc])
895 +conjf(u11t_f[i*ndim+3])*(r[(uid*ndirac+idirac)*nc+1]+r[(uid*ndirac+igork1)*nc+1]))
896 -dk4p_f[did]*(conjf(u12t_f[did*ndim+3])*(r[(did*ndirac+idirac)*nc]-r[(did*ndirac+igork1)*nc])
897 +u11t_f[did*ndim+3] *(r[(did*ndirac+idirac)*nc+1]-r[(did*ndirac+igork1)*nc+1]));
898 }
899#endif
900 }
901#endif
902 return 0;
903}

References CHalo_swap_all(), Complex_f, kferm2, kvol, nc, ndim, and ndirac.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ Reunitarise()

int Reunitarise ( Complex * u11t,
Complex * u12t )
inline

Reunitarises u11t and u12t as in conj(u11t[i])*u11t[i]+conj(u12t[i])*u12t[i]=1.

If you're looking at the FORTRAN code be careful. There are two header files for the /trial/ header. One with u11 u12 (which was included here originally) and the other with u11t and u12t.

Parameters
u11t,u12tTrial fields to be reunitarised
Returns
Zero on success, integer error code otherwise

Definition at line 904 of file matrices.c.

904 {
905 /*
906 * @brief Reunitarises u11t and u12t as in conj(u11t[i])*u11t[i]+conj(u12t[i])*u12t[i]=1
907 *
908 * If you're looking at the FORTRAN code be careful. There are two header files
909 * for the /trial/ header. One with u11 u12 (which was included here originally)
910 * and the other with u11t and u12t.
911 *
912 * @see cuReunitarise (CUDA Wrapper)
913 *
914 * @param u11t, u12t Trial fields to be reunitarised
915 *
916 * @return Zero on success, integer error code otherwise
917 */
918 const char *funcname = "Reunitarise";
919#ifdef __NVCC__
920 cuReunitarise(u11t,u12t,dimGrid,dimBlock);
921#else
922#pragma omp parallel for simd aligned(u11t,u12t:AVX)
923 for(int i=0; i<kvol*ndim; i++){
924 //Declaring anorm inside the loop will hopefully let the compiler know it
925 //is safe to vectorise aggressively
926 double anorm=sqrt(conj(u11t[i])*u11t[i]+conj(u12t[i])*u12t[i]);
927 // Exception handling code. May be faster to leave out as the exit prevents vectorisation.
928 // if(anorm==0){
929 // fprintf(stderr, "Error %i in %s on rank %i: anorm = 0 for μ=%i and i=%i.\nExiting...\n\n",
930 // DIVZERO, funcname, rank, mu, i);
931 // MPI_Finalise();
932 // exit(DIVZERO);
933 // }
934 u11t[i]/=anorm;
935 u12t[i]/=anorm;
936 }
937#endif
938 return 0;
939}

References kvol, and ndim.

Here is the caller graph for this function: