su2hmc
Loading...
Searching...
No Matches
par_mpi.h
Go to the documentation of this file.
1
6#ifndef PAR_MPI
7#define PAR_MPI
8#include <coord.h>
9#include <errorcodes.h>
10#if (nproc >1)
11#include <mpi.h>
12#endif
13#ifdef _OPENMP
14#include <omp.h>
15#endif
16//#include <random.h>
17#include <sizes.h>
18#ifdef __cplusplus
19#include <cstdio>
20#include <cstdlib>
21#include <cstring>
22#else
23#include <stdbool.h>
24#include <stdio.h>
25#include <stdlib.h>
26#include <string.h>
27#endif
28
30#define MPI_Finalise() MPI_Finalize()
31
32//Definitions
33//==========
35#define DOWN 0
37#define UP 1
38
40#define masterproc 0
41
43#define tag 0
44//#define _STAT_SIZE_ sizeof(MPI_Status)
45//Variables
46//=========
47//Up/Down arrays
49extern int __attribute__((aligned(AVX))) pu[ndim];
51extern int __attribute__((aligned(AVX))) pd[ndim];
52
53//MPI Stuff
54#if (nproc >1)
56extern MPI_Comm comm ;
58extern MPI_Request request;
59#endif
60
62extern int *pcoord;
64extern int __attribute__((aligned(AVX))) pstart[ndim][nproc];
66extern int __attribute__((aligned(AVX))) pstop[ndim][nproc];
68extern int rank;
70extern int size;
71//The common keyword from fortran is largely redundant here as everything
72//is already global scope.
73
74/*common /par/ pu, pd, procid, comm,
75 1 gsize, lsize, pcoord, pstart, pstop,
76 1 ismaster, masterproc
77 */
78
79#ifdef __cplusplus
80extern "C"
81{
82#endif
83 //Function Declarations
84 //=====================
93 int Par_begin(int argc, char *argv[]);
107 int Par_sread(const int iread, const float beta, const float fmu, const float akappa, const Complex_f ajq,\
108 Complex *u11, Complex *u12, Complex *u11t, Complex *u12t);
124 int Par_swrite(const int itraj, const int icheck, const float beta, const float fmu, const float akappa,\
125 const Complex_f ajq, Complex *u11, Complex *u12);
126 //Shortcuts for reductions and broadcasts. These should be inlined
137 int Par_isum(int *ival);
148 int Par_dsum(double *dval);
159 int Par_fsum(float *dval);
170 int Par_csum(Complex_f *cval);
181 int Par_zsum(Complex *zval);
189 int Par_icopy(int *ival);
197 int Par_dcopy(double *dval);
205 int Par_fcopy(float *fval);
213 int Par_ccopy(Complex *cval);
221 int Par_zcopy(Complex *zval);
222 //Halo Manipulation
231 int ZHalo_swap_all(Complex *z, int ncpt);
243 int ZHalo_swap_dir(Complex *z, int ncpt, int idir, int layer);
252 int CHalo_swap_all(Complex_f *c, int ncpt);
264 int CHalo_swap_dir(Complex_f *c, int ncpt, int idir, int layer);
273 int DHalo_swap_all(double *d, int ncpt);
285 int DHalo_swap_dir(double *d, int ncpt, int idir, int layer);
299 int Trial_Exchange(Complex *u11t, Complex *u12t, Complex_f *u11t_f, Complex_f *u12t_f);
300 //If we have more than two processors on the time axis, there's an extra step in the Polyakov loop calculation
301#if(npt>1)
309 int Par_tmul(Complex_f *z11, Complex_f *z12);
310#endif
311#ifdef __cplusplus
312}
313#endif
314#endif
Header for routines related to lattice sites.
This header is intended to be a useful reference for error codes and their meanings.
int CHalo_swap_all(Complex_f *c, int ncpt)
Calls the functions to send data to both the up and down halos.
int Par_swrite(const int itraj, const int icheck, const float beta, const float fmu, const float akappa, const Complex_f ajq, Complex *u11, Complex *u12)
Copies u11 and u12 into arrays without halos which then get written to output.
Definition par_mpi.c:341
int __RANLUX__ pstart[ndim][nproc]
The initial lattice site on each sublattice in a given direction.
int Par_fcopy(float *fval)
Broadcasts a float to the other processes.
int ZHalo_swap_all(Complex *z, int ncpt)
Calls the functions to send data to both the up and down halos.
int size
The number of MPI ranks in total.
Definition par_mpi.c:22
int Par_fsum(float *dval)
Performs a reduction on a float dval to get a sum which is then distributed to all ranks.
int Par_sread(const int iread, const float beta, const float fmu, const float akappa, const Complex_f ajq, Complex *u11, Complex *u12, Complex *u11t, Complex *u12t)
Reads and assigns the gauges from file.
Definition par_mpi.c:127
int Par_dcopy(double *dval)
Broadcasts a double to the other processes.
int rank
The MPI rank.
Definition par_mpi.c:22
int Par_begin(int argc, char *argv[])
Initialises the MPI configuration.
Definition par_mpi.c:25
int Par_isum(int *ival)
Performs a reduction on an integer ival to get a sum which is then distributed to all ranks.
int Par_icopy(int *ival)
Broadcasts an integer to the other processes.
int ZHalo_swap_dir(Complex *z, int ncpt, int idir, int layer)
Swaps the halos along the axis given by idir in the direction given by layer.
int Par_ccopy(Complex *cval)
Broadcasts a complex float to the other processes.
int Par_zcopy(Complex *zval)
Broadcasts a complex double to the other processes.
int DHalo_swap_all(double *d, int ncpt)
Calls the functions to send data to both the up and down halos.
int __RANLUX__ pu[ndim]
Processors in the up direction.
Definition par_mpi.c:23
int Trial_Exchange(Complex *u11t, Complex *u12t, Complex_f *u11t_f, Complex_f *u12t_f)
Exchanges the trial fields.
Definition par_mpi.c:1178
int CHalo_swap_dir(Complex_f *c, int ncpt, int idir, int layer)
Swaps the halos along the axis given by idir in the direction given by layer.
int __RANLUX__ pstop[ndim][nproc]
The final lattice site on each sublattice in a given direction.
Definition par_mpi.c:21
int Par_csum(Complex_f *cval)
Performs a reduction on a complex float cval to get a sum which is then distributed to all ranks.
int Par_zsum(Complex *zval)
Performs a reduction on a complex double zval to get a sum which is then distributed to all ranks.
int __RANLUX__ pd[ndim]
Processors in the down direction.
Definition par_mpi.c:24
int DHalo_swap_dir(double *d, int ncpt, int idir, int layer)
Swaps the halos along the axis given by idir in the direction given by layer.
int Par_dsum(double *dval)
Performs a reduction on a double dval to get a sum which is then distributed to all ranks.
int * pcoord
The processor grid.
Definition par_mpi.c:19
Defines the constants of the code and other parameters for loop dimensions. Each subroutine includes ...
#define AVX
Alignment of arrays. 64 for AVX-512, 32 for AVX/AVX2. 16 for SSE. Since AVX is standard on modern x86...
Definition sizes.h:268
#define nproc
Number of processors for MPI.
Definition sizes.h:132
#define Complex
Double precision complex number.
Definition sizes.h:58
#define Complex_f
Single precision complex number.
Definition sizes.h:56
#define ndim
Dimensions.
Definition sizes.h:179