su2hmc/par__mpi_8h_source.html

#ifndef  PAR_MPI

#define  PAR_MPI

#include <coord.h>

#include <errorcodes.h>

#if (nproc >1)

#include <mpi.h>

#endif

#ifdef _OPENMP

#include <omp.h>

#endif

//#include  <random.h>

#include <sizes.h>

#ifdef __cplusplus

#include <cstdio>

#include <cstdlib>

#include <cstring>

#else

#include <stdbool.h>

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#endif


#define MPI_Finalise() MPI_Finalize()


//Definitions

//==========

#define  DOWN  0

#define  UP    1


#define masterproc 0


#define tag   0

//#define _STAT_SIZE_  sizeof(MPI_Status)

//Variables

//=========

//Up/Down arrays

extern int __attribute__((aligned(AVX))) pu[ndim];

extern int __attribute__((aligned(AVX))) pd[ndim];


//MPI Stuff

#if (nproc >1)

extern MPI_Comm comm ;

extern MPI_Request request;

#endif


extern int *pcoord;

extern int  __attribute__((aligned(AVX))) pstart[ndim][nproc];

extern int  __attribute__((aligned(AVX))) pstop[ndim][nproc];

extern int rank;

extern int size;

//The common keyword from fortran is largely redundant here as everything

//is already global scope.


/*common /par/ pu, pd, procid, comm,

  1             gsize, lsize, pcoord, pstart, pstop,

  1             ismaster, masterproc

 */


#ifdef __cplusplus

extern "C"

{

#endif

   //Function Declarations

   //=====================

   int Par_begin(int argc, char *argv[]);

   int Par_sread(const int iread, const float beta, const float fmu, const float akappa, const Complex_f ajq,\

         Complex *u11, Complex *u12, Complex *u11t, Complex *u12t);

   int Par_swrite(const int itraj, const int icheck, const float beta, const float fmu, const float akappa,\

         const Complex_f ajq, Complex *u11, Complex *u12);

   //Shortcuts for reductions and broadcasts. These should be inlined

   int Par_isum(int *ival);

   int Par_dsum(double *dval);

   int Par_fsum(float *dval);

   int Par_csum(Complex_f *cval);

   int Par_zsum(Complex *zval);

   int Par_icopy(int *ival);

   int Par_dcopy(double *dval);

   int Par_fcopy(float *fval);

   int Par_ccopy(Complex *cval);

   int Par_zcopy(Complex *zval);

   //Halo Manipulation

   int ZHalo_swap_all(Complex *z, int ncpt);

   int ZHalo_swap_dir(Complex *z, int ncpt, int idir, int layer);

   int CHalo_swap_all(Complex_f *c, int ncpt);

   int CHalo_swap_dir(Complex_f *c, int ncpt, int idir, int layer);

   int DHalo_swap_all(double *d, int ncpt);

   int DHalo_swap_dir(double *d, int ncpt, int idir, int layer);

   int Trial_Exchange(Complex *u11t, Complex *u12t, Complex_f *u11t_f, Complex_f *u12t_f);

   //If we have more than two processors on the time axis, there's an extra step in the Polyakov loop calculation

#if(npt>1)

   int Par_tmul(Complex_f *z11, Complex_f *z12);

#endif

#ifdef __cplusplus

}

#endif

#endif

coord.h
Header for routines related to lattice sites.

errorcodes.h
This header is intended to be a useful reference for error codes and their meanings.

CHalo_swap_all
int CHalo_swap_all(Complex_f *c, int ncpt)
Calls the functions to send data to both the up and down halos.

Par_swrite
int Par_swrite(const int itraj, const int icheck, const float beta, const float fmu, const float akappa, const Complex_f ajq, Complex *u11, Complex *u12)
Copies u11 and u12 into arrays without halos which then get written to output.
Definition par_mpi.c:341

pstart
int __RANLUX__ pstart[ndim][nproc]
The initial lattice site on each sublattice in a given direction.

Par_fcopy
int Par_fcopy(float *fval)
Broadcasts a float to the other processes.

ZHalo_swap_all
int ZHalo_swap_all(Complex *z, int ncpt)
Calls the functions to send data to both the up and down halos.

size
int size
The number of MPI ranks in total.
Definition par_mpi.c:22

Par_fsum
int Par_fsum(float *dval)
Performs a reduction on a float dval to get a sum which is then distributed to all ranks.

Par_sread
int Par_sread(const int iread, const float beta, const float fmu, const float akappa, const Complex_f ajq, Complex *u11, Complex *u12, Complex *u11t, Complex *u12t)
Reads and assigns the gauges from file.
Definition par_mpi.c:127

Par_dcopy
int Par_dcopy(double *dval)
Broadcasts a double to the other processes.

rank
int rank
The MPI rank.
Definition par_mpi.c:22

Par_begin
int Par_begin(int argc, char *argv[])
Initialises the MPI configuration.
Definition par_mpi.c:25

Par_isum
int Par_isum(int *ival)
Performs a reduction on an integer ival to get a sum which is then distributed to all ranks.

Par_icopy
int Par_icopy(int *ival)
Broadcasts an integer to the other processes.

ZHalo_swap_dir
int ZHalo_swap_dir(Complex *z, int ncpt, int idir, int layer)
Swaps the halos along the axis given by idir in the direction given by layer.

Par_ccopy
int Par_ccopy(Complex *cval)
Broadcasts a complex float to the other processes.

Par_zcopy
int Par_zcopy(Complex *zval)
Broadcasts a complex double to the other processes.

DHalo_swap_all
int DHalo_swap_all(double *d, int ncpt)
Calls the functions to send data to both the up and down halos.

pu
int __RANLUX__ pu[ndim]
Processors in the up direction.
Definition par_mpi.c:23

Trial_Exchange
int Trial_Exchange(Complex *u11t, Complex *u12t, Complex_f *u11t_f, Complex_f *u12t_f)
Exchanges the trial fields.
Definition par_mpi.c:1178

CHalo_swap_dir
int CHalo_swap_dir(Complex_f *c, int ncpt, int idir, int layer)
Swaps the halos along the axis given by idir in the direction given by layer.

pstop
int __RANLUX__ pstop[ndim][nproc]
The final lattice site on each sublattice in a given direction.
Definition par_mpi.c:21

Par_csum
int Par_csum(Complex_f *cval)
Performs a reduction on a complex float cval to get a sum which is then distributed to all ranks.

Par_zsum
int Par_zsum(Complex *zval)
Performs a reduction on a complex double zval to get a sum which is then distributed to all ranks.

pd
int __RANLUX__ pd[ndim]
Processors in the down direction.
Definition par_mpi.c:24

DHalo_swap_dir
int DHalo_swap_dir(double *d, int ncpt, int idir, int layer)
Swaps the halos along the axis given by idir in the direction given by layer.

Par_dsum
int Par_dsum(double *dval)
Performs a reduction on a double dval to get a sum which is then distributed to all ranks.

pcoord
int * pcoord
The processor grid.
Definition par_mpi.c:19

sizes.h
Defines the constants of the code and other parameters for loop dimensions. Each subroutine includes ...

AVX
#define AVX
Alignment of arrays. 64 for AVX-512, 32 for AVX/AVX2. 16 for SSE. Since AVX is standard on modern x86...
Definition sizes.h:268

nproc
#define nproc
Number of processors for MPI.
Definition sizes.h:132

Complex
#define Complex
Double precision complex number.
Definition sizes.h:58

Complex_f
#define Complex_f
Single precision complex number.
Definition sizes.h:56

ndim
#define ndim
Dimensions.
Definition sizes.h:179