su2hmc
Loading...
Searching...
No Matches
sizes.h
Go to the documentation of this file.
1
27#ifndef SIZES
28#define SIZES
29#ifdef __INTEL_MKL__
30#define USE_BLAS
31#include <mkl.h>
32#elif defined GSL_BLAS
33#define USE_BLAS
34#include <gsl/gsl_cblas.h>
35#elif defined AMD_BLAS
36#define USE_BLAS
37#include <cblas.h>
38#endif
39#ifdef __NVCC__
40#include <cuda.h>
41#include <cuda_runtime_api.h>
42#include <cublas_v2.h>
43extern cublasHandle_t cublas_handle;
44extern cublasStatus_t cublas_status;
45extern cudaMemPool_t mempool;
47#define cudaDeviceSynchronise() cudaDeviceSynchronize()
48#endif
49#ifdef __CUDACC__
50#include <thrust_complex.h>
51#include <thrust/reduce.h>
52#include <thrust/device_vector.h>
53#else
54#include <complex.h>
56#define Complex_f float complex
58#define Complex double complex
59#endif
60
62#define FILELEN 64
63// Common block definition for parallel variables
64
66#define nx 8
67#if(nx<1)
68#error "nx is expected it to be greater than or equal to 1"
69#endif
70
71// Keep original restriction of single spatial extent
72
74#define ny nx
75#if(ny<1)
76#error "ny is expected it to be greater than or equal to 1"
77#endif
78
80#define nz nx
81#if(nz<1)
82#error "nz is expected it to be greater than or equal to 1"
83#endif
84
86#define nt 16
87#if(nt<1)
88#error "nt is expected it to be greater than or equal to 1"
89#endif
90
92#define gvol (nx*ny*nz*nt)
94#define gvol3 (nx*ny*nz)
95
97#define npx 1
98#if(npx<1)
99#error "npx is expected it to be greater than or equal to 1"
100#elif(nx%npx!=0)
101#error "npx should be a divisor of nx"
102#endif
103
104// Initially restrict to npz = npy = npx
105// This allows us to have a single ksize variable
106
108#define npy npx
109#if(npy<1)
110#error "npy is expected it to be greater than or equal to 1"
111#elif(ny%npy!=0)
112#error "npy should be a divisor of ny"
113#endif
114
116#define npz npx
117#if(npz<1)
118#error "npz is expected it to be greater than or equal to 1"
119#elif(nz%npz!=0)
120#error "npz should be a divisor of nz"
121#endif
122
124#define npt 1
125#if(npt<1)
126#error "npt is expected it to be greater than or equal to 1"
127#elif(nt%npt!=0)
128#error "npt should be a divisor of nt"
129#endif
130
132#define nproc (npx*npy*npz*npt)
133
135#define nthreads 16
136
137// Existing parameter definitions.
139#define ksizex (nx/npx)
141#define ksizey (ny/npy)
143#define ksizez (nz/npz)
144
146#define ksize ksizex
147
149#define ksizet (nt/npt)
151#define nf 1
152
154#define kvol (ksizet*ksizez*ksizey*ksizex)
156#define kvol3 (ksizez*ksizey*ksizex)
157
158// integer, parameter :: niterc=2*gvol
159// #define niterc 2*gvol
160// jis: hard limit to avoid runaway trajectories
161#if (nx*ny*nz*nt<=16384)
163#define niterc gvol
164#elif (nx>=(3*nt)/2)
166#define niterc gvol3
167#else
169#define niterc (gvol/4)
170#endif
171// Constants for dimensions.
173#define nc 2
175#define nadj 3
177#define ndirac 4
179#define ndim 4
181#define ngorkov 8
182
184#define kmom (ndim*nadj*kvol)
186#define kferm (nc*ngorkov*kvol)
188#define kferm2 (nc*ndirac*kvol)
198#if(npx>1)
199#define halox (ksizey*ksizez*ksizet)
200#else
201#define halox 0
202#endif
204#if(npy>1)
205#define haloy (ksizex*ksizez*ksizet)
206#else
207#define haloy 0
208#endif
210#if(npz>1)
211#define haloz (ksizex*ksizey*ksizet)
212#else
213#define haloz 0
214#endif
216#if(npt>1)
217#define halot (ksizex*ksizey*ksizez)
218#else
219#define halot 0
220#endif
222#define halo (2*(halox+haloy+haloz+halot))
223
225#define kfermHalo (nc*ngorkov*(kvol+halo))
227#define kferm2Halo (nc*ndirac*(kvol+halo))
229#define kmomHalo (ndim*nadj*(kvol+halo))
230
232// These all used to be multipled by kferm or kferm2 at the start of Congradq or Congradp
233// On 20240516 in Room 2.19 of the Lloyd building of Trinity we copped that doing so means that the residue is larger
234// if running on a smaller number of cores. In the extreme GPU case the subvolume is the entire volume so the residue
235// can be several orders of magnitude larger than in the smallest sublattice case.
236// Instead, we rescale all the default residues here by sqrt(kferm) or sqrt(kferm2). No matter what size sublattice
237// we use now, the residue will match that of a 2^3X4 sublattice used in the earlier FORTRAN runs
238#define respbp 3.2E-5
240#define rescgg 2.26E-5
242#define rescga 2.26E-8
243
244
245#ifdef __AVX512F__
246#ifdef __unix__
247#warning AVX512 detected
248#elif (defined WIN32||_WIN32)
249#pragma message("AVX512 detected")
250#endif
252#define AVX 64
253#elif defined __AVX__
254#ifdef __unix__
255#warning AVX or AVX2 detected
256#elif (defined WIN32||_WIN32)
257#pragma message("AVX or AVX2 detected")
258#endif
260#define AVX 32
261#else
262#ifdef __unix__
263#warning No AVX detected, assuming SSE is present
264#elif (defined WIN32||_WIN32)
265#pragma message("No AVX detected, assuming SSE is present")
266#endif
268#define AVX 16
269#endif
270
271#ifdef __NVCC__
272/*
273 * @section gridblock Grids and Blocks
274 *
275 * Threads are grouped together to form warps of 32 threads
276 * best to keep the block dimension (ksizex*ksizey) multiples of 32,
277 * usually between 128 and 256
278 * Note that from Volta/Turing each SM (group of processors)
279 * is smaller than on previous generations of GPUs
280 */
281extern dim3 dimBlock;// =dim3(nx,ny,nz);
282extern dim3 dimGrid;// =dim3(nt,1,1);
283 //For copying over gamval
284extern dim3 dimBlockOne;// =dim3(nx,ny,nz);
285extern dim3 dimGridOne;// =dim3(nt,1,1);
286#define USE_BLAS
287#endif
288#endif