su2hmc
Loading...
Searching...
No Matches
sizes.h
Go to the documentation of this file.
1
27
#ifndef SIZES
28
#define SIZES
29
#ifdef __INTEL_MKL__
30
#define USE_BLAS
31
#include <mkl.h>
32
#elif defined GSL_BLAS
33
#define USE_BLAS
34
#include <gsl/gsl_cblas.h>
35
#elif defined AMD_BLAS
36
#define USE_BLAS
37
#include <cblas.h>
38
#endif
39
#ifdef __NVCC__
40
#include <cuda.h>
41
#include <cuda_runtime_api.h>
42
#include <cublas_v2.h>
43
extern
cublasHandle_t cublas_handle;
44
extern
cublasStatus_t cublas_status;
45
extern
cudaMemPool_t mempool;
47
#define cudaDeviceSynchronise() cudaDeviceSynchronize()
48
#endif
49
#ifdef __CUDACC__
50
#include <thrust_complex.h>
51
#include <thrust/reduce.h>
52
#include <thrust/device_vector.h>
53
#else
54
#include <complex.h>
56
#define Complex_f float complex
58
#define Complex double complex
59
#endif
60
62
#define FILELEN 64
63
// Common block definition for parallel variables
64
66
#define nx 8
67
#if(nx<1)
68
#error "nx is expected it to be greater than or equal to 1"
69
#endif
70
71
// Keep original restriction of single spatial extent
72
74
#define ny nx
75
#if(ny<1)
76
#error "ny is expected it to be greater than or equal to 1"
77
#endif
78
80
#define nz nx
81
#if(nz<1)
82
#error "nz is expected it to be greater than or equal to 1"
83
#endif
84
86
#define nt 16
87
#if(nt<1)
88
#error "nt is expected it to be greater than or equal to 1"
89
#endif
90
92
#define gvol (nx*ny*nz*nt)
94
#define gvol3 (nx*ny*nz)
95
97
#define npx 1
98
#if(npx<1)
99
#error "npx is expected it to be greater than or equal to 1"
100
#elif(nx%npx!=0)
101
#error "npx should be a divisor of nx"
102
#endif
103
104
// Initially restrict to npz = npy = npx
105
// This allows us to have a single ksize variable
106
108
#define npy npx
109
#if(npy<1)
110
#error "npy is expected it to be greater than or equal to 1"
111
#elif(ny%npy!=0)
112
#error "npy should be a divisor of ny"
113
#endif
114
116
#define npz npx
117
#if(npz<1)
118
#error "npz is expected it to be greater than or equal to 1"
119
#elif(nz%npz!=0)
120
#error "npz should be a divisor of nz"
121
#endif
122
124
#define npt 1
125
#if(npt<1)
126
#error "npt is expected it to be greater than or equal to 1"
127
#elif(nt%npt!=0)
128
#error "npt should be a divisor of nt"
129
#endif
130
132
#define nproc (npx*npy*npz*npt)
133
135
#define nthreads 16
136
137
// Existing parameter definitions.
139
#define ksizex (nx/npx)
141
#define ksizey (ny/npy)
143
#define ksizez (nz/npz)
144
146
#define ksize ksizex
147
149
#define ksizet (nt/npt)
151
#define nf 1
152
154
#define kvol (ksizet*ksizez*ksizey*ksizex)
156
#define kvol3 (ksizez*ksizey*ksizex)
157
158
// integer, parameter :: niterc=2*gvol
159
// #define niterc 2*gvol
160
// jis: hard limit to avoid runaway trajectories
161
#if (nx*ny*nz*nt<=16384)
163
#define niterc gvol
164
#elif (nx>=(3*nt)/2)
166
#define niterc gvol3
167
#else
169
#define niterc (gvol/4)
170
#endif
171
// Constants for dimensions.
173
#define nc 2
175
#define nadj 3
177
#define ndirac 4
179
#define ndim 4
181
#define ngorkov 8
182
184
#define kmom (ndim*nadj*kvol)
186
#define kferm (nc*ngorkov*kvol)
188
#define kferm2 (nc*ndirac*kvol)
198
#if(npx>1)
199
#define halox (ksizey*ksizez*ksizet)
200
#else
201
#define halox 0
202
#endif
204
#if(npy>1)
205
#define haloy (ksizex*ksizez*ksizet)
206
#else
207
#define haloy 0
208
#endif
210
#if(npz>1)
211
#define haloz (ksizex*ksizey*ksizet)
212
#else
213
#define haloz 0
214
#endif
216
#if(npt>1)
217
#define halot (ksizex*ksizey*ksizez)
218
#else
219
#define halot 0
220
#endif
222
#define halo (2*(halox+haloy+haloz+halot))
223
225
#define kfermHalo (nc*ngorkov*(kvol+halo))
227
#define kferm2Halo (nc*ndirac*(kvol+halo))
229
#define kmomHalo (ndim*nadj*(kvol+halo))
230
232
// These all used to be multipled by kferm or kferm2 at the start of Congradq or Congradp
233
// On 20240516 in Room 2.19 of the Lloyd building of Trinity we copped that doing so means that the residue is larger
234
// if running on a smaller number of cores. In the extreme GPU case the subvolume is the entire volume so the residue
235
// can be several orders of magnitude larger than in the smallest sublattice case.
236
// Instead, we rescale all the default residues here by sqrt(kferm) or sqrt(kferm2). No matter what size sublattice
237
// we use now, the residue will match that of a 2^3X4 sublattice used in the earlier FORTRAN runs
238
#define respbp 3.2E-5
240
#define rescgg 2.26E-5
242
#define rescga 2.26E-8
243
244
245
#ifdef __AVX512F__
246
#ifdef __unix__
247
#warning AVX512 detected
248
#elif (defined WIN32||_WIN32)
249
#pragma message("AVX512 detected")
250
#endif
252
#define AVX 64
253
#elif defined __AVX__
254
#ifdef __unix__
255
#warning AVX or AVX2 detected
256
#elif (defined WIN32||_WIN32)
257
#pragma message("AVX or AVX2 detected")
258
#endif
260
#define AVX 32
261
#else
262
#ifdef __unix__
263
#warning No AVX detected, assuming SSE is present
264
#elif (defined WIN32||_WIN32)
265
#pragma message("No AVX detected, assuming SSE is present")
266
#endif
268
#define AVX 16
269
#endif
270
271
#ifdef __NVCC__
272
/*
273
* @section gridblock Grids and Blocks
274
*
275
* Threads are grouped together to form warps of 32 threads
276
* best to keep the block dimension (ksizex*ksizey) multiples of 32,
277
* usually between 128 and 256
278
* Note that from Volta/Turing each SM (group of processors)
279
* is smaller than on previous generations of GPUs
280
*/
281
extern
dim3 dimBlock;
// =dim3(nx,ny,nz);
282
extern
dim3 dimGrid;
// =dim3(nt,1,1);
283
//For copying over gamval
284
extern
dim3 dimBlockOne;
// =dim3(nx,ny,nz);
285
extern
dim3 dimGridOne;
// =dim3(nt,1,1);
286
#define USE_BLAS
287
#endif
288
#endif
INCLUDE
sizes.h
Generated by
1.12.0