su2hmc
Loading...
Searching...
No Matches
coord.c
Go to the documentation of this file.
1
5#include <coord.h>
6#include <errorcodes.h>
7#ifdef __OPENMP
8#include <omp.h>
9#endif
10#include <par_mpi.h>
11#include <sizes.h>
12#include <stdlib.h>
13#include <stdio.h>
14
15unsigned int *hu, *hd, *h1u, *h1d, *halosize;
16int Addrc(unsigned int *iu, unsigned int *id){
17 /*
18 * Loads the addresses required during the update
19 *
20 * Globals (Only referenced by the CPU):
21 * ======
22 * hu, hd, h1u, h1d, h2u, h2d, halosize
23 *
24 * Parameters (Used for CPU and GPU):
25 * =========
26 * unsigned int *iu: Upper halo indices
27 * unsigned int *id: Lower halo indices
28 *
29 * Returns:
30 * ========
31 * Zero on success, integer error code otherwise
32 */
33 const char *funcname = "Addrc";
34 //Rather than having 8 ih variables I'm going to use a 2x4 array
35 //down is 0, up is 1
36 int ih[2][4] = {{-1,-1,-1,-1},{-1,-1,-1,-1}};
37 hd = (unsigned int*)aligned_alloc(AVX,ndim*halo*sizeof(int));
38 hu = (unsigned int*)aligned_alloc(AVX,ndim*halo*sizeof(int));
39 h1u = (unsigned int*)aligned_alloc(AVX,ndim*sizeof(int));
40 h1d = (unsigned int*)aligned_alloc(AVX,ndim*sizeof(int));
41 halosize= (unsigned int*)aligned_alloc(AVX,ndim*sizeof(int));
42
43 //Do the lookups appropriate for over indexing into halos
44 //order is down, up for each x y z t
45 //
46 // Since the h2? terms are related to the h1? terms I've dropped them in the C Version
47 // saving about 4 billionths of a second in the process
48 //
49 //Need to watch these +/- 1 at the end. Is that a FORTRAN thing or a program thing?
50 //The only time I see h1d called that +1 term gets cancelled by a -1 so I'm going
51 //to omit it here at my own peril. (Turned out I was right)
52 h1d[0]=kvol; h1u[0]=h1d[0]+halox;
53 halosize[0]=halox;
54
55 h1d[1]=h1u[0]+halox; h1u[1]=h1d[1]+haloy;
56 halosize[1]=haloy;
57
58 h1d[2]=h1u[1]+haloy; h1u[2]=h1d[2]+haloz;
59 halosize[2]=haloz;
60
61 h1d[3]=h1u[2]+haloz; h1u[3]=h1d[3]+halot;
62 halosize[3]=halot;
63
64 //Time for the nitty-gritty
65 /*
66 * Variables are:
67 *
68 * h1d(mu) = starting point in tail of down halo in direction mu
69 * h2d(mu) = finishing point in tail of down halo in direction mu
70 *
71 * h1u(mu) = starting point in tail of up halo in direction mu
72 * h2u(mu) = finishing point in tail of up halo in direction mu
73 *
74 * hd(i,mu) = index in core of point that should be packed into the
75 * ith location of the down halo in direction mu
76 *
77 * hu(i,mu) = index in core of point that should be packed into the
78 * ith location of the up halo in direction mu
79 *
80 * Note that hd and hu should be used for PACKING before SENDING
81 *
82 * Unpacking would be done with a loop over ALL the core sites with
83 * reference to normal dn/up lookups, ie we DO NOT have a list of
84 * where in the halo the core point i should go
85 *
86 * Halo points are ordered "as they come" in the linear loop over
87 * core sites
88 */
89 int iaddr, ic;
90 //if using ic++ inside the loop instead
91 // ic=-1;
92#ifdef _DEBUG
93 printf("ksizex = %i, ksizet=%i\n", ksizex, ksizet);
94#endif
95 //The loop order here matters as it affects the value of ic corresponding to each entry
96 for(int jt=0;jt<ksizet;jt++)
97 for(int jz=0;jz<ksizez;jz++)
98 for(int jy=0;jy<ksizey;jy++)
99 for(int jx=0;jx<ksizex;jx++){
100 //First value of ic is zero as planned.
101 //ic++;
102 ic=((jt*ksizez+jz)*ksizey+jy)*ksizex+jx;
103 //jx!=0 is logically equivalent to if(jx)
104 //If we're inside the sublattice, take the down nearest neightbour from inside the sublattice
105 if(jx)
106 iaddr = ia(jx-1,jy,jz,jt);
107 //Else if we're at the "down" edge, the down nearest neighbour is in the halo
108 else{
109 ih[0][0]++;
110#if npx>1
111 if(ih[0][0]>= halo){
112 fprintf(stderr, "Error %i in %s: Index ih[%i][%i]=%i is larger than the halo size %i."\
113 "\nExiting...\n\n", HALOLIM, funcname, 0, 0, ih[0][0], halo);
114#if(nproc>1)
115 MPI_Abort(comm,HALOLIM);
116#else
117 exit(HALOLIM);
118#endif
119 }
120 hd[0+ndim*ih[0][0]]=ic;
121 iaddr=h1d[0]+ih[0][0];
122#elif npx==1
123 iaddr = ia(jx-1,jy,jz,jt);
124#endif
125 }
126 id[0+ndim*ic]=iaddr;
127
128 if(jx<ksize-1)
129 iaddr = ia(jx+1,jy,jz,jt);
130 else{
131 ih[1][0]++;
132#if npx>1
133 if(ih[1][0]>= halo){
134 fprintf(stderr, "Error %i in %s: Index ih[%i][%i]=%i is larger than the halo size %i."
135 "\nExiting...\n\n", HALOLIM, funcname, 1, 0, ih[1][0], halo);
136#if(nproc>1)
137 MPI_Abort(comm,HALOLIM);
138#else
139 exit(HALOLIM);
140#endif
141 }
142 hu[0+ndim*ih[1][0]]=ic;
143 iaddr=ih[1][0]+h1u[0];
144#elif npx==1
145 iaddr = ia(jx+1,jy,jz,jt);
146#endif
147 }
148 iu[0+ndim*ic]=iaddr;
149
150 if(jy)
151 iaddr = ia(jx,jy-1,jz,jt);
152 else{
153 ih[0][1]++;
154#if npy>1
155 if(ih[0][1]>= halo){
156 fprintf(stderr, "Error %i in %s: Index ih[%i][%i]=%i is larger than the halo size %i."\
157 "\nExiting...\n\n", HALOLIM, funcname, 0, 1, ih[0][1], halo);
158#if(nproc>1)
159 MPI_Abort(comm,HALOLIM);
160#else
161 exit(HALOLIM);
162#endif
163 }
164 hd[1+ndim*ih[0][1]]=ic;
165 iaddr=h1d[1]+ih[0][1];
166#elif npy==1
167 iaddr = ia(jx,jy-1,jz,jt);
168#endif
169 }
170 id[1+ndim*ic]=iaddr;
171
172 if(jy<ksize-1)
173 iaddr = ia(jx,jy+1,jz,jt);
174 else{
175 ih[1][1]++;
176#if npy>1
177 if(ih[1][1]>= halo){
178 fprintf(stderr, "Error %i in %s: Index ih[%i][%i]=%i is larger than the halo size %i."
179 "\nExiting...\n\n", HALOLIM, funcname, 1, 1, ih[1][1], halo);
180#if(nproc>1)
181 MPI_Abort(comm,HALOLIM);
182#else
183 exit(HALOLIM);
184#endif
185 }
186 hu[1+ndim*ih[1][1]]=ic;
187 iaddr=ih[1][1]+h1u[1];
188#elif npy==1
189 iaddr = ia(jx,jy+1,jz,jt);
190#endif
191 }
192 iu[1+ndim*ic]=iaddr;
193
194 if(jz)
195 iaddr = ia(jx,jy,jz-1,jt);
196 else{
197 ih[0][2]++;
198#if npz>1
199 if(ih[0][2]>= halo){
200 fprintf(stderr, "Error %i in %s: Index ih[%i][%i]=%i is larger than the halo size %i."\
201 "\nExiting...\n\n", HALOLIM, funcname, 0, 2, ih[0][2], halo);
202#if(nproc>1)
203 MPI_Abort(comm,HALOLIM);
204#else
205 exit(HALOLIM);
206#endif
207 }
208 hd[2+ndim*ih[0][2]]=ic;
209 iaddr=h1d[2]+ih[0][2];
210#elif npz==1
211 iaddr = ia(jx,jy,jz-1,jt);
212#endif
213 }
214 id[2+ndim*ic]=iaddr;
215
216 if(jz<ksize-1)
217 iaddr = ia(jx,jy,jz+1,jt);
218 else{
219 ih[1][2]++;
220#if npz>1
221 if(ih[1][2]>= halo){
222 fprintf(stderr, "Error %i in %s: Index ih[%i][%i]=%i is larger than the halo size %i."
223 "\nExiting...\n\n", HALOLIM, funcname, 1, 2, ih[1][2], halo);
224#if(nproc>1)
225 MPI_Abort(comm,HALOLIM);
226#else
227 exit(HALOLIM);
228#endif
229 }
230 hu[2+ndim*ih[1][2]]=ic;
231 iaddr=ih[1][2]+h1u[2];
232#elif npz==1
233 iaddr = ia(jx,jy,jz+1,jt);
234#endif
235 }
236 iu[2+ndim*ic]=iaddr;
237
238 if(jt)
239 iaddr = ia(jx,jy,jz,jt-1);
240 else{
241 ih[0][3]++;
242#if npt>1
243 if(ih[0][3]>= halo){
244 fprintf(stderr, "Error %i in %s: Index ih[%i][%i]=%i is larger than the halo size %i."\
245 "\nExiting...\n\n", HALOLIM, funcname, 0, 3, ih[0][3], halo);
246#if(nproc>1)
247 MPI_Abort(comm,HALOLIM);
248#else
249 exit(HALOLIM);
250#endif
251 }
252 hd[3+ndim*ih[0][3]]=ic;
253 iaddr=h1d[3]+ih[0][3];
254#elif npt==1
255 iaddr = ia(jx,jy,jz,jt-1);
256#endif
257 }
258 id[3+ndim*ic]=iaddr;
259
260 if(jt<ksizet-1)
261 iaddr = ia(jx,jy,jz,jt+1);
262 else{
263 ih[1][3]++;
264#if npt>1
265 if(ih[1][3]>= halo){
266 fprintf(stderr, "Error %i in %s: Index ih[%i][%i]=%i is larger than the halo size %i."
267 "\nExiting...\n\n", HALOLIM, funcname, 1, 3, ih[1][3], halo);
268#if(nproc>1)
269 MPI_Abort(comm,HALOLIM);
270#else
271 exit(HALOLIM);
272#endif
273 }
274 hu[3+ndim*ih[1][3]]=ic;
275 iaddr=ih[1][3]+h1u[3];
276#elif npt==1
277 iaddr = ia(jx,jy,jz,jt+1);
278#endif
279 }
280 iu[3+ndim*ic]=iaddr;
281 }
282 //Print iu and id for diagnostics
283#ifdef _DEBUG
284#pragma omp parallel sections
285 {
286#pragma omp section
287 {
288 FILE *id_out = fopen("id_out", "w");
289 for(int i=0;i<kvol;i++)
290 fprintf(id_out,"%i\t%i\t%i\t%i\n",id[i*ndim],id[i*ndim+1],id[i*ndim+2],id[i*ndim+3]);
291 fclose(id_out);
292 }
293#pragma omp section
294 {
295 FILE *iu_out = fopen("iu_out", "w");
296 for(int i=0;i<kvol;i++)
297 fprintf(iu_out,"%i\t%i\t%i\t%i\n",iu[i*ndim],iu[i*ndim+1],iu[i*ndim+2],iu[i*ndim+3]);
298 fclose(iu_out);
299
300 }
301
302 }
303#endif
304 return 0;
305}
306//No point making this parallel because Addrc is serial and the only thing that calls ia
307inline int ia(int x, int y, int z, int t){
308 /*
309 * Described as a 21st Century address calculator, it gets the memory
310 * address of an array entry.
311 *
312 * Parameters:
313 * ==========
314 * int x, y, z, t. The coordinates
315 *
316 * Returns:
317 * =======
318 * An integer corresponding to the position of the entry in a flattened
319 * row-major array
320 *
321 * Future... Switch for Row and column major, and zero or one indexing
322 */
323 const char *funcname = "ia";
324 //We need to ensure that the indices aren't out of bounds using while loops
325 while(x<0) x+=ksizex; while(x>=ksizex) x-= ksizex;
326 while(y<0) y+=ksizey; while(y>=ksizey) y-= ksizey;
327 while(z<0) z+=ksizez; while(z>=ksizez) z-= ksizez;
328 while(t<0) t+=ksizet; while(t>=ksizet) t-= ksizet;
329
330 //And flattening.
331 //return t+ksizet*(z+ksizez*(y+ksizey*x));
332 return ((t*ksizez+z)*ksizey+y)*ksizex+x;
333}
334int Check_addr(unsigned int *table, int lns, int lnt, int imin, int imax){
335 /* Checks that the addresses are within bounds before an update
336 *
337 * Parameters:
338 * ==========
339 * int *table: Pointer to the table in question
340 * int lns: Size of each spacial dimension
341 * int lnt: Size of the time dimension
342 * int imin: Lower bound for element of the table
343 * int imax: Upper bound for an element of the table
344 *
345 * Returns:
346 * =======
347 * Zero on success, integer error code otherwise.
348 */
349 const char *funcname = "Check_addr";
350 //Get the total number of elements in each dimension of the table
351 int ntable = lns*lns*lns*lnt;
352 int iaddr;
353 //Collapsing two for loops together
354 for(int j=0; j<ntable*ndim; j++){
355 iaddr = table[j];
356 if((iaddr<imin) || (iaddr>= imax)){
357 fprintf(stderr, "Error %i in %s: %i is out of the bounds of (%i,%i)\n"\
358 "for a table of size %i^3 *%i.\nExiting...\n\n",\
359 BOUNDERROR,funcname,iaddr,imin,imax,lns,lnt);
360#if(nproc>1)
361 MPI_Abort(comm,BOUNDERROR);
362#else
363 exit(BOUNDERROR);
364#endif
365 }
366 }
367 return 0;
368}
369inline int Index2lcoord(int index, int *coord){
370 /* Converts the index of a point in memory to the equivalent point
371 * in the 4 dimensional array, where the time index is the last
372 * coordinate in the array
373 *
374 * This is a rather nuanced function, as C and Fortran are rather
375 * different in how they store arrays. C starts with index 0 and
376 * Fortran (by default) starts with index 1
377 *
378 * Also C and Fortran store data in the opposite memory order so
379 * be careful when calling this function!
380 *
381 * Parameters:
382 * ==========
383 * int index: The index of the point as stored linearly in computer
384 * memory
385 * int *coord: The 4-array for the coordinates. The first three spots
386 * are for the time index.
387 *
388 * Returns:
389 * ========
390 * Zero on success. Integer Error code otherwise
391 */
392
393 const char *funcname = "Index2lcoord";
394 //A divide and conquer approach. Going from the deepest coordinate
395 //to the least deep coordinate, we take the modulo of the index by
396 //the length of that axis to get the coordinate, and then divide
397 //the index by the length of that coordinate to set up for the
398 //next coordinate. This works since int/int gives an int.
399 coord[3] = index%ksizet; index/=ksizet;
400 coord[2] = index%ksizez; index/=ksizez;
401 coord[1] = index%ksizey; index/=ksizey;
402 coord[0] = index; //No need to divide by nt since were done.
403
404 return 0;
405}
406inline int Index2gcoord(int index, int *coord){
407 /* Converts the index of a point in memory to the equivalent point
408 * in the 4 dimensional array, where the time index is the last
409 * coordinate in the array
410 *
411 * This is a rather nuanced function, as C and Fortran are rather
412 * different in how they store arrays. C starts with index 0 and
413 * Fortran (by default) starts with index 1
414 *
415 * Also C and Fortran store data in the opposite memory order so
416 * be careful when calling this function!
417 *
418 * Parameters:
419 * ==========
420 * int index: The index of the point as stored linearly in computer
421 * memory
422 * int *coord: The 4-array for the coordinates. The first three spots
423 * are for the time index.
424 *
425 * Returns:
426 * ========
427 * Zero on success. Integer Error code otherwise
428 */
429
430 const char *funcname = "Index2gcoord";
431 //A divide and conquer approach. Going from the deepest coordinate
432 //to the least deep coordinate, we take the modulo of the index by
433 //the length of that axis to get the coordinate, and then divide
434 //the index by the length of that coordinate to set up for the
435 //next coordinate. This works since int/int gives an int.
436 coord[3] = index%nt; index/=nt;
437 coord[2] = index%nz; index/=nz;
438 coord[1] = index%ny; index/=ny;
439 coord[0] = index; //No need to divide by nt since were done.
440
441 return 0;
442}
443inline int Coord2lindex(int ix, int iy, int iz, int it){
444 /* Converts the coordinates of a local lattice point to its index in the
445 * computer memory
446 *
447 * This is a rather nuanced function, as C and Fortran are rather
448 * different in how they store arrays. C starts with index 0 and
449 * Fortran (by default) starts with index 1
450 *
451 * Also C and Fortran store data in the opposite memory order so
452 * be careful when calling this function!
453 * Parameters:
454 * ==========
455 * int i?: The coordinate being converted
456 *
457 * Returns:
458 * ========
459 * int index: The position of the point
460 */
461 const char *funcname = "Coord2gindex";
462
463 //I've factorised this function compared to its original
464 //implementation to reduce the number of multiplications
465 //and hopefully improve performance
466 //int index = coord[3]+ksizez*(coord[2]+ksizey*(coord[1]+ksizex*coord[0]));
467 return it+ksizet*(iz+ksizez*(iy+ksizey*ix));
468}
469inline int Coord2gindex(int ix, int iy, int iz, int it){
470 /* Converts the coordinates of a point in the global gauge field
471 * to its flattened index in the computer memory
472 *
473 * This is a rather nuanced function, as C and Fortran are rather
474 * different in how they store arrays. C starts with index 0 and
475 * Fortran (by default) starts with index 1
476 *
477 * Also C and Fortran store data in the opposite memory order so
478 * be careful when calling this function!
479 * Parameters:
480 * ==========
481 * int *coord: The pointer to the 4-vector being considered
482 *
483 * Returns:
484 * ========
485 * int index: The position of the point
486 */
487 const char *funcname = "Coord2gindex";
488
489 //I've factorised this function compared to its original
490 //implementation to reduce the number of multiplications
491 //and hopefully improve performance
492 // return it+nt*(iz+nz*(iy+ny*ix));
493 return ix+nx*(iy+ny*(iz+nz*it));
494}
495int Testlcoord(int cap){
496 /* Tests if the coordinate transformation functions are working
497 * Going to expand a little on the original here and do the following
498 * 1. Convert from int to lcoord (the original code)
499 * And the planned additional features
500 * 2. Convert from lcoord to int (new, function doesn't exist in the original
501 * If we get the same value we started with then we're probably doing
502 * something right.
503 *
504 * Parameters:
505 * ===========
506 * int cap: The max value the index can take on. Should be the size of the array
507 *
508 * Calls:
509 * =====
510 * Index2lcoord, Coord2lindex
511 *
512 * Returns:
513 * ========
514 * Zero on success, integer error code otherwise.
515 */
516 const char *funcname = "Testlcoord";
517 //The storage array for the coordinates, and the index and its test value.
518 int coord[4], index, index2;
519 for(index =0; index<cap; index++){
520 Index2lcoord(index, coord);
521 printf("Coordinates for %i are (x,y,z,t):[%i,%i,%i,%i].\n", index,\
522 coord[0], coord[1], coord[2], coord[3]);
523 //index2 = Coord2lindex(coord);
524 if(!(index==index2)){
525 fprintf(stderr, "Error %i in %s: Converted index %i does not match "
526 "original index %i.\nExiting...\n\n",\
527 INDTOCOORD, funcname, index2, index);
528#if(nproc>1)
529 MPI_Abort(comm,INDTOCOORD);
530#else
531 exit(INDTOCOORD);
532#endif
533 }
534 }
535 return 0;
536}
537int Testgcoord(int cap){
538 /* This is completely new and missing from the original code.
539 * We test the coordinate conversion functions by doing the following
540 * 1. Convert from int to gcoord (new)
541 * 2. Convert from gcoord to int (also new) and compare to input.
542 * If we get the same value we started with then we're probably doing
543 * something right
544 *
545 * The code is basically the same as the previous function with different
546 * magic numbers.
547 *
548 * Parameters:
549 * ===========
550 * int cap: The max value the index can take on. Should be the size of our array
551 *
552 * Calls:
553 * ======
554 * Index2gcoord, Coord2gindex
555 *
556 * Returns:
557 * ========
558 * Zero on success, integer error code otherwise
559 */
560 const char *funcname = "Testgcoord";
561 int coord[4], index, index2;
562#pragma omp parallel for private(coord, index, index2)
563 for(index=0; index<cap; index++){
564 Index2gcoord(index, coord);
565#pragma omp critical
566 printf("Coordinates for %i are (x,y,z,t):[%i,%i,%i,%i].\n", index,\
567 coord[0], coord[1], coord[2], coord[3]);
568 //index2 = Coord2gindex(coord);
569 if(!(index==index2)){
570 fprintf(stderr, "Error %i in %s: Converted index %i does not match "\
571 "original index %i.\nExiting...\n\n",\
572 INDTOCOORD, funcname, index2, index);
573#if(nproc>1)
574 MPI_Abort(comm,INDTOCOORD);
575#else
576 exit(INDTOCOORD);
577#endif
578 }
579 }
580 return 0;
581}
unsigned int * halosize
Array containing the size of the halo in each direction.
Definition coord.c:15
int Coord2gindex(int ix, int iy, int iz, int it)
Converts the coordinates of a global lattice point to its index in the computer memory.
Definition coord.c:469
int Check_addr(unsigned int *table, int lns, int lnt, int imin, int imax)
Definition coord.c:334
int Addrc(unsigned int *iu, unsigned int *id)
Loads the addresses required during the update.
Definition coord.c:16
int ia(int x, int y, int z, int t)
Described as a 21st Century address calculator, it gets the memory address of an array entry.
Definition coord.c:307
unsigned int * h1u
Up halo starting element.
Definition coord.c:15
int Testlcoord(int cap)
Tests if the local coordinate transformation functions are working.
Definition coord.c:495
unsigned int * hd
Down halo indices.
Definition coord.c:15
int Index2lcoord(int index, int *coord)
Converts the index of a point in memory to the equivalent point in the 4 dimensional array,...
Definition coord.c:369
int Index2gcoord(int index, int *coord)
Converts the index of a point in memory to the equivalent point in the 4 dimensional array,...
Definition coord.c:406
unsigned int * h1d
Down halo starting element.
Definition coord.c:15
int Testgcoord(int cap)
This is completely new and missing from the original code.
Definition coord.c:537
unsigned int * hu
Up halo indices.
Definition coord.c:15
int Coord2lindex(int ix, int iy, int iz, int it)
Converts the coordinates of a local lattice point to its index in the computer memory.
Definition coord.c:443
Header for routines related to lattice sites.
This header is intended to be a useful reference for error codes and their meanings.
MPI headers.
Defines the constants of the code and other parameters for loop dimensions. Each subroutine includes ...
#define ksizex
Sublattice x extent.
Definition sizes.h:139
#define AVX
Alignment of arrays. 64 for AVX-512, 32 for AVX/AVX2. 16 for SSE. Since AVX is standard on modern x86...
Definition sizes.h:268
#define nt
Lattice temporal extent. This also corresponds to the inverse temperature.
Definition sizes.h:86
#define nx
Lattice x extent.
Definition sizes.h:66
#define halot
t Halo size
Definition sizes.h:219
#define ksizet
Sublattice t extent.
Definition sizes.h:149
#define halox
x Halo size
Definition sizes.h:201
#define kvol
Sublattice volume.
Definition sizes.h:154
#define haloz
z Halo size
Definition sizes.h:213
#define haloy
y Halo size
Definition sizes.h:207
#define ksize
Sublattice spatial extent for a cubic lattice.
Definition sizes.h:146
#define halo
Total Halo size.
Definition sizes.h:222
#define ksizez
Sublattice z extent.
Definition sizes.h:143
#define ndim
Dimensions.
Definition sizes.h:179
#define ksizey
Sublattice y extent.
Definition sizes.h:141
#define nz
Lattice z extent. We normally use cubic lattices so this is the same as nx.
Definition sizes.h:80
#define ny
Lattice y extent. We normally use cubic lattices so this is the same as nx.
Definition sizes.h:74