/* UPC 1.3 atomics tester * Written by Dan Bonachea * Copyright 2013, The Regents of the University of California * This code is under BSD license: https://upc.lbl.gov/download/dist/LICENSE.TXT */ /* This file is a macro template, instantiated once per type */ #ifndef ATOMIC_TEST #error This file is not meant to be compiled directly #endif #ifndef T #error missing define T #endif #ifndef TM #error missing define TM #endif #ifndef _STRINGIFY #define _STRINGIFY_HELPER(x) #x #define _STRINGIFY(x) _STRINGIFY_HELPER(x) #endif #ifndef _CONCAT #define _CONCAT_HELPER(a,b) a ## b #define _CONCAT(a,b) _CONCAT_HELPER(a,b) #endif #ifndef MIN #define MIN(x,y) ((x)<(y)?(x):(y)) #endif #ifndef MAX #define MAX(x,y) ((x)>(y)?(x):(y)) #endif /* MAX_ACTIVE: max number of threads that can be active using THVAL for this type */ #undef MAX_ACTIVE #if CATINT // ensure at least one bit per active thread #define MAX_ACTIVE (8*sizeof(T)-1) // bits we can freely twiddle without overflow #elif CATFLOAT #define MAX_ACTIVE (sizeof(T) >= 8 ? 50 : 21) // be conservative with float types #elif CATPTS #define MAX_ACTIVE MIN(256,UPC_MAX_BLOCK_SIZE) #else #error bad type CAT #endif /* THVAL is a value of type T that is unique to a given active thread. Additionally, for numeric T, any bits set in the integer representation of the value are unique to that thread */ #undef THVAL #define THVAL(threadid) (_CONCAT(thval_,TM)(threadid)) #if CATINT || CATFLOAT static T _CONCAT(thval_,TM)(size_t threadid) { ASSERT_FATAL(threadid < MAX_ACTIVE); T thval = (T)0; int volatile threads = THREADS; // bug 3220: workaround icc optimizer bug for (size_t bit=threadid; bit < MAX_ACTIVE; bit += threads) { thval = thval + (T)(1ULL< %llx\n", _STRINGIFY(TM), (int)threadid, (unsigned long long)thval); return thval; } #elif CATPTS #undef _THARR #define _THARR _CONCAT(thpos_,TM) static shared [MAX_ACTIVE] char _THARR[THREADS*MAX_ACTIVE]; static T _CONCAT(thval_,TM)(size_t threadid) { ASSERT_FATAL(threadid < MAX_ACTIVE); T thval = (T)&_THARR[threadid*MAX_ACTIVE+threadid]; ASSERT_FATAL(upc_threadof(thval) == threadid); ASSERT_FATAL(upc_phaseof(thval) == threadid); return thval; } #endif /* check val is a valid THVAL for any active thread */ #undef CHECK_THVAL #if CATINT || CATFLOAT #define CHECK_THVAL(val) do { \ T thval = (val); \ ASSERTNE(thval,0); \ if (thval) { \ uint64_t tmp = (uint64_t)thval; \ size_t th; \ for (th = 0; !(tmp&0x1); th++) tmp >>= 1; \ ASSERTEQ(thval,THVAL(th)); \ } \ } while (0) #elif CATPTS #define CHECK_THVAL(val) do { \ T thval = (val); \ size_t th = upc_threadof(thval); \ ASSERTEQT(size_t,INT,upc_phaseof(thval),th); \ ASSERTEQ(thval,THVAL(th)); \ } while (0) #endif #undef PROGRESS #define PROGRESS(args) do { \ upc_barrier; \ if (!MYTHREAD) { \ fflush(NULL); \ if (sec < 'a') printf("%c: ", thissec); \ else printf("%c%c: ", thissec, sec); \ printf args; \ printf("\n"); \ fflush(NULL); \ } \ upc_barrier; \ } while (0) #undef IFSEC #define IFSEC() \ if (strchr(sections, ++sec)) /* Main test driver */ int _CONCAT(test_,TM)(int iters, char thissec, const char *sections, int seed_override) { int errors = 0; char sec = 'a' - 1; upc_type_t type = _CONCAT(UPC_,TM); upc_op_t allops = _CONCAT(OPS_,CAT); upc_op_t oplist[] = { _CONCAT(OPL_,CAT) }; int numops = sizeof(oplist)/sizeof(upc_op_t); upc_atomichint_t hintlist[] = { UPC_ATOMIC_HINT_DEFAULT, UPC_ATOMIC_HINT_LATENCY, UPC_ATOMIC_HINT_THROUGHPUT #ifdef UPC_ATOMIC_HINT_FAVOR_NEAR ,UPC_ATOMIC_HINT_FAVOR_NEAR #endif #ifdef UPC_ATOMIC_HINT_FAVOR_FAR ,UPC_ATOMIC_HINT_FAVOR_FAR #endif }; int numhints = sizeof(hintlist)/sizeof(upc_atomichint_t); int Tunsigned = 0; T maxval, minval; // approx max and min normal values without overflow #if CATINT if (((T)-1) > 0) { Tunsigned = 1; maxval = (T)-1; minval = 0; } else { // carefully written to avoid signed integer overflow and translator bug 3230 maxval = ((T)1) << (sizeof(T)*8-2); T volatile tmp = maxval - 1; maxval += tmp; minval = -maxval - 1; } #elif CATFLOAT if (sizeof(T) >= 8) { maxval = 1ULL<<50; } else { maxval = 1ULL<<21; } minval = -maxval; #endif #if !CATPTS ASSERT_FATAL(maxval > minval); ASSERT_FATAL(maxval > 0); ASSERT_FATAL(minval <= 0); #endif PROGRESS(("Testing %s...", _STRINGIFY(TM))); IFSEC() { static shared T a1[THREADS]; int maxcomb = pow(2, numops) - 1; int testcomb = maxcomb; int step = 1; if (iters < maxcomb) { step = (maxcomb + iters - 1) / iters; testcomb = maxcomb / step; } PROGRESS(("Argument coverage (%i combinations, %.1f%% coverage)", testcomb, 100.0*testcomb/maxcomb)); upc_atomicdomain_t **doms = calloc(maxcomb+1, sizeof(upc_atomicdomain_t *)); for (int c = maxcomb; c > 0; c -= step) { upc_op_t thisop = 0; for (int i=0; i < numops; i++) { if (c & (1< maxlocal) { newval = maxlocal; // fp rounding can cause slight excess at rng=1.0 } ASSERTLE(newval, maxlocal); ASSERTGE(newval, minlocal); switch (rand() % 4) { case 0: if (check[loc] < maxlocal) { upc_atomic_relaxed(d, NULL, UPC_INC, target, NULL, NULL); check[loc]++; } break; case 1: if (check[loc] > minlocal) { upc_atomic_relaxed(d, NULL, UPC_DEC, target, NULL, NULL); check[loc]--; } break; case 2: { T delta = newval - check[loc]; upc_atomic_relaxed(d, NULL, UPC_ADD, target, &delta, NULL); check[loc] += delta; break; } case 3: { T delta = -(newval - check[loc]); upc_atomic_relaxed(d, NULL, UPC_SUB, target, &delta, NULL); check[loc] -= delta; break; } } ASSERTLE(check[loc], maxlocal); ASSERTGE(check[loc], minlocal); } for (int loc = 0; loc < W*THREADS; loc++) { // zero out our contribution shared void *target = &vals[loc]; upc_atomic_relaxed(d, NULL, UPC_SUB, target, &check[loc], NULL); } upc_barrier; for (int i=0; i < W; i++) { // confirm balance ASSERTEQ(vals[W*MYTHREAD+i], 0); } upc_all_atomicdomain_free(d); } #endif { struct { const char *name; upc_op_t ops; int cases; } testinfo[] = { { "GET/SET id test", UPC_GET | UPC_SET, 3 }, { "GET/SET/CSWAP id test", UPC_GET | UPC_SET | UPC_CSWAP, 4 }, #if !CATPTS { "GET/SET/CSWAP/MIN/MAX id test", UPC_GET | UPC_SET | UPC_CSWAP | UPC_MIN | UPC_MAX, 6 } #endif }; for (int test = 0; test < sizeof(testinfo)/sizeof(testinfo[0]); test++) IFSEC() { SRAND(); PROGRESS(("%s (seed=%u)", testinfo[test].name, seed)); upc_atomicdomain_t *d = upc_all_atomicdomain_alloc(type, testinfo[test].ops, 0); ASSERT(d); static shared [W] T vals[W*THREADS]; T myval; if (MYTHREAD >= MAX_ACTIVE) myval = THVAL(0); else myval = THVAL(MYTHREAD); for (int i=0; i < W; i++) vals[W*MYTHREAD+i] = myval; // init upc_barrier; if (MYTHREAD < MAX_ACTIVE) { int myiters = iters * 10; for (int i=0; i < myiters; i++) { int locmax = W*THREADS*i/myiters + 1; int loc = rand() % locmax; shared void *target = &vals[loc]; T fetch = 0; switch (rand() % testinfo[test].cases) { case 0: upc_atomic_relaxed(d, &fetch, UPC_GET, target, NULL, NULL); CHECK_THVAL(fetch); break; case 1: upc_atomic_relaxed(d, NULL, UPC_SET, target, &myval, NULL); break; case 2: upc_atomic_relaxed(d, &fetch, UPC_SET, target, &myval, NULL); CHECK_THVAL(fetch); break; case 3: { T tmp; upc_atomic_relaxed(d, &tmp, UPC_GET, target, NULL, NULL); CHECK_THVAL(tmp); upc_atomic_relaxed(d, &fetch, UPC_CSWAP, target, &tmp, &myval); CHECK_THVAL(fetch); break; } case 4: upc_atomic_relaxed(d, &fetch, UPC_MIN, target, &myval, NULL); CHECK_THVAL(fetch); break; case 5: upc_atomic_relaxed(d, &fetch, UPC_MAX, target, &myval, NULL); CHECK_THVAL(fetch); break; } ASSERTEQ(myval, THVAL(MYTHREAD)); } } upc_all_atomicdomain_free(d); } // test } #if !CATINT IFSEC() ((void)0); // keep sections in sync #else IFSEC() if (Tunsigned) { PROGRESS(("Unsigned overflow test")); upc_atomicdomain_t *d = upc_all_atomicdomain_alloc(type, allops, 0); ASSERT(d); static shared T a1[THREADS]; // unsigned integer type overflows are always well-defined, ensure it matches language-level arithmetic for (int p=0; p < THREADS; p++) { T check = maxval; T result; shared T *target = &a1[(MYTHREAD+p)%THREADS]; *target = check; upc_barrier; for (int i=0; i < iters/THREADS; i++) { T val = maxval - i - 1; check += val; upc_atomic_relaxed(d, NULL, UPC_ADD, target, &val, NULL); upc_atomic_relaxed(d, &result, UPC_GET, target, NULL, NULL); ASSERTEQ(result, check); check *= val; upc_atomic_relaxed(d, NULL, UPC_MULT, target, &val, NULL); upc_atomic_relaxed(d, &result, UPC_GET, target, NULL, NULL); ASSERTEQ(result, check); check -= val; upc_atomic_relaxed(d, NULL, UPC_SUB, target, &val, NULL); upc_atomic_relaxed(d, &result, UPC_GET, target, NULL, NULL); ASSERTEQ(result, check); check = MAX(check, val); upc_atomic_relaxed(d, NULL, UPC_MAX, target, &val, NULL); upc_atomic_relaxed(d, &result, UPC_GET, target, NULL, NULL); ASSERTEQ(result, check); check = MIN(check, val); upc_atomic_relaxed(d, NULL, UPC_MIN, target, &val, NULL); upc_atomic_relaxed(d, &result, UPC_GET, target, NULL, NULL); ASSERTEQ(result, check); } // check INC/DEC across the wraparound T val = 0; check = val; upc_atomic_relaxed(d, NULL, UPC_SET, target, &val, NULL); check--; upc_atomic_relaxed(d, NULL, UPC_DEC, target, NULL, NULL); upc_atomic_relaxed(d, &result, UPC_GET, target, NULL, NULL); ASSERTEQ(result, check); check++; upc_atomic_relaxed(d, NULL, UPC_INC, target, NULL, NULL); upc_atomic_relaxed(d, &result, UPC_GET, target, NULL, NULL); ASSERTEQ(result, check); ASSERTEQ(check, 0); upc_barrier; } upc_all_atomicdomain_free(d); } #endif #if !CATFLOAT IFSEC() ((void)0); // keep sections in sync #else IFSEC() { PROGRESS(("Floating-point limit test")); upc_atomicdomain_t *d = upc_all_atomicdomain_alloc(type, allops, 0); ASSERT(d); static shared T a1[THREADS]; #define DOUBLE 42 #if defined(__PGI) && (TM == 42) // Bug 3218: INFINITY is broken on PGI T inf = HUGE_VAL; #elif defined(_CRAYC) && defined(HAVE_BUILTIN_HUGE_VAL) && (TM == 42) // avoid a warning on Cray C extern double __builtin_huge_val(void); // prevent translator from botching things T inf = __builtin_huge_val(); #elif defined(INFINITY) T inf = INFINITY; #else T inf = ((T)1)/0; #endif T max = 1.0E+36F, min = 1.0E-36F; #if defined(FLT_MAX) && defined(FLT_MIN) max = FLT_MAX; min = FLT_MIN; #endif #if defined(DBL_MAX) && defined(DBL_MIN) && (TM == 42) max = (T)DBL_MAX; min = (T)DBL_MIN; #endif #undef DOUBLE T vals[13]; int cnt = 0; vals[cnt++] = inf; vals[cnt++] = max; vals[cnt++] = (T)1.234E20F; vals[cnt++] = (T)3.141; vals[cnt++] = (T)1.234E-20F; vals[cnt++] = min; vals[cnt++] = (T)0; vals[cnt++] = -min; vals[cnt++] = (T)-1.234E-20F; vals[cnt++] = (T)-3.141; vals[cnt++] = (T)-1.234E20F; vals[cnt++] = -max; vals[cnt++] = -inf; ASSERT_FATAL(cnt == sizeof(vals)/sizeof(T)); for (int i=0; i < cnt-1; i++) { // sanity check ASSERT_FATAL(vals[i] != vals[i+1]); ASSERT_FATAL(vals[i] > vals[i+1]); ASSERT_FATAL(vals[i+1] < vals[i]); } for (int p=0; p < THREADS; p++) { shared T *target = &a1[(MYTHREAD+p)%THREADS]; upc_barrier; for (int i=0; i < cnt; i++) { const T LHS = vals[i]; for (int j=0; j < cnt; j++) { const T RHS = vals[j]; T result; #undef FP_OP #define FP_OP(op, rhs, answer) do { \ static T volatile correct; /* try to force ld/st roundoff */ \ correct = (answer); \ const T * volatile _rhs = (rhs); /* avoid a suncc warning */ \ upc_atomic_relaxed(d, NULL, UPC_SET, target, &LHS, NULL); \ upc_atomic_relaxed(d, NULL, op, target, _rhs, NULL); \ upc_atomic_relaxed(d, &result, UPC_GET, target, NULL, NULL); \ int ok = (isnan(result) && isnan(correct)) || result == correct; \ if (!ok) { \ T diff = result - correct; \ char RHSs[80]; \ if (_rhs) sprintf(RHSs,"%14g", *_rhs); \ else RHSs[0] = 0; \ fprintf(ERROR_STREAM,"%i: FP RESULT ERROR: %14g %-8s %14s => %14g vs %14g (%g off)\n", \ MYTHREAD, LHS, #op, RHSs, result, correct, diff); \ fflush(NULL); \ } \ } while (0) FP_OP(UPC_ADD, &RHS, LHS + RHS); FP_OP(UPC_SUB, &RHS, LHS - RHS); FP_OP(UPC_MULT, &RHS, LHS * RHS); FP_OP(UPC_MIN, &RHS, MIN(LHS, RHS)); FP_OP(UPC_MAX, &RHS, MAX(LHS, RHS)); FP_OP(UPC_INC, NULL, LHS + 1); FP_OP(UPC_DEC, NULL, LHS - 1); T flag = (T)43.21; T correct = (LHS == RHS) ? flag : LHS; upc_atomic_relaxed(d, NULL, UPC_SET, target, &LHS, NULL); upc_atomic_relaxed(d, NULL, UPC_CSWAP, target, &RHS, &flag); upc_atomic_relaxed(d, &result, UPC_GET, target, NULL, NULL); ASSERTEQ(result, correct); } } } upc_all_atomicdomain_free(d); } #endif upc_barrier; return errors; }