2015-11-13 16:08:11 +01:00
|
|
|
|
|
|
|
#include <complex.h>
|
|
|
|
|
|
|
|
/* Double float has 53 bits of fraction. */
|
|
|
|
#define FRAC (1.0 / (1LL << 48))
|
|
|
|
|
|
|
|
int close_enough (double _Complex a, double _Complex b)
|
|
|
|
{
|
|
|
|
double _Complex diff = a - b;
|
|
|
|
double mag2_a = __real__(a) * __real__ (a) + __imag__ (a) * __imag__ (a);
|
|
|
|
double mag2_diff = (__real__(diff) * __real__ (diff)
|
|
|
|
+ __imag__ (diff) * __imag__ (diff));
|
|
|
|
|
|
|
|
return mag2_diff / mag2_a < (FRAC * FRAC);
|
|
|
|
}
|
|
|
|
|
|
|
|
#define N 100
|
|
|
|
|
2015-11-18 14:49:17 +01:00
|
|
|
static int __attribute__ ((noinline))
|
|
|
|
vector (double _Complex ary[N], double _Complex sum, double _Complex prod)
|
|
|
|
{
|
|
|
|
double _Complex tsum = 0, tprod = 1;
|
2015-11-13 16:08:11 +01:00
|
|
|
|
2015-11-18 14:49:17 +01:00
|
|
|
#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
|
2015-11-13 16:08:11 +01:00
|
|
|
{
|
|
|
|
#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
|
2015-11-18 14:49:17 +01:00
|
|
|
for (int ix = 0; ix < N; ix++)
|
|
|
|
{
|
|
|
|
tsum += ary[ix];
|
|
|
|
tprod *= ary[ix];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!close_enough (sum, tsum))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
if (!close_enough (prod, tprod))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __attribute__ ((noinline))
|
|
|
|
worker (double _Complex ary[N], double _Complex sum, double _Complex prod)
|
|
|
|
{
|
|
|
|
double _Complex tsum = 0, tprod = 1;
|
|
|
|
|
|
|
|
#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
|
|
|
|
{
|
|
|
|
#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
|
|
|
|
for (int ix = 0; ix < N; ix++)
|
2015-11-13 16:08:11 +01:00
|
|
|
{
|
|
|
|
tsum += ary[ix];
|
|
|
|
tprod *= ary[ix];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!close_enough (sum, tsum))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
if (!close_enough (prod, tprod))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2015-11-18 14:49:17 +01:00
|
|
|
|
|
|
|
static int __attribute__ ((noinline))
|
|
|
|
gang (double _Complex ary[N], double _Complex sum, double _Complex prod)
|
|
|
|
{
|
|
|
|
double _Complex tsum = 0, tprod = 1;
|
|
|
|
|
|
|
|
#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
|
|
|
|
{
|
|
|
|
#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
|
|
|
|
for (int ix = 0; ix < N; ix++)
|
|
|
|
{
|
|
|
|
tsum += ary[ix];
|
|
|
|
tprod *= ary[ix];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!close_enough (sum, tsum))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
if (!close_enough (prod, tprod))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int main (void)
|
|
|
|
{
|
|
|
|
double _Complex ary[N], sum = 0, prod = 1;
|
|
|
|
|
|
|
|
for (int ix = 0; ix < N; ix++)
|
|
|
|
{
|
|
|
|
double frac = ix * (1.0 / 1024) + 1.0;
|
|
|
|
|
|
|
|
ary[ix] = frac + frac * 2.0i - 1.0i;
|
|
|
|
sum += ary[ix];
|
|
|
|
prod *= ary[ix];
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vector (ary, sum, prod))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
if (worker (ary, sum, prod))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
if (gang (ary, sum, prod))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|