2c71d454eb
2016-08-18 Chung-Lin Tang <cltang@codesourcery.com> PR middle-end/70895 gcc/ * gimplify.c (omp_add_variable): Adjust/add variable mapping on enclosing parallel construct for reduction variables on OpenACC loop directives. gcc/testsuite/ * gfortran.dg/goacc/loop-tree-1.f90: Add gimple scan-tree-dump test. * c-c++-common/goacc/reduction-1.c: Likewise. * c-c++-common/goacc/reduction-2.c: Likewise. * c-c++-common/goacc/reduction-3.c: Likewise. * c-c++-common/goacc/reduction-4.c: Likewise. libgomp/ * testsuite/libgomp.oacc-fortran/reduction-7.f90: Add explicit firstprivate clauses. * testsuite/libgomp.oacc-fortran/reduction-6.f90: Remove explicit copy clauses. * testsuite/libgomp.oacc-c-c++-common/reduction-7.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-flt.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/collapse-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/collapse-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-dbl.c: Likewise. From-SVN: r239576
113 lines
1.9 KiB
C
113 lines
1.9 KiB
C
|
|
/* Single float has 23 bits of fraction. */
|
|
#define FRAC (1.0f / (1 << 20))
|
|
typedef float Type;
|
|
|
|
int close_enough (Type a, Type b)
|
|
{
|
|
Type diff = a - b;
|
|
if (diff < 0)
|
|
diff = -diff;
|
|
|
|
return diff / a < FRAC;
|
|
}
|
|
|
|
#define N 100
|
|
|
|
static int __attribute__ ((noinline))
|
|
vector (Type ary[N], Type sum, Type prod)
|
|
{
|
|
Type tsum = 0, tprod = 1;
|
|
|
|
#pragma acc parallel vector_length(32) copyin(ary[0:N])
|
|
{
|
|
#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
|
|
for (int ix = 0; ix < N; ix++)
|
|
{
|
|
tsum += ary[ix];
|
|
tprod *= ary[ix];
|
|
}
|
|
}
|
|
|
|
if (!close_enough (sum, tsum))
|
|
return 1;
|
|
|
|
if (!close_enough (prod, tprod))
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int __attribute__ ((noinline))
|
|
worker (Type ary[N], Type sum, Type prod)
|
|
{
|
|
Type tsum = 0, tprod = 1;
|
|
|
|
#pragma acc parallel num_workers(32) copyin(ary[0:N])
|
|
{
|
|
#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
|
|
for (int ix = 0; ix < N; ix++)
|
|
{
|
|
tsum += ary[ix];
|
|
tprod *= ary[ix];
|
|
}
|
|
}
|
|
|
|
if (!close_enough (sum, tsum))
|
|
return 1;
|
|
|
|
if (!close_enough (prod, tprod))
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int __attribute__ ((noinline))
|
|
gang (Type ary[N], Type sum, Type prod)
|
|
{
|
|
Type tsum = 0, tprod = 1;
|
|
|
|
#pragma acc parallel num_gangs (32) copyin(ary[0:N])
|
|
{
|
|
#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
|
|
for (int ix = 0; ix < N; ix++)
|
|
{
|
|
tsum += ary[ix];
|
|
tprod *= ary[ix];
|
|
}
|
|
}
|
|
|
|
if (!close_enough (sum, tsum))
|
|
return 1;
|
|
|
|
if (!close_enough (prod, tprod))
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int main (void)
|
|
{
|
|
Type ary[N], sum = 0, prod = 1;
|
|
|
|
for (int ix = 0; ix < N; ix++)
|
|
{
|
|
float frac = ix * (1.0f / 1024) + 1.0f;
|
|
|
|
ary[ix] = frac;
|
|
sum += ary[ix];
|
|
prod *= ary[ix];
|
|
}
|
|
|
|
if (vector (ary, sum, prod))
|
|
return 1;
|
|
|
|
if (worker (ary, sum, prod))
|
|
return 1;
|
|
|
|
if (gang (ary, sum, prod))
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|