e7ff0319f3
gcc/c-family/ PR middle-end/70626 * c-common.h (c_oacc_split_loop_clauses): Add boolean argument. * c-omp.c (c_oacc_split_loop_clauses): Use it to duplicate reduction clauses in acc parallel loops. gcc/c/ PR middle-end/70626 * c-parser.c (c_parser_oacc_loop): Don't augment mask with OACC_LOOP_CLAUSE_MASK. (c_parser_oacc_kernels_parallel): Update call to c_oacc_split_loop_clauses. gcc/cp/ PR middle-end/70626 * parser.c (cp_parser_oacc_loop): Don't augment mask with OACC_LOOP_CLAUSE_MASK. (cp_parser_oacc_kernels_parallel): Update call to c_oacc_split_loop_clauses. gcc/fortran/ PR middle-end/70626 * trans-openmp.c (gfc_trans_oacc_combined_directive): Duplicate the reduction clause in both parallel and loop directives. gcc/testsuite/ PR middle-end/70626 * c-c++-common/goacc/combined-reduction.c: New test. * gfortran.dg/goacc/reduction-2.f95: Add check for kernels reductions. libgomp/ PR middle-end/70626 * testsuite/libgomp.oacc-c++/template-reduction.C: Adjust test. * testsuite/libgomp.oacc-c-c++-common/combined-reduction.c: New test. * testsuite/libgomp.oacc-fortran/combined-reduction.f90: New test. From-SVN: r235651
99 lines
1.6 KiB
C
99 lines
1.6 KiB
C
const int n = 100;
|
|
|
|
// Check explicit template copy map
|
|
|
|
template<typename T> T
|
|
sum (T array[])
|
|
{
|
|
T s = 0;
|
|
|
|
#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy (array[0:n])
|
|
for (int i = 0; i < n; i++)
|
|
s += array[i];
|
|
|
|
return s;
|
|
}
|
|
|
|
// Check implicit template copy map
|
|
|
|
template<typename T> T
|
|
sum ()
|
|
{
|
|
T s = 0;
|
|
T array[n];
|
|
|
|
for (int i = 0; i < n; i++)
|
|
array[i] = i+1;
|
|
|
|
#pragma acc parallel loop num_gangs (10) gang reduction (+:s)
|
|
for (int i = 0; i < n; i++)
|
|
s += array[i];
|
|
|
|
return s;
|
|
}
|
|
|
|
// Check present and async
|
|
|
|
template<typename T> T
|
|
async_sum (T array[])
|
|
{
|
|
T s = 0;
|
|
|
|
#pragma acc parallel loop num_gangs (10) gang async (1) present (array[0:n])
|
|
for (int i = 0; i < n; i++)
|
|
array[i] = i+1;
|
|
|
|
#pragma acc parallel loop num_gangs (10) gang reduction (+:s) present (array[0:n]) async wait (1)
|
|
for (int i = 0; i < n; i++)
|
|
s += array[i];
|
|
|
|
#pragma acc wait
|
|
|
|
return s;
|
|
}
|
|
|
|
// Check present and async and an explicit firstprivate
|
|
|
|
template<typename T> T
|
|
async_sum (int c)
|
|
{
|
|
T s = 0;
|
|
|
|
#pragma acc parallel loop num_gangs (10) gang reduction (+:s) firstprivate (c) async wait (1)
|
|
for (int i = 0; i < n; i++)
|
|
s += i+c;
|
|
|
|
#pragma acc wait
|
|
|
|
return s;
|
|
}
|
|
|
|
int
|
|
main()
|
|
{
|
|
int a[n];
|
|
int result = 0;
|
|
|
|
for (int i = 0; i < n; i++)
|
|
{
|
|
a[i] = i+1;
|
|
result += i+1;
|
|
}
|
|
|
|
if (sum (a) != result)
|
|
__builtin_abort ();
|
|
|
|
if (sum<int> () != result)
|
|
__builtin_abort ();
|
|
|
|
#pragma acc enter data copyin (a)
|
|
if (async_sum (a) != result)
|
|
__builtin_abort ();
|
|
|
|
if (async_sum<int> (1) != result)
|
|
__builtin_abort ();
|
|
#pragma acc exit data delete (a)
|
|
|
|
return 0;
|
|
}
|