Make OpenACC orphan gang reductions errors

This patch promotes all OpenACC gang reductions on orphan loops as
errors. Accord to the spec, orphan loops are those which are not
lexically nested inside an OpenACC parallel or kernels regions. I.e.,
acc loops inside acc routines.

At first I thought this could be a warning because the gang reduction
finalizer uses an atomic update. However, because there is no
synchronization between gangs, there is way to guarantee that reduction
will have completed once a single gang entity returns from the acc
routine call.

	gcc/c/
	* c-typeck.c (c_finish_omp_clauses): Emit an error on orphan
	OpenACC gang reductions.
	gcc/cp/
	* semantics.c (finish_omp_clauses): Emit an error on orphan
	OpenACC gang reductions.
	gcc/fortran/
	* openmp.c (oacc_is_parallel, oacc_is_kernels): New 'static'
	functions.
	(resolve_oacc_loop_blocks): Emit an error on orphan OpenACC gang
	reductions.
	gcc/
	* omp-general.h (enum oacc_loop_flags): Add OLF_REDUCTION enum.
	* omp-low.c (lower_oacc_head_mark): Use it to mark OpenACC
	reductions.
	* omp-offload.c (oacc_loop_auto_partitions): Don't assign gang
	level parallelism to orphan reductions.
	gcc/testsuite/
	* c-c++-common/goacc/nested-reductions-1-routine.c: Adjust.
	* c-c++-common/goacc/nested-reductions-2-routine.c: Likewise.
	* gcc.dg/goacc/loop-processing-1.c: Likewise.
	* gfortran.dg/goacc/nested-reductions-1-routine.f90: Likewise.
	* gfortran.dg/goacc/nested-reductions-2-routine.f90: Likewise.
	* c-c++-common/goacc/orphan-reductions-1.c: New test.
	* c-c++-common/goacc/orphan-reductions-2.c: New test.
	* gfortran.dg/goacc/orphan-reductions-1.f90: New test.
	* gfortran.dg/goacc/orphan-reductions-2.f90: New test.
	libgomp/
	* testsuite/libgomp.oacc-fortran/parallel-dims.f90: Temporarily
	skip.

Co-Authored-By: Thomas Schwinge <thomas@codesourcery.com>
This commit is contained in:
Cesar Philippidis 2017-05-01 18:27:59 -07:00 committed by Thomas Schwinge
parent a83a075570
commit 2b7dac2c0d
16 changed files with 517 additions and 2 deletions

View File

@ -14135,6 +14135,14 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
goto check_dup_generic;
case OMP_CLAUSE_REDUCTION:
if (ort == C_ORT_ACC && oacc_get_fn_attrib (current_function_decl)
&& omp_find_clause (clauses, OMP_CLAUSE_GANG))
{
error_at (OMP_CLAUSE_LOCATION (c),
"gang reduction on an orphan loop");
remove = true;
break;
}
if (reduction_seen == 0)
reduction_seen = OMP_CLAUSE_REDUCTION_INSCAN (c) ? -1 : 1;
else if (reduction_seen != -2

View File

@ -6667,6 +6667,14 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
field_ok = ((ort & C_ORT_OMP_DECLARE_SIMD) == C_ORT_OMP);
goto check_dup_generic;
case OMP_CLAUSE_REDUCTION:
if (ort == C_ORT_ACC && oacc_get_fn_attrib (current_function_decl)
&& omp_find_clause (clauses, OMP_CLAUSE_GANG))
{
error_at (OMP_CLAUSE_LOCATION (c),
"gang reduction on an orphan loop");
remove = true;
break;
}
if (reduction_seen == 0)
reduction_seen = OMP_CLAUSE_REDUCTION_INSCAN (c) ? -1 : 1;
else if (reduction_seen != -2

View File

@ -8322,6 +8322,17 @@ resolve_omp_do (gfc_code *code)
}
}
static bool
oacc_is_parallel (gfc_code *code)
{
return code->op == EXEC_OACC_PARALLEL || code->op == EXEC_OACC_PARALLEL_LOOP;
}
static bool
oacc_is_kernels (gfc_code *code)
{
return code->op == EXEC_OACC_KERNELS || code->op == EXEC_OACC_KERNELS_LOOP;
}
static gfc_statement
omp_code_to_statement (gfc_code *code)
@ -8625,6 +8636,19 @@ resolve_oacc_loop_blocks (gfc_code *code)
if (!oacc_is_loop (code))
return;
if (code->op == EXEC_OACC_LOOP
&& code->ext.omp_clauses->lists[OMP_LIST_REDUCTION]
&& code->ext.omp_clauses->gang)
{
fortran_omp_context *c;
for (c = omp_current_ctx; c; c = c->previous)
if (!oacc_is_loop (c->code))
break;
if (c == NULL || !(oacc_is_parallel (c->code)
|| oacc_is_kernels (c->code)))
gfc_error ("gang reduction on an orphan loop at %L", &code->loc);
}
if (code->ext.omp_clauses->tile_list && code->ext.omp_clauses->gang
&& code->ext.omp_clauses->worker && code->ext.omp_clauses->vector)
gfc_error ("Tiled loop cannot be parallelized across gangs, workers and "

View File

@ -32,9 +32,10 @@ enum oacc_loop_flags {
OLF_INDEPENDENT = 1u << 2, /* Iterations are known independent. */
OLF_GANG_STATIC = 1u << 3, /* Gang partitioning is static (has op). */
OLF_TILE = 1u << 4, /* Tiled loop. */
OLF_REDUCTION = 1u << 5, /* Reduction loop. */
/* Explicitly specified loop axes. */
OLF_DIM_BASE = 5,
OLF_DIM_BASE = 6,
OLF_DIM_GANG = 1u << (OLF_DIM_BASE + GOMP_DIM_GANG),
OLF_DIM_WORKER = 1u << (OLF_DIM_BASE + GOMP_DIM_WORKER),
OLF_DIM_VECTOR = 1u << (OLF_DIM_BASE + GOMP_DIM_VECTOR),

View File

@ -8271,6 +8271,10 @@ lower_oacc_head_mark (location_t loc, tree ddvar, tree clauses,
tag |= OLF_TILE;
break;
case OMP_CLAUSE_REDUCTION:
tag |= OLF_REDUCTION;
break;
default:
continue;
}

View File

@ -1611,6 +1611,13 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask,
non-innermost available level. */
unsigned this_mask = GOMP_DIM_MASK (GOMP_DIM_GANG);
/* Orphan reductions cannot have gang partitioning. */
if ((loop->flags & OLF_REDUCTION)
&& oacc_get_fn_attrib (current_function_decl)
&& !lookup_attribute ("omp target entrypoint",
DECL_ATTRIBUTES (current_function_decl)))
this_mask = GOMP_DIM_MASK (GOMP_DIM_WORKER);
/* Find the first outermost available partition. */
while (this_mask <= outer_mask)
this_mask <<= 1;

View File

@ -44,6 +44,7 @@ void acc_routine (void)
#pragma acc loop reduction(+:sum)
for (i = 0; i < 10; i++)
#pragma acc loop reduction(+:sum)
// { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
for (j = 0; j < 10; j++)
#pragma acc loop reduction(+:sum)
for (k = 0; k < 10; k++)
@ -53,12 +54,14 @@ void acc_routine (void)
for (i = 0; i < 10; i++)
{
#pragma acc loop reduction(+:sum)
// { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
for (j = 0; j < 10; j++)
#pragma acc loop reduction(+:sum)
for (k = 0; k < 10; k++)
sum = 1;
#pragma acc loop reduction(-:diff)
// { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
for (j = 0; j < 10; j++)
#pragma acc loop reduction(-:diff)
for (k = 0; k < 10; k++)

View File

@ -11,6 +11,7 @@ void acc_routine (void)
#pragma acc loop reduction(+:sum)
for (i = 0; i < 10; i++)
#pragma acc loop // { dg-warning "nested loop in reduction needs reduction clause for .sum." }
// { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
for (j = 0; j < 10; j++)
#pragma acc loop reduction(+:sum)
for (k = 0; k < 10; k++)
@ -19,6 +20,7 @@ void acc_routine (void)
#pragma acc loop reduction(+:sum)
for (i = 0; i < 10; i++)
#pragma acc loop collapse(2) // { dg-warning "nested loop in reduction needs reduction clause for .sum." }
// { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
for (j = 0; j < 10; j++)
for (k = 0; k < 10; k++)
#pragma acc loop reduction(+:sum)
@ -28,6 +30,7 @@ void acc_routine (void)
#pragma acc loop reduction(+:sum)
for (i = 0; i < 10; i++)
#pragma acc loop // { dg-warning "nested loop in reduction needs reduction clause for .sum." }
// { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
for (j = 0; j < 10; j++)
#pragma acc loop // { dg-warning "nested loop in reduction needs reduction clause for .sum." }
// { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
@ -39,6 +42,7 @@ void acc_routine (void)
#pragma acc loop reduction(+:sum)
for (i = 0; i < 10; i++)
#pragma acc loop reduction(-:sum) // { dg-warning "conflicting reduction operations for .sum." }
// { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
for (j = 0; j < 10; j++)
#pragma acc loop reduction(+:sum) // { dg-warning "conflicting reduction operations for .sum." }
for (k = 0; k < 10; k++)
@ -47,6 +51,7 @@ void acc_routine (void)
#pragma acc loop reduction(+:sum)
for (i = 0; i < 10; i++)
#pragma acc loop reduction(-:sum) // { dg-warning "conflicting reduction operations for .sum." }
// { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
for (j = 0; j < 10; j++)
#pragma acc loop reduction(-:sum)
for (k = 0; k < 10; k++)
@ -55,6 +60,7 @@ void acc_routine (void)
#pragma acc loop reduction(+:sum)
for (i = 0; i < 10; i++)
#pragma acc loop reduction(-:sum) // { dg-warning "conflicting reduction operations for .sum." }
// { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
for (j = 0; j < 10; j++)
#pragma acc loop // { dg-warning "nested loop in reduction needs reduction clause for .sum." }
// { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
@ -66,6 +72,7 @@ void acc_routine (void)
#pragma acc loop reduction(+:sum)
for (i = 0; i < 10; i++)
#pragma acc loop reduction(-:sum) // { dg-warning "conflicting reduction operations for .sum." }
// { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
for (j = 0; j < 10; j++)
#pragma acc loop reduction(+:sum) // { dg-warning "conflicting reduction operations for .sum." })
// { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
@ -78,12 +85,14 @@ void acc_routine (void)
for (i = 0; i < 10; i++)
{
#pragma acc loop reduction(-:diff) // { dg-warning "nested loop in reduction needs reduction clause for .sum." }
// { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
for (j = 0; j < 10; j++)
#pragma acc loop reduction(+:sum)
for (k = 0; k < 10; k++)
sum = 1;
#pragma acc loop reduction(+:sum) // { dg-warning "nested loop in reduction needs reduction clause for .diff." }
// { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
for (j = 0; j < 10; j++)
#pragma acc loop reduction(-:diff)
for (k = 0; k < 10; k++)

View File

@ -0,0 +1,56 @@
/* Test orphan reductions. */
/* { dg-do compile } */
#pragma acc routine seq
int
seq_reduction (int n)
{
int i, sum = 0;
#pragma acc loop seq reduction(+:sum)
for (i = 0; i < n; i++)
sum = sum + 1;
return sum;
}
#pragma acc routine gang
int
gang_reduction (int n)
{
int i, s1 = 0, s2 = 0;
#pragma acc loop gang reduction(+:s1) /* { dg-error "gang reduction on an orphan loop" } */
for (i = 0; i < n; i++)
s1 = s1 + 2;
#pragma acc loop gang reduction(+:s2) /* { dg-error "gang reduction on an orphan loop" } */
for (i = 0; i < n; i++)
s2 = s2 + 2;
return s1 + s2;
}
#pragma acc routine worker
int
worker_reduction (int n)
{
int i, sum = 0;
#pragma acc loop worker reduction(+:sum)
for (i = 0; i < n; i++)
sum = sum + 3;
return sum;
}
#pragma acc routine vector
int
vector_reduction (int n)
{
int i, sum = 0;
#pragma acc loop vector reduction(+:sum)
for (i = 0; i < n; i++)
sum = sum + 4;
return sum;
}

View File

@ -0,0 +1,87 @@
/* Ensure that the middle end does not assign gang level parallelism
to orphan loop containing reductions. */
/* { dg-do compile } */
/* { dg-additional-options "-fopt-info-optimized-omp" } */
/* { dg-additional-options "-Wopenacc-parallelism" } */
#pragma acc routine gang
int
f1 () /* { dg-warning "region is gang partitioned but does not contain gang partitioned code" } */
{
int sum = 0, i;
#pragma acc loop reduction (+:sum) /* { dg-optimized "assigned OpenACC worker vector loop parallelism" } */
for (i = 0; i < 100; i++)
sum++;
return sum;
}
#pragma acc routine gang
int
f2 () /* { dg-warning "region is gang partitioned but does not contain gang partitioned code" } */
{
int sum = 0, i, j;
#pragma acc loop reduction (+:sum) /* { dg-optimized "assigned OpenACC worker loop parallelism" } */
for (i = 0; i < 100; i++)
#pragma acc loop reduction (+:sum) /* { dg-optimized "assigned OpenACC vector loop parallelism" } */
for (j = 0; j < 100; j++)
sum++;
return sum;
}
#pragma acc routine gang
int
f3 () /* { dg-warning "region is gang partitioned but does not contain gang partitioned code" } */
{
int sum = 0, i, j, k;
#pragma acc loop reduction (+:sum) /* { dg-optimized "assigned OpenACC worker loop parallelism" } */
for (i = 0; i < 100; i++)
#pragma acc loop reduction (+:sum) /* { dg-optimized "assigned OpenACC seq loop parallelism" } */
/* { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 } */
for (j = 0; j < 100; j++)
#pragma acc loop reduction (+:sum) /* { dg-optimized "assigned OpenACC vector loop parallelism" } */
for (k = 0; k < 100; k++)
sum++;
return sum;
}
int
main ()
{
int sum = 0, i, j, k;
#pragma acc parallel copy (sum)
{
#pragma acc loop reduction (+:sum) /* { dg-optimized "assigned OpenACC gang vector loop parallelism" } */
for (i = 0; i < 100; i++)
sum++;
}
#pragma acc parallel copy (sum)
{
#pragma acc loop reduction (+:sum) /* { dg-optimized "assigned OpenACC gang worker loop parallelism" } */
for (i = 0; i < 100; i++)
#pragma acc loop reduction (+:sum) /* { dg-optimized "assigned OpenACC vector loop parallelism" } */
for (j = 0; j < 100; j++)
sum++;
}
#pragma acc parallel copy (sum)
{
#pragma acc loop reduction (+:sum) /* { dg-optimized "assigned OpenACC gang loop parallelism" } */
for (i = 0; i < 100; i++)
#pragma acc loop reduction (+:sum) /* { dg-optimized "assigned OpenACC worker loop parallelism" } */
for (j = 0; j < 100; j++)
#pragma acc loop reduction (+:sum) /* { dg-optimized "assigned OpenACC vector loop parallelism" } */
for (k = 0; k < 100; k++)
sum++;
}
return sum;
}

View File

@ -15,4 +15,4 @@ void vector_1 (int *ary, int size)
}
}
/* { dg-final { scan-tree-dump {OpenACC loops.*Loop 0\(0\).*Loop 24\(1\).*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_HEAD_MARK, 0, 1, 36\);.*Head-0:.*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_HEAD_MARK, 0, 1, 36\);.*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_FORK, \.data_dep\.[0-9_]+, 0\);.*Tail-0:.*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_TAIL_MARK, \.data_dep\.[0-9_]+, 1\);.*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_JOIN, \.data_dep\.[0-9_]+, 0\);.*Loop 6\(6\).*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_HEAD_MARK, 0, 2, 6\);.*Head-0:.*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_HEAD_MARK, 0, 2, 6\);.*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_FORK, \.data_dep\.[0-9_]+, 1\);.*Head-1:.*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_HEAD_MARK, \.data_dep\.[0-9_]+, 1\);.*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_FORK, \.data_dep\.[0-9_]+, 2\);.*Tail-1:.*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_TAIL_MARK, \.data_dep\.[0-9_]+, 2\);.*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_JOIN, \.data_dep\.[0-9_]+, 2\);.*Tail-0:.*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_TAIL_MARK, \.data_dep\.[0-9_]+, 1\);.*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_JOIN, \.data_dep\.[0-9_]+, 1\);} "oaccloops" } } */
/* { dg-final { scan-tree-dump {OpenACC loops.*Loop 0\(0\).*Loop 44\(1\).*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_HEAD_MARK, 0, 1, 68\);.*Head-0:.*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_HEAD_MARK, 0, 1, 68\);.*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_FORK, \.data_dep\.[0-9_]+, 0\);.*Tail-0:.*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_TAIL_MARK, \.data_dep\.[0-9_]+, 1\);.*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_JOIN, \.data_dep\.[0-9_]+, 0\);.*Loop 6\(6\).*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_HEAD_MARK, 0, 2, 6\);.*Head-0:.*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_HEAD_MARK, 0, 2, 6\);.*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_FORK, \.data_dep\.[0-9_]+, 1\);.*Head-1:.*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_HEAD_MARK, \.data_dep\.[0-9_]+, 1\);.*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_FORK, \.data_dep\.[0-9_]+, 2\);.*Tail-1:.*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_TAIL_MARK, \.data_dep\.[0-9_]+, 2\);.*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_JOIN, \.data_dep\.[0-9_]+, 2\);.*Tail-0:.*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_TAIL_MARK, \.data_dep\.[0-9_]+, 1\);.*\.data_dep\.[0-9_]+ = \.UNIQUE \(OACC_JOIN, \.data_dep\.[0-9_]+, 1\);} "oaccloops" } } */

View File

@ -59,6 +59,7 @@ subroutine acc_routine ()
!$acc loop reduction(+:sum)
do i = 1, 10
!$acc loop reduction(+:sum)
! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
do j = 1, 10
!$acc loop reduction(+:sum)
do k = 1, 10
@ -70,6 +71,7 @@ subroutine acc_routine ()
!$acc loop reduction(+:sum) reduction(-:diff)
do i = 1, 10
!$acc loop reduction(+:sum)
! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
do j = 1, 10
!$acc loop reduction(+:sum)
do k = 1, 10
@ -78,6 +80,7 @@ subroutine acc_routine ()
end do
!$acc loop reduction(-:diff)
! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
do j = 1, 10
!$acc loop reduction(-:diff)
do k = 1, 10

View File

@ -10,6 +10,7 @@ subroutine acc_routine ()
!$acc loop reduction(+:sum)
do i = 1, 10
!$acc loop ! { dg-warning "nested loop in reduction needs reduction clause for .sum." }
! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
do j = 1, 10
!$acc loop reduction(+:sum)
do k = 1, 10
@ -21,6 +22,7 @@ subroutine acc_routine ()
!$acc loop reduction(+:sum)
do i = 1, 10
!$acc loop collapse(2) ! { dg-warning "nested loop in reduction needs reduction clause for .sum." }
! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
do j = 1, 10
do k = 1, 10
!$acc loop reduction(+:sum)
@ -34,6 +36,7 @@ subroutine acc_routine ()
!$acc loop reduction(+:sum)
do i = 1, 10
!$acc loop ! { dg-warning "nested loop in reduction needs reduction clause for .sum." }
! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
do j = 1, 10
!$acc loop ! { dg-warning "nested loop in reduction needs reduction clause for .sum." }
! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
@ -49,6 +52,7 @@ subroutine acc_routine ()
!$acc loop reduction(+:sum)
do i = 1, 10
!$acc loop reduction(-:sum) ! { dg-warning "conflicting reduction operations for .sum." }
! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
do j = 1, 10
!$acc loop reduction(+:sum) ! { dg-warning "conflicting reduction operations for .sum." }
do k = 1, 10
@ -60,6 +64,7 @@ subroutine acc_routine ()
!$acc loop reduction(+:sum)
do i = 1, 10
!$acc loop reduction(-:sum) ! { dg-warning "conflicting reduction operations for .sum." }
! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
do j = 1, 10
!$acc loop reduction(-:sum)
do k = 1, 10
@ -71,6 +76,7 @@ subroutine acc_routine ()
!$acc loop reduction(+:sum)
do i = 1, 10
!$acc loop reduction(-:sum) ! { dg-warning "conflicting reduction operations for .sum." }
! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
do j = 1, 10
!$acc loop ! { dg-warning "nested loop in reduction needs reduction clause for .sum." }
! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
@ -86,6 +92,7 @@ subroutine acc_routine ()
!$acc loop reduction(+:sum)
do i = 1, 10
!$acc loop reduction(-:sum) ! { dg-warning "conflicting reduction operations for .sum." }
! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
do j = 1, 10
!$acc loop reduction(+:sum) ! { dg-warning "conflicting reduction operations for .sum." }
! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
@ -101,6 +108,7 @@ subroutine acc_routine ()
!$acc loop reduction(+:sum) reduction(-:diff)
do i = 1, 10
!$acc loop reduction(-:diff) ! { dg-warning "nested loop in reduction needs reduction clause for .sum." }
! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
do j = 1, 10
!$acc loop reduction(+:sum)
do k = 1, 10
@ -109,6 +117,7 @@ subroutine acc_routine ()
end do
!$acc loop reduction(+:sum) ! { dg-warning "nested loop in reduction needs reduction clause for .diff." }
! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
do j = 1, 10
!$acc loop reduction(-:diff)
do k = 1, 10

View File

@ -0,0 +1,206 @@
! Verify that gang reduction on orphan OpenACC loops reported as errors.
! { dg-do compile }
subroutine s1
implicit none
integer, parameter :: n = 100
integer :: i, sum
sum = 0
!$acc parallel reduction(+:sum)
do i = 1, n
sum = sum + 1
end do
!$acc end parallel
!$acc parallel loop gang reduction(+:sum)
do i = 1, n
sum = sum + 1
end do
!$acc parallel
!$acc loop gang reduction(+:sum)
do i = 1, n
sum = sum + 1
end do
!$acc end parallel
end subroutine s1
subroutine s2
implicit none
!$acc routine worker
integer, parameter :: n = 100
integer :: i, j, sum
sum = 0
!$acc loop gang reduction(+:sum) ! { dg-error "gang reduction on an orphan loop" }
do i = 1, n
sum = sum + 1
end do
!$acc loop reduction(+:sum)
do i = 1, n
!$acc loop gang reduction(+:sum) ! { dg-error "gang reduction on an orphan loop" }
do j = 1, n
sum = sum + 1
end do
end do
end subroutine s2
integer function f1 ()
implicit none
integer, parameter :: n = 100
integer :: i, sum
sum = 0
!$acc parallel reduction(+:sum)
do i = 1, n
sum = sum + 1
end do
!$acc end parallel
!$acc parallel loop gang reduction(+:sum)
do i = 1, n
sum = sum + 1
end do
!$acc parallel
!$acc loop gang reduction(+:sum)
do i = 1, n
sum = sum + 1
end do
!$acc end parallel
f1 = sum
end function f1
integer function f2 ()
implicit none
!$acc routine worker
integer, parameter :: n = 100
integer :: i, j, sum
sum = 0
!$acc loop gang reduction(+:sum) ! { dg-error "gang reduction on an orphan loop" }
do i = 1, n
sum = sum + 1
end do
!$acc loop reduction(+:sum)
do i = 1, n
!$acc loop gang reduction(+:sum) ! { dg-error "gang reduction on an orphan loop" }
do j = 1, n
sum = sum + 1
end do
end do
f2 = sum
end function f2
module m
contains
subroutine s3
implicit none
integer, parameter :: n = 100
integer :: i, sum
sum = 0
!$acc parallel reduction(+:sum)
do i = 1, n
sum = sum + 1
end do
!$acc end parallel
!$acc parallel loop gang reduction(+:sum)
do i = 1, n
sum = sum + 1
end do
!$acc parallel
!$acc loop gang reduction(+:sum)
do i = 1, n
sum = sum + 1
end do
!$acc end parallel
end subroutine s3
subroutine s4
implicit none
!$acc routine worker
integer, parameter :: n = 100
integer :: i, j, sum
sum = 0
!$acc loop gang reduction(+:sum) ! { dg-error "gang reduction on an orphan loop" }
do i = 1, n
sum = sum + 1
end do
!$acc loop reduction(+:sum)
do i = 1, n
!$acc loop gang reduction(+:sum) ! { dg-error "gang reduction on an orphan loop" }
do j = 1, n
sum = sum + 1
end do
end do
end subroutine s4
integer function f3 ()
implicit none
integer, parameter :: n = 100
integer :: i, sum
sum = 0
!$acc parallel reduction(+:sum)
do i = 1, n
sum = sum + 1
end do
!$acc end parallel
!$acc parallel loop gang reduction(+:sum)
do i = 1, n
sum = sum + 1
end do
!$acc parallel
!$acc loop gang reduction(+:sum)
do i = 1, n
sum = sum + 1
end do
!$acc end parallel
f3 = sum
end function f3
integer function f4 ()
implicit none
!$acc routine worker
integer, parameter :: n = 100
integer :: i, j, sum
sum = 0
!$acc loop gang reduction(+:sum) ! { dg-error "gang reduction on an orphan loop" }
do i = 1, n
sum = sum + 1
end do
!$acc loop reduction(+:sum)
do i = 1, n
!$acc loop gang reduction(+:sum) ! { dg-error "gang reduction on an orphan loop" }
do j = 1, n
sum = sum + 1
end do
end do
f4 = sum
end function f4
end module m

View File

@ -0,0 +1,89 @@
! Ensure that the middle end does not assign gang level parallelism to
! orphan loop containing reductions.
! { dg-do compile }
! { dg-additional-options "-fopt-info-optimized-omp" }
! { dg-additional-options "-Wopenacc-parallelism" }
subroutine s1 ! { dg-warning "region is gang partitioned but does not contain gang partitioned code" }
implicit none
!$acc routine gang
integer i, sum
sum = 0
!$acc loop reduction (+:sum) ! { dg-optimized "assigned OpenACC worker vector loop parallelism" }
do i = 1, 10
sum = sum + 1
end do
end subroutine s1
subroutine s2 ! { dg-warning "region is gang partitioned but does not contain gang partitioned code" }
implicit none
!$acc routine gang
integer i, j, sum
sum = 0
!$acc loop reduction (+:sum) ! { dg-optimized "assigned OpenACC worker loop parallelism" }
do i = 1, 10
!$acc loop reduction (+:sum) ! { dg-optimized "assigned OpenACC vector loop parallelism" }
do j = 1, 10
sum = sum + 1
end do
end do
end subroutine s2
subroutine s3 ! { dg-warning "region is gang partitioned but does not contain gang partitioned code" }
implicit none
!$acc routine gang
integer i, j, k, sum
sum = 0
!$acc loop reduction (+:sum) ! { dg-optimized "assigned OpenACC worker loop parallelism" }
do i = 1, 10
!$acc loop reduction (+:sum) ! { dg-optimized "assigned OpenACC seq loop parallelism" }
! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 }
do j = 1, 10
!$acc loop reduction (+:sum) ! { dg-optimized "assigned OpenACC vector loop parallelism" }
do k = 1, 10
sum = sum + 1
end do
end do
end do
end subroutine s3
subroutine s4
implicit none
integer i, j, k, sum
sum = 0
!$acc parallel copy(sum)
!$acc loop reduction (+:sum) ! { dg-optimized "assigned OpenACC gang vector loop parallelism" }
do i = 1, 10
sum = sum + 1
end do
!$acc end parallel
!$acc parallel copy(sum)
!$acc loop reduction (+:sum) ! { dg-optimized "assigned OpenACC gang worker loop parallelism" }
do i = 1, 10
!$acc loop reduction (+:sum) ! { dg-optimized "assigned OpenACC vector loop parallelism" }
do j = 1, 10
sum = sum + 1
end do
end do
!$acc end parallel
!$acc parallel copy(sum)
!$acc loop reduction (+:sum) ! { dg-optimized "assigned OpenACC gang loop parallelism" }
do i = 1, 10
!$acc loop reduction (+:sum) ! { dg-optimized "assigned OpenACC worker loop parallelism" }
do j = 1, 10
!$acc loop reduction (+:sum) ! { dg-optimized "assigned OpenACC vector loop parallelism" }
do k = 1, 10
sum = sum + 1
end do
end do
end do
!$acc end parallel
end subroutine s4

View File

@ -3,6 +3,7 @@
! { dg-additional-sources parallel-dims-aux.c }
! { dg-do run }
! { dg-skip-if TODO { *-*-* } }
! { dg-prune-output "command-line option '-fintrinsic-modules-path=.*' is valid for Fortran but not for C" }
! { dg-additional-options "-fopt-info-note-omp" }