[PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition

gcc/
	* gimplify.c (gimplify_adjust_omp_clauses): Discard
	OMP_CLAUSE_TILE.
	* omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE.
	gcc/testsuite/
	* c-c++-common/goacc/combined-directives.c: XFAIL tree scanning
	for OpenACC tile clauses.
	* gfortran.dg/goacc/combined-directives.f90: Likewise.

	gcc/
	PR middle-end/71373
	* tree-nested.c (convert_nonlocal_omp_clauses)
	(convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC,
	OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO,
	OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE.
	gcc/testsuite/
	PR middle-end/71373
	* gcc.dg/goacc/nested-function-1.c: New file.
	* gcc.dg/goacc/nested-function-2.c: Likewise.
	* gcc.dg/goacc/pr71373.c: Likewise.
	* gfortran.dg/goacc/cray-2.f95: Likewise.
	* gfortran.dg/goacc/loop-1-2.f95: Likewise.
	* gfortran.dg/goacc/loop-3-2.f95: Likewise.
	* gfortran.dg/goacc/cray.f95: Update.
	* gfortran.dg/goacc/loop-1.f95: Likewise.
	* gfortran.dg/goacc/loop-3.f95: Likewise.
	* gfortran.dg/goacc/subroutines.f90: Update, and rename to...
	* gfortran.dg/goacc/nested-function-1.f90: ... this new file.
	libgomp/testsuite/
	PR middle-end/71373
	* libgomp.oacc-c/nested-function-1.c: New file.
	* libgomp.oacc-c/nested-function-2.c: Likewise.
	* libgomp.oacc-fortran/nested-function-1.f90: Likewise.
	* libgomp.oacc-fortran/nested-function-2.f90: Likewise.
	* libgomp.oacc-fortran/nested-function-3.f90: Likewise.

Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>

From-SVN: r237291
This commit is contained in:
Thomas Schwinge 2016-06-10 11:22:51 +02:00 committed by Thomas Schwinge
parent 4b1ffdb16c
commit 27d6ba889b
24 changed files with 1369 additions and 100 deletions

View File

@ -1,5 +1,15 @@
2016-06-10 Thomas Schwinge <thomas@codesourcery.com>
PR middle-end/71373
* tree-nested.c (convert_nonlocal_omp_clauses)
(convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC,
OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO,
OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE.
* gimplify.c (gimplify_adjust_omp_clauses): Discard
OMP_CLAUSE_TILE.
* omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE.
* omp-low.c (scan_sharing_clauses): Don't expect
OMP_CLAUSE__CACHE_.

View File

@ -8280,7 +8280,13 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
case OMP_CLAUSE_VECTOR:
case OMP_CLAUSE_AUTO:
case OMP_CLAUSE_SEQ:
break;
case OMP_CLAUSE_TILE:
/* We're not yet making use of the information provided by OpenACC
tile clauses. Discard these here, to simplify later middle end
processing. */
remove = true;
break;
default:

View File

@ -2187,7 +2187,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
case OMP_CLAUSE_GANG:
case OMP_CLAUSE_WORKER:
case OMP_CLAUSE_VECTOR:
case OMP_CLAUSE_TILE:
case OMP_CLAUSE_INDEPENDENT:
case OMP_CLAUSE_AUTO:
case OMP_CLAUSE_SEQ:
@ -2200,6 +2199,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
install_var_local (decl, ctx);
break;
case OMP_CLAUSE_TILE:
case OMP_CLAUSE__CACHE_:
default:
gcc_unreachable ();
@ -2357,13 +2357,13 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
case OMP_CLAUSE_GANG:
case OMP_CLAUSE_WORKER:
case OMP_CLAUSE_VECTOR:
case OMP_CLAUSE_TILE:
case OMP_CLAUSE_INDEPENDENT:
case OMP_CLAUSE_AUTO:
case OMP_CLAUSE_SEQ:
case OMP_CLAUSE__GRIDDIM_:
break;
case OMP_CLAUSE_TILE:
case OMP_CLAUSE__CACHE_:
default:
gcc_unreachable ();

View File

@ -1,4 +1,24 @@
2016-06-10 Thomas Schwinge <thomas@codesourcery.com>
Cesar Philippidis <cesar@codesourcery.com>
PR middle-end/71373
* gcc.dg/goacc/nested-function-1.c: New file.
* gcc.dg/goacc/nested-function-2.c: Likewise.
* gcc.dg/goacc/pr71373.c: Likewise.
* gfortran.dg/goacc/cray-2.f95: Likewise.
* gfortran.dg/goacc/loop-1-2.f95: Likewise.
* gfortran.dg/goacc/loop-3-2.f95: Likewise.
* gfortran.dg/goacc/cray.f95: Update.
* gfortran.dg/goacc/loop-1.f95: Likewise.
* gfortran.dg/goacc/loop-3.f95: Likewise.
* gfortran.dg/goacc/subroutines.f90: Update, and rename to...
* gfortran.dg/goacc/nested-function-1.f90: ... this new file.
2016-06-10 Thomas Schwinge <thomas@codesourcery.com>
* c-c++-common/goacc/combined-directives.c: XFAIL tree scanning
for OpenACC tile clauses.
* gfortran.dg/goacc/combined-directives.f90: Likewise.
PR c/71381
* c-c++-common/goacc/cache-1.c: Update. Move invalid usage tests

View File

@ -111,6 +111,7 @@ test ()
// { dg-final { scan-tree-dump-times "acc loop vector" 2 "gimple" } }
// { dg-final { scan-tree-dump-times "acc loop seq" 2 "gimple" } }
// { dg-final { scan-tree-dump-times "acc loop auto" 2 "gimple" } }
// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" } }
// XFAILed: OpenACC tile clauses are discarded during gimplification.
// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" { xfail *-*-* } } }
// { dg-final { scan-tree-dump-times "acc loop independent private.i" 2 "gimple" } }
// { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }

View File

@ -0,0 +1,100 @@
/* Exercise nested function decomposition, gcc/tree-nested.c. */
/* See gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 for the Fortran
version. */
int main ()
{
#define N 100
int nonlocal_arg;
int nonlocal_a[N];
int nonlocal_i;
int nonlocal_j;
for (int i = 0; i < N; ++i)
nonlocal_a[i] = 5;
nonlocal_arg = 5;
void local ()
{
int local_i;
int local_arg;
int local_a[N];
int local_j;
for (int i = 0; i < N; ++i)
local_a[i] = 5;
local_arg = 5;
#pragma acc kernels loop \
gang(num:local_arg) worker(local_arg) vector(local_arg) \
wait async(local_arg)
for (local_i = 0; local_i < N; ++local_i)
{
#pragma acc cache (local_a[local_i:5])
local_a[local_i] = 100;
#pragma acc loop seq tile(*)
for (local_j = 0; local_j < N; ++local_j)
;
#pragma acc loop auto independent tile(1)
for (local_j = 0; local_j < N; ++local_j)
;
}
#pragma acc kernels loop \
gang(static:local_arg) worker(local_arg) vector(local_arg) \
wait(local_arg, local_arg + 1, local_arg + 2) async
for (local_i = 0; local_i < N; ++local_i)
{
#pragma acc cache (local_a[local_i:4])
local_a[local_i] = 100;
#pragma acc loop seq tile(1)
for (local_j = 0; local_j < N; ++local_j)
;
#pragma acc loop auto independent tile(*)
for (local_j = 0; local_j < N; ++local_j)
;
}
}
void nonlocal ()
{
for (int i = 0; i < N; ++i)
nonlocal_a[i] = 5;
nonlocal_arg = 5;
#pragma acc kernels loop \
gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \
wait async(nonlocal_arg)
for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i)
{
#pragma acc cache (nonlocal_a[nonlocal_i:3])
nonlocal_a[nonlocal_i] = 100;
#pragma acc loop seq tile(2)
for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
;
#pragma acc loop auto independent tile(3)
for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
;
}
#pragma acc kernels loop \
gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \
wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async
for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i)
{
#pragma acc cache (nonlocal_a[nonlocal_i:2])
nonlocal_a[nonlocal_i] = 100;
#pragma acc loop seq tile(*)
for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
;
#pragma acc loop auto independent tile(*)
for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
;
}
}
local ();
nonlocal ();
return 0;
}

View File

@ -0,0 +1,45 @@
/* Exercise nested function decomposition, gcc/tree-nested.c. */
int
main (void)
{
int j = 0, k = 6, l = 7, m = 8;
void simple (void)
{
int i;
#pragma acc parallel
{
#pragma acc loop
for (i = 0; i < m; i+= k)
j = (m + i - j) * l;
}
}
void collapse (void)
{
int x, y, z;
#pragma acc parallel
{
#pragma acc loop collapse (3)
for (x = 0; x < k; x++)
for (y = -5; y < l; y++)
for (z = 0; z < m; z++)
j += x + y + z;
}
}
void reduction (void)
{
int x, y, z;
#pragma acc parallel reduction (+:j)
{
#pragma acc loop reduction (+:j) collapse (3)
for (x = 0; x < k; x++)
for (y = -5; y < l; y++)
for (z = 0; z < m; z++)
j += x + y + z;
}
}
simple();
collapse();
reduction();
return 0;
}

View File

@ -0,0 +1,41 @@
/* Unintentional nested function usage. */
/* Due to missing right braces '}', the following functions are parsed as
nested functions. This ran into an ICE. */
void foo (void)
{
#pragma acc parallel
{
#pragma acc loop independent
for (int i = 0; i < 16; i++)
;
// Note right brace '}' commented out here.
//}
}
void bar (void)
{
}
// Adding right brace '}' here, to make this compile.
}
// ..., and the other way round:
void BAR (void)
{
// Note right brace '}' commented out here.
//}
void FOO (void)
{
#pragma acc parallel
{
#pragma acc loop independent
for (int i = 0; i < 16; i++)
;
}
}
// Adding right brace '}' here, to make this compile.
}

View File

@ -143,7 +143,8 @@ end subroutine test
! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. vector" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. seq" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. auto" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" } }
! XFAILed: OpenACC tile clauses are discarded during gimplification.
! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" { xfail *-*-* } } }
! { dg-final { scan-tree-dump-times "acc loop private.i. independent" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "omp target oacc_\[^ \]+ map.force_tofrom:y" 2 "gimple" } }

View File

@ -0,0 +1,56 @@
! { dg-additional-options "-fcray-pointer" }
! See also cray.f95.
program test
call oacc1
contains
subroutine oacc1
implicit none
integer :: i
real :: pointee
pointer (ptr, pointee)
!$acc declare device_resident (pointee)
!$acc declare device_resident (ptr)
!$acc data copy (pointee) ! { dg-error "Cray pointee" }
!$acc end data
!$acc data deviceptr (pointee) ! { dg-error "Cray pointee" }
!$acc end data
!$acc parallel private (pointee) ! { dg-error "Cray pointee" }
!$acc end parallel
!$acc host_data use_device (pointee) ! { dg-error "Cray pointee" }
!$acc end host_data
!$acc parallel loop reduction(+:pointee) ! { dg-error "Cray pointee" }
do i = 1,5
enddo
!$acc end parallel loop
!$acc parallel loop
do i = 1,5
!$acc cache (pointee) ! { dg-error "Cray pointee" }
enddo
!$acc end parallel loop
!$acc update device (pointee) ! { dg-error "Cray pointee" }
!$acc update host (pointee) ! { dg-error "Cray pointee" }
!$acc update self (pointee) ! { dg-error "Cray pointee" }
!$acc data copy (ptr)
!$acc end data
!$acc data deviceptr (ptr) ! { dg-error "Cray pointer" }
!$acc end data
!$acc parallel private (ptr)
!$acc end parallel
!$acc host_data use_device (ptr) ! { dg-error "Cray pointer" }
!$acc end host_data
!$acc parallel loop reduction(+:ptr) ! { dg-error "Cray pointer" }
do i = 1,5
enddo
!$acc end parallel loop
!$acc parallel loop
do i = 1,5
!TODO: This must fail, as in openacc-1_0-branch.
!$acc cache (ptr) ! { dg-error "" "TODO" { xfail *-*-* } }
enddo
!$acc end parallel loop
!$acc update device (ptr)
!$acc update host (ptr)
!$acc update self (ptr)
end subroutine oacc1
end program test

View File

@ -1,5 +1,5 @@
! { dg-do compile }
! { dg-additional-options "-fcray-pointer" }
! See also cray-2.f95.
module test
contains
@ -8,8 +8,8 @@ contains
integer :: i
real :: pointee
pointer (ptr, pointee)
!$acc declare device_resident (pointee)
!$acc declare device_resident (ptr)
!$acc declare device_resident (pointee)
!$acc declare device_resident (ptr)
!$acc data copy (pointee) ! { dg-error "Cray pointee" }
!$acc end data
!$acc data deviceptr (pointee) ! { dg-error "Cray pointee" }

View File

@ -0,0 +1,176 @@
! See also loop-1.f95.
program test
call test1
contains
subroutine test1
integer :: i, j, k, b(10)
integer, dimension (30) :: a
double precision :: d
real :: r
i = 0
!$acc loop
do 100 ! { dg-error "cannot be a DO WHILE or DO without loop control" }
if (i .gt. 0) exit ! { dg-error "EXIT statement" }
100 i = i + 1
i = 0
!$acc loop
do ! { dg-error "cannot be a DO WHILE or DO without loop control" }
if (i .gt. 0) exit ! { dg-error "EXIT statement" }
i = i + 1
end do
i = 0
!$acc loop
do 200 while (i .lt. 4) ! { dg-error "cannot be a DO WHILE or DO without loop control" }
200 i = i + 1
!$acc loop
do while (i .lt. 8) ! { dg-error "cannot be a DO WHILE or DO without loop control" }
i = i + 1
end do
!$acc loop
do 300 d = 1, 30, 6
i = d
300 a(i) = 1
! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 }
! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 }
!$acc loop
do d = 1, 30, 5
i = d
a(i) = 2
end do
! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 }
! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 }
!$acc loop
do i = 1, 30
if (i .eq. 16) exit ! { dg-error "EXIT statement" }
end do
!$acc loop
outer: do i = 1, 30
do j = 5, 10
if (i .eq. 6 .and. j .eq. 7) exit outer ! { dg-error "EXIT statement" }
end do
end do outer
last: do i = 1, 30
end do last
! different types of loop are allowed
!$acc loop
do i = 1,10
end do
!$acc loop
do 400, i = 1,10
400 a(i) = i
! after loop directive must be loop
!$acc loop
a(1) = 1 ! { dg-error "Expected DO loop" }
do i = 1,10
enddo
! combined directives may be used with/without end
!$acc parallel loop
do i = 1,10
enddo
!$acc parallel loop
do i = 1,10
enddo
!$acc end parallel loop
!$acc kernels loop
do i = 1,10
enddo
!$acc kernels loop
do i = 1,10
enddo
!$acc end kernels loop
!$acc kernels loop reduction(max:i)
do i = 1,10
enddo
!$acc kernels
!$acc loop reduction(max:i)
do i = 1,10
enddo
!$acc end kernels
!$acc parallel loop collapse(0) ! { dg-error "constant positive integer" }
do i = 1,10
enddo
!$acc parallel loop collapse(-1) ! { dg-error "constant positive integer" }
do i = 1,10
enddo
!$acc parallel loop collapse(i) ! { dg-error "Constant expression required" }
do i = 1,10
enddo
!$acc parallel loop collapse(4) ! { dg-error "not enough DO loops for collapsed" }
do i = 1, 3
do j = 4, 6
do k = 5, 7
a(i+j-k) = i + j + k
end do
end do
end do
!$acc parallel loop collapse(2)
do i = 1, 5, 2
do j = i + 1, 7, i ! { dg-error "collapsed loops don.t form rectangular iteration space" }
end do
end do
!$acc parallel loop collapse(2)
do i = 1, 3
do j = 4, 6
end do
end do
!$acc parallel loop collapse(2)
do i = 1, 3
do j = 4, 6
end do
k = 4
end do
!$acc parallel loop collapse(3-1)
do i = 1, 3
do j = 4, 6
end do
k = 4
end do
!$acc parallel loop collapse(1+1)
do i = 1, 3
do j = 4, 6
end do
k = 4
end do
!$acc parallel loop collapse(2)
do i = 1, 3
do ! { dg-error "cannot be a DO WHILE or DO without loop control" }
end do
end do
!$acc parallel loop collapse(2)
do i = 1, 3
do r = 4, 6
end do
! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 }
! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 }
end do
! Both seq and independent are not allowed
!$acc loop independent seq ! { dg-error "SEQ conflicts with INDEPENDENT" }
do i = 1,10
enddo
!$acc cache (a(1:10)) ! { dg-error "ACC CACHE directive must be inside of loop" }
do i = 1,10
!$acc cache(a(i:i+1))
enddo
do i = 1,10
!$acc cache(a(i:i+1))
a(i) = i
!$acc cache(a(i+2:i+2+1))
enddo
end subroutine test1
end program test

View File

@ -1,8 +1,10 @@
! See also loop-1-2.f95.
module test
implicit none
contains
subroutine test1
subroutine test1
integer :: i, j, k, b(10)
integer, dimension (30) :: a
double precision :: d
@ -30,15 +32,15 @@ subroutine test1
do 300 d = 1, 30, 6
i = d
300 a(i) = 1
! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 30 }
! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 30 }
! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 }
! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 }
!$acc loop
do d = 1, 30, 5
i = d
a(i) = 2
end do
! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 36 }
! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 36 }
! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 }
! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 }
!$acc loop
do i = 1, 30
if (i .eq. 16) exit ! { dg-error "EXIT statement" }
@ -53,7 +55,7 @@ subroutine test1
end do last
! different types of loop are allowed
!$acc loop
!$acc loop
do i = 1,10
end do
!$acc loop
@ -65,8 +67,8 @@ subroutine test1
a(1) = 1 ! { dg-error "Expected DO loop" }
do i = 1,10
enddo
! combined directives may be used with/without end
! combined directives may be used with/without end
!$acc parallel loop
do i = 1,10
enddo
@ -82,11 +84,11 @@ subroutine test1
enddo
!$acc end kernels loop
!$acc kernels loop reduction(max:i)
!$acc kernels loop reduction(max:i)
do i = 1,10
enddo
!$acc kernels
!$acc loop reduction(max:i)
!$acc kernels
!$acc loop reduction(max:i)
do i = 1,10
enddo
!$acc end kernels
@ -118,7 +120,7 @@ subroutine test1
end do
!$acc parallel loop collapse(2)
do i = 1, 3
do j = 4, 6
do j = 4, 6
end do
end do
!$acc parallel loop collapse(2)
@ -148,8 +150,8 @@ subroutine test1
do i = 1, 3
do r = 4, 6
end do
! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 149 }
! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 149 }
! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 }
! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 }
end do
! Both seq and independent are not allowed

View File

@ -0,0 +1,58 @@
! { dg-additional-options "-std=f2008" }
! See also loop-3.f95.
program test
call test1
contains
subroutine test1
implicit none
integer :: i, j
! !$acc end loop not required by spec
!$acc loop
do i = 1,5
enddo
!$acc end loop ! { dg-warning "Redundant" }
!$acc loop
do i = 1,5
enddo
j = 1
!$acc end loop ! { dg-error "Unexpected" }
!$acc parallel
!$acc loop
do i = 1,5
enddo
!$acc end parallel
!$acc end loop ! { dg-error "Unexpected" }
! OpenACC supports Fortran 2008 do concurrent statement
!$acc loop
do concurrent (i = 1:5)
end do
!$acc loop
outer_loop: do i = 1, 5
inner_loop: do j = 1,5
if (i .eq. j) cycle outer_loop
if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" }
end do inner_loop
end do outer_loop
outer_loop1: do i = 1, 5
!$acc loop
inner_loop1: do j = 1,5
if (i .eq. j) cycle outer_loop1 ! { dg-error "CYCLE statement" }
end do inner_loop1
end do outer_loop1
!$acc loop collapse(2)
outer_loop2: do i = 1, 5
inner_loop2: do j = 1,5
if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" }
if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" }
end do inner_loop2
end do outer_loop2
end subroutine test1
end program test

View File

@ -1,10 +1,10 @@
! { dg-do compile }
! { dg-additional-options "-std=f2008" }
! See also loop-3-2.f95.
subroutine test1
implicit none
integer :: i, j
! !$acc end loop not required by spec
!$acc loop
do i = 1,5
@ -23,7 +23,7 @@ subroutine test1
enddo
!$acc end parallel
!$acc end loop ! { dg-error "Unexpected" }
! OpenACC supports Fortran 2008 do concurrent statement
!$acc loop
do concurrent (i = 1:5)
@ -35,7 +35,7 @@ subroutine test1
if (i .eq. j) cycle outer_loop
if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" }
end do inner_loop
end do outer_loop
end do outer_loop
outer_loop1: do i = 1, 5
!$acc loop
@ -50,6 +50,5 @@ subroutine test1
if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" }
if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" }
end do inner_loop2
end do outer_loop2
end do outer_loop2
end subroutine test1

View File

@ -0,0 +1,93 @@
! Exercise nested function decomposition, gcc/tree-nested.c.
! See gcc/testsuite/gcc.dg/goacc/nested-function-1.c for the C version.
program main
integer, parameter :: N = 100
integer :: nonlocal_arg
integer :: nonlocal_a(N)
integer :: nonlocal_i
integer :: nonlocal_j
nonlocal_a (:) = 5
nonlocal_arg = 5
call local ()
call nonlocal ()
contains
subroutine local ()
integer :: local_i
integer :: local_arg
integer :: local_a(N)
integer :: local_j
local_a (:) = 5
local_arg = 5
!$acc kernels loop &
!$acc gang(num:local_arg) worker(local_arg) vector(local_arg) &
!$acc wait async(local_arg)
do local_i = 1, N
!$acc cache (local_a(local_i:local_i + 5))
local_a(local_i) = 100
!$acc loop seq tile(*)
do local_j = 1, N
enddo
!$acc loop auto independent tile(1)
do local_j = 1, N
enddo
enddo
!$acc end kernels loop
!$acc kernels loop &
!$acc gang(static:local_arg) worker(local_arg) vector(local_arg) &
!$acc wait(local_arg, local_arg + 1, local_arg + 2) async
do local_i = 1, N
!$acc cache (local_a(local_i:local_i + 4))
local_a(local_i) = 100
!$acc loop seq tile(1)
do local_j = 1, N
enddo
!$acc loop auto independent tile(*)
do local_j = 1, N
enddo
enddo
!$acc end kernels loop
end subroutine local
subroutine nonlocal ()
nonlocal_a (:) = 5
nonlocal_arg = 5
!$acc kernels loop &
!$acc gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) &
!$acc wait async(nonlocal_arg)
do nonlocal_i = 1, N
!$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 3))
nonlocal_a(nonlocal_i) = 100
!$acc loop seq tile(2)
do nonlocal_j = 1, N
enddo
!$acc loop auto independent tile(3)
do nonlocal_j = 1, N
enddo
enddo
!$acc end kernels loop
!$acc kernels loop &
!$acc gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) &
!$acc wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async
do nonlocal_i = 1, N
!$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 2))
nonlocal_a(nonlocal_i) = 100
!$acc loop seq tile(*)
do nonlocal_j = 1, N
enddo
!$acc loop auto independent tile(*)
do nonlocal_j = 1, N
enddo
enddo
!$acc end kernels loop
end subroutine nonlocal
end program main

View File

@ -1,73 +0,0 @@
! Exercise how tree-nested.c handles gang, worker vector and seq.
! { dg-do compile }
program main
integer, parameter :: N = 100
integer :: nonlocal_arg
integer :: nonlocal_a(N)
integer :: nonlocal_i
integer :: nonlocal_j
nonlocal_a (:) = 5
nonlocal_arg = 5
call local ()
call nonlocal ()
contains
subroutine local ()
integer :: local_i
integer :: local_arg
integer :: local_a(N)
integer :: local_j
local_a (:) = 5
local_arg = 5
!$acc kernels loop gang(num:local_arg) worker(local_arg) vector(local_arg)
do local_i = 1, N
local_a(local_i) = 100
!$acc loop seq
do local_j = 1, N
enddo
enddo
!$acc end kernels loop
!$acc kernels loop gang(static:local_arg) worker(local_arg) &
!$acc vector(local_arg)
do local_i = 1, N
local_a(local_i) = 100
!$acc loop seq
do local_j = 1, N
enddo
enddo
!$acc end kernels loop
end subroutine local
subroutine nonlocal ()
nonlocal_a (:) = 5
nonlocal_arg = 5
!$acc kernels loop gang(num:nonlocal_arg) worker(nonlocal_arg) &
!$acc vector(nonlocal_arg)
do nonlocal_i = 1, N
nonlocal_a(nonlocal_i) = 100
!$acc loop seq
do nonlocal_j = 1, N
enddo
enddo
!$acc end kernels loop
!$acc kernels loop gang(static:nonlocal_arg) worker(nonlocal_arg) &
!$acc vector(nonlocal_arg)
do nonlocal_i = 1, N
nonlocal_a(nonlocal_i) = 100
!$acc loop seq
do nonlocal_j = 1, N
enddo
enddo
!$acc end kernels loop
end subroutine nonlocal
end program main

View File

@ -1114,6 +1114,8 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
case OMP_CLAUSE_GANG:
case OMP_CLAUSE_WORKER:
case OMP_CLAUSE_VECTOR:
case OMP_CLAUSE_ASYNC:
case OMP_CLAUSE_WAIT:
/* Several OpenACC clauses have optional arguments. Check if they
are present. */
if (OMP_CLAUSE_OPERAND (clause, 0))
@ -1197,8 +1199,21 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
case OMP_CLAUSE_SIMD:
case OMP_CLAUSE_DEFAULTMAP:
case OMP_CLAUSE_SEQ:
case OMP_CLAUSE_INDEPENDENT:
case OMP_CLAUSE_AUTO:
break;
case OMP_CLAUSE_TILE:
/* OpenACC tile clauses are discarded during gimplification, so we
don't expect to see anything here. */
gcc_unreachable ();
case OMP_CLAUSE__CACHE_:
/* These clauses belong to the OpenACC cache directive, which is
discarded during gimplification, so we don't expect to see
anything here. */
gcc_unreachable ();
default:
gcc_unreachable ();
}
@ -1790,6 +1805,8 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
case OMP_CLAUSE_GANG:
case OMP_CLAUSE_WORKER:
case OMP_CLAUSE_VECTOR:
case OMP_CLAUSE_ASYNC:
case OMP_CLAUSE_WAIT:
/* Several OpenACC clauses have optional arguments. Check if they
are present. */
if (OMP_CLAUSE_OPERAND (clause, 0))
@ -1878,8 +1895,21 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
case OMP_CLAUSE_SIMD:
case OMP_CLAUSE_DEFAULTMAP:
case OMP_CLAUSE_SEQ:
case OMP_CLAUSE_INDEPENDENT:
case OMP_CLAUSE_AUTO:
break;
case OMP_CLAUSE_TILE:
/* OpenACC tile clauses are discarded during gimplification, so we
don't expect to see anything here. */
gcc_unreachable ();
case OMP_CLAUSE__CACHE_:
/* These clauses belong to the OpenACC cache directive, which is
discarded during gimplification, so we don't expect to see
anything here. */
gcc_unreachable ();
default:
gcc_unreachable ();
}

View File

@ -1,3 +1,13 @@
2016-06-10 Thomas Schwinge <thomas@codesourcery.com>
Cesar Philippidis <cesar@codesourcery.com>
PR middle-end/71373
* libgomp.oacc-c/nested-function-1.c: New file.
* libgomp.oacc-c/nested-function-2.c: Likewise.
* libgomp.oacc-fortran/nested-function-1.f90: Likewise.
* libgomp.oacc-fortran/nested-function-2.f90: Likewise.
* libgomp.oacc-fortran/nested-function-3.f90: Likewise.
2016-06-10 Thomas Schwinge <thomas@codesourcery.com>
PR c/71381

View File

@ -0,0 +1,52 @@
/* Exercise nested function decomposition, gcc/tree-nested.c. */
int
main (void)
{
void test1 ()
{
int i, j, k;
int a[4][7][8];
__builtin_memset (a, 0, sizeof (a));
#pragma acc parallel
#pragma acc loop collapse(4 - 1)
for (i = 1; i <= 3; i++)
for (j = 4; j <= 6; j++)
for (k = 5; k <= 7; k++)
a[i][j][k] = i + j + k;
for (i = 1; i <= 3; i++)
for (j = 4; j <= 6; j++)
for (k = 5; k <= 7; k++)
if (a[i][j][k] != i + j + k)
__builtin_abort();
}
void test2 ()
{
int i, j, k;
int a[4][4][4];
__builtin_memset (a, 0, sizeof (a));
#pragma acc parallel
#pragma acc loop collapse(3)
for (i = 1; i <= 3; i++)
for (j = 1; j <= 3; j++)
for (k = 1; k <= 3; k++)
a[i][j][k] = 1;
for (i = 1; i <= 3; i++)
for (j = 1; j <= 3; j++)
for (k = 1; k <= 3; k++)
if (a[i][j][k] != 1)
__builtin_abort ();
}
test1 ();
test2 ();
return 0;
}

View File

@ -0,0 +1,155 @@
/* Exercise nested function decomposition, gcc/tree-nested.c. */
int
main (void)
{
int p1 = 2, p2 = 6, p3 = 0, p4 = 4, p5 = 13, p6 = 18, p7 = 1, p8 = 1, p9 = 1;
void test1 ()
{
int i, j, k;
int a[4][4][4];
__builtin_memset (a, '\0', sizeof (a));
#pragma acc parallel
#pragma acc loop collapse(3)
for (i = 1; i <= 3; i++)
for (j = 1; j <= 3; j++)
for (k = 2; k <= 3; k++)
a[i][j][k] = 1;
for (i = 1; i <= 3; i++)
for (j = 1; j <= 3; j++)
for (k = 2; k <= 3; k++)
if (a[i][j][k] != 1)
__builtin_abort();
}
void test2 (int v1, int v2, int v3, int v4, int v5, int v6)
{
int i, j, k, l = 0, r = 0;
int a[7][5][19];
int b[7][5][19];
__builtin_memset (a, '\0', sizeof (a));
__builtin_memset (b, '\0', sizeof (b));
#pragma acc parallel reduction (||:l)
#pragma acc loop reduction (||:l) collapse(3)
for (i = v1; i <= v2; i++)
for (j = v3; j <= v4; j++)
for (k = v5; k <= v6; k++)
{
l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
if (!l)
a[i][j][k] += 1;
}
for (i = v1; i <= v2; i++)
for (j = v3; j <= v4; j++)
for (k = v5; k <= v6; k++)
{
r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
if (!r)
b[i][j][k] += 1;
}
if (l != r)
__builtin_abort ();
for (i = v1; i <= v2; i++)
for (j = v3; j <= v4; j++)
for (k = v5; k <= v6; k++)
if (b[i][j][k] != a[i][j][k])
__builtin_abort ();
}
void test3 (int v1, int v2, int v3, int v4, int v5, int v6, int v7, int v8,
int v9)
{
int i, j, k, l = 0, r = 0;
int a[7][5][19];
int b[7][5][19];
__builtin_memset (a, '\0', sizeof (a));
__builtin_memset (b, '\0', sizeof (b));
#pragma acc parallel reduction (||:l)
#pragma acc loop reduction (||:l) collapse(3)
for (i = v1; i <= v2; i += v7)
for (j = v3; j <= v4; j += v8)
for (k = v5; k <= v6; k += v9)
{
l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
if (!l)
a[i][j][k] += 1;
}
for (i = v1; i <= v2; i += v7)
for (j = v3; j <= v4; j += v8)
for (k = v5; k <= v6; k += v9)
{
r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
if (!r)
b[i][j][k] += 1;
}
if (l != r)
__builtin_abort ();
for (i = v1; i <= v2; i++)
for (j = v3; j <= v4; j++)
for (k = v5; k <= v6; k++)
if (b[i][j][k] != a[i][j][k])
__builtin_abort ();
}
void test4 ()
{
int i, j, k, l = 0, r = 0;
int a[7][5][19];
int b[7][5][19];
int v1 = p1, v2 = p2, v3 = p3, v4 = p4, v5 = p5, v6 = p6, v7 = p7, v8 = p8,
v9 = p9;
__builtin_memset (a, '\0', sizeof (a));
__builtin_memset (b, '\0', sizeof (b));
#pragma acc parallel reduction (||:l)
#pragma acc loop reduction (||:l) collapse(3)
for (i = v1; i <= v2; i += v7)
for (j = v3; j <= v4; j += v8)
for (k = v5; k <= v6; k += v9)
{
l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
if (!l)
a[i][j][k] += 1;
}
for (i = v1; i <= v2; i += v7)
for (j = v3; j <= v4; j += v8)
for (k = v5; k <= v6; k += v9)
{
r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
if (!r)
b[i][j][k] += 1;
}
if (l != r)
__builtin_abort ();
for (i = v1; i <= v2; i++)
for (j = v3; j <= v4; j++)
for (k = v5; k <= v6; k++)
if (b[i][j][k] != a[i][j][k])
__builtin_abort ();
}
test1 ();
test2 (p1, p2, p3, p4, p5, p6);
test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9);
test4 ();
return 0;
}

View File

@ -0,0 +1,70 @@
! Exercise nested function decomposition, gcc/tree-nested.c.
! { dg-do run }
program collapse2
call test1
call test2
contains
subroutine test1
integer :: i, j, k, a(1:3, 4:6, 5:7)
logical :: l
l = .false.
a(:, :, :) = 0
!$acc parallel reduction (.or.:l)
!$acc loop worker vector collapse(4 - 1)
do 164 i = 1, 3
do 164 j = 4, 6
do 164 k = 5, 7
a(i, j, k) = i + j + k
164 end do
!$acc loop worker vector reduction(.or.:l) collapse(2)
firstdo: do i = 1, 3
do j = 4, 6
do k = 5, 7
if (a(i, j, k) .ne. (i + j + k)) l = .true.
end do
end do
end do firstdo
!$acc end parallel
if (l) call abort
end subroutine test1
subroutine test2
integer :: a(3,3,3), k, kk, kkk, l, ll, lll
a = 0
!$acc parallel
! Use "gang(static:1)" here and below to effectively turn gang-redundant
! execution mode into something like gang-single.
!$acc loop gang(static:1) collapse(1)
do 115 k=1,3
!$acc loop collapse(2)
dokk: do kk=1,3
do kkk=1,3
a(k,kk,kkk) = 1
enddo
enddo dokk
115 continue
!$acc loop gang(static:1) collapse(1)
do k=1,3
if (any(a(k,1:3,1:3).ne.1)) call abort
enddo
! Use "gang(static:1)" here and below to effectively turn gang-redundant
! execution mode into something like gang-single.
!$acc loop gang(static:1) collapse(1)
dol: do 120 l=1,3
!$acc loop collapse(2)
doll: do ll=1,3
do lll=1,3
a(l,ll,lll) = 2
enddo
enddo doll
120 end do dol
!$acc loop gang(static:1) collapse(1)
do l=1,3
if (any(a(l,1:3,1:3).ne.2)) call abort
enddo
!$acc end parallel
end subroutine test2
end program collapse2

View File

@ -0,0 +1,173 @@
! Exercise nested function decomposition, gcc/tree-nested.c.
! { dg-do run }
program collapse3
integer :: p1, p2, p3, p4, p5, p6, p7, p8, p9
p1 = 2
p2 = 6
p3 = -2
p4 = 4
p5 = 13
p6 = 18
p7 = 1
p8 = 1
p9 = 1
call test1
call test2 (p1, p2, p3, p4, p5, p6)
call test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9)
call test4
contains
subroutine test1
integer :: a(3,3,3), k, kk, kkk, l, ll, lll
!$acc parallel
!$acc loop collapse(3)
do 115 k=1,3
dokk: do kk=1,3
do kkk=1,3
a(k,kk,kkk) = 1
enddo
enddo dokk
115 continue
!$acc end parallel
if (any(a(1:3,1:3,1:3).ne.1)) call abort
!$acc parallel
!$acc loop collapse(3)
dol: do 120 l=1,3
doll: do ll=1,3
do lll=1,3
a(l,ll,lll) = 2
enddo
enddo doll
120 end do dol
!$acc end parallel
if (any(a(1:3,1:3,1:3).ne.2)) call abort
end subroutine test1
subroutine test2(v1, v2, v3, v4, v5, v6)
integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
integer :: v1, v2, v3, v4, v5, v6
logical :: l, r
l = .false.
r = .false.
a(:, :, :) = 0
b(:, :, :) = 0
!$acc parallel reduction (.or.:l)
!$acc loop reduction (.or.:l) collapse (3)
do i = v1, v2
do j = v3, v4
do k = v5, v6
l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
l = l.or.k.lt.13.or.k.gt.18
if (.not.l) a(i, j, k) = a(i, j, k) + 1
end do
end do
end do
!$acc end parallel
do i = v1, v2
do j = v3, v4
do k = v5, v6
r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
r = r.or.k.lt.13.or.k.gt.18
if (.not.l) b(i, j, k) = b(i, j, k) + 1
end do
end do
end do
if (l .neqv. r) call abort
do i = v1, v2
do j = v3, v4
do k = v5, v6
if (a(i, j, k) .ne. b(i, j, k)) call abort
end do
end do
end do
end subroutine test2
subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9)
integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
logical :: l, r
l = .false.
r = .false.
a(:, :, :) = 0
b(:, :, :) = 0
!$acc parallel reduction (.or.:l)
!$acc loop reduction (.or.:l) collapse (3)
do i = v1, v2, v7
do j = v3, v4, v8
do k = v5, v6, v9
l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
l = l.or.k.lt.13.or.k.gt.18
if (.not.l) a(i, j, k) = a(i, j, k) + 1
end do
end do
end do
!$acc end parallel
do i = v1, v2, v7
do j = v3, v4, v8
do k = v5, v6, v9
r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
r = r.or.k.lt.13.or.k.gt.18
if (.not.l) b(i, j, k) = b(i, j, k) + 1
end do
end do
end do
if (l .neqv. r) call abort
do i = v1, v2, v7
do j = v3, v4, v8
do k = v5, v6, v9
if (a(i, j, k) .ne. b(i, j, k)) call abort
end do
end do
end do
end subroutine test3
subroutine test4
integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
logical :: l, r
l = .false.
r = .false.
a(:, :, :) = 0
b(:, :, :) = 0
v1 = p1
v2 = p2
v3 = p3
v4 = p4
v5 = p5
v6 = p6
v7 = p7
v8 = p8
v9 = p9
!$acc parallel reduction (.or.:l)
!$acc loop reduction (.or.:l) collapse (3)
do i = v1, v2, v7
do j = v3, v4, v8
do k = v5, v6, v9
l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
l = l.or.k.lt.13.or.k.gt.18
if (.not.l) a(i, j, k) = a(i, j, k) + 1
end do
end do
end do
!$acc end parallel
do i = v1, v2, v7
do j = v3, v4, v8
do k = v5, v6, v9
r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
r = r.or.k.lt.13.or.k.gt.18
if (.not.r) b(i, j, k) = b(i, j, k) + 1
end do
end do
end do
if (l .neqv. r) call abort
do i = v1, v2, v7
do j = v3, v4, v8
do k = v5, v6, v9
if (a(i, j, k) .ne. b(i, j, k)) call abort
end do
end do
end do
end subroutine test4
end program collapse3

View File

@ -0,0 +1,244 @@
! Exercise nested function decomposition, gcc/tree-nested.c.
! { dg-do run }
program sub_collapse_3
call test1
call test2 (2, 6, -2, 4, 13, 18)
call test3 (2, 6, -2, 4, 13, 18, 1, 1, 1)
call test4
call test5 (2, 6, -2, 4, 13, 18)
call test6 (2, 6, -2, 4, 13, 18, 1, 1, 1)
contains
subroutine test1
integer :: a(3,3,3), k, kk, kkk, l, ll, lll
!$acc parallel
!$acc loop collapse(3)
do 115 k=1,3
dokk: do kk=1,3
do kkk=1,3
a(k,kk,kkk) = 1
enddo
enddo dokk
115 continue
!$acc end parallel
if (any(a(1:3,1:3,1:3).ne.1)) call abort
!$acc parallel
!$acc loop collapse(3)
dol: do 120 l=1,3
doll: do ll=1,3
do lll=1,3
a(l,ll,lll) = 2
enddo
enddo doll
120 end do dol
!$acc end parallel
if (any(a(1:3,1:3,1:3).ne.2)) call abort
end subroutine test1
subroutine test2(v1, v2, v3, v4, v5, v6)
integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
integer :: v1, v2, v3, v4, v5, v6
logical :: l, r
l = .false.
r = .false.
a(:, :, :) = 0
b(:, :, :) = 0
!$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l)
!$acc loop reduction (.or.:l) collapse (3)
do i = v1, v2
do j = v3, v4
do k = v5, v6
l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
l = l.or.k.lt.13.or.k.gt.18
if (.not.l) a(i, j, k) = a(i, j, k) + 1
end do
end do
end do
!$acc end parallel
do i = v1, v2
do j = v3, v4
do k = v5, v6
r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
r = r.or.k.lt.13.or.k.gt.18
if (.not.l) b(i, j, k) = b(i, j, k) + 1
end do
end do
end do
if (l .neqv. r) call abort
do i = v1, v2
do j = v3, v4
do k = v5, v6
if (a(i, j, k) .ne. b(i, j, k)) call abort
end do
end do
end do
end subroutine test2
subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9)
integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
logical :: l, r
l = .false.
r = .false.
a(:, :, :) = 0
b(:, :, :) = 0
!$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
!$acc loop reduction (.or.:l) collapse (3)
do i = v1, v2, v7
do j = v3, v4, v8
do k = v5, v6, v9
l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
l = l.or.k.lt.13.or.k.gt.18
if (.not.l) a(i, j, k) = a(i, j, k) + 1
end do
end do
end do
!$acc end parallel
do i = v1, v2, v7
do j = v3, v4, v8
do k = v5, v6, v9
r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
r = r.or.k.lt.13.or.k.gt.18
if (.not.l) b(i, j, k) = b(i, j, k) + 1
end do
end do
end do
if (l .neqv. r) call abort
do i = v1, v2, v7
do j = v3, v4, v8
do k = v5, v6, v9
if (a(i, j, k) .ne. b(i, j, k)) call abort
end do
end do
end do
end subroutine test3
subroutine test4
integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
logical :: l, r
l = .false.
r = .false.
a(:, :, :) = 0
b(:, :, :) = 0
v1 = 2
v2 = 6
v3 = -2
v4 = 4
v5 = 13
v6 = 18
v7 = 1
v8 = 1
v9 = 1
!$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
!$acc loop reduction (.or.:l) collapse (3)
do i = v1, v2, v7
do j = v3, v4, v8
do k = v5, v6, v9
l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
l = l.or.k.lt.13.or.k.gt.18
if (.not.l) a(i, j, k) = a(i, j, k) + 1
end do
end do
end do
!$acc end parallel
do i = v1, v2, v7
do j = v3, v4, v8
do k = v5, v6, v9
r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
r = r.or.k.lt.13.or.k.gt.18
if (.not.r) b(i, j, k) = b(i, j, k) + 1
end do
end do
end do
if (l .neqv. r) call abort
do i = v1, v2, v7
do j = v3, v4, v8
do k = v5, v6, v9
if (a(i, j, k) .ne. b(i, j, k)) call abort
end do
end do
end do
end subroutine test4
subroutine test5(v1, v2, v3, v4, v5, v6)
integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
integer :: v1, v2, v3, v4, v5, v6
logical :: l, r
l = .false.
r = .false.
a(:, :, :) = 0
b(:, :, :) = 0
!$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l)
!$acc loop reduction (.or.:l) collapse (3)
do i = v1, v2
do j = v3, v4
do k = v5, v6
l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
l = l.or.k.lt.13.or.k.gt.18
if (.not.l) a(i, j, k) = a(i, j, k) + 1
end do
end do
end do
!$acc end parallel
do i = v1, v2
do j = v3, v4
do k = v5, v6
r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
r = r.or.k.lt.13.or.k.gt.18
if (.not.r) b(i, j, k) = b(i, j, k) + 1
end do
end do
end do
if (l .neqv. r) call abort
do i = v1, v2
do j = v3, v4
do k = v5, v6
if (a(i, j, k) .ne. b(i, j, k)) call abort
end do
end do
end do
end subroutine test5
subroutine test6(v1, v2, v3, v4, v5, v6, v7, v8, v9)
integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
logical :: l, r
l = .false.
r = .false.
a(:, :, :) = 0
b(:, :, :) = 0
!$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
!$acc loop reduction (.or.:l) collapse (3)
do i = v1, v2, v7
do j = v3, v4, v8
do k = v5, v6, v9
l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
l = l.or.k.lt.13.or.k.gt.18
if (.not.l) a(i, j, k) = a(i, j, k) + 1
m = i * 100 + j * 10 + k
end do
end do
end do
!$acc end parallel
do i = v1, v2, v7
do j = v3, v4, v8
do k = v5, v6, v9
r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
r = r.or.k.lt.13.or.k.gt.18
if (.not.r) b(i, j, k) = b(i, j, k) + 1
end do
end do
end do
if (l .neqv. r) call abort
do i = v1, v2, v7
do j = v3, v4, v8
do k = v5, v6, v9
if (a(i, j, k) .ne. b(i, j, k)) call abort
end do
end do
end do
end subroutine test6
end program sub_collapse_3