From 27d6ba889b34d38dc13b5f3a8959c576fe0477b1 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Fri, 10 Jun 2016 11:22:51 +0200 Subject: [PATCH] [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition gcc/ * gimplify.c (gimplify_adjust_omp_clauses): Discard OMP_CLAUSE_TILE. * omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE. gcc/testsuite/ * c-c++-common/goacc/combined-directives.c: XFAIL tree scanning for OpenACC tile clauses. * gfortran.dg/goacc/combined-directives.f90: Likewise. gcc/ PR middle-end/71373 * tree-nested.c (convert_nonlocal_omp_clauses) (convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC, OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO, OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE. gcc/testsuite/ PR middle-end/71373 * gcc.dg/goacc/nested-function-1.c: New file. * gcc.dg/goacc/nested-function-2.c: Likewise. * gcc.dg/goacc/pr71373.c: Likewise. * gfortran.dg/goacc/cray-2.f95: Likewise. * gfortran.dg/goacc/loop-1-2.f95: Likewise. * gfortran.dg/goacc/loop-3-2.f95: Likewise. * gfortran.dg/goacc/cray.f95: Update. * gfortran.dg/goacc/loop-1.f95: Likewise. * gfortran.dg/goacc/loop-3.f95: Likewise. * gfortran.dg/goacc/subroutines.f90: Update, and rename to... * gfortran.dg/goacc/nested-function-1.f90: ... this new file. libgomp/testsuite/ PR middle-end/71373 * libgomp.oacc-c/nested-function-1.c: New file. * libgomp.oacc-c/nested-function-2.c: Likewise. * libgomp.oacc-fortran/nested-function-1.f90: Likewise. * libgomp.oacc-fortran/nested-function-2.f90: Likewise. * libgomp.oacc-fortran/nested-function-3.f90: Likewise. Co-Authored-By: Cesar Philippidis From-SVN: r237291 --- gcc/ChangeLog | 10 + gcc/gimplify.c | 6 + gcc/omp-low.c | 4 +- gcc/testsuite/ChangeLog | 20 ++ .../c-c++-common/goacc/combined-directives.c | 3 +- .../gcc.dg/goacc/nested-function-1.c | 100 +++++++ .../gcc.dg/goacc/nested-function-2.c | 45 ++++ gcc/testsuite/gcc.dg/goacc/pr71373.c | 41 +++ .../gfortran.dg/goacc/combined-directives.f90 | 3 +- gcc/testsuite/gfortran.dg/goacc/cray-2.f95 | 56 ++++ gcc/testsuite/gfortran.dg/goacc/cray.f95 | 6 +- gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95 | 176 +++++++++++++ gcc/testsuite/gfortran.dg/goacc/loop-1.f95 | 30 ++- gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95 | 58 +++++ gcc/testsuite/gfortran.dg/goacc/loop-3.f95 | 11 +- .../gfortran.dg/goacc/nested-function-1.f90 | 93 +++++++ .../gfortran.dg/goacc/subroutines.f90 | 73 ------ gcc/tree-nested.c | 30 +++ libgomp/ChangeLog | 10 + .../libgomp.oacc-c/nested-function-1.c | 52 ++++ .../libgomp.oacc-c/nested-function-2.c | 155 +++++++++++ .../nested-function-1.f90 | 70 +++++ .../nested-function-2.f90 | 173 +++++++++++++ .../nested-function-3.f90 | 244 ++++++++++++++++++ 24 files changed, 1369 insertions(+), 100 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/goacc/nested-function-1.c create mode 100644 gcc/testsuite/gcc.dg/goacc/nested-function-2.c create mode 100644 gcc/testsuite/gcc.dg/goacc/pr71373.c create mode 100644 gcc/testsuite/gfortran.dg/goacc/cray-2.f95 create mode 100644 gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95 create mode 100644 gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95 create mode 100644 gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 delete mode 100644 gcc/testsuite/gfortran.dg/goacc/subroutines.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-c/nested-function-1.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/nested-function-2.c create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90 diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6afbae76146..9cab3118d65 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,15 @@ 2016-06-10 Thomas Schwinge + PR middle-end/71373 + * tree-nested.c (convert_nonlocal_omp_clauses) + (convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC, + OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO, + OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE. + + * gimplify.c (gimplify_adjust_omp_clauses): Discard + OMP_CLAUSE_TILE. + * omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE. + * omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE__CACHE_. diff --git a/gcc/gimplify.c b/gcc/gimplify.c index f12c6a11456..7c19cf335be 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -8280,7 +8280,13 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p, case OMP_CLAUSE_VECTOR: case OMP_CLAUSE_AUTO: case OMP_CLAUSE_SEQ: + break; + case OMP_CLAUSE_TILE: + /* We're not yet making use of the information provided by OpenACC + tile clauses. Discard these here, to simplify later middle end + processing. */ + remove = true; break; default: diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 91d5fcfcab0..22e59094e82 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -2187,7 +2187,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, case OMP_CLAUSE_GANG: case OMP_CLAUSE_WORKER: case OMP_CLAUSE_VECTOR: - case OMP_CLAUSE_TILE: case OMP_CLAUSE_INDEPENDENT: case OMP_CLAUSE_AUTO: case OMP_CLAUSE_SEQ: @@ -2200,6 +2199,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, install_var_local (decl, ctx); break; + case OMP_CLAUSE_TILE: case OMP_CLAUSE__CACHE_: default: gcc_unreachable (); @@ -2357,13 +2357,13 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, case OMP_CLAUSE_GANG: case OMP_CLAUSE_WORKER: case OMP_CLAUSE_VECTOR: - case OMP_CLAUSE_TILE: case OMP_CLAUSE_INDEPENDENT: case OMP_CLAUSE_AUTO: case OMP_CLAUSE_SEQ: case OMP_CLAUSE__GRIDDIM_: break; + case OMP_CLAUSE_TILE: case OMP_CLAUSE__CACHE_: default: gcc_unreachable (); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e15b009b425..325de75e271 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,4 +1,24 @@ 2016-06-10 Thomas Schwinge + Cesar Philippidis + + PR middle-end/71373 + * gcc.dg/goacc/nested-function-1.c: New file. + * gcc.dg/goacc/nested-function-2.c: Likewise. + * gcc.dg/goacc/pr71373.c: Likewise. + * gfortran.dg/goacc/cray-2.f95: Likewise. + * gfortran.dg/goacc/loop-1-2.f95: Likewise. + * gfortran.dg/goacc/loop-3-2.f95: Likewise. + * gfortran.dg/goacc/cray.f95: Update. + * gfortran.dg/goacc/loop-1.f95: Likewise. + * gfortran.dg/goacc/loop-3.f95: Likewise. + * gfortran.dg/goacc/subroutines.f90: Update, and rename to... + * gfortran.dg/goacc/nested-function-1.f90: ... this new file. + +2016-06-10 Thomas Schwinge + + * c-c++-common/goacc/combined-directives.c: XFAIL tree scanning + for OpenACC tile clauses. + * gfortran.dg/goacc/combined-directives.f90: Likewise. PR c/71381 * c-c++-common/goacc/cache-1.c: Update. Move invalid usage tests diff --git a/gcc/testsuite/c-c++-common/goacc/combined-directives.c b/gcc/testsuite/c-c++-common/goacc/combined-directives.c index c2a3c57b48b..3fa800d7bbe 100644 --- a/gcc/testsuite/c-c++-common/goacc/combined-directives.c +++ b/gcc/testsuite/c-c++-common/goacc/combined-directives.c @@ -111,6 +111,7 @@ test () // { dg-final { scan-tree-dump-times "acc loop vector" 2 "gimple" } } // { dg-final { scan-tree-dump-times "acc loop seq" 2 "gimple" } } // { dg-final { scan-tree-dump-times "acc loop auto" 2 "gimple" } } -// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" } } +// XFAILed: OpenACC tile clauses are discarded during gimplification. +// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" { xfail *-*-* } } } // { dg-final { scan-tree-dump-times "acc loop independent private.i" 2 "gimple" } } // { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } } diff --git a/gcc/testsuite/gcc.dg/goacc/nested-function-1.c b/gcc/testsuite/gcc.dg/goacc/nested-function-1.c new file mode 100644 index 00000000000..e17c0e2227f --- /dev/null +++ b/gcc/testsuite/gcc.dg/goacc/nested-function-1.c @@ -0,0 +1,100 @@ +/* Exercise nested function decomposition, gcc/tree-nested.c. */ +/* See gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 for the Fortran + version. */ + +int main () +{ +#define N 100 + int nonlocal_arg; + int nonlocal_a[N]; + int nonlocal_i; + int nonlocal_j; + + for (int i = 0; i < N; ++i) + nonlocal_a[i] = 5; + nonlocal_arg = 5; + + void local () + { + int local_i; + int local_arg; + int local_a[N]; + int local_j; + + for (int i = 0; i < N; ++i) + local_a[i] = 5; + local_arg = 5; + +#pragma acc kernels loop \ + gang(num:local_arg) worker(local_arg) vector(local_arg) \ + wait async(local_arg) + for (local_i = 0; local_i < N; ++local_i) + { +#pragma acc cache (local_a[local_i:5]) + local_a[local_i] = 100; +#pragma acc loop seq tile(*) + for (local_j = 0; local_j < N; ++local_j) + ; +#pragma acc loop auto independent tile(1) + for (local_j = 0; local_j < N; ++local_j) + ; + } + +#pragma acc kernels loop \ + gang(static:local_arg) worker(local_arg) vector(local_arg) \ + wait(local_arg, local_arg + 1, local_arg + 2) async + for (local_i = 0; local_i < N; ++local_i) + { +#pragma acc cache (local_a[local_i:4]) + local_a[local_i] = 100; +#pragma acc loop seq tile(1) + for (local_j = 0; local_j < N; ++local_j) + ; +#pragma acc loop auto independent tile(*) + for (local_j = 0; local_j < N; ++local_j) + ; + } + } + + void nonlocal () + { + for (int i = 0; i < N; ++i) + nonlocal_a[i] = 5; + nonlocal_arg = 5; + +#pragma acc kernels loop \ + gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \ + wait async(nonlocal_arg) + for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i) + { +#pragma acc cache (nonlocal_a[nonlocal_i:3]) + nonlocal_a[nonlocal_i] = 100; +#pragma acc loop seq tile(2) + for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j) + ; +#pragma acc loop auto independent tile(3) + for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j) + ; + } + +#pragma acc kernels loop \ + gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \ + wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async + for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i) + { +#pragma acc cache (nonlocal_a[nonlocal_i:2]) + nonlocal_a[nonlocal_i] = 100; +#pragma acc loop seq tile(*) + for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j) + ; +#pragma acc loop auto independent tile(*) + for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j) + ; + } + } + + local (); + nonlocal (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/goacc/nested-function-2.c b/gcc/testsuite/gcc.dg/goacc/nested-function-2.c new file mode 100644 index 00000000000..70c9ec8ebfa --- /dev/null +++ b/gcc/testsuite/gcc.dg/goacc/nested-function-2.c @@ -0,0 +1,45 @@ +/* Exercise nested function decomposition, gcc/tree-nested.c. */ + +int +main (void) +{ + int j = 0, k = 6, l = 7, m = 8; + void simple (void) + { + int i; +#pragma acc parallel + { +#pragma acc loop + for (i = 0; i < m; i+= k) + j = (m + i - j) * l; + } + } + void collapse (void) + { + int x, y, z; +#pragma acc parallel + { +#pragma acc loop collapse (3) + for (x = 0; x < k; x++) + for (y = -5; y < l; y++) + for (z = 0; z < m; z++) + j += x + y + z; + } + } + void reduction (void) + { + int x, y, z; +#pragma acc parallel reduction (+:j) + { +#pragma acc loop reduction (+:j) collapse (3) + for (x = 0; x < k; x++) + for (y = -5; y < l; y++) + for (z = 0; z < m; z++) + j += x + y + z; + } + } + simple(); + collapse(); + reduction(); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/goacc/pr71373.c b/gcc/testsuite/gcc.dg/goacc/pr71373.c new file mode 100644 index 00000000000..9381752cc9d --- /dev/null +++ b/gcc/testsuite/gcc.dg/goacc/pr71373.c @@ -0,0 +1,41 @@ +/* Unintentional nested function usage. */ +/* Due to missing right braces '}', the following functions are parsed as + nested functions. This ran into an ICE. */ + +void foo (void) +{ + #pragma acc parallel + { + #pragma acc loop independent + for (int i = 0; i < 16; i++) + ; + // Note right brace '}' commented out here. + //} +} +void bar (void) +{ +} + +// Adding right brace '}' here, to make this compile. +} + + +// ..., and the other way round: + +void BAR (void) +{ +// Note right brace '}' commented out here. +//} + +void FOO (void) +{ + #pragma acc parallel + { + #pragma acc loop independent + for (int i = 0; i < 16; i++) + ; + } +} + +// Adding right brace '}' here, to make this compile. +} diff --git a/gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 b/gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 index 42a447ad06b..abb5e6b6c3d 100644 --- a/gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 +++ b/gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 @@ -143,7 +143,8 @@ end subroutine test ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. vector" 2 "gimple" } } ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. seq" 2 "gimple" } } ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. auto" 2 "gimple" } } -! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" } } +! XFAILed: OpenACC tile clauses are discarded during gimplification. +! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" { xfail *-*-* } } } ! { dg-final { scan-tree-dump-times "acc loop private.i. independent" 2 "gimple" } } ! { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } } ! { dg-final { scan-tree-dump-times "omp target oacc_\[^ \]+ map.force_tofrom:y" 2 "gimple" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/cray-2.f95 b/gcc/testsuite/gfortran.dg/goacc/cray-2.f95 new file mode 100644 index 00000000000..51b79b53636 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/cray-2.f95 @@ -0,0 +1,56 @@ +! { dg-additional-options "-fcray-pointer" } +! See also cray.f95. + +program test + call oacc1 +contains + subroutine oacc1 + implicit none + integer :: i + real :: pointee + pointer (ptr, pointee) + !$acc declare device_resident (pointee) + !$acc declare device_resident (ptr) + !$acc data copy (pointee) ! { dg-error "Cray pointee" } + !$acc end data + !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" } + !$acc end data + !$acc parallel private (pointee) ! { dg-error "Cray pointee" } + !$acc end parallel + !$acc host_data use_device (pointee) ! { dg-error "Cray pointee" } + !$acc end host_data + !$acc parallel loop reduction(+:pointee) ! { dg-error "Cray pointee" } + do i = 1,5 + enddo + !$acc end parallel loop + !$acc parallel loop + do i = 1,5 + !$acc cache (pointee) ! { dg-error "Cray pointee" } + enddo + !$acc end parallel loop + !$acc update device (pointee) ! { dg-error "Cray pointee" } + !$acc update host (pointee) ! { dg-error "Cray pointee" } + !$acc update self (pointee) ! { dg-error "Cray pointee" } + !$acc data copy (ptr) + !$acc end data + !$acc data deviceptr (ptr) ! { dg-error "Cray pointer" } + !$acc end data + !$acc parallel private (ptr) + !$acc end parallel + !$acc host_data use_device (ptr) ! { dg-error "Cray pointer" } + !$acc end host_data + !$acc parallel loop reduction(+:ptr) ! { dg-error "Cray pointer" } + do i = 1,5 + enddo + !$acc end parallel loop + !$acc parallel loop + do i = 1,5 + !TODO: This must fail, as in openacc-1_0-branch. + !$acc cache (ptr) ! { dg-error "" "TODO" { xfail *-*-* } } + enddo + !$acc end parallel loop + !$acc update device (ptr) + !$acc update host (ptr) + !$acc update self (ptr) + end subroutine oacc1 +end program test diff --git a/gcc/testsuite/gfortran.dg/goacc/cray.f95 b/gcc/testsuite/gfortran.dg/goacc/cray.f95 index 705c18c992d..d6d531705a6 100644 --- a/gcc/testsuite/gfortran.dg/goacc/cray.f95 +++ b/gcc/testsuite/gfortran.dg/goacc/cray.f95 @@ -1,5 +1,5 @@ -! { dg-do compile } ! { dg-additional-options "-fcray-pointer" } +! See also cray-2.f95. module test contains @@ -8,8 +8,8 @@ contains integer :: i real :: pointee pointer (ptr, pointee) - !$acc declare device_resident (pointee) - !$acc declare device_resident (ptr) + !$acc declare device_resident (pointee) + !$acc declare device_resident (ptr) !$acc data copy (pointee) ! { dg-error "Cray pointee" } !$acc end data !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" } diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95 b/gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95 new file mode 100644 index 00000000000..79665b948c3 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95 @@ -0,0 +1,176 @@ +! See also loop-1.f95. + +program test + call test1 +contains + +subroutine test1 + integer :: i, j, k, b(10) + integer, dimension (30) :: a + double precision :: d + real :: r + i = 0 + !$acc loop + do 100 ! { dg-error "cannot be a DO WHILE or DO without loop control" } + if (i .gt. 0) exit ! { dg-error "EXIT statement" } + 100 i = i + 1 + i = 0 + !$acc loop + do ! { dg-error "cannot be a DO WHILE or DO without loop control" } + if (i .gt. 0) exit ! { dg-error "EXIT statement" } + i = i + 1 + end do + i = 0 + !$acc loop + do 200 while (i .lt. 4) ! { dg-error "cannot be a DO WHILE or DO without loop control" } + 200 i = i + 1 + !$acc loop + do while (i .lt. 8) ! { dg-error "cannot be a DO WHILE or DO without loop control" } + i = i + 1 + end do + !$acc loop + do 300 d = 1, 30, 6 + i = d + 300 a(i) = 1 + ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 } + ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 } + !$acc loop + do d = 1, 30, 5 + i = d + a(i) = 2 + end do + ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 } + ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 } + !$acc loop + do i = 1, 30 + if (i .eq. 16) exit ! { dg-error "EXIT statement" } + end do + !$acc loop + outer: do i = 1, 30 + do j = 5, 10 + if (i .eq. 6 .and. j .eq. 7) exit outer ! { dg-error "EXIT statement" } + end do + end do outer + last: do i = 1, 30 + end do last + + ! different types of loop are allowed + !$acc loop + do i = 1,10 + end do + !$acc loop + do 400, i = 1,10 +400 a(i) = i + + ! after loop directive must be loop + !$acc loop + a(1) = 1 ! { dg-error "Expected DO loop" } + do i = 1,10 + enddo + + ! combined directives may be used with/without end + !$acc parallel loop + do i = 1,10 + enddo + !$acc parallel loop + do i = 1,10 + enddo + !$acc end parallel loop + !$acc kernels loop + do i = 1,10 + enddo + !$acc kernels loop + do i = 1,10 + enddo + !$acc end kernels loop + + !$acc kernels loop reduction(max:i) + do i = 1,10 + enddo + !$acc kernels + !$acc loop reduction(max:i) + do i = 1,10 + enddo + !$acc end kernels + + !$acc parallel loop collapse(0) ! { dg-error "constant positive integer" } + do i = 1,10 + enddo + + !$acc parallel loop collapse(-1) ! { dg-error "constant positive integer" } + do i = 1,10 + enddo + + !$acc parallel loop collapse(i) ! { dg-error "Constant expression required" } + do i = 1,10 + enddo + + !$acc parallel loop collapse(4) ! { dg-error "not enough DO loops for collapsed" } + do i = 1, 3 + do j = 4, 6 + do k = 5, 7 + a(i+j-k) = i + j + k + end do + end do + end do + !$acc parallel loop collapse(2) + do i = 1, 5, 2 + do j = i + 1, 7, i ! { dg-error "collapsed loops don.t form rectangular iteration space" } + end do + end do + !$acc parallel loop collapse(2) + do i = 1, 3 + do j = 4, 6 + end do + end do + !$acc parallel loop collapse(2) + do i = 1, 3 + do j = 4, 6 + end do + k = 4 + end do + !$acc parallel loop collapse(3-1) + do i = 1, 3 + do j = 4, 6 + end do + k = 4 + end do + !$acc parallel loop collapse(1+1) + do i = 1, 3 + do j = 4, 6 + end do + k = 4 + end do + !$acc parallel loop collapse(2) + do i = 1, 3 + do ! { dg-error "cannot be a DO WHILE or DO without loop control" } + end do + end do + !$acc parallel loop collapse(2) + do i = 1, 3 + do r = 4, 6 + end do + ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 } + ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 } + end do + + ! Both seq and independent are not allowed + !$acc loop independent seq ! { dg-error "SEQ conflicts with INDEPENDENT" } + do i = 1,10 + enddo + + + !$acc cache (a(1:10)) ! { dg-error "ACC CACHE directive must be inside of loop" } + + do i = 1,10 + !$acc cache(a(i:i+1)) + enddo + + do i = 1,10 + !$acc cache(a(i:i+1)) + a(i) = i + !$acc cache(a(i+2:i+2+1)) + enddo + +end subroutine test1 +end program test diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-1.f95 b/gcc/testsuite/gfortran.dg/goacc/loop-1.f95 index a605f038926..5f81b7a1d19 100644 --- a/gcc/testsuite/gfortran.dg/goacc/loop-1.f95 +++ b/gcc/testsuite/gfortran.dg/goacc/loop-1.f95 @@ -1,8 +1,10 @@ +! See also loop-1-2.f95. + module test implicit none contains -subroutine test1 +subroutine test1 integer :: i, j, k, b(10) integer, dimension (30) :: a double precision :: d @@ -30,15 +32,15 @@ subroutine test1 do 300 d = 1, 30, 6 i = d 300 a(i) = 1 - ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 30 } - ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 30 } + ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 } + ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 } !$acc loop do d = 1, 30, 5 i = d a(i) = 2 end do - ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 36 } - ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 36 } + ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 } + ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 } !$acc loop do i = 1, 30 if (i .eq. 16) exit ! { dg-error "EXIT statement" } @@ -53,7 +55,7 @@ subroutine test1 end do last ! different types of loop are allowed - !$acc loop + !$acc loop do i = 1,10 end do !$acc loop @@ -65,8 +67,8 @@ subroutine test1 a(1) = 1 ! { dg-error "Expected DO loop" } do i = 1,10 enddo - - ! combined directives may be used with/without end + + ! combined directives may be used with/without end !$acc parallel loop do i = 1,10 enddo @@ -82,11 +84,11 @@ subroutine test1 enddo !$acc end kernels loop - !$acc kernels loop reduction(max:i) + !$acc kernels loop reduction(max:i) do i = 1,10 enddo - !$acc kernels - !$acc loop reduction(max:i) + !$acc kernels + !$acc loop reduction(max:i) do i = 1,10 enddo !$acc end kernels @@ -118,7 +120,7 @@ subroutine test1 end do !$acc parallel loop collapse(2) do i = 1, 3 - do j = 4, 6 + do j = 4, 6 end do end do !$acc parallel loop collapse(2) @@ -148,8 +150,8 @@ subroutine test1 do i = 1, 3 do r = 4, 6 end do - ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 149 } - ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 149 } + ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 } + ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 } end do ! Both seq and independent are not allowed diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95 b/gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95 new file mode 100644 index 00000000000..9be74a85919 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95 @@ -0,0 +1,58 @@ +! { dg-additional-options "-std=f2008" } +! See also loop-3.f95. + +program test + call test1 +contains +subroutine test1 + implicit none + integer :: i, j + + ! !$acc end loop not required by spec + !$acc loop + do i = 1,5 + enddo + !$acc end loop ! { dg-warning "Redundant" } + + !$acc loop + do i = 1,5 + enddo + j = 1 + !$acc end loop ! { dg-error "Unexpected" } + + !$acc parallel + !$acc loop + do i = 1,5 + enddo + !$acc end parallel + !$acc end loop ! { dg-error "Unexpected" } + + ! OpenACC supports Fortran 2008 do concurrent statement + !$acc loop + do concurrent (i = 1:5) + end do + + !$acc loop + outer_loop: do i = 1, 5 + inner_loop: do j = 1,5 + if (i .eq. j) cycle outer_loop + if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" } + end do inner_loop + end do outer_loop + + outer_loop1: do i = 1, 5 + !$acc loop + inner_loop1: do j = 1,5 + if (i .eq. j) cycle outer_loop1 ! { dg-error "CYCLE statement" } + end do inner_loop1 + end do outer_loop1 + + !$acc loop collapse(2) + outer_loop2: do i = 1, 5 + inner_loop2: do j = 1,5 + if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" } + if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" } + end do inner_loop2 + end do outer_loop2 +end subroutine test1 +end program test diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-3.f95 b/gcc/testsuite/gfortran.dg/goacc/loop-3.f95 index 2a866c79234..30930f404f3 100644 --- a/gcc/testsuite/gfortran.dg/goacc/loop-3.f95 +++ b/gcc/testsuite/gfortran.dg/goacc/loop-3.f95 @@ -1,10 +1,10 @@ -! { dg-do compile } ! { dg-additional-options "-std=f2008" } +! See also loop-3-2.f95. subroutine test1 implicit none integer :: i, j - + ! !$acc end loop not required by spec !$acc loop do i = 1,5 @@ -23,7 +23,7 @@ subroutine test1 enddo !$acc end parallel !$acc end loop ! { dg-error "Unexpected" } - + ! OpenACC supports Fortran 2008 do concurrent statement !$acc loop do concurrent (i = 1:5) @@ -35,7 +35,7 @@ subroutine test1 if (i .eq. j) cycle outer_loop if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" } end do inner_loop - end do outer_loop + end do outer_loop outer_loop1: do i = 1, 5 !$acc loop @@ -50,6 +50,5 @@ subroutine test1 if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" } if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" } end do inner_loop2 - end do outer_loop2 + end do outer_loop2 end subroutine test1 - diff --git a/gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 b/gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 new file mode 100644 index 00000000000..2fcaa400ee3 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 @@ -0,0 +1,93 @@ +! Exercise nested function decomposition, gcc/tree-nested.c. +! See gcc/testsuite/gcc.dg/goacc/nested-function-1.c for the C version. + +program main + integer, parameter :: N = 100 + integer :: nonlocal_arg + integer :: nonlocal_a(N) + integer :: nonlocal_i + integer :: nonlocal_j + + nonlocal_a (:) = 5 + nonlocal_arg = 5 + + call local () + call nonlocal () + +contains + + subroutine local () + integer :: local_i + integer :: local_arg + integer :: local_a(N) + integer :: local_j + + local_a (:) = 5 + local_arg = 5 + + !$acc kernels loop & + !$acc gang(num:local_arg) worker(local_arg) vector(local_arg) & + !$acc wait async(local_arg) + do local_i = 1, N + !$acc cache (local_a(local_i:local_i + 5)) + local_a(local_i) = 100 + !$acc loop seq tile(*) + do local_j = 1, N + enddo + !$acc loop auto independent tile(1) + do local_j = 1, N + enddo + enddo + !$acc end kernels loop + + !$acc kernels loop & + !$acc gang(static:local_arg) worker(local_arg) vector(local_arg) & + !$acc wait(local_arg, local_arg + 1, local_arg + 2) async + do local_i = 1, N + !$acc cache (local_a(local_i:local_i + 4)) + local_a(local_i) = 100 + !$acc loop seq tile(1) + do local_j = 1, N + enddo + !$acc loop auto independent tile(*) + do local_j = 1, N + enddo + enddo + !$acc end kernels loop + end subroutine local + + subroutine nonlocal () + nonlocal_a (:) = 5 + nonlocal_arg = 5 + + !$acc kernels loop & + !$acc gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) & + !$acc wait async(nonlocal_arg) + do nonlocal_i = 1, N + !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 3)) + nonlocal_a(nonlocal_i) = 100 + !$acc loop seq tile(2) + do nonlocal_j = 1, N + enddo + !$acc loop auto independent tile(3) + do nonlocal_j = 1, N + enddo + enddo + !$acc end kernels loop + + !$acc kernels loop & + !$acc gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) & + !$acc wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async + do nonlocal_i = 1, N + !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 2)) + nonlocal_a(nonlocal_i) = 100 + !$acc loop seq tile(*) + do nonlocal_j = 1, N + enddo + !$acc loop auto independent tile(*) + do nonlocal_j = 1, N + enddo + enddo + !$acc end kernels loop + end subroutine nonlocal +end program main diff --git a/gcc/testsuite/gfortran.dg/goacc/subroutines.f90 b/gcc/testsuite/gfortran.dg/goacc/subroutines.f90 deleted file mode 100644 index 6cab798d458..00000000000 --- a/gcc/testsuite/gfortran.dg/goacc/subroutines.f90 +++ /dev/null @@ -1,73 +0,0 @@ -! Exercise how tree-nested.c handles gang, worker vector and seq. - -! { dg-do compile } - -program main - integer, parameter :: N = 100 - integer :: nonlocal_arg - integer :: nonlocal_a(N) - integer :: nonlocal_i - integer :: nonlocal_j - - nonlocal_a (:) = 5 - nonlocal_arg = 5 - - call local () - call nonlocal () - -contains - - subroutine local () - integer :: local_i - integer :: local_arg - integer :: local_a(N) - integer :: local_j - - local_a (:) = 5 - local_arg = 5 - - !$acc kernels loop gang(num:local_arg) worker(local_arg) vector(local_arg) - do local_i = 1, N - local_a(local_i) = 100 - !$acc loop seq - do local_j = 1, N - enddo - enddo - !$acc end kernels loop - - !$acc kernels loop gang(static:local_arg) worker(local_arg) & - !$acc vector(local_arg) - do local_i = 1, N - local_a(local_i) = 100 - !$acc loop seq - do local_j = 1, N - enddo - enddo - !$acc end kernels loop - end subroutine local - - subroutine nonlocal () - nonlocal_a (:) = 5 - nonlocal_arg = 5 - - !$acc kernels loop gang(num:nonlocal_arg) worker(nonlocal_arg) & - !$acc vector(nonlocal_arg) - do nonlocal_i = 1, N - nonlocal_a(nonlocal_i) = 100 - !$acc loop seq - do nonlocal_j = 1, N - enddo - enddo - !$acc end kernels loop - - !$acc kernels loop gang(static:nonlocal_arg) worker(nonlocal_arg) & - !$acc vector(nonlocal_arg) - do nonlocal_i = 1, N - nonlocal_a(nonlocal_i) = 100 - !$acc loop seq - do nonlocal_j = 1, N - enddo - enddo - !$acc end kernels loop - end subroutine nonlocal -end program main diff --git a/gcc/tree-nested.c b/gcc/tree-nested.c index 25a92aaa04b..6fc63260000 100644 --- a/gcc/tree-nested.c +++ b/gcc/tree-nested.c @@ -1114,6 +1114,8 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_GANG: case OMP_CLAUSE_WORKER: case OMP_CLAUSE_VECTOR: + case OMP_CLAUSE_ASYNC: + case OMP_CLAUSE_WAIT: /* Several OpenACC clauses have optional arguments. Check if they are present. */ if (OMP_CLAUSE_OPERAND (clause, 0)) @@ -1197,8 +1199,21 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_SIMD: case OMP_CLAUSE_DEFAULTMAP: case OMP_CLAUSE_SEQ: + case OMP_CLAUSE_INDEPENDENT: + case OMP_CLAUSE_AUTO: break; + case OMP_CLAUSE_TILE: + /* OpenACC tile clauses are discarded during gimplification, so we + don't expect to see anything here. */ + gcc_unreachable (); + + case OMP_CLAUSE__CACHE_: + /* These clauses belong to the OpenACC cache directive, which is + discarded during gimplification, so we don't expect to see + anything here. */ + gcc_unreachable (); + default: gcc_unreachable (); } @@ -1790,6 +1805,8 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_GANG: case OMP_CLAUSE_WORKER: case OMP_CLAUSE_VECTOR: + case OMP_CLAUSE_ASYNC: + case OMP_CLAUSE_WAIT: /* Several OpenACC clauses have optional arguments. Check if they are present. */ if (OMP_CLAUSE_OPERAND (clause, 0)) @@ -1878,8 +1895,21 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_SIMD: case OMP_CLAUSE_DEFAULTMAP: case OMP_CLAUSE_SEQ: + case OMP_CLAUSE_INDEPENDENT: + case OMP_CLAUSE_AUTO: break; + case OMP_CLAUSE_TILE: + /* OpenACC tile clauses are discarded during gimplification, so we + don't expect to see anything here. */ + gcc_unreachable (); + + case OMP_CLAUSE__CACHE_: + /* These clauses belong to the OpenACC cache directive, which is + discarded during gimplification, so we don't expect to see + anything here. */ + gcc_unreachable (); + default: gcc_unreachable (); } diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 5c7f41abf89..cf551f49384 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,13 @@ +2016-06-10 Thomas Schwinge + Cesar Philippidis + + PR middle-end/71373 + * libgomp.oacc-c/nested-function-1.c: New file. + * libgomp.oacc-c/nested-function-2.c: Likewise. + * libgomp.oacc-fortran/nested-function-1.f90: Likewise. + * libgomp.oacc-fortran/nested-function-2.f90: Likewise. + * libgomp.oacc-fortran/nested-function-3.f90: Likewise. + 2016-06-10 Thomas Schwinge PR c/71381 diff --git a/libgomp/testsuite/libgomp.oacc-c/nested-function-1.c b/libgomp/testsuite/libgomp.oacc-c/nested-function-1.c new file mode 100644 index 00000000000..fb2a3acdfa9 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/nested-function-1.c @@ -0,0 +1,52 @@ +/* Exercise nested function decomposition, gcc/tree-nested.c. */ + +int +main (void) +{ + void test1 () + { + int i, j, k; + int a[4][7][8]; + + __builtin_memset (a, 0, sizeof (a)); + +#pragma acc parallel +#pragma acc loop collapse(4 - 1) + for (i = 1; i <= 3; i++) + for (j = 4; j <= 6; j++) + for (k = 5; k <= 7; k++) + a[i][j][k] = i + j + k; + + for (i = 1; i <= 3; i++) + for (j = 4; j <= 6; j++) + for (k = 5; k <= 7; k++) + if (a[i][j][k] != i + j + k) + __builtin_abort(); + } + + void test2 () + { + int i, j, k; + int a[4][4][4]; + + __builtin_memset (a, 0, sizeof (a)); + +#pragma acc parallel +#pragma acc loop collapse(3) + for (i = 1; i <= 3; i++) + for (j = 1; j <= 3; j++) + for (k = 1; k <= 3; k++) + a[i][j][k] = 1; + + for (i = 1; i <= 3; i++) + for (j = 1; j <= 3; j++) + for (k = 1; k <= 3; k++) + if (a[i][j][k] != 1) + __builtin_abort (); + } + + test1 (); + test2 (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/nested-function-2.c b/libgomp/testsuite/libgomp.oacc-c/nested-function-2.c new file mode 100644 index 00000000000..2c3f3feb7f8 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/nested-function-2.c @@ -0,0 +1,155 @@ +/* Exercise nested function decomposition, gcc/tree-nested.c. */ + +int +main (void) +{ + int p1 = 2, p2 = 6, p3 = 0, p4 = 4, p5 = 13, p6 = 18, p7 = 1, p8 = 1, p9 = 1; + + void test1 () + { + int i, j, k; + int a[4][4][4]; + + __builtin_memset (a, '\0', sizeof (a)); + +#pragma acc parallel +#pragma acc loop collapse(3) + for (i = 1; i <= 3; i++) + for (j = 1; j <= 3; j++) + for (k = 2; k <= 3; k++) + a[i][j][k] = 1; + + for (i = 1; i <= 3; i++) + for (j = 1; j <= 3; j++) + for (k = 2; k <= 3; k++) + if (a[i][j][k] != 1) + __builtin_abort(); + } + + void test2 (int v1, int v2, int v3, int v4, int v5, int v6) + { + int i, j, k, l = 0, r = 0; + int a[7][5][19]; + int b[7][5][19]; + + __builtin_memset (a, '\0', sizeof (a)); + __builtin_memset (b, '\0', sizeof (b)); + +#pragma acc parallel reduction (||:l) +#pragma acc loop reduction (||:l) collapse(3) + for (i = v1; i <= v2; i++) + for (j = v3; j <= v4; j++) + for (k = v5; k <= v6; k++) + { + l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18; + if (!l) + a[i][j][k] += 1; + } + + for (i = v1; i <= v2; i++) + for (j = v3; j <= v4; j++) + for (k = v5; k <= v6; k++) + { + r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18; + if (!r) + b[i][j][k] += 1; + } + + if (l != r) + __builtin_abort (); + + for (i = v1; i <= v2; i++) + for (j = v3; j <= v4; j++) + for (k = v5; k <= v6; k++) + if (b[i][j][k] != a[i][j][k]) + __builtin_abort (); + } + + void test3 (int v1, int v2, int v3, int v4, int v5, int v6, int v7, int v8, + int v9) + { + int i, j, k, l = 0, r = 0; + int a[7][5][19]; + int b[7][5][19]; + + __builtin_memset (a, '\0', sizeof (a)); + __builtin_memset (b, '\0', sizeof (b)); + +#pragma acc parallel reduction (||:l) +#pragma acc loop reduction (||:l) collapse(3) + for (i = v1; i <= v2; i += v7) + for (j = v3; j <= v4; j += v8) + for (k = v5; k <= v6; k += v9) + { + l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18; + if (!l) + a[i][j][k] += 1; + } + + for (i = v1; i <= v2; i += v7) + for (j = v3; j <= v4; j += v8) + for (k = v5; k <= v6; k += v9) + { + r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18; + if (!r) + b[i][j][k] += 1; + } + + if (l != r) + __builtin_abort (); + + for (i = v1; i <= v2; i++) + for (j = v3; j <= v4; j++) + for (k = v5; k <= v6; k++) + if (b[i][j][k] != a[i][j][k]) + __builtin_abort (); + } + + void test4 () + { + int i, j, k, l = 0, r = 0; + int a[7][5][19]; + int b[7][5][19]; + int v1 = p1, v2 = p2, v3 = p3, v4 = p4, v5 = p5, v6 = p6, v7 = p7, v8 = p8, + v9 = p9; + + __builtin_memset (a, '\0', sizeof (a)); + __builtin_memset (b, '\0', sizeof (b)); + +#pragma acc parallel reduction (||:l) +#pragma acc loop reduction (||:l) collapse(3) + for (i = v1; i <= v2; i += v7) + for (j = v3; j <= v4; j += v8) + for (k = v5; k <= v6; k += v9) + { + l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18; + if (!l) + a[i][j][k] += 1; + } + + for (i = v1; i <= v2; i += v7) + for (j = v3; j <= v4; j += v8) + for (k = v5; k <= v6; k += v9) + { + r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18; + if (!r) + b[i][j][k] += 1; + } + + if (l != r) + __builtin_abort (); + + for (i = v1; i <= v2; i++) + for (j = v3; j <= v4; j++) + for (k = v5; k <= v6; k++) + if (b[i][j][k] != a[i][j][k]) + __builtin_abort (); + } + + test1 (); + test2 (p1, p2, p3, p4, p5, p6); + test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9); + test4 (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90 new file mode 100644 index 00000000000..fdbca4481f8 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90 @@ -0,0 +1,70 @@ +! Exercise nested function decomposition, gcc/tree-nested.c. + +! { dg-do run } + +program collapse2 + call test1 + call test2 +contains + subroutine test1 + integer :: i, j, k, a(1:3, 4:6, 5:7) + logical :: l + l = .false. + a(:, :, :) = 0 + !$acc parallel reduction (.or.:l) + !$acc loop worker vector collapse(4 - 1) + do 164 i = 1, 3 + do 164 j = 4, 6 + do 164 k = 5, 7 + a(i, j, k) = i + j + k +164 end do + !$acc loop worker vector reduction(.or.:l) collapse(2) +firstdo: do i = 1, 3 + do j = 4, 6 + do k = 5, 7 + if (a(i, j, k) .ne. (i + j + k)) l = .true. + end do + end do + end do firstdo + !$acc end parallel + if (l) call abort + end subroutine test1 + + subroutine test2 + integer :: a(3,3,3), k, kk, kkk, l, ll, lll + a = 0 + !$acc parallel + ! Use "gang(static:1)" here and below to effectively turn gang-redundant + ! execution mode into something like gang-single. + !$acc loop gang(static:1) collapse(1) + do 115 k=1,3 + !$acc loop collapse(2) + dokk: do kk=1,3 + do kkk=1,3 + a(k,kk,kkk) = 1 + enddo + enddo dokk +115 continue + !$acc loop gang(static:1) collapse(1) + do k=1,3 + if (any(a(k,1:3,1:3).ne.1)) call abort + enddo + ! Use "gang(static:1)" here and below to effectively turn gang-redundant + ! execution mode into something like gang-single. + !$acc loop gang(static:1) collapse(1) + dol: do 120 l=1,3 + !$acc loop collapse(2) + doll: do ll=1,3 + do lll=1,3 + a(l,ll,lll) = 2 + enddo + enddo doll +120 end do dol + !$acc loop gang(static:1) collapse(1) + do l=1,3 + if (any(a(l,1:3,1:3).ne.2)) call abort + enddo + !$acc end parallel + end subroutine test2 + +end program collapse2 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90 new file mode 100644 index 00000000000..4e2819641ea --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90 @@ -0,0 +1,173 @@ +! Exercise nested function decomposition, gcc/tree-nested.c. + +! { dg-do run } + +program collapse3 + integer :: p1, p2, p3, p4, p5, p6, p7, p8, p9 + p1 = 2 + p2 = 6 + p3 = -2 + p4 = 4 + p5 = 13 + p6 = 18 + p7 = 1 + p8 = 1 + p9 = 1 + call test1 + call test2 (p1, p2, p3, p4, p5, p6) + call test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9) + call test4 +contains + subroutine test1 + integer :: a(3,3,3), k, kk, kkk, l, ll, lll + !$acc parallel + !$acc loop collapse(3) + do 115 k=1,3 +dokk: do kk=1,3 + do kkk=1,3 + a(k,kk,kkk) = 1 + enddo + enddo dokk +115 continue + !$acc end parallel + if (any(a(1:3,1:3,1:3).ne.1)) call abort + !$acc parallel + !$acc loop collapse(3) +dol: do 120 l=1,3 +doll: do ll=1,3 + do lll=1,3 + a(l,ll,lll) = 2 + enddo + enddo doll +120 end do dol + !$acc end parallel + if (any(a(1:3,1:3,1:3).ne.2)) call abort + end subroutine test1 + + subroutine test2(v1, v2, v3, v4, v5, v6) + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.l) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test2 + + subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9) + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.l) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test3 + + subroutine test4 + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + v1 = p1 + v2 = p2 + v3 = p3 + v4 = p4 + v5 = p5 + v6 = p6 + v7 = p7 + v8 = p8 + v9 = p9 + !$acc parallel reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.r) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test4 + +end program collapse3 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90 new file mode 100644 index 00000000000..2f6485ef8cf --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90 @@ -0,0 +1,244 @@ +! Exercise nested function decomposition, gcc/tree-nested.c. + +! { dg-do run } + +program sub_collapse_3 + call test1 + call test2 (2, 6, -2, 4, 13, 18) + call test3 (2, 6, -2, 4, 13, 18, 1, 1, 1) + call test4 + call test5 (2, 6, -2, 4, 13, 18) + call test6 (2, 6, -2, 4, 13, 18, 1, 1, 1) +contains + subroutine test1 + integer :: a(3,3,3), k, kk, kkk, l, ll, lll + !$acc parallel + !$acc loop collapse(3) + do 115 k=1,3 +dokk: do kk=1,3 + do kkk=1,3 + a(k,kk,kkk) = 1 + enddo + enddo dokk +115 continue + !$acc end parallel + if (any(a(1:3,1:3,1:3).ne.1)) call abort + !$acc parallel + !$acc loop collapse(3) +dol: do 120 l=1,3 +doll: do ll=1,3 + do lll=1,3 + a(l,ll,lll) = 2 + enddo + enddo doll +120 end do dol + !$acc end parallel + if (any(a(1:3,1:3,1:3).ne.2)) call abort + end subroutine test1 + + subroutine test2(v1, v2, v3, v4, v5, v6) + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.l) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test2 + + subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9) + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.l) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test3 + + subroutine test4 + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + v1 = 2 + v2 = 6 + v3 = -2 + v4 = 4 + v5 = 13 + v6 = 18 + v7 = 1 + v8 = 1 + v9 = 1 + !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.r) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test4 + + subroutine test5(v1, v2, v3, v4, v5, v6) + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.r) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test5 + + subroutine test6(v1, v2, v3, v4, v5, v6, v7, v8, v9) + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l) + !$acc loop reduction (.or.:l) collapse (3) + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + m = i * 100 + j * 10 + k + end do + end do + end do + !$acc end parallel + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.r) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do + end subroutine test6 + +end program sub_collapse_3