From 27d6ba889b34d38dc13b5f3a8959c576fe0477b1 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <thomas@codesourcery.com>
Date: Fri, 10 Jun 2016 11:22:51 +0200
Subject: [PATCH] [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested
 function decomposition

	gcc/
	* gimplify.c (gimplify_adjust_omp_clauses): Discard
	OMP_CLAUSE_TILE.
	* omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE.
	gcc/testsuite/
	* c-c++-common/goacc/combined-directives.c: XFAIL tree scanning
	for OpenACC tile clauses.
	* gfortran.dg/goacc/combined-directives.f90: Likewise.

	gcc/
	PR middle-end/71373
	* tree-nested.c (convert_nonlocal_omp_clauses)
	(convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC,
	OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO,
	OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE.
	gcc/testsuite/
	PR middle-end/71373
	* gcc.dg/goacc/nested-function-1.c: New file.
	* gcc.dg/goacc/nested-function-2.c: Likewise.
	* gcc.dg/goacc/pr71373.c: Likewise.
	* gfortran.dg/goacc/cray-2.f95: Likewise.
	* gfortran.dg/goacc/loop-1-2.f95: Likewise.
	* gfortran.dg/goacc/loop-3-2.f95: Likewise.
	* gfortran.dg/goacc/cray.f95: Update.
	* gfortran.dg/goacc/loop-1.f95: Likewise.
	* gfortran.dg/goacc/loop-3.f95: Likewise.
	* gfortran.dg/goacc/subroutines.f90: Update, and rename to...
	* gfortran.dg/goacc/nested-function-1.f90: ... this new file.
	libgomp/testsuite/
	PR middle-end/71373
	* libgomp.oacc-c/nested-function-1.c: New file.
	* libgomp.oacc-c/nested-function-2.c: Likewise.
	* libgomp.oacc-fortran/nested-function-1.f90: Likewise.
	* libgomp.oacc-fortran/nested-function-2.f90: Likewise.
	* libgomp.oacc-fortran/nested-function-3.f90: Likewise.

Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>

From-SVN: r237291
---
 gcc/ChangeLog                                 |  10 +
 gcc/gimplify.c                                |   6 +
 gcc/omp-low.c                                 |   4 +-
 gcc/testsuite/ChangeLog                       |  20 ++
 .../c-c++-common/goacc/combined-directives.c  |   3 +-
 .../gcc.dg/goacc/nested-function-1.c          | 100 +++++++
 .../gcc.dg/goacc/nested-function-2.c          |  45 ++++
 gcc/testsuite/gcc.dg/goacc/pr71373.c          |  41 +++
 .../gfortran.dg/goacc/combined-directives.f90 |   3 +-
 gcc/testsuite/gfortran.dg/goacc/cray-2.f95    |  56 ++++
 gcc/testsuite/gfortran.dg/goacc/cray.f95      |   6 +-
 gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95  | 176 +++++++++++++
 gcc/testsuite/gfortran.dg/goacc/loop-1.f95    |  30 ++-
 gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95  |  58 +++++
 gcc/testsuite/gfortran.dg/goacc/loop-3.f95    |  11 +-
 .../gfortran.dg/goacc/nested-function-1.f90   |  93 +++++++
 .../gfortran.dg/goacc/subroutines.f90         |  73 ------
 gcc/tree-nested.c                             |  30 +++
 libgomp/ChangeLog                             |  10 +
 .../libgomp.oacc-c/nested-function-1.c        |  52 ++++
 .../libgomp.oacc-c/nested-function-2.c        | 155 +++++++++++
 .../nested-function-1.f90                     |  70 +++++
 .../nested-function-2.f90                     | 173 +++++++++++++
 .../nested-function-3.f90                     | 244 ++++++++++++++++++
 24 files changed, 1369 insertions(+), 100 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/goacc/nested-function-1.c
 create mode 100644 gcc/testsuite/gcc.dg/goacc/nested-function-2.c
 create mode 100644 gcc/testsuite/gcc.dg/goacc/pr71373.c
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/cray-2.f95
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90
 delete mode 100644 gcc/testsuite/gfortran.dg/goacc/subroutines.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/nested-function-1.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/nested-function-2.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 6afbae76146..9cab3118d65 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,15 @@
 2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
 
+	PR middle-end/71373
+	* tree-nested.c (convert_nonlocal_omp_clauses)
+	(convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC,
+	OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO,
+	OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE.
+
+	* gimplify.c (gimplify_adjust_omp_clauses): Discard
+	OMP_CLAUSE_TILE.
+	* omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE.
+
 	* omp-low.c (scan_sharing_clauses): Don't expect
 	OMP_CLAUSE__CACHE_.
 
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index f12c6a11456..7c19cf335be 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -8280,7 +8280,13 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
 	case OMP_CLAUSE_VECTOR:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
+	  break;
+
 	case OMP_CLAUSE_TILE:
+	  /* We're not yet making use of the information provided by OpenACC
+	     tile clauses.  Discard these here, to simplify later middle end
+	     processing.  */
+	  remove = true;
 	  break;
 
 	default:
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 91d5fcfcab0..22e59094e82 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -2187,7 +2187,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
-	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE_INDEPENDENT:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
@@ -2200,6 +2199,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	    install_var_local (decl, ctx);
 	  break;
 
+	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE__CACHE_:
 	default:
 	  gcc_unreachable ();
@@ -2357,13 +2357,13 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
-	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE_INDEPENDENT:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
 	case OMP_CLAUSE__GRIDDIM_:
 	  break;
 
+	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE__CACHE_:
 	default:
 	  gcc_unreachable ();
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index e15b009b425..325de75e271 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,4 +1,24 @@
 2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
+	    Cesar Philippidis  <cesar@codesourcery.com>
+
+	PR middle-end/71373
+	* gcc.dg/goacc/nested-function-1.c: New file.
+	* gcc.dg/goacc/nested-function-2.c: Likewise.
+	* gcc.dg/goacc/pr71373.c: Likewise.
+	* gfortran.dg/goacc/cray-2.f95: Likewise.
+	* gfortran.dg/goacc/loop-1-2.f95: Likewise.
+	* gfortran.dg/goacc/loop-3-2.f95: Likewise.
+	* gfortran.dg/goacc/cray.f95: Update.
+	* gfortran.dg/goacc/loop-1.f95: Likewise.
+	* gfortran.dg/goacc/loop-3.f95: Likewise.
+	* gfortran.dg/goacc/subroutines.f90: Update, and rename to...
+	* gfortran.dg/goacc/nested-function-1.f90: ... this new file.
+
+2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
+
+	* c-c++-common/goacc/combined-directives.c: XFAIL tree scanning
+	for OpenACC tile clauses.
+	* gfortran.dg/goacc/combined-directives.f90: Likewise.
 
 	PR c/71381
 	* c-c++-common/goacc/cache-1.c: Update.  Move invalid usage tests
diff --git a/gcc/testsuite/c-c++-common/goacc/combined-directives.c b/gcc/testsuite/c-c++-common/goacc/combined-directives.c
index c2a3c57b48b..3fa800d7bbe 100644
--- a/gcc/testsuite/c-c++-common/goacc/combined-directives.c
+++ b/gcc/testsuite/c-c++-common/goacc/combined-directives.c
@@ -111,6 +111,7 @@ test ()
 // { dg-final { scan-tree-dump-times "acc loop vector" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "acc loop seq" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "acc loop auto" 2 "gimple" } }
-// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" } }
+// XFAILed: OpenACC tile clauses are discarded during gimplification.
+// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" { xfail *-*-* } } }
 // { dg-final { scan-tree-dump-times "acc loop independent private.i" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
diff --git a/gcc/testsuite/gcc.dg/goacc/nested-function-1.c b/gcc/testsuite/gcc.dg/goacc/nested-function-1.c
new file mode 100644
index 00000000000..e17c0e2227f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/goacc/nested-function-1.c
@@ -0,0 +1,100 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+/* See gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 for the Fortran
+   version.  */
+
+int main ()
+{
+#define N 100
+  int nonlocal_arg;
+  int nonlocal_a[N];
+  int nonlocal_i;
+  int nonlocal_j;
+
+  for (int i = 0; i < N; ++i)
+    nonlocal_a[i] = 5;
+  nonlocal_arg = 5;
+
+  void local ()
+  {
+    int local_i;
+    int local_arg;
+    int local_a[N];
+    int local_j;
+
+    for (int i = 0; i < N; ++i)
+      local_a[i] = 5;
+    local_arg = 5;
+
+#pragma acc kernels loop \
+  gang(num:local_arg) worker(local_arg) vector(local_arg) \
+  wait async(local_arg)
+    for (local_i = 0; local_i < N; ++local_i)
+      {
+#pragma acc cache (local_a[local_i:5])
+	local_a[local_i] = 100;
+#pragma acc loop seq tile(*)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+#pragma acc loop auto independent tile(1)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+      }
+
+#pragma acc kernels loop \
+  gang(static:local_arg) worker(local_arg) vector(local_arg) \
+  wait(local_arg, local_arg + 1, local_arg + 2) async
+    for (local_i = 0; local_i < N; ++local_i)
+      {
+#pragma acc cache (local_a[local_i:4])
+	local_a[local_i] = 100;
+#pragma acc loop seq tile(1)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+#pragma acc loop auto independent tile(*)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+      }
+  }
+
+  void nonlocal ()
+  {
+    for (int i = 0; i < N; ++i)
+      nonlocal_a[i] = 5;
+    nonlocal_arg = 5;
+
+#pragma acc kernels loop \
+  gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \
+  wait async(nonlocal_arg)
+    for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i)
+      {
+#pragma acc cache (nonlocal_a[nonlocal_i:3])
+	nonlocal_a[nonlocal_i] = 100;
+#pragma acc loop seq tile(2)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+#pragma acc loop auto independent tile(3)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+      }
+
+#pragma acc kernels loop \
+  gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \
+  wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async
+    for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i)
+      {
+#pragma acc cache (nonlocal_a[nonlocal_i:2])
+	nonlocal_a[nonlocal_i] = 100;
+#pragma acc loop seq tile(*)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+#pragma acc loop auto independent tile(*)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+      }
+  }
+
+  local ();
+  nonlocal ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/goacc/nested-function-2.c b/gcc/testsuite/gcc.dg/goacc/nested-function-2.c
new file mode 100644
index 00000000000..70c9ec8ebfa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/goacc/nested-function-2.c
@@ -0,0 +1,45 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+
+int
+main (void)
+{
+  int j = 0, k = 6, l = 7, m = 8;
+  void simple (void)
+  {
+    int i;
+#pragma acc parallel
+    {
+#pragma acc loop
+      for (i = 0; i < m; i+= k)
+	j = (m + i - j) * l;
+    }
+  }
+  void collapse (void)
+  {
+    int x, y, z;
+#pragma acc parallel
+    {
+#pragma acc loop collapse (3)
+      for (x = 0; x < k; x++)
+	for (y = -5; y < l; y++)
+	  for (z = 0; z < m; z++)
+	    j += x + y + z;
+    }
+  }
+  void reduction (void)
+  {
+    int x, y, z;
+#pragma acc parallel reduction (+:j)
+    {
+#pragma acc loop reduction (+:j) collapse (3)
+      for (x = 0; x < k; x++)
+	for (y = -5; y < l; y++)
+	  for (z = 0; z < m; z++)
+	    j += x + y + z;
+    }
+  }
+  simple();
+  collapse();
+  reduction();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/goacc/pr71373.c b/gcc/testsuite/gcc.dg/goacc/pr71373.c
new file mode 100644
index 00000000000..9381752cc9d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/goacc/pr71373.c
@@ -0,0 +1,41 @@
+/* Unintentional nested function usage.  */
+/* Due to missing right braces '}', the following functions are parsed as
+   nested functions.  This ran into an ICE.  */
+
+void foo (void)
+{
+  #pragma acc parallel
+  {
+    #pragma acc loop independent
+    for (int i = 0; i < 16; i++)
+      ;
+  // Note right brace '}' commented out here.
+  //}
+}
+void bar (void)
+{
+}
+
+// Adding right brace '}' here, to make this compile.
+}
+
+
+// ..., and the other way round:
+
+void BAR (void)
+{
+// Note right brace '}' commented out here.
+//}
+
+void FOO (void)
+{
+  #pragma acc parallel
+  {
+    #pragma acc loop independent
+    for (int i = 0; i < 16; i++)
+      ;
+  }
+}
+
+// Adding right brace '}' here, to make this compile.
+}
diff --git a/gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 b/gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
index 42a447ad06b..abb5e6b6c3d 100644
--- a/gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
+++ b/gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
@@ -143,7 +143,8 @@ end subroutine test
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. vector" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. seq" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. auto" 2 "gimple" } }
-! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" } }
+! XFAILed: OpenACC tile clauses are discarded during gimplification.
+! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" { xfail *-*-* } } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. independent" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "omp target oacc_\[^ \]+ map.force_tofrom:y" 2 "gimple" } }
diff --git a/gcc/testsuite/gfortran.dg/goacc/cray-2.f95 b/gcc/testsuite/gfortran.dg/goacc/cray-2.f95
new file mode 100644
index 00000000000..51b79b53636
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/cray-2.f95
@@ -0,0 +1,56 @@
+! { dg-additional-options "-fcray-pointer" }
+! See also cray.f95.
+
+program test
+  call oacc1
+contains
+  subroutine oacc1
+    implicit none
+    integer :: i
+    real :: pointee
+    pointer (ptr, pointee)
+    !$acc declare device_resident (pointee)
+    !$acc declare device_resident (ptr)
+    !$acc data copy (pointee) ! { dg-error "Cray pointee" }
+    !$acc end data
+    !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" }
+    !$acc end data
+    !$acc parallel private (pointee) ! { dg-error "Cray pointee" }
+    !$acc end parallel
+    !$acc host_data use_device (pointee) ! { dg-error "Cray pointee" }
+    !$acc end host_data
+    !$acc parallel loop reduction(+:pointee) ! { dg-error "Cray pointee" }
+    do i = 1,5
+    enddo
+    !$acc end parallel loop
+    !$acc parallel loop
+    do i = 1,5
+      !$acc cache (pointee) ! { dg-error "Cray pointee" }
+    enddo
+    !$acc end parallel loop
+    !$acc update device (pointee) ! { dg-error "Cray pointee" }
+    !$acc update host (pointee) ! { dg-error "Cray pointee" }
+    !$acc update self (pointee) ! { dg-error "Cray pointee" }
+    !$acc data copy (ptr)
+    !$acc end data
+    !$acc data deviceptr (ptr) ! { dg-error "Cray pointer" }
+    !$acc end data
+    !$acc parallel private (ptr)
+    !$acc end parallel
+    !$acc host_data use_device (ptr) ! { dg-error "Cray pointer" }
+    !$acc end host_data
+    !$acc parallel loop reduction(+:ptr) ! { dg-error "Cray pointer" }
+    do i = 1,5
+    enddo
+    !$acc end parallel loop
+    !$acc parallel loop
+    do i = 1,5
+      !TODO: This must fail, as in openacc-1_0-branch.
+      !$acc cache (ptr) ! { dg-error "" "TODO" { xfail *-*-* } }
+    enddo
+    !$acc end parallel loop
+    !$acc update device (ptr)
+    !$acc update host (ptr)
+    !$acc update self (ptr)
+  end subroutine oacc1
+end program test
diff --git a/gcc/testsuite/gfortran.dg/goacc/cray.f95 b/gcc/testsuite/gfortran.dg/goacc/cray.f95
index 705c18c992d..d6d531705a6 100644
--- a/gcc/testsuite/gfortran.dg/goacc/cray.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/cray.f95
@@ -1,5 +1,5 @@
-! { dg-do compile } 
 ! { dg-additional-options "-fcray-pointer" }
+! See also cray-2.f95.
 
 module test
 contains
@@ -8,8 +8,8 @@ contains
     integer :: i
     real :: pointee
     pointer (ptr, pointee)
-    !$acc declare device_resident (pointee) 
-    !$acc declare device_resident (ptr) 
+    !$acc declare device_resident (pointee)
+    !$acc declare device_resident (ptr)
     !$acc data copy (pointee) ! { dg-error "Cray pointee" }
     !$acc end data
     !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" }
diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95 b/gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95
new file mode 100644
index 00000000000..79665b948c3
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95
@@ -0,0 +1,176 @@
+! See also loop-1.f95.
+
+program test
+  call test1
+contains
+
+subroutine test1
+  integer :: i, j, k, b(10)
+  integer, dimension (30) :: a
+  double precision :: d
+  real :: r
+  i = 0
+  !$acc loop
+  do 100 ! { dg-error "cannot be a DO WHILE or DO without loop control" }
+    if (i .gt. 0) exit ! { dg-error "EXIT statement" }
+  100 i = i + 1
+  i = 0
+  !$acc loop
+  do ! { dg-error "cannot be a DO WHILE or DO without loop control" }
+      if (i .gt. 0) exit ! { dg-error "EXIT statement" }
+       i = i + 1
+  end do
+  i = 0
+  !$acc loop
+  do 200 while (i .lt. 4) ! { dg-error "cannot be a DO WHILE or DO without loop control" }
+  200 i = i + 1
+  !$acc loop
+  do while (i .lt. 8) ! { dg-error "cannot be a DO WHILE or DO without loop control" }
+       i = i + 1
+  end do
+  !$acc loop
+  do 300 d = 1, 30, 6
+      i = d
+  300 a(i) = 1
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 }
+  !$acc loop
+  do d = 1, 30, 5
+       i = d
+      a(i) = 2
+  end do
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 }
+  !$acc loop
+  do i = 1, 30
+      if (i .eq. 16) exit ! { dg-error "EXIT statement" }
+  end do
+  !$acc loop
+  outer: do i = 1, 30
+      do j = 5, 10
+          if (i .eq. 6 .and. j .eq. 7) exit outer ! { dg-error "EXIT statement" }
+      end do
+  end do outer
+  last: do i = 1, 30
+   end do last
+
+  ! different types of loop are allowed
+  !$acc loop
+  do i = 1,10
+  end do
+  !$acc loop
+  do 400, i = 1,10
+400   a(i) = i
+
+  ! after loop directive must be loop
+  !$acc loop
+  a(1) = 1 ! { dg-error "Expected DO loop" }
+  do i = 1,10
+  enddo
+
+  ! combined directives may be used with/without end
+  !$acc parallel loop
+  do i = 1,10
+  enddo
+  !$acc parallel loop
+  do i = 1,10
+  enddo
+  !$acc end parallel loop
+  !$acc kernels loop
+  do i = 1,10
+  enddo
+  !$acc kernels loop
+  do i = 1,10
+  enddo
+  !$acc end kernels loop
+
+  !$acc kernels loop reduction(max:i)
+  do i = 1,10
+  enddo
+  !$acc kernels
+  !$acc loop reduction(max:i)
+  do i = 1,10
+  enddo
+  !$acc end kernels
+
+  !$acc parallel loop collapse(0) ! { dg-error "constant positive integer" }
+  do i = 1,10
+  enddo
+
+  !$acc parallel loop collapse(-1) ! { dg-error "constant positive integer" }
+  do i = 1,10
+  enddo
+
+  !$acc parallel loop collapse(i) ! { dg-error "Constant expression required" }
+  do i = 1,10
+  enddo
+
+  !$acc parallel loop collapse(4) ! { dg-error "not enough DO loops for collapsed" }
+    do i = 1, 3
+        do j = 4, 6
+          do k = 5, 7
+              a(i+j-k) = i + j + k
+          end do
+        end do
+    end do
+    !$acc parallel loop collapse(2)
+    do i = 1, 5, 2
+        do j = i + 1, 7, i  ! { dg-error "collapsed loops don.t form rectangular iteration space" }
+        end do
+    end do
+    !$acc parallel loop collapse(2)
+    do i = 1, 3
+        do j = 4, 6
+        end do
+    end do
+    !$acc parallel loop collapse(2)
+    do i = 1, 3
+        do j = 4, 6
+        end do
+        k = 4
+    end do
+    !$acc parallel loop collapse(3-1)
+    do i = 1, 3
+        do j = 4, 6
+        end do
+        k = 4
+    end do
+    !$acc parallel loop collapse(1+1)
+    do i = 1, 3
+        do j = 4, 6
+        end do
+        k = 4
+    end do
+    !$acc parallel loop collapse(2)
+    do i = 1, 3
+        do      ! { dg-error "cannot be a DO WHILE or DO without loop control" }
+        end do
+    end do
+    !$acc parallel loop collapse(2)
+    do i = 1, 3
+        do r = 4, 6
+        end do
+        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 }
+        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 }
+    end do
+
+    ! Both seq and independent are not allowed
+  !$acc loop independent seq ! { dg-error "SEQ conflicts with INDEPENDENT" }
+  do i = 1,10
+  enddo
+
+
+  !$acc cache (a(1:10)) ! { dg-error "ACC CACHE directive must be inside of loop" }
+
+  do i = 1,10
+    !$acc cache(a(i:i+1))
+  enddo
+
+  do i = 1,10
+    !$acc cache(a(i:i+1))
+    a(i) = i
+    !$acc cache(a(i+2:i+2+1))
+  enddo
+
+end subroutine test1
+end program test
diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-1.f95 b/gcc/testsuite/gfortran.dg/goacc/loop-1.f95
index a605f038926..5f81b7a1d19 100644
--- a/gcc/testsuite/gfortran.dg/goacc/loop-1.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/loop-1.f95
@@ -1,8 +1,10 @@
+! See also loop-1-2.f95.
+
 module test
   implicit none
 contains
 
-subroutine test1  
+subroutine test1
   integer :: i, j, k, b(10)
   integer, dimension (30) :: a
   double precision :: d
@@ -30,15 +32,15 @@ subroutine test1
   do 300 d = 1, 30, 6
       i = d
   300 a(i) = 1
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 30 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 30 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 }
   !$acc loop
   do d = 1, 30, 5
        i = d
       a(i) = 2
   end do
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 36 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 36 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 }
   !$acc loop
   do i = 1, 30
       if (i .eq. 16) exit ! { dg-error "EXIT statement" }
@@ -53,7 +55,7 @@ subroutine test1
    end do last
 
   ! different types of loop are allowed
-  !$acc loop 
+  !$acc loop
   do i = 1,10
   end do
   !$acc loop
@@ -65,8 +67,8 @@ subroutine test1
   a(1) = 1 ! { dg-error "Expected DO loop" }
   do i = 1,10
   enddo
-  
-  ! combined directives may be used with/without end 
+
+  ! combined directives may be used with/without end
   !$acc parallel loop
   do i = 1,10
   enddo
@@ -82,11 +84,11 @@ subroutine test1
   enddo
   !$acc end kernels loop
 
-  !$acc kernels loop reduction(max:i) 
+  !$acc kernels loop reduction(max:i)
   do i = 1,10
   enddo
-  !$acc kernels 
-  !$acc loop reduction(max:i) 
+  !$acc kernels
+  !$acc loop reduction(max:i)
   do i = 1,10
   enddo
   !$acc end kernels
@@ -118,7 +120,7 @@ subroutine test1
     end do
     !$acc parallel loop collapse(2)
     do i = 1, 3
-        do j = 4, 6  
+        do j = 4, 6
         end do
     end do
     !$acc parallel loop collapse(2)
@@ -148,8 +150,8 @@ subroutine test1
     do i = 1, 3
         do r = 4, 6
         end do
-        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 149 }
-        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 149 }
+        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 }
+        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 }
     end do
 
     ! Both seq and independent are not allowed
diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95 b/gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95
new file mode 100644
index 00000000000..9be74a85919
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95
@@ -0,0 +1,58 @@
+! { dg-additional-options "-std=f2008" }
+! See also loop-3.f95.
+
+program test
+  call test1
+contains
+subroutine test1
+  implicit none
+  integer :: i, j
+
+  ! !$acc end loop not required by spec
+  !$acc loop
+  do i = 1,5
+  enddo
+  !$acc end loop ! { dg-warning "Redundant" }
+
+  !$acc loop
+  do i = 1,5
+  enddo
+  j = 1
+  !$acc end loop ! { dg-error "Unexpected" }
+
+  !$acc parallel
+  !$acc loop
+  do i = 1,5
+  enddo
+  !$acc end parallel
+  !$acc end loop ! { dg-error "Unexpected" }
+
+  ! OpenACC supports Fortran 2008 do concurrent statement
+  !$acc loop
+  do concurrent (i = 1:5)
+  end do
+
+  !$acc loop
+  outer_loop: do i = 1, 5
+    inner_loop: do j = 1,5
+      if (i .eq. j) cycle outer_loop
+      if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" }
+    end do inner_loop
+  end do outer_loop
+
+  outer_loop1: do i = 1, 5
+    !$acc loop
+    inner_loop1: do j = 1,5
+      if (i .eq. j) cycle outer_loop1 ! { dg-error "CYCLE statement" }
+    end do inner_loop1
+  end do outer_loop1
+
+  !$acc loop collapse(2)
+  outer_loop2: do i = 1, 5
+    inner_loop2: do j = 1,5
+      if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" }
+      if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" }
+    end do inner_loop2
+  end do outer_loop2
+end subroutine test1
+end program test
diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-3.f95 b/gcc/testsuite/gfortran.dg/goacc/loop-3.f95
index 2a866c79234..30930f404f3 100644
--- a/gcc/testsuite/gfortran.dg/goacc/loop-3.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/loop-3.f95
@@ -1,10 +1,10 @@
-! { dg-do compile }
 ! { dg-additional-options "-std=f2008" }
+! See also loop-3-2.f95.
 
 subroutine test1
   implicit none
   integer :: i, j
-  
+
   ! !$acc end loop not required by spec
   !$acc loop
   do i = 1,5
@@ -23,7 +23,7 @@ subroutine test1
   enddo
   !$acc end parallel
   !$acc end loop ! { dg-error "Unexpected" }
-  
+
   ! OpenACC supports Fortran 2008 do concurrent statement
   !$acc loop
   do concurrent (i = 1:5)
@@ -35,7 +35,7 @@ subroutine test1
       if (i .eq. j) cycle outer_loop
       if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" }
     end do inner_loop
-  end do outer_loop 
+  end do outer_loop
 
   outer_loop1: do i = 1, 5
     !$acc loop
@@ -50,6 +50,5 @@ subroutine test1
       if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" }
       if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" }
     end do inner_loop2
-  end do outer_loop2 
+  end do outer_loop2
 end subroutine test1
-
diff --git a/gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 b/gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90
new file mode 100644
index 00000000000..2fcaa400ee3
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90
@@ -0,0 +1,93 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+! See gcc/testsuite/gcc.dg/goacc/nested-function-1.c for the C version.
+
+program main
+  integer, parameter :: N = 100
+  integer :: nonlocal_arg
+  integer :: nonlocal_a(N)
+  integer :: nonlocal_i
+  integer :: nonlocal_j
+
+  nonlocal_a (:) = 5
+  nonlocal_arg = 5
+
+  call local ()
+  call nonlocal ()
+
+contains
+
+  subroutine local ()
+    integer :: local_i
+    integer :: local_arg
+    integer :: local_a(N)
+    integer :: local_j
+
+    local_a (:) = 5
+    local_arg = 5
+
+    !$acc kernels loop &
+    !$acc gang(num:local_arg) worker(local_arg) vector(local_arg) &
+    !$acc wait async(local_arg)
+    do local_i = 1, N
+       !$acc cache (local_a(local_i:local_i + 5))
+       local_a(local_i) = 100
+       !$acc loop seq tile(*)
+       do local_j = 1, N
+       enddo
+       !$acc loop auto independent tile(1)
+       do local_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+
+    !$acc kernels loop &
+    !$acc gang(static:local_arg) worker(local_arg) vector(local_arg) &
+    !$acc wait(local_arg, local_arg + 1, local_arg + 2) async
+    do local_i = 1, N
+       !$acc cache (local_a(local_i:local_i + 4))
+       local_a(local_i) = 100
+       !$acc loop seq tile(1)
+       do local_j = 1, N
+       enddo
+       !$acc loop auto independent tile(*)
+       do local_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+  end subroutine local
+
+  subroutine nonlocal ()
+    nonlocal_a (:) = 5
+    nonlocal_arg = 5
+
+    !$acc kernels loop &
+    !$acc gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) &
+    !$acc wait async(nonlocal_arg)
+    do nonlocal_i = 1, N
+       !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 3))
+       nonlocal_a(nonlocal_i) = 100
+       !$acc loop seq tile(2)
+       do nonlocal_j = 1, N
+       enddo
+       !$acc loop auto independent tile(3)
+       do nonlocal_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+
+    !$acc kernels loop &
+    !$acc gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) &
+    !$acc wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async
+    do nonlocal_i = 1, N
+       !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 2))
+       nonlocal_a(nonlocal_i) = 100
+       !$acc loop seq tile(*)
+       do nonlocal_j = 1, N
+       enddo
+       !$acc loop auto independent tile(*)
+       do nonlocal_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+  end subroutine nonlocal
+end program main
diff --git a/gcc/testsuite/gfortran.dg/goacc/subroutines.f90 b/gcc/testsuite/gfortran.dg/goacc/subroutines.f90
deleted file mode 100644
index 6cab798d458..00000000000
--- a/gcc/testsuite/gfortran.dg/goacc/subroutines.f90
+++ /dev/null
@@ -1,73 +0,0 @@
-! Exercise how tree-nested.c handles gang, worker vector and seq.
-
-! { dg-do compile } 
-
-program main
-  integer, parameter :: N = 100
-  integer :: nonlocal_arg
-  integer :: nonlocal_a(N)
-  integer :: nonlocal_i
-  integer :: nonlocal_j
-  
-  nonlocal_a (:) = 5
-  nonlocal_arg = 5
-  
-  call local ()
-  call nonlocal ()
-
-contains
-
-  subroutine local ()
-    integer :: local_i
-    integer :: local_arg
-    integer :: local_a(N)
-    integer :: local_j
-    
-    local_a (:) = 5
-    local_arg = 5
-
-    !$acc kernels loop gang(num:local_arg) worker(local_arg) vector(local_arg)
-    do local_i = 1, N
-       local_a(local_i) = 100
-       !$acc loop seq
-       do local_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-
-    !$acc kernels loop gang(static:local_arg) worker(local_arg) &
-    !$acc vector(local_arg)
-    do local_i = 1, N
-       local_a(local_i) = 100
-       !$acc loop seq
-       do local_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-  end subroutine local
-
-  subroutine nonlocal ()
-    nonlocal_a (:) = 5
-    nonlocal_arg = 5
-  
-    !$acc kernels loop gang(num:nonlocal_arg) worker(nonlocal_arg) &
-    !$acc vector(nonlocal_arg)
-    do nonlocal_i = 1, N
-       nonlocal_a(nonlocal_i) = 100
-       !$acc loop seq
-       do nonlocal_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-
-    !$acc kernels loop gang(static:nonlocal_arg) worker(nonlocal_arg) &
-    !$acc vector(nonlocal_arg)
-    do nonlocal_i = 1, N
-       nonlocal_a(nonlocal_i) = 100
-       !$acc loop seq
-       do nonlocal_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-  end subroutine nonlocal
-end program main
diff --git a/gcc/tree-nested.c b/gcc/tree-nested.c
index 25a92aaa04b..6fc63260000 100644
--- a/gcc/tree-nested.c
+++ b/gcc/tree-nested.c
@@ -1114,6 +1114,8 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
+	case OMP_CLAUSE_ASYNC:
+	case OMP_CLAUSE_WAIT:
 	  /* Several OpenACC clauses have optional arguments.  Check if they
 	     are present.  */
 	  if (OMP_CLAUSE_OPERAND (clause, 0))
@@ -1197,8 +1199,21 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_SIMD:
 	case OMP_CLAUSE_DEFAULTMAP:
 	case OMP_CLAUSE_SEQ:
+	case OMP_CLAUSE_INDEPENDENT:
+	case OMP_CLAUSE_AUTO:
 	  break;
 
+	case OMP_CLAUSE_TILE:
+	  /* OpenACC tile clauses are discarded during gimplification, so we
+	     don't expect to see anything here.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE__CACHE_:
+	  /* These clauses belong to the OpenACC cache directive, which is
+	     discarded during gimplification, so we don't expect to see
+	     anything here.  */
+	  gcc_unreachable ();
+
 	default:
 	  gcc_unreachable ();
 	}
@@ -1790,6 +1805,8 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
+	case OMP_CLAUSE_ASYNC:
+	case OMP_CLAUSE_WAIT:
 	  /* Several OpenACC clauses have optional arguments.  Check if they
 	     are present.  */
 	  if (OMP_CLAUSE_OPERAND (clause, 0))
@@ -1878,8 +1895,21 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_SIMD:
 	case OMP_CLAUSE_DEFAULTMAP:
 	case OMP_CLAUSE_SEQ:
+	case OMP_CLAUSE_INDEPENDENT:
+	case OMP_CLAUSE_AUTO:
 	  break;
 
+	case OMP_CLAUSE_TILE:
+	  /* OpenACC tile clauses are discarded during gimplification, so we
+	     don't expect to see anything here.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE__CACHE_:
+	  /* These clauses belong to the OpenACC cache directive, which is
+	     discarded during gimplification, so we don't expect to see
+	     anything here.  */
+	  gcc_unreachable ();
+
 	default:
 	  gcc_unreachable ();
 	}
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index 5c7f41abf89..cf551f49384 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,13 @@
+2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
+	    Cesar Philippidis  <cesar@codesourcery.com>
+
+	PR middle-end/71373
+	* libgomp.oacc-c/nested-function-1.c: New file.
+	* libgomp.oacc-c/nested-function-2.c: Likewise.
+	* libgomp.oacc-fortran/nested-function-1.f90: Likewise.
+	* libgomp.oacc-fortran/nested-function-2.f90: Likewise.
+	* libgomp.oacc-fortran/nested-function-3.f90: Likewise.
+
 2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
 
 	PR c/71381
diff --git a/libgomp/testsuite/libgomp.oacc-c/nested-function-1.c b/libgomp/testsuite/libgomp.oacc-c/nested-function-1.c
new file mode 100644
index 00000000000..fb2a3acdfa9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/nested-function-1.c
@@ -0,0 +1,52 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+
+int
+main (void)
+{
+  void test1 ()
+  {
+    int i, j, k;
+    int a[4][7][8];
+
+    __builtin_memset (a, 0, sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(4 - 1)
+    for (i = 1; i <= 3; i++)
+      for (j = 4; j <= 6; j++)
+	for (k = 5; k <= 7; k++)
+	  a[i][j][k] = i + j + k;
+
+    for (i = 1; i <= 3; i++)
+      for (j = 4; j <= 6; j++)
+	for (k = 5; k <= 7; k++)
+	  if (a[i][j][k] != i + j + k)
+	    __builtin_abort();
+  }
+
+  void test2 ()
+  {
+    int i, j, k;
+    int a[4][4][4];
+
+    __builtin_memset (a, 0, sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(3)
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 1; k <= 3; k++)
+	  a[i][j][k] = 1;
+
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 1; k <= 3; k++)
+	  if (a[i][j][k] != 1)
+	    __builtin_abort ();
+  }
+
+  test1 ();
+  test2 ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/nested-function-2.c b/libgomp/testsuite/libgomp.oacc-c/nested-function-2.c
new file mode 100644
index 00000000000..2c3f3feb7f8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/nested-function-2.c
@@ -0,0 +1,155 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+
+int
+main (void)
+{
+  int p1 = 2, p2 = 6, p3 = 0, p4 = 4, p5 = 13, p6 = 18, p7 = 1, p8 = 1, p9 = 1;
+
+  void test1 ()
+  {
+    int i, j, k;
+    int a[4][4][4];
+
+    __builtin_memset (a, '\0', sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(3)
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 2; k <= 3; k++)
+	  a[i][j][k] = 1;
+
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 2; k <= 3; k++)
+	  if (a[i][j][k] != 1)
+	    __builtin_abort();
+  }
+
+  void test2 (int v1, int v2, int v3, int v4, int v5, int v6)
+  {
+    int i, j, k, l = 0, r = 0;
+    int a[7][5][19];
+    int b[7][5][19];
+
+    __builtin_memset (a, '\0', sizeof (a));
+    __builtin_memset (b, '\0', sizeof (b));
+
+#pragma acc parallel reduction (||:l)
+#pragma acc loop reduction (||:l) collapse(3)
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  {
+	    l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!l)
+	      a[i][j][k] += 1;
+	  }
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  {
+	    r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!r)
+	      b[i][j][k] += 1;
+	  }
+
+    if (l != r)
+      __builtin_abort ();
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  if (b[i][j][k] != a[i][j][k])
+	    __builtin_abort ();
+  }
+
+  void test3 (int v1, int v2, int v3, int v4, int v5, int v6, int v7, int v8,
+      int v9)
+  {
+    int i, j, k, l = 0, r = 0;
+    int a[7][5][19];
+    int b[7][5][19];
+
+    __builtin_memset (a, '\0', sizeof (a));
+    __builtin_memset (b, '\0', sizeof (b));
+
+#pragma acc parallel reduction (||:l)
+#pragma acc loop reduction (||:l) collapse(3)
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!l)
+	      a[i][j][k] += 1;
+	  }
+
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!r)
+	      b[i][j][k] += 1;
+	  }
+
+    if (l != r)
+      __builtin_abort ();
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  if (b[i][j][k] != a[i][j][k])
+	    __builtin_abort ();
+  }
+
+  void test4 ()
+  {
+    int i, j, k, l = 0, r = 0;
+    int a[7][5][19];
+    int b[7][5][19];
+    int v1 = p1, v2 = p2, v3 = p3, v4 = p4, v5 = p5, v6 = p6, v7 = p7, v8 = p8,
+      v9 = p9;
+
+    __builtin_memset (a, '\0', sizeof (a));
+    __builtin_memset (b, '\0', sizeof (b));
+
+#pragma acc parallel reduction (||:l)
+#pragma acc loop reduction (||:l) collapse(3)
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!l)
+	      a[i][j][k] += 1;
+	  }
+
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!r)
+	      b[i][j][k] += 1;
+	  }
+
+    if (l != r)
+      __builtin_abort ();
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  if (b[i][j][k] != a[i][j][k])
+	    __builtin_abort ();
+  }
+
+  test1 ();
+  test2 (p1, p2, p3, p4, p5, p6);
+  test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9);
+  test4 ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90
new file mode 100644
index 00000000000..fdbca4481f8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90
@@ -0,0 +1,70 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+
+! { dg-do run }
+
+program collapse2
+  call test1
+  call test2
+contains
+  subroutine test1
+    integer :: i, j, k, a(1:3, 4:6, 5:7)
+    logical :: l
+    l = .false.
+    a(:, :, :) = 0
+    !$acc parallel reduction (.or.:l)
+    !$acc loop worker vector collapse(4 - 1)
+      do 164 i = 1, 3
+        do 164 j = 4, 6
+          do 164 k = 5, 7
+            a(i, j, k) = i + j + k
+164      end do
+    !$acc loop worker vector reduction(.or.:l) collapse(2)
+firstdo: do i = 1, 3
+        do j = 4, 6
+          do k = 5, 7
+            if (a(i, j, k) .ne. (i + j + k)) l = .true.
+          end do
+        end do
+      end do firstdo
+    !$acc end parallel
+    if (l) call abort
+  end subroutine test1
+
+  subroutine test2
+    integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+    a = 0
+    !$acc parallel
+    ! Use "gang(static:1)" here and below to effectively turn gang-redundant
+    ! execution mode into something like gang-single.
+    !$acc loop gang(static:1) collapse(1)
+      do 115 k=1,3
+         !$acc loop collapse(2)
+  dokk: do kk=1,3
+          do kkk=1,3
+            a(k,kk,kkk) = 1
+          enddo
+        enddo dokk
+115   continue
+    !$acc loop gang(static:1) collapse(1)
+      do k=1,3
+         if (any(a(k,1:3,1:3).ne.1)) call abort
+      enddo
+    ! Use "gang(static:1)" here and below to effectively turn gang-redundant
+    ! execution mode into something like gang-single.
+    !$acc loop gang(static:1) collapse(1)
+ dol: do 120 l=1,3
+    !$acc loop collapse(2)
+  doll: do ll=1,3
+          do lll=1,3
+            a(l,ll,lll) = 2
+          enddo
+        enddo doll
+120   end do dol
+    !$acc loop gang(static:1) collapse(1)
+     do l=1,3
+        if (any(a(l,1:3,1:3).ne.2)) call abort
+     enddo
+    !$acc end parallel
+  end subroutine test2
+
+end program collapse2
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90
new file mode 100644
index 00000000000..4e2819641ea
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90
@@ -0,0 +1,173 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+
+! { dg-do run }
+
+program collapse3
+  integer :: p1, p2, p3, p4, p5, p6, p7, p8, p9
+  p1 = 2
+  p2 = 6
+  p3 = -2
+  p4 = 4
+  p5 = 13
+  p6 = 18
+  p7 = 1
+  p8 = 1
+  p9 = 1
+  call test1
+  call test2 (p1, p2, p3, p4, p5, p6)
+  call test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9)
+  call test4
+contains
+  subroutine test1
+    integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+    !$acc parallel
+    !$acc loop collapse(3)
+      do 115 k=1,3
+dokk:   do kk=1,3
+          do kkk=1,3
+            a(k,kk,kkk) = 1
+          enddo
+        enddo dokk
+115   continue
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.1)) call abort
+    !$acc parallel
+    !$acc loop collapse(3)
+dol:  do 120 l=1,3
+doll:   do ll=1,3
+          do lll=1,3
+            a(l,ll,lll) = 2
+          enddo
+        enddo doll
+120   end do dol
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.2)) call abort
+    end subroutine test1
+
+  subroutine test2(v1, v2, v3, v4, v5, v6)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test2
+
+  subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test3
+
+  subroutine test4
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    v1 = p1
+    v2 = p2
+    v3 = p3
+    v4 = p4
+    v5 = p5
+    v6 = p6
+    v7 = p7
+    v8 = p8
+    v9 = p9
+    !$acc parallel reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+         do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+         end do
+      end do
+    end do
+  end subroutine test4
+
+end program collapse3
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90
new file mode 100644
index 00000000000..2f6485ef8cf
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90
@@ -0,0 +1,244 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+
+! { dg-do run }
+
+program sub_collapse_3
+  call test1
+  call test2 (2, 6, -2, 4, 13, 18)
+  call test3 (2, 6, -2, 4, 13, 18, 1, 1, 1)
+  call test4
+  call test5 (2, 6, -2, 4, 13, 18)
+  call test6 (2, 6, -2, 4, 13, 18, 1, 1, 1)
+contains
+  subroutine test1
+    integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+    !$acc parallel
+    !$acc loop collapse(3)
+      do 115 k=1,3
+dokk:   do kk=1,3
+          do kkk=1,3
+            a(k,kk,kkk) = 1
+          enddo
+        enddo dokk
+115   continue
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.1)) call abort
+    !$acc parallel
+    !$acc loop collapse(3)
+dol:  do 120 l=1,3
+doll:   do ll=1,3
+          do lll=1,3
+            a(l,ll,lll) = 2
+          enddo
+        enddo doll
+120   end do dol
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.2)) call abort
+  end subroutine test1
+
+  subroutine test2(v1, v2, v3, v4, v5, v6)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test2
+
+  subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test3
+
+  subroutine test4
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    v1 = 2
+    v2 = 6
+    v3 = -2
+    v4 = 4
+    v5 = 13
+    v6 = 18
+    v7 = 1
+    v8 = 1
+    v9 = 1
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+         do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+         end do
+      end do
+    end do
+  end subroutine test4
+
+  subroutine test5(v1, v2, v3, v4, v5, v6)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test5
+
+  subroutine test6(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+            m = i * 100 + j * 10 + k
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test6
+
+end program sub_collapse_3