OpenACC 'kernels' decomposition: Mark variables used in synthesized data clauses as addressable [PR100280]

... as otherwise 'gcc/omp-low.c:lower_omp_target' has to create a temporary:

    13073			else if (is_gimple_reg (var))
    13074			  {
    13075			    gcc_assert (offloaded);
    13076			    tree avar = create_tmp_var (TREE_TYPE (var));
    13077			    mark_addressable (avar);

..., which (a) is only implemented for actualy *offloaded* regions (but not
data regions), and (b) the subsequently synthesized code for writing to and
later reading back from the temporary fundamentally conflicts with OpenACC
'async' (as used by OpenACC 'kernels' decomposition).  That's all not trivial
to make work, so let's just avoid this case.

	gcc/
	PR middle-end/100280
	* omp-oacc-kernels-decompose.cc (maybe_build_inner_data_region):
	Mark variables used in synthesized data clauses as addressable.
	gcc/testsuite/
	PR middle-end/100280
	* c-c++-common/goacc/kernels-decompose-pr100280-1.c: New.
	* c-c++-common/goacc/classify-kernels-parloops.c: Likewise.
	* c-c++-common/goacc/classify-kernels-unparallelized-parloops.c:
	Likewise.
	* c-c++-common/goacc/classify-kernels-unparallelized.c: Test
	'--param openacc-kernels=decompose'.
	* c-c++-common/goacc/classify-kernels.c: Likewise.
	* c-c++-common/goacc/kernels-decompose-2.c: Update.
	* c-c++-common/goacc/kernels-decompose-ice-1.c: Remove.
	* c-c++-common/goacc/kernels-decompose-ice-2.c: Likewise.
	* gfortran.dg/goacc/classify-kernels-parloops.f95: New.
	* gfortran.dg/goacc/classify-kernels-unparallelized-parloops.f95:
	Likewise.
	* gfortran.dg/goacc/classify-kernels-unparallelized.f95: Test
	'--param openacc-kernels=decompose'.
	* gfortran.dg/goacc/classify-kernels.f95: Likewise.
	libgomp/
	PR middle-end/100280
	* testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c:
	Update.
	* testsuite/libgomp.oacc-c-c++-common/f-asyncwait-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c:
	Likewise.

Suggested-by: Julian Brown <julian@codesourcery.com>
This commit is contained in:
Thomas Schwinge 2021-12-16 22:02:37 +01:00
parent 862e5f398b
commit 9b32c1669a
16 changed files with 264 additions and 170 deletions

View File

@ -793,7 +793,8 @@ make_data_region_try_statement (location_t loc, gimple *body)
/* If INNER_BIND_VARS holds variables, build an OpenACC data region with
location LOC containing BODY and having 'create (var)' clauses for each
variable. If INNER_CLEANUP is present, add a try-finally statement with
variable (as a side effect, such variables also get TREE_ADDRESSABLE set).
If INNER_CLEANUP is present, add a try-finally statement with
this cleanup code in the finally block. Return the new data region, or
the original BODY if no data region was needed. */
@ -842,6 +843,9 @@ maybe_build_inner_data_region (location_t loc, gimple *body,
inner_data_clauses = new_clause;
prev_mapped_var = v;
/* See <https://gcc.gnu.org/PR100280>. */
TREE_ADDRESSABLE (v) = 1;
}
}

View File

@ -0,0 +1,41 @@
/* Check offloaded function's attributes and classification for OpenACC
kernels. */
/* { dg-additional-options "--param openacc-kernels=parloops" } */
/* { dg-additional-options "-O2" }
{ dg-additional-options "-fopt-info-optimized-omp" }
{ dg-additional-options "-fdump-tree-ompexp" }
{ dg-additional-options "-fdump-tree-parloops1-all" }
{ dg-additional-options "-fdump-tree-oaccloops" } */
/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting
aspects of that functionality. */
#define N 1024
extern unsigned int *__restrict a;
extern unsigned int *__restrict b;
extern unsigned int *__restrict c;
void KERNELS ()
{
#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-message "optimized: assigned OpenACC gang loop parallelism" } */
for (unsigned int i = 0; i < N; i++)
c[i] = a[i] + b[i];
}
/* Check the offloaded function's attributes.
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } } */
/* Check that exactly one OpenACC kernels construct is analyzed, and that it
can be parallelized.
{ dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
{ dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
/* Check the offloaded function's classification and compute dimensions (will
always be 1 x 1 x 1 for non-offloading compilation).
{ dg-final { scan-tree-dump-times "(?n)Function is parallelized OpenACC kernels offload" 1 "oaccloops" } }
{ dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } }
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccloops" } } */

View File

@ -0,0 +1,45 @@
/* Check offloaded function's attributes and classification for unparallelized
OpenACC kernels. */
/* { dg-additional-options "--param openacc-kernels=parloops" } */
/* { dg-additional-options "-O2" }
{ dg-additional-options "-fopt-info-optimized-omp" }
{ dg-additional-options "-fdump-tree-ompexp" }
{ dg-additional-options "-fdump-tree-parloops1-all" }
{ dg-additional-options "-fdump-tree-oaccloops" } */
/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting
aspects of that functionality. */
#define N 1024
extern unsigned int *__restrict a;
extern unsigned int *__restrict b;
extern unsigned int *__restrict c;
/* An "extern"al mapping of loop iterations/array indices makes the loop
unparallelizable. */
extern unsigned int f (unsigned int);
void KERNELS ()
{
#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-message "optimized: assigned OpenACC seq loop parallelism" } */
for (unsigned int i = 0; i < N; i++)
c[i] = a[f (i)] + b[f (i)];
}
/* Check the offloaded function's attributes.
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } } */
/* Check that exactly one OpenACC kernels construct is analyzed, and that it
can't be parallelized.
{ dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } }
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
{ dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } } */
/* Check the offloaded function's classification and compute dimensions (will
always be 1 x 1 x 1 for non-offloading compilation).
{ dg-final { scan-tree-dump-times "(?n)Function is unparallelized OpenACC kernels offload" 1 "oaccloops" } }
{ dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } }
{ dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccloops" } } */

View File

@ -1,8 +1,10 @@
/* Check offloaded function's attributes and classification for unparallelized
OpenACC kernels. */
/* { dg-additional-options "--param openacc-kernels=decompose" } */
/* { dg-additional-options "-O2" }
{ dg-additional-options "-fopt-info-optimized-omp" }
{ dg-additional-options "-fopt-info-all-omp" }
{ dg-additional-options "-fdump-tree-ompexp" }
{ dg-additional-options "-fdump-tree-parloops1-all" }
{ dg-additional-options "-fdump-tree-oaccloops" } */
@ -23,6 +25,7 @@ extern unsigned int f (unsigned int);
void KERNELS ()
{
#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-message "optimized: assigned OpenACC seq loop parallelism" } */
/* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
for (unsigned int i = 0; i < N; i++)
c[i] = a[f (i)] + b[f (i)];
}

View File

@ -1,8 +1,10 @@
/* Check offloaded function's attributes and classification for OpenACC
kernels. */
/* { dg-additional-options "--param openacc-kernels=decompose" } */
/* { dg-additional-options "-O2" }
{ dg-additional-options "-fopt-info-optimized-omp" }
{ dg-additional-options "-fopt-info-all-omp" }
{ dg-additional-options "-fdump-tree-ompexp" }
{ dg-additional-options "-fdump-tree-parloops1-all" }
{ dg-additional-options "-fdump-tree-oaccloops" } */
@ -19,6 +21,7 @@ extern unsigned int *__restrict c;
void KERNELS ()
{
#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-message "optimized: assigned OpenACC gang loop parallelism" } */
/* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
for (unsigned int i = 0; i < N; i++)
c[i] = a[i] + b[i];
}

View File

@ -55,7 +55,7 @@ main ()
;
}
{ /*TODO Instead of using 'for (int i = 0; [...])', move 'int i' outside, to work around for ICE detailed in 'kernels-decompose-ice-1.c'. */
{
int i;
#pragma acc kernels /* { dg-line l_compute[incr c_compute] } */
/* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_compute$c_compute } */
@ -64,6 +64,20 @@ main ()
a[i] = 0;
}
#pragma acc kernels /* { dg-line l_compute[incr c_compute] } */
/* { dg-note {beginning 'gang-single' part in OpenACC 'kernels' region} {} { target *-*-* } l_compute$c_compute } */
/* { dg-note {variable 'i' declared in block is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l_compute$c_compute } */
{
int i;
}
#pragma acc kernels /* { dg-line l_compute[incr c_compute] } */
/* { dg-note {variable 'i' declared in block is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l_compute$c_compute } */
/* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_compute$c_compute } */
/* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
for (int i = 0; i < N; i++)
a[i] = 0;
#pragma acc kernels loop /* { dg-line l_loop_i[incr c_loop_i] } */
/* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */

View File

@ -1,114 +0,0 @@
/* Test OpenACC 'kernels' construct decomposition. */
/* { dg-additional-options "-fopt-info-omp-all" } */
/* { dg-additional-options "-fchecking --param=openacc-kernels=decompose" } */
/* { dg-ice "TODO" }
{ dg-prune-output "during GIMPLE pass: omplower" } */
/* { dg-additional-options "--param=openacc-privatization=noisy" } */
/* Reduced from 'kernels-decompose-2.c'.
(Hopefully) similar instances:
- 'kernels-decompose-ice-2.c'
- 'libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c'
- 'libgomp.oacc-c-c++-common/kernels-decompose-1.c'
*/
int
main ()
{
#define N 10
#pragma acc kernels
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } .-1 } */
/* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
for (int i = 0; i < N; i++)
;
return 0;
}
/*
In 'gimple' we've got:
main ()
{
int D.2087;
{
int a[10];
try
{
#pragma omp target oacc_kernels map(tofrom:a [len: 40])
{
{
int i;
i = 0;
goto <D.2085>;
[...]
..., which in 'omp_oacc_kernels_decompose' we turn into:
main ()
{
int D.2087;
{
int a[10];
try
{
#pragma omp target oacc_data_kernels map(tofrom:a [len: 40])
{
try
{
{
int i;
#pragma omp target oacc_data_kernels map(alloc:i [len: 4])
{
try
{
{
#pragma omp target oacc_kernels async(-1) map(force_present:i [len: 4]) map(force_present:a [len: 40])
{
i = 0;
goto <D.2085>;
[...]
..., which results in ICE in:
#1 0x0000000000d2247b in lower_omp_target (gsi_p=gsi_p@entry=0x7fffffffbc90, ctx=ctx@entry=0x2c994c0) at [...]/gcc/omp-low.c:11981
11981 gcc_assert (offloaded);
(gdb) list
11976 talign = TYPE_ALIGN_UNIT (TREE_TYPE (TREE_TYPE (ovar)));
11977 gimplify_assign (x, var, &ilist);
11978 }
11979 else if (is_gimple_reg (var))
11980 {
11981 gcc_assert (offloaded);
11982 tree avar = create_tmp_var (TREE_TYPE (var));
11983 mark_addressable (avar);
11984 enum gomp_map_kind map_kind = OMP_CLAUSE_MAP_KIND (c);
11985 if (GOMP_MAP_COPY_TO_P (map_kind)
(gdb) call debug_tree(var)
<var_decl 0x7ffff7feebd0 i
type <integer_type 0x7ffff67be5e8 int sizes-gimplified public SI
size <integer_cst 0x7ffff67a5f18 constant 32>
unit-size <integer_cst 0x7ffff67a5f30 constant 4>
align:32 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff67be5e8 precision:32 min <integer_cst 0x7ffff67a5ed0 -2147483648> max <integer_cst 0x7ffff67a5ee8 2147483647>
pointer_to_this <pointer_type 0x7ffff67c69d8>>
used read SI [...]:15:12 size <integer_cst 0x7ffff67a5f18 32> unit-size <integer_cst 0x7ffff67a5f30 4>
align:32 warn_if_not_align:0 context <function_decl 0x7ffff68eea00 main>>
Just defusing the 'assert' is not sufficient:
libgomp: present clause: !acc_is_present (0x7ffe29cba3ec, 4 (0x4))
TODO Can't the 'omp_oacc_kernels_decompose' transformation be much simpler, such that we avoid the intermediate 'data' if we've got just one compute construct inside it?
TODO But it's not clear if that'd just resolve one simple instance of the general problem?
*/

View File

@ -1,22 +0,0 @@
/* Test OpenACC 'kernels' construct decomposition. */
/* { dg-additional-options "-fopt-info-omp-all" } */
/* { dg-additional-options "-fchecking --param=openacc-kernels=decompose" } */
/* { dg-ice "TODO" }
{ dg-prune-output "during GIMPLE pass: omplower" } */
/* { dg-additional-options "--param=openacc-privatization=noisy" } */
/* Reduced from 'kernels-decompose-ice-1.c'. */
int
main ()
{
#pragma acc kernels
/* { dg-note {beginning 'gang-single' part in OpenACC 'kernels' region} {} { target *-*-* } .-1 } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } .-2 } */
{
int i;
}
}

View File

@ -0,0 +1,19 @@
/* Reduced from 'libgomp.oacc-c-c++-common/kernels-loop-2.c'. */
/* { dg-additional-options "--param openacc-kernels=decompose" } */
/* { dg-additional-options "-fopt-info-all-omp" } */
/* { dg-additional-options "--param=openacc-privatization=noisy" } */
void
foo (void) /* { dg-line l_f_1 } */
{
#pragma acc kernels /* { dg-line l_k_1 } */
/* { dg-note {variable 'i' declared in block is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l_k_1 } */
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_k_1 } */
/* { dg-bogus {note: beginning 'parloops' part in OpenACC 'kernels' region} {TODO location} { xfail *-*-* } l_f_1 }
{ dg-note {beginning 'parloops' part in OpenACC 'kernels' region} TODO { xfail *-*-* } .+1 } */
for (int i;;)
;
}

View File

@ -0,0 +1,43 @@
! Check offloaded function's attributes and classification for OpenACC
! kernels.
! { dg-additional-options "--param openacc-kernels=parloops" }
! { dg-additional-options "-O2" }
! { dg-additional-options "-fopt-info-optimized-omp" }
! { dg-additional-options "-fdump-tree-ompexp" }
! { dg-additional-options "-fdump-tree-parloops1-all" }
! { dg-additional-options "-fdump-tree-oaccloops" }
! { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting
! aspects of that functionality.
program main
implicit none
integer, parameter :: n = 1024
integer, dimension (0:n-1) :: a, b, c
integer :: i
call setup(a, b)
!$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-message "optimized: assigned OpenACC gang loop parallelism" }
do i = 0, n - 1
c(i) = a(i) + b(i)
end do
!$acc end kernels
end program main
! Check the offloaded function's attributes.
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } }
! Check that exactly one OpenACC kernels construct is analyzed, and that it
! can be parallelized.
! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
! Check the offloaded function's classification and compute dimensions (will
! always be 1 x 1 x 1 for non-offloading compilation).
! { dg-final { scan-tree-dump-times "(?n)Function is parallelized OpenACC kernels offload" 1 "oaccloops" } }
! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } }
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccloops" } }

View File

@ -0,0 +1,47 @@
! Check offloaded function's attributes and classification for unparallelized
! OpenACC kernels.
! { dg-additional-options "--param openacc-kernels=parloops" }
! { dg-additional-options "-O2" }
! { dg-additional-options "-fopt-info-optimized-omp" }
! { dg-additional-options "-fdump-tree-ompexp" }
! { dg-additional-options "-fdump-tree-parloops1-all" }
! { dg-additional-options "-fdump-tree-oaccloops" }
! { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting
! aspects of that functionality.
program main
implicit none
integer, parameter :: n = 1024
integer, dimension (0:n-1) :: a, b, c
integer :: i
! An "external" mapping of loop iterations/array indices makes the loop
! unparallelizable.
integer, external :: f
call setup(a, b)
!$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-message "optimized: assigned OpenACC seq loop parallelism" }
do i = 0, n - 1
c(i) = a(f (i)) + b(f (i))
end do
!$acc end kernels
end program main
! Check the offloaded function's attributes.
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } }
! Check that exactly one OpenACC kernels construct is analyzed, and that it
! can't be parallelized.
! { dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } }
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
! { dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } }
! Check the offloaded function's classification and compute dimensions (will
! always be 1 x 1 x 1 for non-offloading compilation).
! { dg-final { scan-tree-dump-times "(?n)Function is unparallelized OpenACC kernels offload" 1 "oaccloops" } }
! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } }
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccloops" } }

View File

@ -1,8 +1,10 @@
! Check offloaded function's attributes and classification for unparallelized
! OpenACC kernels.
! { dg-additional-options "--param openacc-kernels=decompose" }
! { dg-additional-options "-O2" }
! { dg-additional-options "-fopt-info-optimized-omp" }
! { dg-additional-options "-fopt-info-all-omp" }
! { dg-additional-options "-fdump-tree-ompexp" }
! { dg-additional-options "-fdump-tree-parloops1-all" }
! { dg-additional-options "-fdump-tree-oaccloops" }
@ -23,6 +25,7 @@ program main
call setup(a, b)
!$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-message "optimized: assigned OpenACC seq loop parallelism" }
! { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 }
do i = 0, n - 1
c(i) = a(f (i)) + b(f (i))
end do

View File

@ -1,8 +1,10 @@
! Check offloaded function's attributes and classification for OpenACC
! kernels.
! { dg-additional-options "--param openacc-kernels=decompose" }
! { dg-additional-options "-O2" }
! { dg-additional-options "-fopt-info-optimized-omp" }
! { dg-additional-options "-fopt-info-all-omp" }
! { dg-additional-options "-fdump-tree-ompexp" }
! { dg-additional-options "-fdump-tree-parloops1-all" }
! { dg-additional-options "-fdump-tree-oaccloops" }
@ -19,6 +21,7 @@ program main
call setup(a, b)
!$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-message "optimized: assigned OpenACC gang loop parallelism" }
! { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 }
do i = 0, n - 1
c(i) = a(i) + b(i)
end do

View File

@ -1,5 +1,5 @@
/* { dg-additional-options "--param=openacc-kernels=decompose" } */
/* Hopefully, this is the same issue as '../../../gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-1.c'.
/* ICE similar to PR100280, but not the same.
{ dg-ice "TODO" }
TODO { dg-prune-output "during GIMPLE pass: omplower" }
TODO { dg-do link } */

View File

@ -3,7 +3,7 @@
/* Based on '../libgomp.oacc-fortran/asyncwait-1.f90'. */
/* { dg-additional-options "--param=openacc-kernels=decompose" } */
/* TODO To avoid PR100280 ICE { dg-additional-options "--param=openacc-kernels=parloops" } */
/* { dg-xfail-run-if TODO { openacc_radeon_accel_selected } } */
/* { dg-additional-options "-fopt-info-all-omp" }
{ dg-additional-options "-foffload=-fopt-info-all-omp" } */
@ -202,11 +202,12 @@ main (void)
#pragma acc data copy (a[0:N]) copy (b[0:N])
{
#pragma acc kernels async /* { dg-line l_compute[incr c_compute] } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_compute$c_compute } */
#pragma acc kernels async
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */
/* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
for (int i = 0; i < N; ++i)
b[i] = a[i];
@ -229,11 +230,12 @@ main (void)
#pragma acc data copy (a[0:N]) copy (b[0:N])
{
#pragma acc kernels async (1) /* { dg-line l_compute[incr c_compute] } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_compute$c_compute } */
#pragma acc kernels async (1)
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */
/* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
for (int i = 0; i < N; ++i)
b[i] = a[i];
@ -259,24 +261,27 @@ main (void)
#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N])
{
#pragma acc kernels async (1) /* { dg-line l_compute[incr c_compute] } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */
/* { dg-note {variable 'i' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_compute$c_compute } */
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target { ! __OPTIMIZE__ } } l_compute$c_compute }
{ dg-optimized "assigned OpenACC gang loop parallelism" "" { target { __OPTIMIZE__ } } l_compute$c_compute } */
/* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
for (int i = 0; i < N; ++i)
b[i] = (a[i] * a[i] * a[i]) / a[i];
#pragma acc kernels async (1) /* { dg-line l_compute[incr c_compute] } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */
/* { dg-note {variable 'i' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_compute$c_compute } */
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target { ! __OPTIMIZE__ } } l_compute$c_compute }
{ dg-optimized "assigned OpenACC gang loop parallelism" "" { target { __OPTIMIZE__ } } l_compute$c_compute } */
/* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
for (int i = 0; i < N; ++i)
c[i] = (a[i] * 4) / a[i];
#pragma acc kernels async (1) /* { dg-line l_compute[incr c_compute] } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_compute$c_compute } */
#pragma acc kernels async (1)
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */
/* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
for (int i = 0; i < N; ++i)
d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i];
@ -307,33 +312,37 @@ main (void)
#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N])
{
#pragma acc kernels async (1) /* { dg-line l_compute[incr c_compute] } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */
/* { dg-note {variable 'i' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_compute$c_compute } */
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target { ! __OPTIMIZE__ } } l_compute$c_compute }
{ dg-optimized "assigned OpenACC gang loop parallelism" "" { target { __OPTIMIZE__ } } l_compute$c_compute } */
/* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
for (int i = 0; i < N; ++i)
b[i] = (a[i] * a[i] * a[i]) / a[i];
#pragma acc kernels async (1) /* { dg-line l_compute[incr c_compute] } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_compute$c_compute } */
#pragma acc kernels async (1)
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */
/* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
for (int i = 0; i < N; ++i)
c[i] = (a[i] * 4) / a[i];
#pragma acc kernels async (1) /* { dg-line l_compute[incr c_compute] } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_compute$c_compute } */
#pragma acc kernels async (1)
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */
/* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
for (int i = 0; i < N; ++i)
d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i];
#pragma acc kernels wait (1) async (1) /* { dg-line l_compute[incr c_compute] } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_compute$c_compute } */
#pragma acc kernels wait (1) async (1)
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */
/* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
for (int i = 0; i < N; ++i)
e[i] = a[i] + b[i] + c[i] + d[i];

View File

@ -32,11 +32,7 @@ int main()
{
/* { dg-note {beginning 'gang-single' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
int c = 234;
/* { dg-note {variable 'c' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_compute$c_compute }
{ dg-note {variable 'c\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */
/*TODO Hopefully, this is the same issue as '../../../gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-1.c'. */
(volatile int *) &c;
/* { dg-note {variable 'c' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_compute$c_compute } */
#pragma acc loop independent gang /* { dg-line l_loop_i[incr c_loop_i] } */
/* { dg-note {parallelized loop nest in OpenACC 'kernels' region} {} { target *-*-* } l_loop_i$c_loop_i } */