Thomas Schwinge fd71a9a24d OpenACC 2.5 kernels construct: num_gangs, num_workers, vector_length clauses
gcc/c/
	* c-parser.c (OACC_KERNELS_CLAUSE_MASK): Add
	"PRAGMA_OACC_CLAUSE_NUM_GANGS", "PRAGMA_OACC_CLAUSE_NUM_WORKERS",
	"VECTOR_LENGTH".
	gcc/cp/
	* parser.c (OACC_KERNELS_CLAUSE_MASK): Add
	"PRAGMA_OACC_CLAUSE_NUM_GANGS", "PRAGMA_OACC_CLAUSE_NUM_WORKERS",
	"VECTOR_LENGTH".
	gcc/fortran/
	* openmp.c (OACC_KERNELS_CLAUSES): Add "OMP_CLAUSE_NUM_GANGS",
	"OMP_CLAUSE_NUM_WORKERS", "OMP_CLAUSE_VECTOR_LENGTH".
	gcc/
	* omp-offload.c (execute_oacc_device_lower): Remove the
	parallelism dimensions function attributes for unparallelized
	OpenACC kernels constructs.
	gcc/testsuite/
	* c-c++-common/goacc/parallel-dims-1.c: Update.
	* c-c++-common/goacc/parallel-dims-2.c: Likewise.
	* c-c++-common/goacc/routine-1.c: Likewise.
	* c-c++-common/goacc/uninit-dim-clause.c: Likewise.
	* g++.dg/goacc/template.C: Likewise.
	* gfortran.dg/goacc/kernels-tree.f95: Likewise.
	* gfortran.dg/goacc/routine-3.f90: Likewise.
	* gfortran.dg/goacc/sie.f95: Likewise.
	* gfortran.dg/goacc/uninit-dim-clause.f95: Likewise.
	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c: Update.
	* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise.
	* testsuite/libgomp.oacc-fortran/kernels-loop-2.f95: Likewise.

From-SVN: r248370
2017-05-23 17:47:32 +02:00

58 lines
1.4 KiB
C

#include <stdlib.h>
#define N (1024 * 512)
#define COUNTERTYPE unsigned int
int
main (void)
{
unsigned int *__restrict a;
unsigned int *__restrict b;
unsigned int *__restrict c;
a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
/* Parallelism dimensions: compiler/runtime decides. */
#pragma acc kernels copyout (a[0:N])
{
for (COUNTERTYPE i = 0; i < N; i++)
a[i] = i * 2;
}
/* Parallelism dimensions: variable. */
#pragma acc kernels copyout (b[0:N]) \
num_gangs (3 + a[3]) num_workers (5 + a[5]) vector_length (7 + a[7])
/* { dg-prune-output "using vector_length \\(32\\), ignoring runtime setting" } */
{
for (COUNTERTYPE i = 0; i < N; i++)
b[i] = i * 4;
}
/* Parallelism dimensions: literal. */
#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) \
num_gangs (3) num_workers (5) vector_length (7)
/* { dg-prune-output "using vector_length \\(32\\), ignoring 7" } */
{
for (COUNTERTYPE ii = 0; ii < N; ii++)
c[ii] = a[ii] + b[ii];
}
for (COUNTERTYPE i = 0; i < N; i++)
{
if (a[i] != i * 2)
abort ();
if (b[i] != i * 4)
abort ();
if (c[i] != a[i] + b[i])
abort ();
}
free (a);
free (b);
free (c);
return 0;
}