gcc/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
Julian Brown c408512e1f amdgcn: Enable OpenACC worker partitioning for AMD GCN
gcc/
	* config/gcn/gcn.c (gcn_init_builtins): Override decls for
	BUILT_IN_GOACC_SINGLE_START, BUILT_IN_GOACC_SINGLE_COPY_START,
	BUILT_IN_GOACC_SINGLE_COPY_END and BUILT_IN_GOACC_BARRIER.
	(gcn_goacc_validate_dims): Turn on worker partitioning unconditionally.
	(gcn_fork_join): Update comment.
	* config/gcn/gcn.opt (flag_worker_partitioning): Remove.
	(macc_experimental_workers): Remove unused option.
	libgomp/
	* plugin/plugin-gcn.c (gcn_exec): Change default number of workers to
	16.
	* testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c
	[acc_device_radeon]: Update.
	* testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c
	[ACC_DEVICE_TYPE_radeon]: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
	[acc_device_radeon]: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
	[ACC_DEVICE_TYPE_radeon]: Likewise.
	* testsuite/libgomp.oacc-fortran/optional-reduction.f90: XFAIL for
	'openacc_radeon_accel_selected' and '-O0'.
	* testsuite/libgomp.oacc-fortran/reduction-7.f90: Likewise.

Co-Authored-By: Kwok Cheung Yeung <kcy@codesourcery.com>
Co-Authored-By: Thomas Schwinge <thomas@codesourcery.com>
2021-08-09 15:08:44 +02:00

83 lines
2.0 KiB
C

/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting
aspects of that functionality. */
#include <stdio.h>
#include <openacc.h>
#include <gomp-constants.h>
#define NUM_WORKERS 16
#ifdef ACC_DEVICE_TYPE_radeon
/* AMD GCN uses the autovectorizer for the vector dimension: the use
of a function call in vector-partitioned code in this test is not
currently supported. */
#define NUM_VECTORS 1
#else
#define NUM_VECTORS 32
#endif
#define WIDTH 64
#define HEIGHT 32
#define WORK_ID(I,N) \
(acc_on_device (acc_device_not_host) \
? __builtin_goacc_parlevel_id (GOMP_DIM_WORKER) \
: (I % N))
#define VEC_ID(I,N) \
(acc_on_device (acc_device_not_host) \
? __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR) \
: (I % N))
#pragma acc routine worker
void __attribute__ ((noinline))
WorkVec (int *ptr, int w, int h, int nw, int nv)
{
#pragma acc loop worker
for (int i = 0; i < h; i++)
#pragma acc loop vector
for (int j = 0; j < w; j++)
ptr[i*w + j] = (WORK_ID (i, nw) << 8) | VEC_ID(j, nv);
}
int DoWorkVec (int nw)
{
int ary[HEIGHT][WIDTH];
int err = 0;
for (int ix = 0; ix != HEIGHT; ix++)
for (int jx = 0; jx != WIDTH; jx++)
ary[ix][jx] = 0xdeadbeef;
printf ("spawning %d ...", nw); fflush (stdout);
#pragma acc parallel num_workers(nw) vector_length (NUM_VECTORS) copy (ary)
/* { dg-warning "region contains vector partitioned code but is not vector partitioned" "" { target openacc_radeon_accel_selected } .-1 } */
{
WorkVec ((int *)ary, WIDTH, HEIGHT, nw, NUM_VECTORS);
}
for (int ix = 0; ix != HEIGHT; ix++)
for (int jx = 0; jx != WIDTH; jx++)
{
int exp = ((ix % nw) << 8) | (jx % NUM_VECTORS);
if (ary[ix][jx] != exp)
{
printf ("\nary[%d][%d] = %#x expected %#x", ix, jx,
ary[ix][jx], exp);
err = 1;
}
}
printf (err ? " failed\n" : " ok\n");
return err;
}
int main ()
{
int err = 0;
for (int W = 1; W <= NUM_WORKERS; W <<= 1)
err |= DoWorkVec (W);
return err;
}