6687d13a87
2020-01-17 Andrew Stubbs <ams@codesourcery.com> libgomp/ * config/accel/openacc.f90 (openacc_kinds): Rename acc_device_gcn to acc_device_radeon. (openacc): Likewise. * openacc.f90 (openacc_kinds): Likewise. (openacc): Likewise. * openacc.h (acc_device_t): Likewise. * openacc_lib.h: Likewise. * testsuite/lib/libgomp.exp (check_effective_target_openacc_amdgcn_accel_present): Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c (cb_compute_construct_end): Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c (cb_enqueue_launch_start): Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c (cb_enter_data_end): Likewise. (cb_exit_data_start): Likewise. (cb_exit_data_end): Likewise. (cb_compute_construct_end): Likewise. (cb_enqueue_launch_start): Likewise. (cb_enqueue_launch_end): Likewise. * testsuite/libgomp.oacc-c-c++-common/asyncwait-nop-1.c (main): Likewise.
234 lines
6.6 KiB
C
234 lines
6.6 KiB
C
/* Test dispatch of events to callbacks. */
|
|
|
|
#undef NDEBUG
|
|
#include <assert.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include <acc_prof.h>
|
|
|
|
|
|
/* Use explicit 'copyin' clauses, to work around "'firstprivate'
|
|
optimizations", which will cause the value at the point of call to be used
|
|
(*before* any potential modifications done in callbacks), as opposed to its
|
|
address being taken, which then later gets dereferenced (*after* any
|
|
modifications done in callbacks). */
|
|
#define COPYIN(...) copyin(__VA_ARGS__)
|
|
|
|
|
|
/* See the 'DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT' reference in
|
|
'libgomp.texi'. */
|
|
#define DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT 0
|
|
|
|
|
|
#define DEBUG_printf(...) //__builtin_printf (__VA_ARGS__)
|
|
|
|
|
|
volatile // TODO PR90488
|
|
static int state = -1;
|
|
|
|
#define STATE_OP(state, op) \
|
|
do \
|
|
{ \
|
|
typeof (state) state_o = (state); \
|
|
(void) state_o; \
|
|
(state)op; \
|
|
DEBUG_printf("state: %d -> %d\n", state_o, (state)); \
|
|
} \
|
|
while (0)
|
|
|
|
|
|
static acc_device_t acc_device_type;
|
|
static int acc_device_num;
|
|
static int num_gangs, num_workers, vector_length;
|
|
|
|
|
|
static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
|
|
{
|
|
DEBUG_printf ("%s\n", __FUNCTION__);
|
|
|
|
assert (acc_device_type != acc_device_host);
|
|
|
|
assert (state == 0);
|
|
STATE_OP (state, = 1);
|
|
|
|
assert (prof_info->event_type == acc_ev_enqueue_launch_start);
|
|
assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
|
|
assert (prof_info->version == _ACC_PROF_INFO_VERSION);
|
|
assert (prof_info->device_type == acc_device_type);
|
|
assert (prof_info->device_number == acc_device_num);
|
|
assert (prof_info->thread_id == -1);
|
|
assert (prof_info->async == acc_async_sync);
|
|
assert (prof_info->async_queue == prof_info->async);
|
|
assert (prof_info->src_file == NULL);
|
|
assert (prof_info->func_name == NULL);
|
|
assert (prof_info->line_no == -1);
|
|
assert (prof_info->end_line_no == -1);
|
|
assert (prof_info->func_line_no == -1);
|
|
assert (prof_info->func_end_line_no == -1);
|
|
|
|
assert (event_info->launch_event.event_type == prof_info->event_type);
|
|
assert (event_info->launch_event.valid_bytes == _ACC_LAUNCH_EVENT_INFO_VALID_BYTES);
|
|
assert (event_info->launch_event.parent_construct == acc_construct_parallel);
|
|
assert (event_info->launch_event.implicit == 1);
|
|
assert (event_info->launch_event.tool_info == NULL);
|
|
assert (event_info->launch_event.kernel_name != NULL);
|
|
{
|
|
const char *s = strstr (event_info->launch_event.kernel_name, "main");
|
|
assert (s != NULL);
|
|
s = strstr (s, "omp_fn");
|
|
assert (s != NULL);
|
|
}
|
|
if (num_gangs < 1)
|
|
assert (event_info->launch_event.num_gangs >= 1);
|
|
else
|
|
{
|
|
#ifdef __OPTIMIZE__
|
|
assert (event_info->launch_event.num_gangs == num_gangs);
|
|
#else
|
|
/* No parallelized OpenACC 'kernels' constructs. Unparallelized OpenACC
|
|
'kernels' constructs must get launched as 1 x 1 x 1 GPU kernels. */
|
|
assert (event_info->launch_event.num_gangs == 1);
|
|
#endif
|
|
}
|
|
if (num_workers < 1)
|
|
assert (event_info->launch_event.num_workers >= 1);
|
|
else
|
|
{
|
|
#ifdef __OPTIMIZE__
|
|
assert (event_info->launch_event.num_workers == num_workers);
|
|
#else
|
|
/* See 'num_gangs' above. */
|
|
assert (event_info->launch_event.num_workers == 1);
|
|
#endif
|
|
}
|
|
if (vector_length < 1)
|
|
assert (event_info->launch_event.vector_length >= 1);
|
|
else if (acc_device_type == acc_device_nvidia) /* ... is special. */
|
|
assert (event_info->launch_event.vector_length == 32);
|
|
else if (acc_device_type == acc_device_radeon) /* ...and so is this. */
|
|
assert (event_info->launch_event.vector_length == 64);
|
|
else
|
|
{
|
|
#ifdef __OPTIMIZE__
|
|
assert (event_info->launch_event.vector_length == vector_length);
|
|
#else
|
|
/* See 'num_gangs' above. */
|
|
assert (event_info->launch_event.vector_length == 1);
|
|
#endif
|
|
}
|
|
|
|
if (acc_device_type == acc_device_host)
|
|
assert (api_info->device_api == acc_device_api_none);
|
|
else if (acc_device_type == acc_device_radeon)
|
|
assert (api_info->device_api == acc_device_api_other);
|
|
else
|
|
assert (api_info->device_api == acc_device_api_cuda);
|
|
assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES);
|
|
assert (api_info->device_type == prof_info->device_type);
|
|
assert (api_info->vendor == -1);
|
|
assert (api_info->device_handle == NULL);
|
|
assert (api_info->context_handle == NULL);
|
|
assert (api_info->async_handle == NULL);
|
|
}
|
|
|
|
|
|
static acc_prof_reg reg;
|
|
static acc_prof_reg unreg;
|
|
static acc_prof_lookup_func lookup;
|
|
void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_lookup_func lookup_)
|
|
{
|
|
DEBUG_printf ("%s\n", __FUNCTION__);
|
|
|
|
reg = reg_;
|
|
unreg = unreg_;
|
|
lookup = lookup_;
|
|
}
|
|
|
|
|
|
int main()
|
|
{
|
|
acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
|
|
|
|
STATE_OP (state, = 0);
|
|
reg (acc_ev_enqueue_launch_start, cb_enqueue_launch_start, acc_reg);
|
|
assert (state == 0);
|
|
|
|
acc_device_type = acc_get_device_type ();
|
|
acc_device_num = acc_get_device_num (acc_device_type);
|
|
assert (state == 0);
|
|
|
|
/* Parallelism dimensions: compiler/runtime decides. */
|
|
STATE_OP (state, = 0);
|
|
num_gangs = num_workers = vector_length = 0;
|
|
{
|
|
#define N 100
|
|
int x[N];
|
|
#pragma acc kernels
|
|
{
|
|
for (int i = 0; i < N; ++i)
|
|
x[i] = i * i;
|
|
}
|
|
if (acc_device_type == acc_device_host)
|
|
assert (state == 0); /* No 'acc_ev_enqueue_launch_start'. */
|
|
else
|
|
assert (state == 1);
|
|
for (int i = 0; i < N; ++i)
|
|
if (x[i] != i * i)
|
|
__builtin_abort ();
|
|
#undef N
|
|
}
|
|
|
|
/* Parallelism dimensions: literal. */
|
|
STATE_OP (state, = 0);
|
|
num_gangs = 30;
|
|
num_workers = 3;
|
|
vector_length = 5;
|
|
{
|
|
#define N 100
|
|
int x[N];
|
|
#pragma acc kernels \
|
|
num_gangs (30) num_workers (3) vector_length (5)
|
|
/* { dg-prune-output "using vector_length \\(32\\), ignoring 5" } */
|
|
{
|
|
for (int i = 0; i < N; ++i)
|
|
x[i] = i * i;
|
|
}
|
|
if (acc_device_type == acc_device_host)
|
|
assert (state == 0); /* No 'acc_ev_enqueue_launch_start'. */
|
|
else
|
|
assert (state == 1);
|
|
for (int i = 0; i < N; ++i)
|
|
if (x[i] != i * i)
|
|
__builtin_abort ();
|
|
#undef N
|
|
}
|
|
|
|
/* Parallelism dimensions: variable. */
|
|
STATE_OP (state, = 0);
|
|
num_gangs = 22;
|
|
num_workers = 5;
|
|
vector_length = 7;
|
|
{
|
|
#define N 100
|
|
int x[N];
|
|
#pragma acc kernels \
|
|
num_gangs (num_gangs) num_workers (num_workers) vector_length (vector_length)
|
|
/* { dg-prune-output "using vector_length \\(32\\), ignoring runtime setting" } */
|
|
{
|
|
for (int i = 0; i < N; ++i)
|
|
x[i] = i * i;
|
|
}
|
|
if (acc_device_type == acc_device_host)
|
|
assert (state == 0); /* No 'acc_ev_enqueue_launch_start'. */
|
|
else
|
|
assert (state == 1);
|
|
for (int i = 0; i < N; ++i)
|
|
if (x[i] != i * i)
|
|
__builtin_abort ();
|
|
#undef N
|
|
}
|
|
|
|
return 0;
|
|
}
|