gcc/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
Thomas Schwinge 29ddaf43f7 [OpenACC] Clarify sequencing of 'async' data copying vs. profiling events in 'libgomp.oacc-c-c++-common/acc_prof-{init,parallel}-1.c'
... as noticed with GCN offloading.

Fix-up for r271346 (commit 5fae049dc2)
"OpenACC Profiling Interface (incomplete)".

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Clarify
	sequencing of 'async' data copying vs. profiling events.
	* testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c:
	Likewise.
2021-07-27 11:16:25 +02:00

751 lines
28 KiB
C

/* Test dispatch of events to callbacks. */
#undef NDEBUG
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <acc_prof.h>
/* Use explicit 'copyin' clauses, to work around "'firstprivate'
optimizations", which will cause the value at the point of call to be used
(*before* any potential modifications done in callbacks), as opposed to its
address being taken, which then later gets dereferenced (*after* any
modifications done in callbacks). */
#define COPYIN(...) copyin(__VA_ARGS__)
/* See the 'DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT' reference in
libgomp.texi. */
#define DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT 0
/* Do we expect to see 'acc_ev_exit_data_start' and 'acc_ev_exit_data_end'
after a compute construct with an 'async' clause? */
#define ASYNC_EXIT_DATA 1
#define DEBUG_printf(...) //__builtin_printf (__VA_ARGS__)
static acc_prof_reg reg;
static acc_prof_reg unreg;
static acc_prof_lookup_func lookup;
void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_lookup_func lookup_)
{
DEBUG_printf ("%s\n", __FUNCTION__);
reg = reg_;
unreg = unreg_;
lookup = lookup_;
}
static int state = -1;
#define STATE_OP(state, op) \
do \
{ \
typeof (state) state_o = (state); \
(void) state_o; \
(state)op; \
DEBUG_printf("state: %d -> %d\n", state_o, (state)); \
} \
while (0)
static acc_device_t acc_device_type;
static int acc_device_num;
static int acc_async = acc_async_sync;
struct tool_info
{
acc_event_info event_info;
struct tool_info *nested;
};
struct tool_info *tool_info;
static void cb_device_init_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
{
DEBUG_printf ("%s\n", __FUNCTION__);
#if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT
assert (state == 1
|| state == 101);
STATE_OP (state, ++);
assert (tool_info != NULL);
assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
assert (tool_info->nested == NULL);
tool_info->nested = (struct tool_info *) malloc(sizeof *tool_info);
assert (tool_info->nested != NULL);
tool_info->nested->nested = NULL;
#else
assert (state == 0
|| state == 100);
STATE_OP (state, ++);
assert (tool_info == NULL);
tool_info = (struct tool_info *) malloc(sizeof *tool_info);
assert (tool_info != NULL);
tool_info->nested = NULL;
#endif
assert (prof_info->event_type == acc_ev_device_init_start);
assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
assert (prof_info->version == _ACC_PROF_INFO_VERSION);
assert (prof_info->device_type == acc_device_default);
assert (prof_info->device_number == acc_device_num);
assert (prof_info->thread_id == -1);
assert (prof_info->async == acc_async_sync);
assert (prof_info->async_queue == prof_info->async);
assert (prof_info->src_file == NULL);
assert (prof_info->func_name == NULL);
assert (prof_info->line_no == -1);
assert (prof_info->end_line_no == -1);
assert (prof_info->func_line_no == -1);
assert (prof_info->func_end_line_no == -1);
assert (event_info->other_event.event_type == prof_info->event_type);
assert (event_info->other_event.valid_bytes == _ACC_OTHER_EVENT_INFO_VALID_BYTES);
assert (event_info->other_event.parent_construct == acc_construct_parallel);
assert (event_info->other_event.implicit == 1);
assert (event_info->other_event.tool_info == NULL);
assert (api_info->device_api == acc_device_api_none);
assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES);
assert (api_info->device_type == prof_info->device_type);
assert (api_info->vendor == -1);
assert (api_info->device_handle == NULL);
assert (api_info->context_handle == NULL);
assert (api_info->async_handle == NULL);
#if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT
tool_info->nested->event_info.other_event.event_type = event_info->other_event.event_type;
event_info->other_event.tool_info = tool_info->nested;
#else
tool_info->event_info.other_event.event_type = event_info->other_event.event_type;
event_info->other_event.tool_info = tool_info;
#endif
}
static void cb_device_init_end (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
{
DEBUG_printf ("%s\n", __FUNCTION__);
#if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT
assert (state == 2
|| state == 102);
STATE_OP (state, ++);
assert (tool_info != NULL);
assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
assert (tool_info->nested != NULL);
assert (tool_info->nested->event_info.other_event.event_type == acc_ev_device_init_start);
#else
assert (state == 1
|| state == 101);
STATE_OP (state, ++);
assert (tool_info != NULL);
assert (tool_info->event_info.other_event.event_type == acc_ev_device_init_start);
#endif
assert (prof_info->event_type == acc_ev_device_init_end);
assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
assert (prof_info->version == _ACC_PROF_INFO_VERSION);
assert (prof_info->device_type == acc_device_default);
assert (prof_info->device_number == acc_device_num);
assert (prof_info->thread_id == -1);
assert (prof_info->async == acc_async_sync);
assert (prof_info->async_queue == prof_info->async);
assert (prof_info->src_file == NULL);
assert (prof_info->func_name == NULL);
assert (prof_info->line_no == -1);
assert (prof_info->end_line_no == -1);
assert (prof_info->func_line_no == -1);
assert (prof_info->func_end_line_no == -1);
assert (event_info->other_event.event_type == prof_info->event_type);
assert (event_info->other_event.valid_bytes == _ACC_OTHER_EVENT_INFO_VALID_BYTES);
assert (event_info->other_event.parent_construct == acc_construct_parallel);
assert (event_info->other_event.implicit == 1);
#if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT
assert (event_info->other_event.tool_info == tool_info->nested);
#else
assert (event_info->other_event.tool_info == tool_info);
#endif
assert (api_info->device_api == acc_device_api_none);
assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES);
assert (api_info->device_type == prof_info->device_type);
assert (api_info->vendor == -1);
assert (api_info->device_handle == NULL);
assert (api_info->context_handle == NULL);
assert (api_info->async_handle == NULL);
#if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT
free (tool_info->nested);
tool_info->nested = NULL;
#else
free (tool_info);
tool_info = NULL;
#endif
}
static void cb_enter_data_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
{
DEBUG_printf ("%s\n", __FUNCTION__);
assert (state == 3
|| state == 103);
STATE_OP (state, ++);
assert (tool_info != NULL);
assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
assert (tool_info->nested == NULL);
tool_info->nested = (struct tool_info *) malloc(sizeof *tool_info);
assert (tool_info->nested != NULL);
tool_info->nested->nested = NULL;
assert (prof_info->event_type == acc_ev_enter_data_start);
assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
assert (prof_info->version == _ACC_PROF_INFO_VERSION);
assert (prof_info->device_type == acc_device_type);
assert (prof_info->device_number == acc_device_num);
assert (prof_info->thread_id == -1);
assert (prof_info->async == acc_async);
assert (prof_info->async_queue == prof_info->async);
assert (prof_info->src_file == NULL);
assert (prof_info->func_name == NULL);
assert (prof_info->line_no == -1);
assert (prof_info->end_line_no == -1);
assert (prof_info->func_line_no == -1);
assert (prof_info->func_end_line_no == -1);
assert (event_info->other_event.event_type == prof_info->event_type);
assert (event_info->other_event.valid_bytes == _ACC_OTHER_EVENT_INFO_VALID_BYTES);
assert (event_info->other_event.parent_construct == acc_construct_parallel);
assert (event_info->other_event.implicit == 1);
assert (event_info->other_event.tool_info == NULL);
assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES);
assert (api_info->device_type == prof_info->device_type);
assert (api_info->vendor == -1);
assert (api_info->device_handle == NULL);
assert (api_info->context_handle == NULL);
assert (api_info->async_handle == NULL);
tool_info->nested->event_info.other_event.event_type = event_info->other_event.event_type;
event_info->other_event.tool_info = tool_info->nested;
}
static void cb_enter_data_end (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
{
DEBUG_printf ("%s\n", __FUNCTION__);
assert (state == 4
|| state == 104);
#if defined COPYIN
/* Conceptually, 'acc_ev_enter_data_end' marks the end of data copying,
before 'acc_ev_enqueue_launch_start' marks invoking the compute region.
That's the 'state_init = state;' intended to be captured in the compute
regions. */
/* In an 'async' setting, this event may be triggered before actual 'async'
data copying has completed. Given that 'state' appears in 'COPYIN', we
first have to synchronize (that is, let the 'async' 'COPYIN' read the
current 'state' value)... */
if (acc_async != acc_async_sync)
{
/* "We're not yet accounting for the fact that _OpenACC events may occur
during event processing_"; temporarily disable to avoid deadlock. */
unreg (acc_ev_none, NULL, acc_toggle_per_thread);
acc_wait (acc_async);
reg (acc_ev_none, NULL, acc_toggle_per_thread);
}
/* ... before modifying it in the following. */
#endif
STATE_OP (state, ++);
assert (tool_info != NULL);
assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
assert (tool_info->nested != NULL);
assert (tool_info->nested->event_info.other_event.event_type == acc_ev_enter_data_start);
assert (prof_info->event_type == acc_ev_enter_data_end);
assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
assert (prof_info->version == _ACC_PROF_INFO_VERSION);
assert (prof_info->device_type == acc_device_type);
assert (prof_info->device_number == acc_device_num);
assert (prof_info->thread_id == -1);
assert (prof_info->async == acc_async);
assert (prof_info->async_queue == prof_info->async);
assert (prof_info->src_file == NULL);
assert (prof_info->func_name == NULL);
assert (prof_info->line_no == -1);
assert (prof_info->end_line_no == -1);
assert (prof_info->func_line_no == -1);
assert (prof_info->func_end_line_no == -1);
assert (event_info->other_event.event_type == prof_info->event_type);
assert (event_info->other_event.valid_bytes == _ACC_OTHER_EVENT_INFO_VALID_BYTES);
assert (event_info->other_event.parent_construct == acc_construct_parallel);
assert (event_info->other_event.implicit == 1);
assert (event_info->other_event.tool_info == tool_info->nested);
if (acc_device_type == acc_device_host)
assert (api_info->device_api == acc_device_api_none);
else if (acc_device_type == acc_device_radeon)
assert (api_info->device_api == acc_device_api_other);
else
assert (api_info->device_api == acc_device_api_cuda);
assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES);
assert (api_info->device_type == prof_info->device_type);
assert (api_info->vendor == -1);
assert (api_info->device_handle == NULL);
assert (api_info->context_handle == NULL);
assert (api_info->async_handle == NULL);
free (tool_info->nested);
tool_info->nested = NULL;
}
static void cb_exit_data_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
{
DEBUG_printf ("%s\n", __FUNCTION__);
assert (state == 7
#if ASYNC_EXIT_DATA
|| state == 107
#endif
);
STATE_OP (state, ++);
assert (tool_info != NULL);
assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
assert (tool_info->nested == NULL);
tool_info->nested = (struct tool_info *) malloc(sizeof *tool_info);
assert (tool_info->nested != NULL);
tool_info->nested->nested = NULL;
assert (prof_info->event_type == acc_ev_exit_data_start);
assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
assert (prof_info->version == _ACC_PROF_INFO_VERSION);
assert (prof_info->device_type == acc_device_type);
assert (prof_info->device_number == acc_device_num);
assert (prof_info->thread_id == -1);
assert (prof_info->async == acc_async);
assert (prof_info->async_queue == prof_info->async);
assert (prof_info->src_file == NULL);
assert (prof_info->func_name == NULL);
assert (prof_info->line_no == -1);
assert (prof_info->end_line_no == -1);
assert (prof_info->func_line_no == -1);
assert (prof_info->func_end_line_no == -1);
assert (event_info->other_event.event_type == prof_info->event_type);
assert (event_info->other_event.valid_bytes == _ACC_OTHER_EVENT_INFO_VALID_BYTES);
assert (event_info->other_event.parent_construct == acc_construct_parallel);
assert (event_info->other_event.implicit == 1);
assert (event_info->other_event.tool_info == NULL);
if (acc_device_type == acc_device_host)
assert (api_info->device_api == acc_device_api_none);
else if (acc_device_type == acc_device_radeon)
assert (api_info->device_api == acc_device_api_other);
else
assert (api_info->device_api == acc_device_api_cuda);
assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES);
assert (api_info->device_type == prof_info->device_type);
assert (api_info->vendor == -1);
assert (api_info->device_handle == NULL);
assert (api_info->context_handle == NULL);
assert (api_info->async_handle == NULL);
tool_info->nested->event_info.other_event.event_type = event_info->other_event.event_type;
event_info->other_event.tool_info = tool_info->nested;
}
static void cb_exit_data_end (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
{
DEBUG_printf ("%s\n", __FUNCTION__);
assert (state == 8
#if ASYNC_EXIT_DATA
|| state == 108
#endif
);
STATE_OP (state, ++);
assert (tool_info != NULL);
assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
assert (tool_info->nested != NULL);
assert (tool_info->nested->event_info.other_event.event_type == acc_ev_exit_data_start);
assert (prof_info->event_type == acc_ev_exit_data_end);
assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
assert (prof_info->version == _ACC_PROF_INFO_VERSION);
assert (prof_info->device_type == acc_device_type);
assert (prof_info->device_number == acc_device_num);
assert (prof_info->thread_id == -1);
assert (prof_info->async == acc_async);
assert (prof_info->async_queue == prof_info->async);
assert (prof_info->src_file == NULL);
assert (prof_info->func_name == NULL);
assert (prof_info->line_no == -1);
assert (prof_info->end_line_no == -1);
assert (prof_info->func_line_no == -1);
assert (prof_info->func_end_line_no == -1);
assert (event_info->other_event.event_type == prof_info->event_type);
assert (event_info->other_event.valid_bytes == _ACC_OTHER_EVENT_INFO_VALID_BYTES);
assert (event_info->other_event.parent_construct == acc_construct_parallel);
assert (event_info->other_event.implicit == 1);
assert (event_info->other_event.tool_info == tool_info->nested);
if (acc_device_type == acc_device_host)
assert (api_info->device_api == acc_device_api_none);
else if (acc_device_type == acc_device_radeon)
assert (api_info->device_api == acc_device_api_other);
else
assert (api_info->device_api == acc_device_api_cuda);
assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES);
assert (api_info->device_type == prof_info->device_type);
assert (api_info->vendor == -1);
assert (api_info->device_handle == NULL);
assert (api_info->context_handle == NULL);
assert (api_info->async_handle == NULL);
free (tool_info->nested);
tool_info->nested = NULL;
}
static void cb_compute_construct_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
{
DEBUG_printf ("%s\n", __FUNCTION__);
#if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT
assert (state == 0
|| state == 100);
if (state == 100)
{
/* Compensate for the missing 'acc_ev_device_init_start' and
'acc_ev_device_init_end'. */
state += 2;
}
#else
if (state == 100)
{
/* Compensate for the missing 'acc_ev_device_init_start' and
'acc_ev_device_init_end'. */
state += 2;
}
assert (state == 2
|| state == 102);
#endif
STATE_OP (state, ++);
assert (tool_info == NULL);
tool_info = (struct tool_info *) malloc(sizeof *tool_info);
assert (tool_info != NULL);
tool_info->nested = NULL;
assert (prof_info->event_type == acc_ev_compute_construct_start);
assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
assert (prof_info->version == _ACC_PROF_INFO_VERSION);
assert (prof_info->device_type == acc_device_type);
assert (prof_info->device_number == acc_device_num);
assert (prof_info->thread_id == -1);
assert (prof_info->async == /* TODO acc_async */ acc_async_sync);
assert (prof_info->async_queue == prof_info->async);
assert (prof_info->src_file == NULL);
assert (prof_info->func_name == NULL);
assert (prof_info->line_no == -1);
assert (prof_info->end_line_no == -1);
assert (prof_info->func_line_no == -1);
assert (prof_info->func_end_line_no == -1);
assert (event_info->other_event.event_type == prof_info->event_type);
assert (event_info->other_event.valid_bytes == _ACC_OTHER_EVENT_INFO_VALID_BYTES);
assert (event_info->other_event.parent_construct == acc_construct_parallel);
assert (event_info->other_event.implicit == 0);
assert (event_info->other_event.tool_info == NULL);
assert (api_info->device_api == acc_device_api_none);
assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES);
assert (api_info->device_type == prof_info->device_type);
assert (api_info->vendor == -1);
assert (api_info->device_handle == NULL);
assert (api_info->context_handle == NULL);
assert (api_info->async_handle == NULL);
tool_info->event_info.other_event.event_type = event_info->other_event.event_type;
event_info->other_event.tool_info = tool_info;
if (acc_device_type == acc_device_host)
{
/* Compensate for the missing 'acc_ev_enter_data_start'. */
state += 1;
}
}
static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
{
DEBUG_printf ("%s\n", __FUNCTION__);
if (acc_device_type == acc_device_host)
{
/* Compensate for the missing 'acc_ev_enter_data_end'. */
state += 1;
/* Compensate for the missing 'acc_ev_enqueue_launch_start' and
'acc_ev_enqueue_launch_end'. */
state += 2;
/* Compensate for the missing 'acc_ev_exit_data_start' and
'acc_ev_exit_data_end'. */
state += 2;
}
#if !ASYNC_EXIT_DATA
else if (acc_async != acc_async_sync)
{
/* Compensate for the missing 'acc_ev_exit_data_start' and
'acc_ev_exit_data_end'. */
state += 2;
}
#endif
assert (state == 9
|| state == 109);
STATE_OP (state, ++);
assert (tool_info != NULL);
assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
assert (tool_info->nested == NULL);
assert (prof_info->event_type == acc_ev_compute_construct_end);
assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
assert (prof_info->version == _ACC_PROF_INFO_VERSION);
assert (prof_info->device_type == acc_device_type);
assert (prof_info->device_number == acc_device_num);
assert (prof_info->thread_id == -1);
if (acc_device_type == acc_device_host)
assert (prof_info->async == acc_async_sync);
else
assert (prof_info->async == acc_async);
assert (prof_info->async_queue == prof_info->async);
assert (prof_info->src_file == NULL);
assert (prof_info->func_name == NULL);
assert (prof_info->line_no == -1);
assert (prof_info->end_line_no == -1);
assert (prof_info->func_line_no == -1);
assert (prof_info->func_end_line_no == -1);
assert (event_info->other_event.event_type == prof_info->event_type);
assert (event_info->other_event.valid_bytes == _ACC_OTHER_EVENT_INFO_VALID_BYTES);
assert (event_info->other_event.parent_construct == acc_construct_parallel);
assert (event_info->other_event.implicit == 0);
assert (event_info->other_event.tool_info == tool_info);
if (acc_device_type == acc_device_host)
assert (api_info->device_api == acc_device_api_none);
else if (acc_device_type == acc_device_radeon)
assert (api_info->device_api == acc_device_api_other);
else
assert (api_info->device_api == acc_device_api_cuda);
assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES);
assert (api_info->device_type == prof_info->device_type);
assert (api_info->vendor == -1);
assert (api_info->device_handle == NULL);
assert (api_info->context_handle == NULL);
assert (api_info->async_handle == NULL);
free (tool_info);
tool_info = NULL;
}
static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
{
DEBUG_printf ("%s\n", __FUNCTION__);
assert (acc_device_type != acc_device_host);
assert (state == 5
|| state == 105);
STATE_OP (state, ++);
assert (tool_info != NULL);
assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
assert (tool_info->nested == NULL);
tool_info->nested = (struct tool_info *) malloc(sizeof *tool_info);
assert (tool_info->nested != NULL);
tool_info->nested->nested = NULL;
assert (prof_info->event_type == acc_ev_enqueue_launch_start);
assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
assert (prof_info->version == _ACC_PROF_INFO_VERSION);
assert (prof_info->device_type == acc_device_type);
assert (prof_info->device_number == acc_device_num);
assert (prof_info->thread_id == -1);
assert (prof_info->async == acc_async);
assert (prof_info->async_queue == prof_info->async);
assert (prof_info->src_file == NULL);
assert (prof_info->func_name == NULL);
assert (prof_info->line_no == -1);
assert (prof_info->end_line_no == -1);
assert (prof_info->func_line_no == -1);
assert (prof_info->func_end_line_no == -1);
assert (event_info->launch_event.event_type == prof_info->event_type);
assert (event_info->launch_event.valid_bytes == _ACC_LAUNCH_EVENT_INFO_VALID_BYTES);
assert (event_info->launch_event.parent_construct == acc_construct_parallel);
assert (event_info->launch_event.implicit == 1);
assert (event_info->launch_event.tool_info == NULL);
assert (event_info->launch_event.kernel_name != NULL);
{
const char *s = strstr (event_info->launch_event.kernel_name, "main");
assert (s != NULL);
s = strstr (s, "omp_fn");
assert (s != NULL);
}
assert (event_info->launch_event.num_gangs >= 1);
assert (event_info->launch_event.num_workers >= 1);
assert (event_info->launch_event.vector_length >= 1);
if (acc_device_type == acc_device_host)
assert (api_info->device_api == acc_device_api_none);
else if (acc_device_type == acc_device_radeon)
assert (api_info->device_api == acc_device_api_other);
else
assert (api_info->device_api == acc_device_api_cuda);
assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES);
assert (api_info->device_type == prof_info->device_type);
assert (api_info->vendor == -1);
assert (api_info->device_handle == NULL);
assert (api_info->context_handle == NULL);
assert (api_info->async_handle == NULL);
tool_info->nested->event_info.launch_event.event_type = event_info->launch_event.event_type;
tool_info->nested->event_info.launch_event.kernel_name = strdup (event_info->launch_event.kernel_name);
tool_info->nested->event_info.launch_event.num_gangs = event_info->launch_event.num_gangs;
tool_info->nested->event_info.launch_event.num_workers = event_info->launch_event.num_workers;
tool_info->nested->event_info.launch_event.vector_length = event_info->launch_event.vector_length;
event_info->other_event.tool_info = tool_info->nested;
}
static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
{
DEBUG_printf ("%s\n", __FUNCTION__);
assert (acc_device_type != acc_device_host);
assert (state == 6
|| state == 106);
STATE_OP (state, ++);
assert (tool_info != NULL);
assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
assert (tool_info->nested != NULL);
assert (tool_info->nested->event_info.launch_event.event_type == acc_ev_enqueue_launch_start);
assert (tool_info->nested->event_info.launch_event.kernel_name != NULL);
assert (tool_info->nested->event_info.launch_event.num_gangs >= 1);
assert (tool_info->nested->event_info.launch_event.num_workers >= 1);
assert (tool_info->nested->event_info.launch_event.vector_length >= 1);
assert (prof_info->event_type == acc_ev_enqueue_launch_end);
assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
assert (prof_info->version == _ACC_PROF_INFO_VERSION);
assert (prof_info->device_type == acc_device_type);
assert (prof_info->device_number == acc_device_num);
assert (prof_info->thread_id == -1);
assert (prof_info->async == acc_async);
assert (prof_info->async_queue == prof_info->async);
assert (prof_info->src_file == NULL);
assert (prof_info->func_name == NULL);
assert (prof_info->line_no == -1);
assert (prof_info->end_line_no == -1);
assert (prof_info->func_line_no == -1);
assert (prof_info->func_end_line_no == -1);
assert (event_info->launch_event.event_type == prof_info->event_type);
assert (event_info->launch_event.valid_bytes == _ACC_LAUNCH_EVENT_INFO_VALID_BYTES);
assert (event_info->launch_event.parent_construct == acc_construct_parallel);
assert (event_info->launch_event.implicit == 1);
assert (event_info->launch_event.tool_info == tool_info->nested);
assert (event_info->launch_event.kernel_name != NULL);
assert (strcmp (event_info->launch_event.kernel_name, tool_info->nested->event_info.launch_event.kernel_name) == 0);
assert (event_info->launch_event.num_gangs == tool_info->nested->event_info.launch_event.num_gangs);
assert (event_info->launch_event.num_workers == tool_info->nested->event_info.launch_event.num_workers);
assert (event_info->launch_event.vector_length == tool_info->nested->event_info.launch_event.vector_length);
if (acc_device_type == acc_device_host)
assert (api_info->device_api == acc_device_api_none);
else if (acc_device_type == acc_device_radeon)
assert (api_info->device_api == acc_device_api_other);
else
assert (api_info->device_api == acc_device_api_cuda);
assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES);
assert (api_info->device_type == prof_info->device_type);
assert (api_info->vendor == -1);
assert (api_info->device_handle == NULL);
assert (api_info->context_handle == NULL);
assert (api_info->async_handle == NULL);
free ((void *) tool_info->nested->event_info.launch_event.kernel_name);
free (tool_info->nested);
tool_info->nested = NULL;
}
int main()
{
acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
STATE_OP (state, = 0);
reg (acc_ev_device_init_start, cb_device_init_start, acc_reg);
reg (acc_ev_device_init_end, cb_device_init_end, acc_reg);
reg (acc_ev_enter_data_start, cb_enter_data_start, acc_reg);
reg (acc_ev_enter_data_end, cb_enter_data_end, acc_reg);
reg (acc_ev_exit_data_start, cb_exit_data_start, acc_reg);
reg (acc_ev_exit_data_end, cb_exit_data_end, acc_reg);
reg (acc_ev_compute_construct_start, cb_compute_construct_start, acc_reg);
reg (acc_ev_compute_construct_end, cb_compute_construct_end, acc_reg);
reg (acc_ev_enqueue_launch_start, cb_enqueue_launch_start, acc_reg);
reg (acc_ev_enqueue_launch_end, cb_enqueue_launch_end, acc_reg);
assert (state == 0);
acc_device_type = acc_get_device_type ();
acc_device_num = acc_get_device_num (acc_device_type);
assert (state == 0);
{
int state_init;
#pragma acc parallel COPYIN(state) copyout(state_init)
{
asm volatile ("" : : : "memory"); // TODO PR90488
state_init = state;
}
assert (state_init == 4);
}
assert (state == 10);
STATE_OP (state, = 100);
{
int state_init;
acc_async = 12;
#pragma acc parallel async(acc_async) COPYIN(state) copyout(state_init)
{
asm volatile ("" : : : "memory"); // TODO PR90488
state_init = state;
}
acc_async = acc_async_sync;
#pragma acc wait
assert (state_init == 104);
}
assert (state == 110);
return 0;
}