5fae049dc2
libgomp/ * acc_prof.h: New file. * oacc-profiling.c: Likewise. * Makefile.am (nodist_libsubinclude_HEADERS, libgomp_la_SOURCES): Add these, respectively. * Makefile.in: Regenerate. * env.c (initialize_env): Call goacc_profiling_initialize. * oacc-plugin.c (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): New functions. * oacc-plugin.h (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): Declare. * libgomp.map (OACC_2.5.1): Add acc_prof_lookup, acc_prof_register, acc_prof_unregister, and acc_register_library. (GOMP_PLUGIN_1.3): Add GOMP_PLUGIN_goacc_profiling_dispatch, and GOMP_PLUGIN_goacc_thread. * oacc-int.h (struct goacc_thread): Add prof_info, api_info, prof_callbacks_enabled members. (goacc_prof_enabled, goacc_profiling_initialize) (_goacc_profiling_dispatch_p, _goacc_profiling_setup_p) (goacc_profiling_dispatch): Declare. (GOACC_PROF_ENABLED, GOACC_PROFILING_DISPATCH_P) (GOACC_PROFILING_SETUP_P): Define. * oacc-async.c (acc_async_test, acc_async_test_all, acc_wait) (acc_wait_async, acc_wait_all, acc_wait_all_async): Update for OpenACC Profiling Interface. * oacc-cuda.c (acc_get_current_cuda_device) (acc_get_current_cuda_context, acc_get_cuda_stream) (acc_set_cuda_stream): Likewise. * oacc-init.c (acc_init_1, goacc_attach_host_thread_to_device) (acc_init, acc_set_device_type, acc_get_device_type) (acc_get_device_num, goacc_lazy_initialize): Likewise. * oacc-mem.c (acc_malloc, acc_free, memcpy_tofrom_device) (acc_deviceptr, acc_hostptr, acc_is_present, acc_map_data) (acc_unmap_data, present_create_copy, delete_copyout) (update_dev_host): Likewise. * oacc-parallel.c (GOACC_parallel_keyed, GOACC_data_start) (GOACC_data_end, GOACC_enter_exit_data, GOACC_update, GOACC_wait): Likewise. * plugin/plugin-nvptx.c (nvptx_exec, nvptx_alloc, nvptx_free) (GOMP_OFFLOAD_openacc_exec, GOMP_OFFLOAD_openacc_async_exec): Likewise. * libgomp.texi: Update. * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: New file. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise. From-SVN: r271346
354 lines
9.9 KiB
C
354 lines
9.9 KiB
C
/* Test dispatch of events to callbacks. */
|
|
|
|
#undef NDEBUG
|
|
#include <assert.h>
|
|
|
|
#include <acc_prof.h>
|
|
|
|
|
|
/* Use explicit 'copyin' clauses, to work around "'firstprivate'
|
|
optimizations", which will cause the value at the point of call to be used
|
|
(*before* any potential modifications done in callbacks), as opposed to its
|
|
address being taken, which then later gets dereferenced (*after* any
|
|
modifications done in callbacks). */
|
|
#define COPYIN(...) copyin(__VA_ARGS__)
|
|
|
|
|
|
#define DEBUG_printf(...) //__builtin_printf (__VA_ARGS__)
|
|
|
|
|
|
static int state = -1;
|
|
|
|
#define STATE_OP(state, op) \
|
|
do \
|
|
{ \
|
|
typeof (state) state_o = (state); \
|
|
(void) state_o; \
|
|
(state)op; \
|
|
DEBUG_printf("state: %d -> %d\n", state_o, (state)); \
|
|
} \
|
|
while (0)
|
|
|
|
|
|
static void cb_compute_construct_start_1 (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
|
|
{
|
|
DEBUG_printf ("%s\n", __FUNCTION__);
|
|
|
|
assert (state == 0
|
|
|| state == 10
|
|
|| state == 30
|
|
|| state == 41
|
|
|| state == 51
|
|
|| state == 91
|
|
|| state == 101
|
|
|| state == 151);
|
|
STATE_OP (state, ++);
|
|
}
|
|
|
|
static void cb_compute_construct_start_2 (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
|
|
{
|
|
DEBUG_printf ("%s\n", __FUNCTION__);
|
|
|
|
assert (state == 1
|
|
|| state == 11
|
|
|| state == 40
|
|
|| state == 50
|
|
|| state == 90
|
|
|| state == 100
|
|
|| state == 150);
|
|
STATE_OP (state, ++);
|
|
}
|
|
|
|
static void cb_compute_construct_end_1 (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
|
|
{
|
|
DEBUG_printf ("%s\n", __FUNCTION__);
|
|
|
|
assert (state == 14
|
|
|| state == 21
|
|
|| state == 32
|
|
|| state == 42
|
|
|| state == 80
|
|
|| state == 103
|
|
|| state == 152);
|
|
STATE_OP (state, ++);
|
|
}
|
|
|
|
static void cb_compute_construct_end_2 (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
|
|
{
|
|
DEBUG_printf ("%s\n", __FUNCTION__);
|
|
|
|
assert (state == 13
|
|
|| state == 43
|
|
|| state == 102
|
|
|| state == 154);
|
|
STATE_OP (state, ++);
|
|
}
|
|
|
|
static void cb_compute_construct_end_3 (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
|
|
{
|
|
DEBUG_printf ("%s\n", __FUNCTION__);
|
|
|
|
assert (state == 12
|
|
|| state == 20
|
|
|| state == 31
|
|
|| state == 44
|
|
|| state == 81
|
|
|| state == 104
|
|
|| state == 153);
|
|
STATE_OP (state, ++);
|
|
}
|
|
|
|
|
|
static acc_prof_reg reg;
|
|
static acc_prof_reg unreg;
|
|
static acc_prof_lookup_func lookup;
|
|
void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_lookup_func lookup_)
|
|
{
|
|
DEBUG_printf ("%s\n", __FUNCTION__);
|
|
|
|
reg = reg_;
|
|
unreg = unreg_;
|
|
lookup = lookup_;
|
|
}
|
|
|
|
|
|
int main()
|
|
{
|
|
acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
|
|
|
|
STATE_OP (state, = 0);
|
|
reg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_reg);
|
|
reg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_reg);
|
|
reg (acc_ev_compute_construct_start, cb_compute_construct_start_2, acc_reg);
|
|
{
|
|
int state_init;
|
|
#pragma acc parallel COPYIN(state) copyout(state_init)
|
|
{
|
|
state_init = state;
|
|
}
|
|
assert (state_init == 2);
|
|
}
|
|
assert (state == 2);
|
|
|
|
STATE_OP (state, = 10);
|
|
reg (acc_ev_compute_construct_end, cb_compute_construct_end_1, acc_reg);
|
|
reg (acc_ev_compute_construct_end, cb_compute_construct_end_2, acc_reg);
|
|
reg (acc_ev_compute_construct_end, cb_compute_construct_end_3, acc_reg);
|
|
reg (acc_ev_compute_construct_end, cb_compute_construct_end_2, acc_reg);
|
|
reg (acc_ev_compute_construct_end, cb_compute_construct_end_3, acc_reg);
|
|
reg (acc_ev_compute_construct_end, cb_compute_construct_end_3, acc_reg);
|
|
{
|
|
int state_init;
|
|
#pragma acc parallel COPYIN(state) copyout(state_init)
|
|
{
|
|
state_init = state;
|
|
}
|
|
assert (state_init == 12);
|
|
}
|
|
assert (state == 15);
|
|
|
|
STATE_OP (state, = 20);
|
|
unreg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_toggle);
|
|
unreg (acc_ev_compute_construct_start, cb_compute_construct_start_2, acc_toggle);
|
|
unreg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_reg);
|
|
unreg (acc_ev_compute_construct_start, cb_compute_construct_start_2, acc_reg);
|
|
unreg (acc_ev_compute_construct_end, cb_compute_construct_end_1, acc_toggle);
|
|
unreg (acc_ev_compute_construct_end, cb_compute_construct_end_2, acc_toggle);
|
|
unreg (acc_ev_compute_construct_end, cb_compute_construct_end_3, acc_toggle);
|
|
unreg (acc_ev_compute_construct_end, cb_compute_construct_end_2, acc_reg);
|
|
unreg (acc_ev_compute_construct_end, cb_compute_construct_end_2, acc_reg);
|
|
unreg (acc_ev_compute_construct_end, cb_compute_construct_end_2, acc_toggle);
|
|
reg (acc_ev_compute_construct_end, cb_compute_construct_end_2, acc_toggle);
|
|
{
|
|
int state_init;
|
|
#pragma acc parallel COPYIN(state) copyout(state_init)
|
|
{
|
|
state_init = state;
|
|
}
|
|
assert (state_init == 20);
|
|
}
|
|
assert (state == 20);
|
|
|
|
STATE_OP (state, = 30);
|
|
reg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_toggle);
|
|
reg (acc_ev_compute_construct_start, cb_compute_construct_start_2, acc_toggle);
|
|
reg (acc_ev_compute_construct_end, cb_compute_construct_end_1, acc_toggle);
|
|
reg (acc_ev_compute_construct_end, cb_compute_construct_end_2, acc_toggle);
|
|
reg (acc_ev_compute_construct_end, cb_compute_construct_end_3, acc_toggle);
|
|
{
|
|
int state_init;
|
|
#pragma acc parallel COPYIN(state) copyout(state_init)
|
|
{
|
|
state_init = state;
|
|
}
|
|
assert (state_init == 31);
|
|
}
|
|
assert (state == 33);
|
|
|
|
STATE_OP (state, = 40);
|
|
reg (acc_ev_compute_construct_start, cb_compute_construct_start_2, acc_reg);
|
|
unreg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_reg);
|
|
reg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_reg);
|
|
unreg (acc_ev_compute_construct_end, cb_compute_construct_end_3, acc_reg);
|
|
unreg (acc_ev_compute_construct_end, cb_compute_construct_end_3, acc_reg);
|
|
reg (acc_ev_compute_construct_end, cb_compute_construct_end_2, acc_reg);
|
|
unreg (acc_ev_compute_construct_end, cb_compute_construct_end_1, acc_reg);
|
|
reg (acc_ev_compute_construct_end, cb_compute_construct_end_1, acc_reg);
|
|
{
|
|
int state_init;
|
|
#pragma acc parallel COPYIN(state) copyout(state_init)
|
|
{
|
|
state_init = state;
|
|
}
|
|
assert (state_init == 42);
|
|
}
|
|
assert (state == 45);
|
|
|
|
STATE_OP (state, = 50);
|
|
unreg (acc_ev_compute_construct_end, NULL, acc_toggle);
|
|
{
|
|
int state_init;
|
|
#pragma acc parallel COPYIN(state) copyout(state_init)
|
|
{
|
|
state_init = state;
|
|
}
|
|
assert (state_init == 52);
|
|
}
|
|
assert (state == 52);
|
|
|
|
STATE_OP (state, = 60);
|
|
unreg (acc_ev_compute_construct_end, NULL, acc_toggle);
|
|
unreg (/* TODO */ (acc_event_t) 0, NULL, acc_toggle_per_thread);
|
|
unreg (/* TODO */ (acc_event_t) 0, NULL, acc_toggle_per_thread);
|
|
{
|
|
int state_init;
|
|
#pragma acc parallel COPYIN(state) copyout(state_init)
|
|
{
|
|
state_init = state;
|
|
}
|
|
assert (state_init == 60);
|
|
}
|
|
assert (state == 60);
|
|
|
|
STATE_OP (state, = 70);
|
|
unreg (acc_ev_compute_construct_start, NULL, acc_toggle);
|
|
reg (/* TODO */ (acc_event_t) 0, NULL, acc_toggle_per_thread);
|
|
{
|
|
int state_init;
|
|
#pragma acc parallel COPYIN(state) copyout(state_init)
|
|
{
|
|
state_init = state;
|
|
}
|
|
assert (state_init == 70);
|
|
}
|
|
assert (state == 70);
|
|
|
|
STATE_OP (state, = 80);
|
|
unreg (acc_ev_compute_construct_end, cb_compute_construct_end_2, acc_reg);
|
|
reg (acc_ev_compute_construct_end, NULL, acc_toggle);
|
|
reg (/* TODO */ (acc_event_t) 0, NULL, acc_toggle_per_thread);
|
|
{
|
|
int state_init;
|
|
#pragma acc parallel COPYIN(state) copyout(state_init)
|
|
{
|
|
state_init = state;
|
|
}
|
|
assert (state_init == 80);
|
|
}
|
|
assert (state == 82);
|
|
|
|
STATE_OP (state, = 90);
|
|
reg (acc_ev_compute_construct_start, NULL, acc_toggle);
|
|
unreg (acc_ev_compute_construct_end, NULL, acc_toggle);
|
|
reg (acc_ev_compute_construct_end, cb_compute_construct_end_2, acc_reg);
|
|
{
|
|
int state_init;
|
|
#pragma acc parallel COPYIN(state) copyout(state_init)
|
|
{
|
|
state_init = state;
|
|
}
|
|
assert (state_init == 92);
|
|
}
|
|
assert (state == 92);
|
|
|
|
STATE_OP (state, = 100);
|
|
reg (acc_ev_compute_construct_end, NULL, acc_toggle);
|
|
{
|
|
int state_init;
|
|
#pragma acc parallel COPYIN(state) copyout(state_init)
|
|
{
|
|
state_init = state;
|
|
}
|
|
assert (state_init == 102);
|
|
}
|
|
assert (state == 105);
|
|
|
|
STATE_OP (state, = 110);
|
|
unreg (/* TODO */ (acc_event_t) 0, NULL, acc_toggle);
|
|
unreg (/* TODO */ (acc_event_t) 0, NULL, acc_toggle);
|
|
{
|
|
int state_init;
|
|
#pragma acc parallel COPYIN(state) copyout(state_init)
|
|
{
|
|
state_init = state;
|
|
}
|
|
assert (state_init == 110);
|
|
}
|
|
assert (state == 110);
|
|
|
|
STATE_OP (state, = 120);
|
|
unreg (/* TODO */ (acc_event_t) 0, NULL, acc_toggle_per_thread);
|
|
{
|
|
int state_init;
|
|
#pragma acc parallel COPYIN(state) copyout(state_init)
|
|
{
|
|
state_init = state;
|
|
}
|
|
assert (state_init == 120);
|
|
}
|
|
assert (state == 120);
|
|
|
|
STATE_OP (state, = 130);
|
|
unreg (acc_ev_compute_construct_end, cb_compute_construct_end_3, acc_reg);
|
|
reg (acc_ev_compute_construct_end, cb_compute_construct_end_3, acc_reg);
|
|
reg (/* TODO */ (acc_event_t) 0, NULL, acc_toggle);
|
|
{
|
|
int state_init;
|
|
#pragma acc parallel COPYIN(state) copyout(state_init)
|
|
{
|
|
state_init = state;
|
|
}
|
|
assert (state_init == 130);
|
|
}
|
|
assert (state == 130);
|
|
|
|
STATE_OP (state, = 140);
|
|
unreg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_reg);
|
|
reg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_reg);
|
|
unreg (acc_ev_compute_construct_end, cb_compute_construct_end_1, acc_reg);
|
|
reg (acc_ev_compute_construct_end, cb_compute_construct_end_1, acc_reg);
|
|
{
|
|
int state_init;
|
|
#pragma acc parallel COPYIN(state) copyout(state_init)
|
|
{
|
|
state_init = state;
|
|
}
|
|
assert (state_init == 140);
|
|
}
|
|
assert (state == 140);
|
|
|
|
STATE_OP (state, = 150);
|
|
reg (/* TODO */ (acc_event_t) 0, NULL, acc_toggle_per_thread);
|
|
{
|
|
int state_init;
|
|
#pragma acc parallel COPYIN(state) copyout(state_init)
|
|
{
|
|
state_init = state;
|
|
}
|
|
assert (state_init == 152);
|
|
}
|
|
assert (state == 155);
|
|
|
|
return 0;
|
|
}
|