751 lines
28 KiB
C
Raw Normal View History

OpenACC Profiling Interface (incomplete) libgomp/ * acc_prof.h: New file. * oacc-profiling.c: Likewise. * Makefile.am (nodist_libsubinclude_HEADERS, libgomp_la_SOURCES): Add these, respectively. * Makefile.in: Regenerate. * env.c (initialize_env): Call goacc_profiling_initialize. * oacc-plugin.c (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): New functions. * oacc-plugin.h (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): Declare. * libgomp.map (OACC_2.5.1): Add acc_prof_lookup, acc_prof_register, acc_prof_unregister, and acc_register_library. (GOMP_PLUGIN_1.3): Add GOMP_PLUGIN_goacc_profiling_dispatch, and GOMP_PLUGIN_goacc_thread. * oacc-int.h (struct goacc_thread): Add prof_info, api_info, prof_callbacks_enabled members. (goacc_prof_enabled, goacc_profiling_initialize) (_goacc_profiling_dispatch_p, _goacc_profiling_setup_p) (goacc_profiling_dispatch): Declare. (GOACC_PROF_ENABLED, GOACC_PROFILING_DISPATCH_P) (GOACC_PROFILING_SETUP_P): Define. * oacc-async.c (acc_async_test, acc_async_test_all, acc_wait) (acc_wait_async, acc_wait_all, acc_wait_all_async): Update for OpenACC Profiling Interface. * oacc-cuda.c (acc_get_current_cuda_device) (acc_get_current_cuda_context, acc_get_cuda_stream) (acc_set_cuda_stream): Likewise. * oacc-init.c (acc_init_1, goacc_attach_host_thread_to_device) (acc_init, acc_set_device_type, acc_get_device_type) (acc_get_device_num, goacc_lazy_initialize): Likewise. * oacc-mem.c (acc_malloc, acc_free, memcpy_tofrom_device) (acc_deviceptr, acc_hostptr, acc_is_present, acc_map_data) (acc_unmap_data, present_create_copy, delete_copyout) (update_dev_host): Likewise. * oacc-parallel.c (GOACC_parallel_keyed, GOACC_data_start) (GOACC_data_end, GOACC_enter_exit_data, GOACC_update, GOACC_wait): Likewise. * plugin/plugin-nvptx.c (nvptx_exec, nvptx_alloc, nvptx_free) (GOMP_OFFLOAD_openacc_exec, GOMP_OFFLOAD_openacc_async_exec): Likewise. * libgomp.texi: Update. * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: New file. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise. From-SVN: r271346
2019-05-17 21:13:36 +02:00
/* Test dispatch of events to callbacks. */
#undef NDEBUG
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <acc_prof.h>
/* Use explicit 'copyin' clauses, to work around "'firstprivate'
optimizations", which will cause the value at the point of call to be used
(*before* any potential modifications done in callbacks), as opposed to its
address being taken, which then later gets dereferenced (*after* any
modifications done in callbacks). */
#define COPYIN(...) copyin(__VA_ARGS__)
/* See the 'DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT' reference in
libgomp.texi. */
#define DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT 0
/* Do we expect to see 'acc_ev_exit_data_start' and 'acc_ev_exit_data_end'
after a compute construct with an 'async' clause? */
#define ASYNC_EXIT_DATA 1
#define DEBUG_printf(...) //__builtin_printf (__VA_ARGS__)
static acc_prof_reg reg;
static acc_prof_reg unreg;
static acc_prof_lookup_func lookup;
void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_lookup_func lookup_)
{
DEBUG_printf ("%s\n", __FUNCTION__);
reg = reg_;
unreg = unreg_;
lookup = lookup_;
}
OpenACC Profiling Interface (incomplete) libgomp/ * acc_prof.h: New file. * oacc-profiling.c: Likewise. * Makefile.am (nodist_libsubinclude_HEADERS, libgomp_la_SOURCES): Add these, respectively. * Makefile.in: Regenerate. * env.c (initialize_env): Call goacc_profiling_initialize. * oacc-plugin.c (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): New functions. * oacc-plugin.h (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): Declare. * libgomp.map (OACC_2.5.1): Add acc_prof_lookup, acc_prof_register, acc_prof_unregister, and acc_register_library. (GOMP_PLUGIN_1.3): Add GOMP_PLUGIN_goacc_profiling_dispatch, and GOMP_PLUGIN_goacc_thread. * oacc-int.h (struct goacc_thread): Add prof_info, api_info, prof_callbacks_enabled members. (goacc_prof_enabled, goacc_profiling_initialize) (_goacc_profiling_dispatch_p, _goacc_profiling_setup_p) (goacc_profiling_dispatch): Declare. (GOACC_PROF_ENABLED, GOACC_PROFILING_DISPATCH_P) (GOACC_PROFILING_SETUP_P): Define. * oacc-async.c (acc_async_test, acc_async_test_all, acc_wait) (acc_wait_async, acc_wait_all, acc_wait_all_async): Update for OpenACC Profiling Interface. * oacc-cuda.c (acc_get_current_cuda_device) (acc_get_current_cuda_context, acc_get_cuda_stream) (acc_set_cuda_stream): Likewise. * oacc-init.c (acc_init_1, goacc_attach_host_thread_to_device) (acc_init, acc_set_device_type, acc_get_device_type) (acc_get_device_num, goacc_lazy_initialize): Likewise. * oacc-mem.c (acc_malloc, acc_free, memcpy_tofrom_device) (acc_deviceptr, acc_hostptr, acc_is_present, acc_map_data) (acc_unmap_data, present_create_copy, delete_copyout) (update_dev_host): Likewise. * oacc-parallel.c (GOACC_parallel_keyed, GOACC_data_start) (GOACC_data_end, GOACC_enter_exit_data, GOACC_update, GOACC_wait): Likewise. * plugin/plugin-nvptx.c (nvptx_exec, nvptx_alloc, nvptx_free) (GOMP_OFFLOAD_openacc_exec, GOMP_OFFLOAD_openacc_async_exec): Likewise. * libgomp.texi: Update. * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: New file. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise. From-SVN: r271346
2019-05-17 21:13:36 +02:00
static int state = -1;
#define STATE_OP(state, op) \
do \
{ \
typeof (state) state_o = (state); \
(void) state_o; \
(state)op; \
DEBUG_printf("state: %d -> %d\n", state_o, (state)); \
} \
while (0)
static acc_device_t acc_device_type;
static int acc_device_num;
static int acc_async = acc_async_sync;
OpenACC Profiling Interface (incomplete) libgomp/ * acc_prof.h: New file. * oacc-profiling.c: Likewise. * Makefile.am (nodist_libsubinclude_HEADERS, libgomp_la_SOURCES): Add these, respectively. * Makefile.in: Regenerate. * env.c (initialize_env): Call goacc_profiling_initialize. * oacc-plugin.c (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): New functions. * oacc-plugin.h (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): Declare. * libgomp.map (OACC_2.5.1): Add acc_prof_lookup, acc_prof_register, acc_prof_unregister, and acc_register_library. (GOMP_PLUGIN_1.3): Add GOMP_PLUGIN_goacc_profiling_dispatch, and GOMP_PLUGIN_goacc_thread. * oacc-int.h (struct goacc_thread): Add prof_info, api_info, prof_callbacks_enabled members. (goacc_prof_enabled, goacc_profiling_initialize) (_goacc_profiling_dispatch_p, _goacc_profiling_setup_p) (goacc_profiling_dispatch): Declare. (GOACC_PROF_ENABLED, GOACC_PROFILING_DISPATCH_P) (GOACC_PROFILING_SETUP_P): Define. * oacc-async.c (acc_async_test, acc_async_test_all, acc_wait) (acc_wait_async, acc_wait_all, acc_wait_all_async): Update for OpenACC Profiling Interface. * oacc-cuda.c (acc_get_current_cuda_device) (acc_get_current_cuda_context, acc_get_cuda_stream) (acc_set_cuda_stream): Likewise. * oacc-init.c (acc_init_1, goacc_attach_host_thread_to_device) (acc_init, acc_set_device_type, acc_get_device_type) (acc_get_device_num, goacc_lazy_initialize): Likewise. * oacc-mem.c (acc_malloc, acc_free, memcpy_tofrom_device) (acc_deviceptr, acc_hostptr, acc_is_present, acc_map_data) (acc_unmap_data, present_create_copy, delete_copyout) (update_dev_host): Likewise. * oacc-parallel.c (GOACC_parallel_keyed, GOACC_data_start) (GOACC_data_end, GOACC_enter_exit_data, GOACC_update, GOACC_wait): Likewise. * plugin/plugin-nvptx.c (nvptx_exec, nvptx_alloc, nvptx_free) (GOMP_OFFLOAD_openacc_exec, GOMP_OFFLOAD_openacc_async_exec): Likewise. * libgomp.texi: Update. * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: New file. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise. From-SVN: r271346
2019-05-17 21:13:36 +02:00
struct tool_info
{
acc_event_info event_info;
struct tool_info *nested;
};
struct tool_info *tool_info;
static void cb_device_init_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
{
DEBUG_printf ("%s\n", __FUNCTION__);
#if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT
assert (state == 1
|| state == 101);
STATE_OP (state, ++);
assert (tool_info != NULL);
assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
assert (tool_info->nested == NULL);
tool_info->nested = (struct tool_info *) malloc(sizeof *tool_info);
assert (tool_info->nested != NULL);
tool_info->nested->nested = NULL;
#else
assert (state == 0
|| state == 100);
STATE_OP (state, ++);
assert (tool_info == NULL);
tool_info = (struct tool_info *) malloc(sizeof *tool_info);
assert (tool_info != NULL);
tool_info->nested = NULL;
#endif
assert (prof_info->event_type == acc_ev_device_init_start);
assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
assert (prof_info->version == _ACC_PROF_INFO_VERSION);
assert (prof_info->device_type == acc_device_default);
assert (prof_info->device_number == acc_device_num);
assert (prof_info->thread_id == -1);
assert (prof_info->async == acc_async_sync);
assert (prof_info->async_queue == prof_info->async);
assert (prof_info->src_file == NULL);
assert (prof_info->func_name == NULL);
assert (prof_info->line_no == -1);
assert (prof_info->end_line_no == -1);
assert (prof_info->func_line_no == -1);
assert (prof_info->func_end_line_no == -1);
assert (event_info->other_event.event_type == prof_info->event_type);
assert (event_info->other_event.valid_bytes == _ACC_OTHER_EVENT_INFO_VALID_BYTES);
assert (event_info->other_event.parent_construct == acc_construct_parallel);
assert (event_info->other_event.implicit == 1);
assert (event_info->other_event.tool_info == NULL);
assert (api_info->device_api == acc_device_api_none);
assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES);
assert (api_info->device_type == prof_info->device_type);
assert (api_info->vendor == -1);
assert (api_info->device_handle == NULL);
assert (api_info->context_handle == NULL);
assert (api_info->async_handle == NULL);
#if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT
tool_info->nested->event_info.other_event.event_type = event_info->other_event.event_type;
event_info->other_event.tool_info = tool_info->nested;
#else
tool_info->event_info.other_event.event_type = event_info->other_event.event_type;
event_info->other_event.tool_info = tool_info;
#endif
}
static void cb_device_init_end (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
{
DEBUG_printf ("%s\n", __FUNCTION__);
#if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT
assert (state == 2
|| state == 102);
STATE_OP (state, ++);
assert (tool_info != NULL);
assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
assert (tool_info->nested != NULL);
assert (tool_info->nested->event_info.other_event.event_type == acc_ev_device_init_start);
#else
assert (state == 1
|| state == 101);
STATE_OP (state, ++);
assert (tool_info != NULL);
assert (tool_info->event_info.other_event.event_type == acc_ev_device_init_start);
#endif
assert (prof_info->event_type == acc_ev_device_init_end);
assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
assert (prof_info->version == _ACC_PROF_INFO_VERSION);
assert (prof_info->device_type == acc_device_default);
assert (prof_info->device_number == acc_device_num);
assert (prof_info->thread_id == -1);
assert (prof_info->async == acc_async_sync);
assert (prof_info->async_queue == prof_info->async);
assert (prof_info->src_file == NULL);
assert (prof_info->func_name == NULL);
assert (prof_info->line_no == -1);
assert (prof_info->end_line_no == -1);
assert (prof_info->func_line_no == -1);
assert (prof_info->func_end_line_no == -1);
assert (event_info->other_event.event_type == prof_info->event_type);
assert (event_info->other_event.valid_bytes == _ACC_OTHER_EVENT_INFO_VALID_BYTES);
assert (event_info->other_event.parent_construct == acc_construct_parallel);
assert (event_info->other_event.implicit == 1);
#if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT
assert (event_info->other_event.tool_info == tool_info->nested);
#else
assert (event_info->other_event.tool_info == tool_info);
#endif
assert (api_info->device_api == acc_device_api_none);
assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES);
assert (api_info->device_type == prof_info->device_type);
assert (api_info->vendor == -1);
assert (api_info->device_handle == NULL);
assert (api_info->context_handle == NULL);
assert (api_info->async_handle == NULL);
#if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT
free (tool_info->nested);
tool_info->nested = NULL;
#else
free (tool_info);
tool_info = NULL;
#endif
}
static void cb_enter_data_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
{
DEBUG_printf ("%s\n", __FUNCTION__);
assert (state == 3
|| state == 103);
STATE_OP (state, ++);
assert (tool_info != NULL);
assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
assert (tool_info->nested == NULL);
tool_info->nested = (struct tool_info *) malloc(sizeof *tool_info);
assert (tool_info->nested != NULL);
tool_info->nested->nested = NULL;
assert (prof_info->event_type == acc_ev_enter_data_start);
assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
assert (prof_info->version == _ACC_PROF_INFO_VERSION);
assert (prof_info->device_type == acc_device_type);
assert (prof_info->device_number == acc_device_num);
assert (prof_info->thread_id == -1);
assert (prof_info->async == acc_async);
assert (prof_info->async_queue == prof_info->async);
assert (prof_info->src_file == NULL);
assert (prof_info->func_name == NULL);
assert (prof_info->line_no == -1);
assert (prof_info->end_line_no == -1);
assert (prof_info->func_line_no == -1);
assert (prof_info->func_end_line_no == -1);
assert (event_info->other_event.event_type == prof_info->event_type);
assert (event_info->other_event.valid_bytes == _ACC_OTHER_EVENT_INFO_VALID_BYTES);
assert (event_info->other_event.parent_construct == acc_construct_parallel);
assert (event_info->other_event.implicit == 1);
assert (event_info->other_event.tool_info == NULL);
assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES);
assert (api_info->device_type == prof_info->device_type);
assert (api_info->vendor == -1);
assert (api_info->device_handle == NULL);
assert (api_info->context_handle == NULL);
assert (api_info->async_handle == NULL);
tool_info->nested->event_info.other_event.event_type = event_info->other_event.event_type;
event_info->other_event.tool_info = tool_info->nested;
}
static void cb_enter_data_end (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
{
DEBUG_printf ("%s\n", __FUNCTION__);
assert (state == 4
|| state == 104);
#if defined COPYIN
/* Conceptually, 'acc_ev_enter_data_end' marks the end of data copying,
before 'acc_ev_enqueue_launch_start' marks invoking the compute region.
That's the 'state_init = state;' intended to be captured in the compute
regions. */
/* In an 'async' setting, this event may be triggered before actual 'async'
data copying has completed. Given that 'state' appears in 'COPYIN', we
first have to synchronize (that is, let the 'async' 'COPYIN' read the
current 'state' value)... */
if (acc_async != acc_async_sync)
{
/* "We're not yet accounting for the fact that _OpenACC events may occur
during event processing_"; temporarily disable to avoid deadlock. */
unreg (acc_ev_none, NULL, acc_toggle_per_thread);
acc_wait (acc_async);
reg (acc_ev_none, NULL, acc_toggle_per_thread);
}
/* ... before modifying it in the following. */
#endif
OpenACC Profiling Interface (incomplete) libgomp/ * acc_prof.h: New file. * oacc-profiling.c: Likewise. * Makefile.am (nodist_libsubinclude_HEADERS, libgomp_la_SOURCES): Add these, respectively. * Makefile.in: Regenerate. * env.c (initialize_env): Call goacc_profiling_initialize. * oacc-plugin.c (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): New functions. * oacc-plugin.h (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): Declare. * libgomp.map (OACC_2.5.1): Add acc_prof_lookup, acc_prof_register, acc_prof_unregister, and acc_register_library. (GOMP_PLUGIN_1.3): Add GOMP_PLUGIN_goacc_profiling_dispatch, and GOMP_PLUGIN_goacc_thread. * oacc-int.h (struct goacc_thread): Add prof_info, api_info, prof_callbacks_enabled members. (goacc_prof_enabled, goacc_profiling_initialize) (_goacc_profiling_dispatch_p, _goacc_profiling_setup_p) (goacc_profiling_dispatch): Declare. (GOACC_PROF_ENABLED, GOACC_PROFILING_DISPATCH_P) (GOACC_PROFILING_SETUP_P): Define. * oacc-async.c (acc_async_test, acc_async_test_all, acc_wait) (acc_wait_async, acc_wait_all, acc_wait_all_async): Update for OpenACC Profiling Interface. * oacc-cuda.c (acc_get_current_cuda_device) (acc_get_current_cuda_context, acc_get_cuda_stream) (acc_set_cuda_stream): Likewise. * oacc-init.c (acc_init_1, goacc_attach_host_thread_to_device) (acc_init, acc_set_device_type, acc_get_device_type) (acc_get_device_num, goacc_lazy_initialize): Likewise. * oacc-mem.c (acc_malloc, acc_free, memcpy_tofrom_device) (acc_deviceptr, acc_hostptr, acc_is_present, acc_map_data) (acc_unmap_data, present_create_copy, delete_copyout) (update_dev_host): Likewise. * oacc-parallel.c (GOACC_parallel_keyed, GOACC_data_start) (GOACC_data_end, GOACC_enter_exit_data, GOACC_update, GOACC_wait): Likewise. * plugin/plugin-nvptx.c (nvptx_exec, nvptx_alloc, nvptx_free) (GOMP_OFFLOAD_openacc_exec, GOMP_OFFLOAD_openacc_async_exec): Likewise. * libgomp.texi: Update. * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: New file. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise. From-SVN: r271346
2019-05-17 21:13:36 +02:00
STATE_OP (state, ++);
assert (tool_info != NULL);
assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
assert (tool_info->nested != NULL);
assert (tool_info->nested->event_info.other_event.event_type == acc_ev_enter_data_start);
assert (prof_info->event_type == acc_ev_enter_data_end);
assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
assert (prof_info->version == _ACC_PROF_INFO_VERSION);
assert (prof_info->device_type == acc_device_type);
assert (prof_info->device_number == acc_device_num);
assert (prof_info->thread_id == -1);
assert (prof_info->async == acc_async);
assert (prof_info->async_queue == prof_info->async);
assert (prof_info->src_file == NULL);
assert (prof_info->func_name == NULL);
assert (prof_info->line_no == -1);
assert (prof_info->end_line_no == -1);
assert (prof_info->func_line_no == -1);
assert (prof_info->func_end_line_no == -1);
assert (event_info->other_event.event_type == prof_info->event_type);
assert (event_info->other_event.valid_bytes == _ACC_OTHER_EVENT_INFO_VALID_BYTES);
assert (event_info->other_event.parent_construct == acc_construct_parallel);
assert (event_info->other_event.implicit == 1);
assert (event_info->other_event.tool_info == tool_info->nested);
if (acc_device_type == acc_device_host)
assert (api_info->device_api == acc_device_api_none);
else if (acc_device_type == acc_device_radeon)
assert (api_info->device_api == acc_device_api_other);
OpenACC Profiling Interface (incomplete) libgomp/ * acc_prof.h: New file. * oacc-profiling.c: Likewise. * Makefile.am (nodist_libsubinclude_HEADERS, libgomp_la_SOURCES): Add these, respectively. * Makefile.in: Regenerate. * env.c (initialize_env): Call goacc_profiling_initialize. * oacc-plugin.c (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): New functions. * oacc-plugin.h (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): Declare. * libgomp.map (OACC_2.5.1): Add acc_prof_lookup, acc_prof_register, acc_prof_unregister, and acc_register_library. (GOMP_PLUGIN_1.3): Add GOMP_PLUGIN_goacc_profiling_dispatch, and GOMP_PLUGIN_goacc_thread. * oacc-int.h (struct goacc_thread): Add prof_info, api_info, prof_callbacks_enabled members. (goacc_prof_enabled, goacc_profiling_initialize) (_goacc_profiling_dispatch_p, _goacc_profiling_setup_p) (goacc_profiling_dispatch): Declare. (GOACC_PROF_ENABLED, GOACC_PROFILING_DISPATCH_P) (GOACC_PROFILING_SETUP_P): Define. * oacc-async.c (acc_async_test, acc_async_test_all, acc_wait) (acc_wait_async, acc_wait_all, acc_wait_all_async): Update for OpenACC Profiling Interface. * oacc-cuda.c (acc_get_current_cuda_device) (acc_get_current_cuda_context, acc_get_cuda_stream) (acc_set_cuda_stream): Likewise. * oacc-init.c (acc_init_1, goacc_attach_host_thread_to_device) (acc_init, acc_set_device_type, acc_get_device_type) (acc_get_device_num, goacc_lazy_initialize): Likewise. * oacc-mem.c (acc_malloc, acc_free, memcpy_tofrom_device) (acc_deviceptr, acc_hostptr, acc_is_present, acc_map_data) (acc_unmap_data, present_create_copy, delete_copyout) (update_dev_host): Likewise. * oacc-parallel.c (GOACC_parallel_keyed, GOACC_data_start) (GOACC_data_end, GOACC_enter_exit_data, GOACC_update, GOACC_wait): Likewise. * plugin/plugin-nvptx.c (nvptx_exec, nvptx_alloc, nvptx_free) (GOMP_OFFLOAD_openacc_exec, GOMP_OFFLOAD_openacc_async_exec): Likewise. * libgomp.texi: Update. * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: New file. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise. From-SVN: r271346
2019-05-17 21:13:36 +02:00
else
assert (api_info->device_api == acc_device_api_cuda);
assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES);
assert (api_info->device_type == prof_info->device_type);
assert (api_info->vendor == -1);
assert (api_info->device_handle == NULL);
assert (api_info->context_handle == NULL);
assert (api_info->async_handle == NULL);
free (tool_info->nested);
tool_info->nested = NULL;
}
static void cb_exit_data_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
{
DEBUG_printf ("%s\n", __FUNCTION__);
assert (state == 7
#if ASYNC_EXIT_DATA
|| state == 107
#endif
);
STATE_OP (state, ++);
assert (tool_info != NULL);
assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
assert (tool_info->nested == NULL);
tool_info->nested = (struct tool_info *) malloc(sizeof *tool_info);
assert (tool_info->nested != NULL);
tool_info->nested->nested = NULL;
assert (prof_info->event_type == acc_ev_exit_data_start);
assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
assert (prof_info->version == _ACC_PROF_INFO_VERSION);
assert (prof_info->device_type == acc_device_type);
assert (prof_info->device_number == acc_device_num);
assert (prof_info->thread_id == -1);
assert (prof_info->async == acc_async);
assert (prof_info->async_queue == prof_info->async);
assert (prof_info->src_file == NULL);
assert (prof_info->func_name == NULL);
assert (prof_info->line_no == -1);
assert (prof_info->end_line_no == -1);
assert (prof_info->func_line_no == -1);
assert (prof_info->func_end_line_no == -1);
assert (event_info->other_event.event_type == prof_info->event_type);
assert (event_info->other_event.valid_bytes == _ACC_OTHER_EVENT_INFO_VALID_BYTES);
assert (event_info->other_event.parent_construct == acc_construct_parallel);
assert (event_info->other_event.implicit == 1);
assert (event_info->other_event.tool_info == NULL);
if (acc_device_type == acc_device_host)
assert (api_info->device_api == acc_device_api_none);
else if (acc_device_type == acc_device_radeon)
assert (api_info->device_api == acc_device_api_other);
OpenACC Profiling Interface (incomplete) libgomp/ * acc_prof.h: New file. * oacc-profiling.c: Likewise. * Makefile.am (nodist_libsubinclude_HEADERS, libgomp_la_SOURCES): Add these, respectively. * Makefile.in: Regenerate. * env.c (initialize_env): Call goacc_profiling_initialize. * oacc-plugin.c (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): New functions. * oacc-plugin.h (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): Declare. * libgomp.map (OACC_2.5.1): Add acc_prof_lookup, acc_prof_register, acc_prof_unregister, and acc_register_library. (GOMP_PLUGIN_1.3): Add GOMP_PLUGIN_goacc_profiling_dispatch, and GOMP_PLUGIN_goacc_thread. * oacc-int.h (struct goacc_thread): Add prof_info, api_info, prof_callbacks_enabled members. (goacc_prof_enabled, goacc_profiling_initialize) (_goacc_profiling_dispatch_p, _goacc_profiling_setup_p) (goacc_profiling_dispatch): Declare. (GOACC_PROF_ENABLED, GOACC_PROFILING_DISPATCH_P) (GOACC_PROFILING_SETUP_P): Define. * oacc-async.c (acc_async_test, acc_async_test_all, acc_wait) (acc_wait_async, acc_wait_all, acc_wait_all_async): Update for OpenACC Profiling Interface. * oacc-cuda.c (acc_get_current_cuda_device) (acc_get_current_cuda_context, acc_get_cuda_stream) (acc_set_cuda_stream): Likewise. * oacc-init.c (acc_init_1, goacc_attach_host_thread_to_device) (acc_init, acc_set_device_type, acc_get_device_type) (acc_get_device_num, goacc_lazy_initialize): Likewise. * oacc-mem.c (acc_malloc, acc_free, memcpy_tofrom_device) (acc_deviceptr, acc_hostptr, acc_is_present, acc_map_data) (acc_unmap_data, present_create_copy, delete_copyout) (update_dev_host): Likewise. * oacc-parallel.c (GOACC_parallel_keyed, GOACC_data_start) (GOACC_data_end, GOACC_enter_exit_data, GOACC_update, GOACC_wait): Likewise. * plugin/plugin-nvptx.c (nvptx_exec, nvptx_alloc, nvptx_free) (GOMP_OFFLOAD_openacc_exec, GOMP_OFFLOAD_openacc_async_exec): Likewise. * libgomp.texi: Update. * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: New file. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise. From-SVN: r271346
2019-05-17 21:13:36 +02:00
else
assert (api_info->device_api == acc_device_api_cuda);
assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES);
assert (api_info->device_type == prof_info->device_type);
assert (api_info->vendor == -1);
assert (api_info->device_handle == NULL);
assert (api_info->context_handle == NULL);
assert (api_info->async_handle == NULL);
tool_info->nested->event_info.other_event.event_type = event_info->other_event.event_type;
event_info->other_event.tool_info = tool_info->nested;
}
static void cb_exit_data_end (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
{
DEBUG_printf ("%s\n", __FUNCTION__);
assert (state == 8
#if ASYNC_EXIT_DATA
|| state == 108
#endif
);
STATE_OP (state, ++);
assert (tool_info != NULL);
assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
assert (tool_info->nested != NULL);
assert (tool_info->nested->event_info.other_event.event_type == acc_ev_exit_data_start);
assert (prof_info->event_type == acc_ev_exit_data_end);
assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
assert (prof_info->version == _ACC_PROF_INFO_VERSION);
assert (prof_info->device_type == acc_device_type);
assert (prof_info->device_number == acc_device_num);
assert (prof_info->thread_id == -1);
assert (prof_info->async == acc_async);
assert (prof_info->async_queue == prof_info->async);
assert (prof_info->src_file == NULL);
assert (prof_info->func_name == NULL);
assert (prof_info->line_no == -1);
assert (prof_info->end_line_no == -1);
assert (prof_info->func_line_no == -1);
assert (prof_info->func_end_line_no == -1);
assert (event_info->other_event.event_type == prof_info->event_type);
assert (event_info->other_event.valid_bytes == _ACC_OTHER_EVENT_INFO_VALID_BYTES);
assert (event_info->other_event.parent_construct == acc_construct_parallel);
assert (event_info->other_event.implicit == 1);
assert (event_info->other_event.tool_info == tool_info->nested);
if (acc_device_type == acc_device_host)
assert (api_info->device_api == acc_device_api_none);
else if (acc_device_type == acc_device_radeon)
assert (api_info->device_api == acc_device_api_other);
OpenACC Profiling Interface (incomplete) libgomp/ * acc_prof.h: New file. * oacc-profiling.c: Likewise. * Makefile.am (nodist_libsubinclude_HEADERS, libgomp_la_SOURCES): Add these, respectively. * Makefile.in: Regenerate. * env.c (initialize_env): Call goacc_profiling_initialize. * oacc-plugin.c (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): New functions. * oacc-plugin.h (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): Declare. * libgomp.map (OACC_2.5.1): Add acc_prof_lookup, acc_prof_register, acc_prof_unregister, and acc_register_library. (GOMP_PLUGIN_1.3): Add GOMP_PLUGIN_goacc_profiling_dispatch, and GOMP_PLUGIN_goacc_thread. * oacc-int.h (struct goacc_thread): Add prof_info, api_info, prof_callbacks_enabled members. (goacc_prof_enabled, goacc_profiling_initialize) (_goacc_profiling_dispatch_p, _goacc_profiling_setup_p) (goacc_profiling_dispatch): Declare. (GOACC_PROF_ENABLED, GOACC_PROFILING_DISPATCH_P) (GOACC_PROFILING_SETUP_P): Define. * oacc-async.c (acc_async_test, acc_async_test_all, acc_wait) (acc_wait_async, acc_wait_all, acc_wait_all_async): Update for OpenACC Profiling Interface. * oacc-cuda.c (acc_get_current_cuda_device) (acc_get_current_cuda_context, acc_get_cuda_stream) (acc_set_cuda_stream): Likewise. * oacc-init.c (acc_init_1, goacc_attach_host_thread_to_device) (acc_init, acc_set_device_type, acc_get_device_type) (acc_get_device_num, goacc_lazy_initialize): Likewise. * oacc-mem.c (acc_malloc, acc_free, memcpy_tofrom_device) (acc_deviceptr, acc_hostptr, acc_is_present, acc_map_data) (acc_unmap_data, present_create_copy, delete_copyout) (update_dev_host): Likewise. * oacc-parallel.c (GOACC_parallel_keyed, GOACC_data_start) (GOACC_data_end, GOACC_enter_exit_data, GOACC_update, GOACC_wait): Likewise. * plugin/plugin-nvptx.c (nvptx_exec, nvptx_alloc, nvptx_free) (GOMP_OFFLOAD_openacc_exec, GOMP_OFFLOAD_openacc_async_exec): Likewise. * libgomp.texi: Update. * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: New file. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise. From-SVN: r271346
2019-05-17 21:13:36 +02:00
else
assert (api_info->device_api == acc_device_api_cuda);
assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES);
assert (api_info->device_type == prof_info->device_type);
assert (api_info->vendor == -1);
assert (api_info->device_handle == NULL);
assert (api_info->context_handle == NULL);
assert (api_info->async_handle == NULL);
free (tool_info->nested);
tool_info->nested = NULL;
}
static void cb_compute_construct_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
{
DEBUG_printf ("%s\n", __FUNCTION__);
#if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT
assert (state == 0
|| state == 100);
if (state == 100)
{
/* Compensate for the missing 'acc_ev_device_init_start' and
'acc_ev_device_init_end'. */
state += 2;
}
#else
if (state == 100)
{
/* Compensate for the missing 'acc_ev_device_init_start' and
'acc_ev_device_init_end'. */
state += 2;
}
assert (state == 2
|| state == 102);
#endif
STATE_OP (state, ++);
assert (tool_info == NULL);
tool_info = (struct tool_info *) malloc(sizeof *tool_info);
assert (tool_info != NULL);
tool_info->nested = NULL;
assert (prof_info->event_type == acc_ev_compute_construct_start);
assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
assert (prof_info->version == _ACC_PROF_INFO_VERSION);
assert (prof_info->device_type == acc_device_type);
assert (prof_info->device_number == acc_device_num);
assert (prof_info->thread_id == -1);
assert (prof_info->async == /* TODO acc_async */ acc_async_sync);
assert (prof_info->async_queue == prof_info->async);
assert (prof_info->src_file == NULL);
assert (prof_info->func_name == NULL);
assert (prof_info->line_no == -1);
assert (prof_info->end_line_no == -1);
assert (prof_info->func_line_no == -1);
assert (prof_info->func_end_line_no == -1);
assert (event_info->other_event.event_type == prof_info->event_type);
assert (event_info->other_event.valid_bytes == _ACC_OTHER_EVENT_INFO_VALID_BYTES);
assert (event_info->other_event.parent_construct == acc_construct_parallel);
assert (event_info->other_event.implicit == 0);
assert (event_info->other_event.tool_info == NULL);
assert (api_info->device_api == acc_device_api_none);
assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES);
assert (api_info->device_type == prof_info->device_type);
assert (api_info->vendor == -1);
assert (api_info->device_handle == NULL);
assert (api_info->context_handle == NULL);
assert (api_info->async_handle == NULL);
tool_info->event_info.other_event.event_type = event_info->other_event.event_type;
event_info->other_event.tool_info = tool_info;
if (acc_device_type == acc_device_host)
{
/* Compensate for the missing 'acc_ev_enter_data_start'. */
state += 1;
}
}
static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
{
DEBUG_printf ("%s\n", __FUNCTION__);
if (acc_device_type == acc_device_host)
{
/* Compensate for the missing 'acc_ev_enter_data_end'. */
state += 1;
/* Compensate for the missing 'acc_ev_enqueue_launch_start' and
'acc_ev_enqueue_launch_end'. */
state += 2;
/* Compensate for the missing 'acc_ev_exit_data_start' and
'acc_ev_exit_data_end'. */
state += 2;
}
#if !ASYNC_EXIT_DATA
else if (acc_async != acc_async_sync)
{
/* Compensate for the missing 'acc_ev_exit_data_start' and
'acc_ev_exit_data_end'. */
state += 2;
}
#endif
assert (state == 9
|| state == 109);
STATE_OP (state, ++);
assert (tool_info != NULL);
assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
assert (tool_info->nested == NULL);
assert (prof_info->event_type == acc_ev_compute_construct_end);
assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
assert (prof_info->version == _ACC_PROF_INFO_VERSION);
assert (prof_info->device_type == acc_device_type);
assert (prof_info->device_number == acc_device_num);
assert (prof_info->thread_id == -1);
if (acc_device_type == acc_device_host)
assert (prof_info->async == acc_async_sync);
else
assert (prof_info->async == acc_async);
assert (prof_info->async_queue == prof_info->async);
assert (prof_info->src_file == NULL);
assert (prof_info->func_name == NULL);
assert (prof_info->line_no == -1);
assert (prof_info->end_line_no == -1);
assert (prof_info->func_line_no == -1);
assert (prof_info->func_end_line_no == -1);
assert (event_info->other_event.event_type == prof_info->event_type);
assert (event_info->other_event.valid_bytes == _ACC_OTHER_EVENT_INFO_VALID_BYTES);
assert (event_info->other_event.parent_construct == acc_construct_parallel);
assert (event_info->other_event.implicit == 0);
assert (event_info->other_event.tool_info == tool_info);
if (acc_device_type == acc_device_host)
assert (api_info->device_api == acc_device_api_none);
else if (acc_device_type == acc_device_radeon)
assert (api_info->device_api == acc_device_api_other);
OpenACC Profiling Interface (incomplete) libgomp/ * acc_prof.h: New file. * oacc-profiling.c: Likewise. * Makefile.am (nodist_libsubinclude_HEADERS, libgomp_la_SOURCES): Add these, respectively. * Makefile.in: Regenerate. * env.c (initialize_env): Call goacc_profiling_initialize. * oacc-plugin.c (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): New functions. * oacc-plugin.h (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): Declare. * libgomp.map (OACC_2.5.1): Add acc_prof_lookup, acc_prof_register, acc_prof_unregister, and acc_register_library. (GOMP_PLUGIN_1.3): Add GOMP_PLUGIN_goacc_profiling_dispatch, and GOMP_PLUGIN_goacc_thread. * oacc-int.h (struct goacc_thread): Add prof_info, api_info, prof_callbacks_enabled members. (goacc_prof_enabled, goacc_profiling_initialize) (_goacc_profiling_dispatch_p, _goacc_profiling_setup_p) (goacc_profiling_dispatch): Declare. (GOACC_PROF_ENABLED, GOACC_PROFILING_DISPATCH_P) (GOACC_PROFILING_SETUP_P): Define. * oacc-async.c (acc_async_test, acc_async_test_all, acc_wait) (acc_wait_async, acc_wait_all, acc_wait_all_async): Update for OpenACC Profiling Interface. * oacc-cuda.c (acc_get_current_cuda_device) (acc_get_current_cuda_context, acc_get_cuda_stream) (acc_set_cuda_stream): Likewise. * oacc-init.c (acc_init_1, goacc_attach_host_thread_to_device) (acc_init, acc_set_device_type, acc_get_device_type) (acc_get_device_num, goacc_lazy_initialize): Likewise. * oacc-mem.c (acc_malloc, acc_free, memcpy_tofrom_device) (acc_deviceptr, acc_hostptr, acc_is_present, acc_map_data) (acc_unmap_data, present_create_copy, delete_copyout) (update_dev_host): Likewise. * oacc-parallel.c (GOACC_parallel_keyed, GOACC_data_start) (GOACC_data_end, GOACC_enter_exit_data, GOACC_update, GOACC_wait): Likewise. * plugin/plugin-nvptx.c (nvptx_exec, nvptx_alloc, nvptx_free) (GOMP_OFFLOAD_openacc_exec, GOMP_OFFLOAD_openacc_async_exec): Likewise. * libgomp.texi: Update. * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: New file. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise. From-SVN: r271346
2019-05-17 21:13:36 +02:00
else
assert (api_info->device_api == acc_device_api_cuda);
assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES);
assert (api_info->device_type == prof_info->device_type);
assert (api_info->vendor == -1);
assert (api_info->device_handle == NULL);
assert (api_info->context_handle == NULL);
assert (api_info->async_handle == NULL);
free (tool_info);
tool_info = NULL;
}
static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
{
DEBUG_printf ("%s\n", __FUNCTION__);
assert (acc_device_type != acc_device_host);
assert (state == 5
|| state == 105);
STATE_OP (state, ++);
assert (tool_info != NULL);
assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
assert (tool_info->nested == NULL);
tool_info->nested = (struct tool_info *) malloc(sizeof *tool_info);
assert (tool_info->nested != NULL);
tool_info->nested->nested = NULL;
assert (prof_info->event_type == acc_ev_enqueue_launch_start);
assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
assert (prof_info->version == _ACC_PROF_INFO_VERSION);
assert (prof_info->device_type == acc_device_type);
assert (prof_info->device_number == acc_device_num);
assert (prof_info->thread_id == -1);
assert (prof_info->async == acc_async);
assert (prof_info->async_queue == prof_info->async);
assert (prof_info->src_file == NULL);
assert (prof_info->func_name == NULL);
assert (prof_info->line_no == -1);
assert (prof_info->end_line_no == -1);
assert (prof_info->func_line_no == -1);
assert (prof_info->func_end_line_no == -1);
assert (event_info->launch_event.event_type == prof_info->event_type);
assert (event_info->launch_event.valid_bytes == _ACC_LAUNCH_EVENT_INFO_VALID_BYTES);
assert (event_info->launch_event.parent_construct == acc_construct_parallel);
assert (event_info->launch_event.implicit == 1);
assert (event_info->launch_event.tool_info == NULL);
assert (event_info->launch_event.kernel_name != NULL);
{
const char *s = strstr (event_info->launch_event.kernel_name, "main");
assert (s != NULL);
s = strstr (s, "omp_fn");
assert (s != NULL);
}
assert (event_info->launch_event.num_gangs >= 1);
assert (event_info->launch_event.num_workers >= 1);
assert (event_info->launch_event.vector_length >= 1);
if (acc_device_type == acc_device_host)
assert (api_info->device_api == acc_device_api_none);
else if (acc_device_type == acc_device_radeon)
assert (api_info->device_api == acc_device_api_other);
OpenACC Profiling Interface (incomplete) libgomp/ * acc_prof.h: New file. * oacc-profiling.c: Likewise. * Makefile.am (nodist_libsubinclude_HEADERS, libgomp_la_SOURCES): Add these, respectively. * Makefile.in: Regenerate. * env.c (initialize_env): Call goacc_profiling_initialize. * oacc-plugin.c (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): New functions. * oacc-plugin.h (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): Declare. * libgomp.map (OACC_2.5.1): Add acc_prof_lookup, acc_prof_register, acc_prof_unregister, and acc_register_library. (GOMP_PLUGIN_1.3): Add GOMP_PLUGIN_goacc_profiling_dispatch, and GOMP_PLUGIN_goacc_thread. * oacc-int.h (struct goacc_thread): Add prof_info, api_info, prof_callbacks_enabled members. (goacc_prof_enabled, goacc_profiling_initialize) (_goacc_profiling_dispatch_p, _goacc_profiling_setup_p) (goacc_profiling_dispatch): Declare. (GOACC_PROF_ENABLED, GOACC_PROFILING_DISPATCH_P) (GOACC_PROFILING_SETUP_P): Define. * oacc-async.c (acc_async_test, acc_async_test_all, acc_wait) (acc_wait_async, acc_wait_all, acc_wait_all_async): Update for OpenACC Profiling Interface. * oacc-cuda.c (acc_get_current_cuda_device) (acc_get_current_cuda_context, acc_get_cuda_stream) (acc_set_cuda_stream): Likewise. * oacc-init.c (acc_init_1, goacc_attach_host_thread_to_device) (acc_init, acc_set_device_type, acc_get_device_type) (acc_get_device_num, goacc_lazy_initialize): Likewise. * oacc-mem.c (acc_malloc, acc_free, memcpy_tofrom_device) (acc_deviceptr, acc_hostptr, acc_is_present, acc_map_data) (acc_unmap_data, present_create_copy, delete_copyout) (update_dev_host): Likewise. * oacc-parallel.c (GOACC_parallel_keyed, GOACC_data_start) (GOACC_data_end, GOACC_enter_exit_data, GOACC_update, GOACC_wait): Likewise. * plugin/plugin-nvptx.c (nvptx_exec, nvptx_alloc, nvptx_free) (GOMP_OFFLOAD_openacc_exec, GOMP_OFFLOAD_openacc_async_exec): Likewise. * libgomp.texi: Update. * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: New file. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise. From-SVN: r271346
2019-05-17 21:13:36 +02:00
else
assert (api_info->device_api == acc_device_api_cuda);
assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES);
assert (api_info->device_type == prof_info->device_type);
assert (api_info->vendor == -1);
assert (api_info->device_handle == NULL);
assert (api_info->context_handle == NULL);
assert (api_info->async_handle == NULL);
tool_info->nested->event_info.launch_event.event_type = event_info->launch_event.event_type;
tool_info->nested->event_info.launch_event.kernel_name = strdup (event_info->launch_event.kernel_name);
tool_info->nested->event_info.launch_event.num_gangs = event_info->launch_event.num_gangs;
tool_info->nested->event_info.launch_event.num_workers = event_info->launch_event.num_workers;
tool_info->nested->event_info.launch_event.vector_length = event_info->launch_event.vector_length;
event_info->other_event.tool_info = tool_info->nested;
}
static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
{
DEBUG_printf ("%s\n", __FUNCTION__);
assert (acc_device_type != acc_device_host);
assert (state == 6
|| state == 106);
STATE_OP (state, ++);
assert (tool_info != NULL);
assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
assert (tool_info->nested != NULL);
assert (tool_info->nested->event_info.launch_event.event_type == acc_ev_enqueue_launch_start);
assert (tool_info->nested->event_info.launch_event.kernel_name != NULL);
assert (tool_info->nested->event_info.launch_event.num_gangs >= 1);
assert (tool_info->nested->event_info.launch_event.num_workers >= 1);
assert (tool_info->nested->event_info.launch_event.vector_length >= 1);
assert (prof_info->event_type == acc_ev_enqueue_launch_end);
assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
assert (prof_info->version == _ACC_PROF_INFO_VERSION);
assert (prof_info->device_type == acc_device_type);
assert (prof_info->device_number == acc_device_num);
assert (prof_info->thread_id == -1);
assert (prof_info->async == acc_async);
assert (prof_info->async_queue == prof_info->async);
assert (prof_info->src_file == NULL);
assert (prof_info->func_name == NULL);
assert (prof_info->line_no == -1);
assert (prof_info->end_line_no == -1);
assert (prof_info->func_line_no == -1);
assert (prof_info->func_end_line_no == -1);
assert (event_info->launch_event.event_type == prof_info->event_type);
assert (event_info->launch_event.valid_bytes == _ACC_LAUNCH_EVENT_INFO_VALID_BYTES);
assert (event_info->launch_event.parent_construct == acc_construct_parallel);
assert (event_info->launch_event.implicit == 1);
assert (event_info->launch_event.tool_info == tool_info->nested);
assert (event_info->launch_event.kernel_name != NULL);
assert (strcmp (event_info->launch_event.kernel_name, tool_info->nested->event_info.launch_event.kernel_name) == 0);
assert (event_info->launch_event.num_gangs == tool_info->nested->event_info.launch_event.num_gangs);
assert (event_info->launch_event.num_workers == tool_info->nested->event_info.launch_event.num_workers);
assert (event_info->launch_event.vector_length == tool_info->nested->event_info.launch_event.vector_length);
if (acc_device_type == acc_device_host)
assert (api_info->device_api == acc_device_api_none);
else if (acc_device_type == acc_device_radeon)
assert (api_info->device_api == acc_device_api_other);
OpenACC Profiling Interface (incomplete) libgomp/ * acc_prof.h: New file. * oacc-profiling.c: Likewise. * Makefile.am (nodist_libsubinclude_HEADERS, libgomp_la_SOURCES): Add these, respectively. * Makefile.in: Regenerate. * env.c (initialize_env): Call goacc_profiling_initialize. * oacc-plugin.c (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): New functions. * oacc-plugin.h (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): Declare. * libgomp.map (OACC_2.5.1): Add acc_prof_lookup, acc_prof_register, acc_prof_unregister, and acc_register_library. (GOMP_PLUGIN_1.3): Add GOMP_PLUGIN_goacc_profiling_dispatch, and GOMP_PLUGIN_goacc_thread. * oacc-int.h (struct goacc_thread): Add prof_info, api_info, prof_callbacks_enabled members. (goacc_prof_enabled, goacc_profiling_initialize) (_goacc_profiling_dispatch_p, _goacc_profiling_setup_p) (goacc_profiling_dispatch): Declare. (GOACC_PROF_ENABLED, GOACC_PROFILING_DISPATCH_P) (GOACC_PROFILING_SETUP_P): Define. * oacc-async.c (acc_async_test, acc_async_test_all, acc_wait) (acc_wait_async, acc_wait_all, acc_wait_all_async): Update for OpenACC Profiling Interface. * oacc-cuda.c (acc_get_current_cuda_device) (acc_get_current_cuda_context, acc_get_cuda_stream) (acc_set_cuda_stream): Likewise. * oacc-init.c (acc_init_1, goacc_attach_host_thread_to_device) (acc_init, acc_set_device_type, acc_get_device_type) (acc_get_device_num, goacc_lazy_initialize): Likewise. * oacc-mem.c (acc_malloc, acc_free, memcpy_tofrom_device) (acc_deviceptr, acc_hostptr, acc_is_present, acc_map_data) (acc_unmap_data, present_create_copy, delete_copyout) (update_dev_host): Likewise. * oacc-parallel.c (GOACC_parallel_keyed, GOACC_data_start) (GOACC_data_end, GOACC_enter_exit_data, GOACC_update, GOACC_wait): Likewise. * plugin/plugin-nvptx.c (nvptx_exec, nvptx_alloc, nvptx_free) (GOMP_OFFLOAD_openacc_exec, GOMP_OFFLOAD_openacc_async_exec): Likewise. * libgomp.texi: Update. * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: New file. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise. From-SVN: r271346
2019-05-17 21:13:36 +02:00
else
assert (api_info->device_api == acc_device_api_cuda);
assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES);
assert (api_info->device_type == prof_info->device_type);
assert (api_info->vendor == -1);
assert (api_info->device_handle == NULL);
assert (api_info->context_handle == NULL);
assert (api_info->async_handle == NULL);
free ((void *) tool_info->nested->event_info.launch_event.kernel_name);
free (tool_info->nested);
tool_info->nested = NULL;
}
int main()
{
acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
STATE_OP (state, = 0);
reg (acc_ev_device_init_start, cb_device_init_start, acc_reg);
reg (acc_ev_device_init_end, cb_device_init_end, acc_reg);
reg (acc_ev_enter_data_start, cb_enter_data_start, acc_reg);
reg (acc_ev_enter_data_end, cb_enter_data_end, acc_reg);
reg (acc_ev_exit_data_start, cb_exit_data_start, acc_reg);
reg (acc_ev_exit_data_end, cb_exit_data_end, acc_reg);
reg (acc_ev_compute_construct_start, cb_compute_construct_start, acc_reg);
reg (acc_ev_compute_construct_end, cb_compute_construct_end, acc_reg);
reg (acc_ev_enqueue_launch_start, cb_enqueue_launch_start, acc_reg);
reg (acc_ev_enqueue_launch_end, cb_enqueue_launch_end, acc_reg);
assert (state == 0);
acc_device_type = acc_get_device_type ();
acc_device_num = acc_get_device_num (acc_device_type);
assert (state == 0);
{
int state_init;
#pragma acc parallel COPYIN(state) copyout(state_init)
{
asm volatile ("" : : : "memory"); // TODO PR90488
state_init = state;
}
assert (state_init == 4);
}
assert (state == 10);
STATE_OP (state, = 100);
{
int state_init;
acc_async = 12;
OpenACC Profiling Interface (incomplete) libgomp/ * acc_prof.h: New file. * oacc-profiling.c: Likewise. * Makefile.am (nodist_libsubinclude_HEADERS, libgomp_la_SOURCES): Add these, respectively. * Makefile.in: Regenerate. * env.c (initialize_env): Call goacc_profiling_initialize. * oacc-plugin.c (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): New functions. * oacc-plugin.h (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): Declare. * libgomp.map (OACC_2.5.1): Add acc_prof_lookup, acc_prof_register, acc_prof_unregister, and acc_register_library. (GOMP_PLUGIN_1.3): Add GOMP_PLUGIN_goacc_profiling_dispatch, and GOMP_PLUGIN_goacc_thread. * oacc-int.h (struct goacc_thread): Add prof_info, api_info, prof_callbacks_enabled members. (goacc_prof_enabled, goacc_profiling_initialize) (_goacc_profiling_dispatch_p, _goacc_profiling_setup_p) (goacc_profiling_dispatch): Declare. (GOACC_PROF_ENABLED, GOACC_PROFILING_DISPATCH_P) (GOACC_PROFILING_SETUP_P): Define. * oacc-async.c (acc_async_test, acc_async_test_all, acc_wait) (acc_wait_async, acc_wait_all, acc_wait_all_async): Update for OpenACC Profiling Interface. * oacc-cuda.c (acc_get_current_cuda_device) (acc_get_current_cuda_context, acc_get_cuda_stream) (acc_set_cuda_stream): Likewise. * oacc-init.c (acc_init_1, goacc_attach_host_thread_to_device) (acc_init, acc_set_device_type, acc_get_device_type) (acc_get_device_num, goacc_lazy_initialize): Likewise. * oacc-mem.c (acc_malloc, acc_free, memcpy_tofrom_device) (acc_deviceptr, acc_hostptr, acc_is_present, acc_map_data) (acc_unmap_data, present_create_copy, delete_copyout) (update_dev_host): Likewise. * oacc-parallel.c (GOACC_parallel_keyed, GOACC_data_start) (GOACC_data_end, GOACC_enter_exit_data, GOACC_update, GOACC_wait): Likewise. * plugin/plugin-nvptx.c (nvptx_exec, nvptx_alloc, nvptx_free) (GOMP_OFFLOAD_openacc_exec, GOMP_OFFLOAD_openacc_async_exec): Likewise. * libgomp.texi: Update. * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: New file. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise. From-SVN: r271346
2019-05-17 21:13:36 +02:00
#pragma acc parallel async(acc_async) COPYIN(state) copyout(state_init)
{
asm volatile ("" : : : "memory"); // TODO PR90488
state_init = state;
}
acc_async = acc_async_sync;
OpenACC Profiling Interface (incomplete) libgomp/ * acc_prof.h: New file. * oacc-profiling.c: Likewise. * Makefile.am (nodist_libsubinclude_HEADERS, libgomp_la_SOURCES): Add these, respectively. * Makefile.in: Regenerate. * env.c (initialize_env): Call goacc_profiling_initialize. * oacc-plugin.c (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): New functions. * oacc-plugin.h (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): Declare. * libgomp.map (OACC_2.5.1): Add acc_prof_lookup, acc_prof_register, acc_prof_unregister, and acc_register_library. (GOMP_PLUGIN_1.3): Add GOMP_PLUGIN_goacc_profiling_dispatch, and GOMP_PLUGIN_goacc_thread. * oacc-int.h (struct goacc_thread): Add prof_info, api_info, prof_callbacks_enabled members. (goacc_prof_enabled, goacc_profiling_initialize) (_goacc_profiling_dispatch_p, _goacc_profiling_setup_p) (goacc_profiling_dispatch): Declare. (GOACC_PROF_ENABLED, GOACC_PROFILING_DISPATCH_P) (GOACC_PROFILING_SETUP_P): Define. * oacc-async.c (acc_async_test, acc_async_test_all, acc_wait) (acc_wait_async, acc_wait_all, acc_wait_all_async): Update for OpenACC Profiling Interface. * oacc-cuda.c (acc_get_current_cuda_device) (acc_get_current_cuda_context, acc_get_cuda_stream) (acc_set_cuda_stream): Likewise. * oacc-init.c (acc_init_1, goacc_attach_host_thread_to_device) (acc_init, acc_set_device_type, acc_get_device_type) (acc_get_device_num, goacc_lazy_initialize): Likewise. * oacc-mem.c (acc_malloc, acc_free, memcpy_tofrom_device) (acc_deviceptr, acc_hostptr, acc_is_present, acc_map_data) (acc_unmap_data, present_create_copy, delete_copyout) (update_dev_host): Likewise. * oacc-parallel.c (GOACC_parallel_keyed, GOACC_data_start) (GOACC_data_end, GOACC_enter_exit_data, GOACC_update, GOACC_wait): Likewise. * plugin/plugin-nvptx.c (nvptx_exec, nvptx_alloc, nvptx_free) (GOMP_OFFLOAD_openacc_exec, GOMP_OFFLOAD_openacc_async_exec): Likewise. * libgomp.texi: Update. * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: New file. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise. From-SVN: r271346
2019-05-17 21:13:36 +02:00
#pragma acc wait
assert (state_init == 104);
}
assert (state == 110);
return 0;
}