5fae049dc2
libgomp/ * acc_prof.h: New file. * oacc-profiling.c: Likewise. * Makefile.am (nodist_libsubinclude_HEADERS, libgomp_la_SOURCES): Add these, respectively. * Makefile.in: Regenerate. * env.c (initialize_env): Call goacc_profiling_initialize. * oacc-plugin.c (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): New functions. * oacc-plugin.h (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): Declare. * libgomp.map (OACC_2.5.1): Add acc_prof_lookup, acc_prof_register, acc_prof_unregister, and acc_register_library. (GOMP_PLUGIN_1.3): Add GOMP_PLUGIN_goacc_profiling_dispatch, and GOMP_PLUGIN_goacc_thread. * oacc-int.h (struct goacc_thread): Add prof_info, api_info, prof_callbacks_enabled members. (goacc_prof_enabled, goacc_profiling_initialize) (_goacc_profiling_dispatch_p, _goacc_profiling_setup_p) (goacc_profiling_dispatch): Declare. (GOACC_PROF_ENABLED, GOACC_PROFILING_DISPATCH_P) (GOACC_PROFILING_SETUP_P): Define. * oacc-async.c (acc_async_test, acc_async_test_all, acc_wait) (acc_wait_async, acc_wait_all, acc_wait_all_async): Update for OpenACC Profiling Interface. * oacc-cuda.c (acc_get_current_cuda_device) (acc_get_current_cuda_context, acc_get_cuda_stream) (acc_set_cuda_stream): Likewise. * oacc-init.c (acc_init_1, goacc_attach_host_thread_to_device) (acc_init, acc_set_device_type, acc_get_device_type) (acc_get_device_num, goacc_lazy_initialize): Likewise. * oacc-mem.c (acc_malloc, acc_free, memcpy_tofrom_device) (acc_deviceptr, acc_hostptr, acc_is_present, acc_map_data) (acc_unmap_data, present_create_copy, delete_copyout) (update_dev_host): Likewise. * oacc-parallel.c (GOACC_parallel_keyed, GOACC_data_start) (GOACC_data_end, GOACC_enter_exit_data, GOACC_update, GOACC_wait): Likewise. * plugin/plugin-nvptx.c (nvptx_exec, nvptx_alloc, nvptx_free) (GOMP_OFFLOAD_openacc_exec, GOMP_OFFLOAD_openacc_async_exec): Likewise. * libgomp.texi: Update. * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: New file. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise. From-SVN: r271346
405 lines
10 KiB
C
405 lines
10 KiB
C
/* OpenACC Runtime Library Definitions.
|
|
|
|
Copyright (C) 2013-2019 Free Software Foundation, Inc.
|
|
|
|
Contributed by Mentor Embedded.
|
|
|
|
This file is part of the GNU Offloading and Multi Processing Library
|
|
(libgomp).
|
|
|
|
Libgomp is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3, or (at your option)
|
|
any later version.
|
|
|
|
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
more details.
|
|
|
|
Under Section 7 of GPL version 3, you are granted additional
|
|
permissions described in the GCC Runtime Library Exception, version
|
|
3.1, as published by the Free Software Foundation.
|
|
|
|
You should have received a copy of the GNU General Public License and
|
|
a copy of the GCC Runtime Library Exception along with this program;
|
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#include <assert.h>
|
|
#include <string.h>
|
|
#include "openacc.h"
|
|
#include "libgomp.h"
|
|
#include "oacc-int.h"
|
|
|
|
static struct goacc_thread *
|
|
get_goacc_thread (void)
|
|
{
|
|
struct goacc_thread *thr = goacc_thread ();
|
|
|
|
if (!thr || !thr->dev)
|
|
gomp_fatal ("no device active");
|
|
|
|
return thr;
|
|
}
|
|
|
|
static int
|
|
validate_async_val (int async)
|
|
{
|
|
if (!async_valid_p (async))
|
|
gomp_fatal ("invalid async-argument: %d", async);
|
|
|
|
if (async == acc_async_sync)
|
|
return -1;
|
|
|
|
if (async == acc_async_noval)
|
|
return 0;
|
|
|
|
if (async >= 0)
|
|
/* TODO: we reserve 0 for acc_async_noval before we can clarify the
|
|
semantics of "default_async". */
|
|
return 1 + async;
|
|
else
|
|
__builtin_unreachable ();
|
|
}
|
|
|
|
/* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This
|
|
might return NULL if no asyncqueue is to be used. Otherwise, if CREATE,
|
|
create the asyncqueue if it doesn't exist yet.
|
|
|
|
Unless CREATE, this will not generate any OpenACC Profiling Interface
|
|
events. */
|
|
|
|
attribute_hidden struct goacc_asyncqueue *
|
|
lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async)
|
|
{
|
|
async = validate_async_val (async);
|
|
if (async < 0)
|
|
return NULL;
|
|
|
|
struct goacc_asyncqueue *ret_aq = NULL;
|
|
struct gomp_device_descr *dev = thr->dev;
|
|
|
|
gomp_mutex_lock (&dev->openacc.async.lock);
|
|
|
|
if (!create
|
|
&& (async >= dev->openacc.async.nasyncqueue
|
|
|| !dev->openacc.async.asyncqueue[async]))
|
|
goto end;
|
|
|
|
if (async >= dev->openacc.async.nasyncqueue)
|
|
{
|
|
int diff = async + 1 - dev->openacc.async.nasyncqueue;
|
|
dev->openacc.async.asyncqueue
|
|
= gomp_realloc (dev->openacc.async.asyncqueue,
|
|
sizeof (goacc_aq) * (async + 1));
|
|
memset (dev->openacc.async.asyncqueue + dev->openacc.async.nasyncqueue,
|
|
0, sizeof (goacc_aq) * diff);
|
|
dev->openacc.async.nasyncqueue = async + 1;
|
|
}
|
|
|
|
if (!dev->openacc.async.asyncqueue[async])
|
|
{
|
|
dev->openacc.async.asyncqueue[async] = dev->openacc.async.construct_func ();
|
|
|
|
if (!dev->openacc.async.asyncqueue[async])
|
|
{
|
|
gomp_mutex_unlock (&dev->openacc.async.lock);
|
|
gomp_fatal ("async %d creation failed", async);
|
|
}
|
|
|
|
/* Link new async queue into active list. */
|
|
goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list));
|
|
n->aq = dev->openacc.async.asyncqueue[async];
|
|
n->next = dev->openacc.async.active;
|
|
dev->openacc.async.active = n;
|
|
}
|
|
|
|
ret_aq = dev->openacc.async.asyncqueue[async];
|
|
|
|
end:
|
|
gomp_mutex_unlock (&dev->openacc.async.lock);
|
|
return ret_aq;
|
|
}
|
|
|
|
/* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This
|
|
might return NULL if no asyncqueue is to be used. Otherwise, create the
|
|
asyncqueue if it doesn't exist yet. */
|
|
|
|
attribute_hidden struct goacc_asyncqueue *
|
|
get_goacc_asyncqueue (int async)
|
|
{
|
|
struct goacc_thread *thr = get_goacc_thread ();
|
|
return lookup_goacc_asyncqueue (thr, true, async);
|
|
}
|
|
|
|
int
|
|
acc_async_test (int async)
|
|
{
|
|
struct goacc_thread *thr = goacc_thread ();
|
|
|
|
if (!thr || !thr->dev)
|
|
gomp_fatal ("no device active");
|
|
|
|
goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
|
|
if (!aq)
|
|
return 1;
|
|
|
|
acc_prof_info prof_info;
|
|
acc_api_info api_info;
|
|
bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
|
if (profiling_p)
|
|
{
|
|
prof_info.async = async;
|
|
prof_info.async_queue = prof_info.async;
|
|
}
|
|
|
|
int res = thr->dev->openacc.async.test_func (aq);
|
|
|
|
if (profiling_p)
|
|
{
|
|
thr->prof_info = NULL;
|
|
thr->api_info = NULL;
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
int
|
|
acc_async_test_all (void)
|
|
{
|
|
struct goacc_thread *thr = get_goacc_thread ();
|
|
|
|
acc_prof_info prof_info;
|
|
acc_api_info api_info;
|
|
bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
|
|
|
int ret = 1;
|
|
gomp_mutex_lock (&thr->dev->openacc.async.lock);
|
|
for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
|
|
if (!thr->dev->openacc.async.test_func (l->aq))
|
|
{
|
|
ret = 0;
|
|
break;
|
|
}
|
|
gomp_mutex_unlock (&thr->dev->openacc.async.lock);
|
|
|
|
if (profiling_p)
|
|
{
|
|
thr->prof_info = NULL;
|
|
thr->api_info = NULL;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
void
|
|
acc_wait (int async)
|
|
{
|
|
struct goacc_thread *thr = get_goacc_thread ();
|
|
|
|
goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
|
|
if (!aq)
|
|
return;
|
|
|
|
acc_prof_info prof_info;
|
|
acc_api_info api_info;
|
|
bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
|
if (profiling_p)
|
|
{
|
|
prof_info.async = async;
|
|
prof_info.async_queue = prof_info.async;
|
|
}
|
|
|
|
if (!thr->dev->openacc.async.synchronize_func (aq))
|
|
gomp_fatal ("wait on %d failed", async);
|
|
|
|
if (profiling_p)
|
|
{
|
|
thr->prof_info = NULL;
|
|
thr->api_info = NULL;
|
|
}
|
|
}
|
|
|
|
/* acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait. */
|
|
#ifdef HAVE_ATTRIBUTE_ALIAS
|
|
strong_alias (acc_wait, acc_async_wait)
|
|
#else
|
|
void
|
|
acc_async_wait (int async)
|
|
{
|
|
acc_wait (async);
|
|
}
|
|
#endif
|
|
|
|
void
|
|
acc_wait_async (int async1, int async2)
|
|
{
|
|
struct goacc_thread *thr = get_goacc_thread ();
|
|
|
|
goacc_aq aq1 = lookup_goacc_asyncqueue (thr, false, async1);
|
|
/* TODO: Is this also correct for acc_async_sync, assuming that in this case,
|
|
we'll always be synchronous anyways? */
|
|
if (!aq1)
|
|
return;
|
|
|
|
acc_prof_info prof_info;
|
|
acc_api_info api_info;
|
|
bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
|
if (profiling_p)
|
|
{
|
|
prof_info.async = async2;
|
|
prof_info.async_queue = prof_info.async;
|
|
}
|
|
|
|
goacc_aq aq2 = lookup_goacc_asyncqueue (thr, true, async2);
|
|
/* An async queue is always synchronized with itself. */
|
|
if (aq1 == aq2)
|
|
goto out_prof;
|
|
|
|
if (aq2)
|
|
{
|
|
if (!thr->dev->openacc.async.serialize_func (aq1, aq2))
|
|
gomp_fatal ("ordering of async ids %d and %d failed", async1, async2);
|
|
}
|
|
else
|
|
{
|
|
/* TODO: Local thread synchronization.
|
|
Necessary for the "async2 == acc_async_sync" case, or can just skip? */
|
|
if (!thr->dev->openacc.async.synchronize_func (aq1))
|
|
gomp_fatal ("wait on %d failed", async1);
|
|
}
|
|
|
|
out_prof:
|
|
if (profiling_p)
|
|
{
|
|
thr->prof_info = NULL;
|
|
thr->api_info = NULL;
|
|
}
|
|
}
|
|
|
|
void
|
|
acc_wait_all (void)
|
|
{
|
|
struct goacc_thread *thr = goacc_thread ();
|
|
|
|
acc_prof_info prof_info;
|
|
acc_api_info api_info;
|
|
bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
|
|
|
bool ret = true;
|
|
gomp_mutex_lock (&thr->dev->openacc.async.lock);
|
|
for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
|
|
ret &= thr->dev->openacc.async.synchronize_func (l->aq);
|
|
gomp_mutex_unlock (&thr->dev->openacc.async.lock);
|
|
|
|
if (profiling_p)
|
|
{
|
|
thr->prof_info = NULL;
|
|
thr->api_info = NULL;
|
|
}
|
|
|
|
if (!ret)
|
|
gomp_fatal ("wait all failed");
|
|
}
|
|
|
|
/* acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all. */
|
|
#ifdef HAVE_ATTRIBUTE_ALIAS
|
|
strong_alias (acc_wait_all, acc_async_wait_all)
|
|
#else
|
|
void
|
|
acc_async_wait_all (void)
|
|
{
|
|
acc_wait_all ();
|
|
}
|
|
#endif
|
|
|
|
void
|
|
acc_wait_all_async (int async)
|
|
{
|
|
struct goacc_thread *thr = get_goacc_thread ();
|
|
|
|
acc_prof_info prof_info;
|
|
acc_api_info api_info;
|
|
bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
|
if (profiling_p)
|
|
{
|
|
prof_info.async = async;
|
|
prof_info.async_queue = prof_info.async;
|
|
}
|
|
|
|
goacc_aq waiting_queue = lookup_goacc_asyncqueue (thr, true, async);
|
|
|
|
bool ret = true;
|
|
gomp_mutex_lock (&thr->dev->openacc.async.lock);
|
|
for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
|
|
{
|
|
if (waiting_queue)
|
|
ret &= thr->dev->openacc.async.serialize_func (l->aq, waiting_queue);
|
|
else
|
|
/* TODO: Local thread synchronization.
|
|
Necessary for the "async2 == acc_async_sync" case, or can just skip? */
|
|
ret &= thr->dev->openacc.async.synchronize_func (l->aq);
|
|
}
|
|
gomp_mutex_unlock (&thr->dev->openacc.async.lock);
|
|
|
|
if (profiling_p)
|
|
{
|
|
thr->prof_info = NULL;
|
|
thr->api_info = NULL;
|
|
}
|
|
|
|
if (!ret)
|
|
gomp_fatal ("wait all async(%d) failed", async);
|
|
}
|
|
|
|
attribute_hidden void
|
|
goacc_async_free (struct gomp_device_descr *devicep,
|
|
struct goacc_asyncqueue *aq, void *ptr)
|
|
{
|
|
if (!aq)
|
|
free (ptr);
|
|
else
|
|
devicep->openacc.async.queue_callback_func (aq, free, ptr);
|
|
}
|
|
|
|
/* This function initializes the asyncqueues for the device specified by
|
|
DEVICEP. TODO DEVICEP must be locked on entry, and remains locked on
|
|
return. */
|
|
|
|
attribute_hidden void
|
|
goacc_init_asyncqueues (struct gomp_device_descr *devicep)
|
|
{
|
|
devicep->openacc.async.nasyncqueue = 0;
|
|
devicep->openacc.async.asyncqueue = NULL;
|
|
devicep->openacc.async.active = NULL;
|
|
gomp_mutex_init (&devicep->openacc.async.lock);
|
|
}
|
|
|
|
/* This function finalizes the asyncqueues for the device specified by DEVICEP.
|
|
TODO DEVICEP must be locked on entry, and remains locked on return. */
|
|
|
|
attribute_hidden bool
|
|
goacc_fini_asyncqueues (struct gomp_device_descr *devicep)
|
|
{
|
|
bool ret = true;
|
|
gomp_mutex_lock (&devicep->openacc.async.lock);
|
|
if (devicep->openacc.async.nasyncqueue > 0)
|
|
{
|
|
goacc_aq_list next;
|
|
for (goacc_aq_list l = devicep->openacc.async.active; l; l = next)
|
|
{
|
|
ret &= devicep->openacc.async.destruct_func (l->aq);
|
|
next = l->next;
|
|
free (l);
|
|
}
|
|
free (devicep->openacc.async.asyncqueue);
|
|
devicep->openacc.async.nasyncqueue = 0;
|
|
devicep->openacc.async.asyncqueue = NULL;
|
|
devicep->openacc.async.active = NULL;
|
|
}
|
|
gomp_mutex_unlock (&devicep->openacc.async.lock);
|
|
gomp_mutex_destroy (&devicep->openacc.async.lock);
|
|
return ret;
|
|
}
|