477 lines
12 KiB
C
477 lines
12 KiB
C
/* OpenACC Runtime Library Definitions.
|
|
|
|
Copyright (C) 2013-2022 Free Software Foundation, Inc.
|
|
|
|
Contributed by Mentor Embedded.
|
|
|
|
This file is part of the GNU Offloading and Multi Processing Library
|
|
(libgomp).
|
|
|
|
Libgomp is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3, or (at your option)
|
|
any later version.
|
|
|
|
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
more details.
|
|
|
|
Under Section 7 of GPL version 3, you are granted additional
|
|
permissions described in the GCC Runtime Library Exception, version
|
|
3.1, as published by the Free Software Foundation.
|
|
|
|
You should have received a copy of the GNU General Public License and
|
|
a copy of the GCC Runtime Library Exception along with this program;
|
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#include <assert.h>
|
|
#include <string.h>
|
|
#include "openacc.h"
|
|
#include "libgomp.h"
|
|
#include "oacc-int.h"
|
|
|
|
static struct goacc_thread *
|
|
get_goacc_thread (void)
|
|
{
|
|
struct goacc_thread *thr = goacc_thread ();
|
|
|
|
if (!thr || !thr->dev)
|
|
gomp_fatal ("no device active");
|
|
|
|
return thr;
|
|
}
|
|
|
|
static int
|
|
validate_async_val (int async)
|
|
{
|
|
if (!async_valid_p (async))
|
|
gomp_fatal ("invalid async-argument: %d", async);
|
|
|
|
if (async == acc_async_sync)
|
|
return -1;
|
|
|
|
if (async == acc_async_noval)
|
|
return 0;
|
|
|
|
if (async >= 0)
|
|
/* TODO: we reserve 0 for acc_async_noval before we can clarify the
|
|
semantics of "default_async". */
|
|
return 1 + async;
|
|
else
|
|
__builtin_unreachable ();
|
|
}
|
|
|
|
/* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This
|
|
might return NULL if no asyncqueue is to be used. Otherwise, if CREATE,
|
|
create the asyncqueue if it doesn't exist yet.
|
|
|
|
Unless CREATE, this will not generate any OpenACC Profiling Interface
|
|
events. */
|
|
|
|
attribute_hidden struct goacc_asyncqueue *
|
|
lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async)
|
|
{
|
|
async = validate_async_val (async);
|
|
if (async < 0)
|
|
return NULL;
|
|
|
|
struct goacc_asyncqueue *ret_aq = NULL;
|
|
struct gomp_device_descr *dev = thr->dev;
|
|
|
|
gomp_mutex_lock (&dev->openacc.async.lock);
|
|
|
|
if (!create
|
|
&& (async >= dev->openacc.async.nasyncqueue
|
|
|| !dev->openacc.async.asyncqueue[async]))
|
|
goto end;
|
|
|
|
if (async >= dev->openacc.async.nasyncqueue)
|
|
{
|
|
int diff = async + 1 - dev->openacc.async.nasyncqueue;
|
|
dev->openacc.async.asyncqueue
|
|
= gomp_realloc (dev->openacc.async.asyncqueue,
|
|
sizeof (goacc_aq) * (async + 1));
|
|
memset (dev->openacc.async.asyncqueue + dev->openacc.async.nasyncqueue,
|
|
0, sizeof (goacc_aq) * diff);
|
|
dev->openacc.async.nasyncqueue = async + 1;
|
|
}
|
|
|
|
if (!dev->openacc.async.asyncqueue[async])
|
|
{
|
|
dev->openacc.async.asyncqueue[async]
|
|
= dev->openacc.async.construct_func (dev->target_id);
|
|
|
|
if (!dev->openacc.async.asyncqueue[async])
|
|
{
|
|
gomp_mutex_unlock (&dev->openacc.async.lock);
|
|
gomp_fatal ("async %d creation failed", async);
|
|
}
|
|
|
|
/* Link new async queue into active list. */
|
|
goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list));
|
|
n->aq = dev->openacc.async.asyncqueue[async];
|
|
n->next = dev->openacc.async.active;
|
|
dev->openacc.async.active = n;
|
|
}
|
|
|
|
ret_aq = dev->openacc.async.asyncqueue[async];
|
|
|
|
end:
|
|
gomp_mutex_unlock (&dev->openacc.async.lock);
|
|
return ret_aq;
|
|
}
|
|
|
|
/* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This
|
|
might return NULL if no asyncqueue is to be used. Otherwise, create the
|
|
asyncqueue if it doesn't exist yet. */
|
|
|
|
attribute_hidden struct goacc_asyncqueue *
|
|
get_goacc_asyncqueue (int async)
|
|
{
|
|
struct goacc_thread *thr = get_goacc_thread ();
|
|
return lookup_goacc_asyncqueue (thr, true, async);
|
|
}
|
|
|
|
int
|
|
acc_async_test (int async)
|
|
{
|
|
struct goacc_thread *thr = goacc_thread ();
|
|
|
|
if (!thr || !thr->dev)
|
|
gomp_fatal ("no device active");
|
|
|
|
goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
|
|
if (!aq)
|
|
return 1;
|
|
|
|
acc_prof_info prof_info;
|
|
acc_api_info api_info;
|
|
bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
|
if (profiling_p)
|
|
{
|
|
prof_info.async = async;
|
|
prof_info.async_queue = prof_info.async;
|
|
}
|
|
|
|
int res = thr->dev->openacc.async.test_func (aq);
|
|
|
|
if (profiling_p)
|
|
{
|
|
thr->prof_info = NULL;
|
|
thr->api_info = NULL;
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
int
|
|
acc_async_test_all (void)
|
|
{
|
|
struct goacc_thread *thr = get_goacc_thread ();
|
|
|
|
acc_prof_info prof_info;
|
|
acc_api_info api_info;
|
|
bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
|
|
|
int ret = 1;
|
|
gomp_mutex_lock (&thr->dev->openacc.async.lock);
|
|
for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
|
|
if (!thr->dev->openacc.async.test_func (l->aq))
|
|
{
|
|
ret = 0;
|
|
break;
|
|
}
|
|
gomp_mutex_unlock (&thr->dev->openacc.async.lock);
|
|
|
|
if (profiling_p)
|
|
{
|
|
thr->prof_info = NULL;
|
|
thr->api_info = NULL;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
void
|
|
acc_wait (int async)
|
|
{
|
|
struct goacc_thread *thr = get_goacc_thread ();
|
|
|
|
goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
|
|
if (!aq)
|
|
return;
|
|
|
|
acc_prof_info prof_info;
|
|
acc_api_info api_info;
|
|
bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
|
if (profiling_p)
|
|
{
|
|
prof_info.async = async;
|
|
prof_info.async_queue = prof_info.async;
|
|
}
|
|
|
|
if (!thr->dev->openacc.async.synchronize_func (aq))
|
|
gomp_fatal ("wait on %d failed", async);
|
|
|
|
if (profiling_p)
|
|
{
|
|
thr->prof_info = NULL;
|
|
thr->api_info = NULL;
|
|
}
|
|
}
|
|
|
|
/* acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait. */
|
|
#ifdef HAVE_ATTRIBUTE_ALIAS
|
|
strong_alias (acc_wait, acc_async_wait)
|
|
#else
|
|
void
|
|
acc_async_wait (int async)
|
|
{
|
|
acc_wait (async);
|
|
}
|
|
#endif
|
|
|
|
void
|
|
acc_wait_async (int async1, int async2)
|
|
{
|
|
struct goacc_thread *thr = get_goacc_thread ();
|
|
|
|
goacc_aq aq1 = lookup_goacc_asyncqueue (thr, false, async1);
|
|
/* TODO: Is this also correct for acc_async_sync, assuming that in this case,
|
|
we'll always be synchronous anyways? */
|
|
if (!aq1)
|
|
return;
|
|
|
|
acc_prof_info prof_info;
|
|
acc_api_info api_info;
|
|
bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
|
if (profiling_p)
|
|
{
|
|
prof_info.async = async2;
|
|
prof_info.async_queue = prof_info.async;
|
|
}
|
|
|
|
goacc_aq aq2 = lookup_goacc_asyncqueue (thr, true, async2);
|
|
/* An async queue is always synchronized with itself. */
|
|
if (aq1 == aq2)
|
|
goto out_prof;
|
|
|
|
if (aq2)
|
|
{
|
|
if (!thr->dev->openacc.async.serialize_func (aq1, aq2))
|
|
gomp_fatal ("ordering of async ids %d and %d failed", async1, async2);
|
|
}
|
|
else
|
|
{
|
|
/* TODO: Local thread synchronization.
|
|
Necessary for the "async2 == acc_async_sync" case, or can just skip? */
|
|
if (!thr->dev->openacc.async.synchronize_func (aq1))
|
|
gomp_fatal ("wait on %d failed", async1);
|
|
}
|
|
|
|
out_prof:
|
|
if (profiling_p)
|
|
{
|
|
thr->prof_info = NULL;
|
|
thr->api_info = NULL;
|
|
}
|
|
}
|
|
|
|
void
|
|
acc_wait_all (void)
|
|
{
|
|
struct goacc_thread *thr = goacc_thread ();
|
|
|
|
acc_prof_info prof_info;
|
|
acc_api_info api_info;
|
|
bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
|
|
|
bool ret = true;
|
|
gomp_mutex_lock (&thr->dev->openacc.async.lock);
|
|
for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
|
|
ret &= thr->dev->openacc.async.synchronize_func (l->aq);
|
|
gomp_mutex_unlock (&thr->dev->openacc.async.lock);
|
|
|
|
if (profiling_p)
|
|
{
|
|
thr->prof_info = NULL;
|
|
thr->api_info = NULL;
|
|
}
|
|
|
|
if (!ret)
|
|
gomp_fatal ("wait all failed");
|
|
}
|
|
|
|
/* acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all. */
|
|
#ifdef HAVE_ATTRIBUTE_ALIAS
|
|
strong_alias (acc_wait_all, acc_async_wait_all)
|
|
#else
|
|
void
|
|
acc_async_wait_all (void)
|
|
{
|
|
acc_wait_all ();
|
|
}
|
|
#endif
|
|
|
|
void
|
|
acc_wait_all_async (int async)
|
|
{
|
|
struct goacc_thread *thr = get_goacc_thread ();
|
|
|
|
acc_prof_info prof_info;
|
|
acc_api_info api_info;
|
|
bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
|
if (profiling_p)
|
|
{
|
|
prof_info.async = async;
|
|
prof_info.async_queue = prof_info.async;
|
|
}
|
|
|
|
goacc_aq waiting_queue = lookup_goacc_asyncqueue (thr, true, async);
|
|
|
|
bool ret = true;
|
|
gomp_mutex_lock (&thr->dev->openacc.async.lock);
|
|
for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
|
|
{
|
|
if (waiting_queue)
|
|
ret &= thr->dev->openacc.async.serialize_func (l->aq, waiting_queue);
|
|
else
|
|
/* TODO: Local thread synchronization.
|
|
Necessary for the "async2 == acc_async_sync" case, or can just skip? */
|
|
ret &= thr->dev->openacc.async.synchronize_func (l->aq);
|
|
}
|
|
gomp_mutex_unlock (&thr->dev->openacc.async.lock);
|
|
|
|
if (profiling_p)
|
|
{
|
|
thr->prof_info = NULL;
|
|
thr->api_info = NULL;
|
|
}
|
|
|
|
if (!ret)
|
|
gomp_fatal ("wait all async(%d) failed", async);
|
|
}
|
|
|
|
void
|
|
GOACC_wait (int async, int num_waits, ...)
|
|
{
|
|
goacc_lazy_initialize ();
|
|
|
|
struct goacc_thread *thr = goacc_thread ();
|
|
|
|
/* No nesting. */
|
|
assert (thr->prof_info == NULL);
|
|
assert (thr->api_info == NULL);
|
|
acc_prof_info prof_info;
|
|
acc_api_info api_info;
|
|
bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
|
|
if (profiling_p)
|
|
{
|
|
prof_info.async = async;
|
|
prof_info.async_queue = prof_info.async;
|
|
}
|
|
|
|
if (num_waits)
|
|
{
|
|
va_list ap;
|
|
|
|
va_start (ap, num_waits);
|
|
goacc_wait (async, num_waits, &ap);
|
|
va_end (ap);
|
|
}
|
|
else if (async == acc_async_sync)
|
|
acc_wait_all ();
|
|
else
|
|
acc_wait_all_async (async);
|
|
|
|
if (profiling_p)
|
|
{
|
|
thr->prof_info = NULL;
|
|
thr->api_info = NULL;
|
|
}
|
|
}
|
|
|
|
attribute_hidden void
|
|
goacc_wait (int async, int num_waits, va_list *ap)
|
|
{
|
|
while (num_waits--)
|
|
{
|
|
int qid = va_arg (*ap, int);
|
|
|
|
/* Waiting on ACC_ASYNC_NOVAL maps to 'wait all'. */
|
|
if (qid == acc_async_noval)
|
|
{
|
|
if (async == acc_async_sync)
|
|
acc_wait_all ();
|
|
else
|
|
acc_wait_all_async (async);
|
|
break;
|
|
}
|
|
|
|
if (acc_async_test (qid))
|
|
continue;
|
|
|
|
if (async == acc_async_sync)
|
|
acc_wait (qid);
|
|
else if (qid == async)
|
|
/* If we're waiting on the same asynchronous queue as we're
|
|
launching on, the queue itself will order work as
|
|
required, so there's no need to wait explicitly. */
|
|
;
|
|
else
|
|
acc_wait_async (qid, async);
|
|
}
|
|
}
|
|
|
|
attribute_hidden void
|
|
goacc_async_free (struct gomp_device_descr *devicep,
|
|
struct goacc_asyncqueue *aq, void *ptr)
|
|
{
|
|
if (!aq)
|
|
free (ptr);
|
|
else
|
|
devicep->openacc.async.queue_callback_func (aq, free, ptr);
|
|
}
|
|
|
|
/* This function initializes the asyncqueues for the device specified by
|
|
DEVICEP. TODO DEVICEP must be locked on entry, and remains locked on
|
|
return. */
|
|
|
|
attribute_hidden void
|
|
goacc_init_asyncqueues (struct gomp_device_descr *devicep)
|
|
{
|
|
devicep->openacc.async.nasyncqueue = 0;
|
|
devicep->openacc.async.asyncqueue = NULL;
|
|
devicep->openacc.async.active = NULL;
|
|
gomp_mutex_init (&devicep->openacc.async.lock);
|
|
}
|
|
|
|
/* This function finalizes the asyncqueues for the device specified by DEVICEP.
|
|
TODO DEVICEP must be locked on entry, and remains locked on return. */
|
|
|
|
attribute_hidden bool
|
|
goacc_fini_asyncqueues (struct gomp_device_descr *devicep)
|
|
{
|
|
bool ret = true;
|
|
gomp_mutex_lock (&devicep->openacc.async.lock);
|
|
if (devicep->openacc.async.nasyncqueue > 0)
|
|
{
|
|
goacc_aq_list next;
|
|
for (goacc_aq_list l = devicep->openacc.async.active; l; l = next)
|
|
{
|
|
ret &= devicep->openacc.async.destruct_func (l->aq);
|
|
next = l->next;
|
|
free (l);
|
|
}
|
|
free (devicep->openacc.async.asyncqueue);
|
|
devicep->openacc.async.nasyncqueue = 0;
|
|
devicep->openacc.async.asyncqueue = NULL;
|
|
devicep->openacc.async.active = NULL;
|
|
}
|
|
gomp_mutex_unlock (&devicep->openacc.async.lock);
|
|
gomp_mutex_destroy (&devicep->openacc.async.lock);
|
|
return ret;
|
|
}
|