275c736e73
This patch implement OpenMP 5.0 requirements of incrementing/decrementing the reference count of a mapped structure at most once (across all elements) on a construct. This is implemented by pulling in libgomp/hashtab.h and using htab_t as a pointer set. Structure element list siblings also have pointers-to-refcounts linked together, to naturally achieve uniform increment/decrement without repeating. There are still some questions on whether using such a htab_t based set is faster/slower than using a sorted pointer array based implementation. This is to be researched on later. libgomp/ChangeLog: * hashtab.h (htab_clear): New function with initialization code factored out from... (htab_create): ...here, adjust to use htab_clear function. * libgomp.h (REFCOUNT_SPECIAL): New symbol to denote range of special refcount values, add comments. (REFCOUNT_INFINITY): Adjust definition to use REFCOUNT_SPECIAL. (REFCOUNT_LINK): Likewise. (REFCOUNT_STRUCTELEM): New special refcount range for structure element siblings. (REFCOUNT_STRUCTELEM_P): Macro for testing for structure element sibling maps. (REFCOUNT_STRUCTELEM_FLAG_FIRST): Flag to indicate first sibling. (REFCOUNT_STRUCTELEM_FLAG_LAST): Flag to indicate last sibling. (REFCOUNT_STRUCTELEM_FIRST_P): Macro to test _FIRST flag. (REFCOUNT_STRUCTELEM_LAST_P): Macro to test _LAST flag. (struct splay_tree_key_s): Add structelem_refcount and structelem_refcount_ptr fields into a union with dynamic_refcount. Add comments. (gomp_map_vars): Delete declaration. (gomp_map_vars_async): Likewise. (gomp_unmap_vars): Likewise. (gomp_unmap_vars_async): Likewise. (goacc_map_vars): New declaration. (goacc_unmap_vars): Likewise. * oacc-mem.c (acc_map_data): Adjust to use goacc_map_vars. (goacc_enter_datum): Likewise. (goacc_enter_data_internal): Likewise. * oacc-parallel.c (GOACC_parallel_keyed): Adjust to use goacc_map_vars and goacc_unmap_vars. (GOACC_data_start): Adjust to use goacc_map_vars. (GOACC_data_end): Adjust to use goacc_unmap_vars. * target.c (hash_entry_type): New typedef. (htab_alloc): New function hook for hashtab.h. (htab_free): Likewise. (htab_hash): Likewise. (htab_eq): Likewise. (hashtab.h): Add file include. (gomp_increment_refcount): New function. (gomp_decrement_refcount): Likewise. (gomp_map_vars_existing): Add refcount_set parameter, adjust to use gomp_increment_refcount. (gomp_map_fields_existing): Add refcount_set parameter, adjust calls to gomp_map_vars_existing. (gomp_map_vars_internal): Add refcount_set parameter, add local openmp_p variable to guard OpenMP specific paths, adjust calls to gomp_map_vars_existing, add structure element sibling splay_tree_key sequence creation code, adjust Fortran map case to avoid increment under OpenMP. (gomp_map_vars): Adjust to static, add refcount_set parameter, manage local refcount_set if caller passed in NULL, adjust call to gomp_map_vars_internal. (gomp_map_vars_async): Adjust and rename into... (goacc_map_vars): ...this new function, adjust call to gomp_map_vars_internal. (gomp_remove_splay_tree_key): New function with code factored out from gomp_remove_var_internal. (gomp_remove_var_internal): Add code to handle removing multiple splay_tree_key sequence for structure elements, adjust code to use gomp_remove_splay_tree_key for splay-tree key removal. (gomp_unmap_vars_internal): Add refcount_set parameter, adjust to use gomp_decrement_refcount. (gomp_unmap_vars): Adjust to static, add refcount_set parameter, manage local refcount_set if caller passed in NULL, adjust call to gomp_unmap_vars_internal. (gomp_unmap_vars_async): Adjust and rename into... (goacc_unmap_vars): ...this new function, adjust call to gomp_unmap_vars_internal. (GOMP_target): Manage refcount_set and adjust calls to gomp_map_vars and gomp_unmap_vars. (GOMP_target_ext): Likewise. (gomp_target_data_fallback): Adjust call to gomp_map_vars. (GOMP_target_data): Likewise. (GOMP_target_data_ext): Likewise. (GOMP_target_end_data): Adjust call to gomp_unmap_vars. (gomp_exit_data): Add refcount_set parameter, adjust to use gomp_decrement_refcount, adjust to queue splay-tree keys for removal after main loop. (GOMP_target_enter_exit_data): Manage refcount_set and adjust calls to gomp_map_vars and gomp_exit_data. (gomp_target_task_fn): Likewise. * testsuite/libgomp.c-c++-common/refcount-1.c: New testcase. * testsuite/libgomp.c-c++-common/struct-elem-1.c: New testcase. * testsuite/libgomp.c-c++-common/struct-elem-2.c: New testcase. * testsuite/libgomp.c-c++-common/struct-elem-3.c: New testcase. * testsuite/libgomp.c-c++-common/struct-elem-4.c: New testcase. * testsuite/libgomp.c-c++-common/struct-elem-5.c: New testcase.
727 lines
21 KiB
C
727 lines
21 KiB
C
/* Copyright (C) 2013-2021 Free Software Foundation, Inc.
|
|
|
|
Contributed by Mentor Embedded.
|
|
|
|
This file is part of the GNU Offloading and Multi Processing Library
|
|
(libgomp).
|
|
|
|
Libgomp is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3, or (at your option)
|
|
any later version.
|
|
|
|
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
more details.
|
|
|
|
Under Section 7 of GPL version 3, you are granted additional
|
|
permissions described in the GCC Runtime Library Exception, version
|
|
3.1, as published by the Free Software Foundation.
|
|
|
|
You should have received a copy of the GNU General Public License and
|
|
a copy of the GCC Runtime Library Exception along with this program;
|
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
/* This file handles OpenACC constructs. */
|
|
|
|
#include "openacc.h"
|
|
#include "libgomp.h"
|
|
#include "gomp-constants.h"
|
|
#include "oacc-int.h"
|
|
#ifdef HAVE_INTTYPES_H
|
|
# include <inttypes.h> /* For PRIu64. */
|
|
#endif
|
|
#include <string.h>
|
|
#include <stdarg.h>
|
|
#include <assert.h>
|
|
|
|
|
|
/* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we
|
|
continue to support the following two legacy values. */
|
|
_Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_ICV) == 0,
|
|
"legacy GOMP_DEVICE_ICV broken");
|
|
_Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK)
|
|
== GOACC_FLAG_HOST_FALLBACK,
|
|
"legacy GOMP_DEVICE_HOST_FALLBACK broken");
|
|
|
|
|
|
/* Handle the mapping pair that are presented when a
|
|
deviceptr clause is used with Fortran. */
|
|
|
|
static void
|
|
handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes,
|
|
unsigned short *kinds)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < mapnum; i++)
|
|
{
|
|
unsigned short kind1 = kinds[i] & 0xff;
|
|
|
|
/* Handle Fortran deviceptr clause. */
|
|
if (kind1 == GOMP_MAP_FORCE_DEVICEPTR)
|
|
{
|
|
unsigned short kind2;
|
|
|
|
if (i < (signed)mapnum - 1)
|
|
kind2 = kinds[i + 1] & 0xff;
|
|
else
|
|
kind2 = 0xffff;
|
|
|
|
if (sizes[i] == sizeof (void *))
|
|
continue;
|
|
|
|
/* At this point, we're dealing with a Fortran deviceptr.
|
|
If the next element is not what we're expecting, then
|
|
this is an instance of where the deviceptr variable was
|
|
not used within the region and the pointer was removed
|
|
by the gimplifier. */
|
|
if (kind2 == GOMP_MAP_POINTER
|
|
&& sizes[i + 1] == 0
|
|
&& hostaddrs[i] == *(void **)hostaddrs[i + 1])
|
|
{
|
|
kinds[i+1] = kinds[i];
|
|
sizes[i+1] = sizeof (void *);
|
|
}
|
|
|
|
/* Invalidate the entry. */
|
|
hostaddrs[i] = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/* Launch a possibly offloaded function with FLAGS. FN is the host fn
|
|
address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory
|
|
blocks to be copied to/from the device. Varadic arguments are
|
|
keyed optional parameters terminated with a zero. */
|
|
|
|
void
|
|
GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
|
|
size_t mapnum, void **hostaddrs, size_t *sizes,
|
|
unsigned short *kinds, ...)
|
|
{
|
|
int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
|
|
|
|
va_list ap;
|
|
struct goacc_thread *thr;
|
|
struct gomp_device_descr *acc_dev;
|
|
struct target_mem_desc *tgt;
|
|
void **devaddrs;
|
|
unsigned int i;
|
|
struct splay_tree_key_s k;
|
|
splay_tree_key tgt_fn_key;
|
|
void (*tgt_fn);
|
|
int async = GOMP_ASYNC_SYNC;
|
|
unsigned dims[GOMP_DIM_MAX];
|
|
unsigned tag;
|
|
|
|
#ifdef HAVE_INTTYPES_H
|
|
gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
|
|
__FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
|
|
#else
|
|
gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
|
|
__FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
|
|
#endif
|
|
goacc_lazy_initialize ();
|
|
|
|
thr = goacc_thread ();
|
|
acc_dev = thr->dev;
|
|
|
|
bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
|
|
|
|
acc_prof_info prof_info;
|
|
if (profiling_p)
|
|
{
|
|
thr->prof_info = &prof_info;
|
|
|
|
prof_info.event_type = acc_ev_compute_construct_start;
|
|
prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
|
|
prof_info.version = _ACC_PROF_INFO_VERSION;
|
|
prof_info.device_type = acc_device_type (acc_dev->type);
|
|
prof_info.device_number = acc_dev->target_id;
|
|
prof_info.thread_id = -1;
|
|
prof_info.async = async;
|
|
prof_info.async_queue = prof_info.async;
|
|
prof_info.src_file = NULL;
|
|
prof_info.func_name = NULL;
|
|
prof_info.line_no = -1;
|
|
prof_info.end_line_no = -1;
|
|
prof_info.func_line_no = -1;
|
|
prof_info.func_end_line_no = -1;
|
|
}
|
|
acc_event_info compute_construct_event_info;
|
|
if (profiling_p)
|
|
{
|
|
compute_construct_event_info.other_event.event_type
|
|
= prof_info.event_type;
|
|
compute_construct_event_info.other_event.valid_bytes
|
|
= _ACC_OTHER_EVENT_INFO_VALID_BYTES;
|
|
compute_construct_event_info.other_event.parent_construct
|
|
= acc_construct_parallel;
|
|
compute_construct_event_info.other_event.implicit = 0;
|
|
compute_construct_event_info.other_event.tool_info = NULL;
|
|
}
|
|
acc_api_info api_info;
|
|
if (profiling_p)
|
|
{
|
|
thr->api_info = &api_info;
|
|
|
|
api_info.device_api = acc_device_api_none;
|
|
api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
|
|
api_info.device_type = prof_info.device_type;
|
|
api_info.vendor = -1;
|
|
api_info.device_handle = NULL;
|
|
api_info.context_handle = NULL;
|
|
api_info.async_handle = NULL;
|
|
}
|
|
|
|
if (profiling_p)
|
|
goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
|
|
&api_info);
|
|
|
|
handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
|
|
|
|
/* Host fallback if "if" clause is false or if the current device is set to
|
|
the host. */
|
|
if (flags & GOACC_FLAG_HOST_FALLBACK)
|
|
{
|
|
prof_info.device_type = acc_device_host;
|
|
api_info.device_type = prof_info.device_type;
|
|
goacc_save_and_set_bind (acc_device_host);
|
|
fn (hostaddrs);
|
|
goacc_restore_bind ();
|
|
goto out_prof;
|
|
}
|
|
else if (acc_device_type (acc_dev->type) == acc_device_host)
|
|
{
|
|
fn (hostaddrs);
|
|
goto out_prof;
|
|
}
|
|
|
|
/* Default: let the runtime choose. */
|
|
for (i = 0; i != GOMP_DIM_MAX; i++)
|
|
dims[i] = 0;
|
|
|
|
va_start (ap, kinds);
|
|
/* TODO: This will need amending when device_type is implemented. */
|
|
while ((tag = va_arg (ap, unsigned)) != 0)
|
|
{
|
|
if (GOMP_LAUNCH_DEVICE (tag))
|
|
gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
|
|
GOMP_LAUNCH_DEVICE (tag));
|
|
|
|
switch (GOMP_LAUNCH_CODE (tag))
|
|
{
|
|
case GOMP_LAUNCH_DIM:
|
|
{
|
|
unsigned mask = GOMP_LAUNCH_OP (tag);
|
|
|
|
for (i = 0; i != GOMP_DIM_MAX; i++)
|
|
if (mask & GOMP_DIM_MASK (i))
|
|
dims[i] = va_arg (ap, unsigned);
|
|
}
|
|
break;
|
|
|
|
case GOMP_LAUNCH_ASYNC:
|
|
{
|
|
/* Small constant values are encoded in the operand. */
|
|
async = GOMP_LAUNCH_OP (tag);
|
|
|
|
if (async == GOMP_LAUNCH_OP_MAX)
|
|
async = va_arg (ap, unsigned);
|
|
|
|
if (profiling_p)
|
|
{
|
|
prof_info.async = async;
|
|
prof_info.async_queue = prof_info.async;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case GOMP_LAUNCH_WAIT:
|
|
{
|
|
unsigned num_waits = GOMP_LAUNCH_OP (tag);
|
|
goacc_wait (async, num_waits, &ap);
|
|
break;
|
|
}
|
|
|
|
default:
|
|
gomp_fatal ("unrecognized offload code '%d',"
|
|
" libgomp is too old", GOMP_LAUNCH_CODE (tag));
|
|
}
|
|
}
|
|
va_end (ap);
|
|
|
|
if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
|
|
{
|
|
k.host_start = (uintptr_t) fn;
|
|
k.host_end = k.host_start + 1;
|
|
gomp_mutex_lock (&acc_dev->lock);
|
|
tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
|
|
gomp_mutex_unlock (&acc_dev->lock);
|
|
|
|
if (tgt_fn_key == NULL)
|
|
gomp_fatal ("target function wasn't mapped");
|
|
|
|
tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
|
|
}
|
|
else
|
|
tgt_fn = (void (*)) fn;
|
|
|
|
acc_event_info enter_exit_data_event_info;
|
|
if (profiling_p)
|
|
{
|
|
prof_info.event_type = acc_ev_enter_data_start;
|
|
enter_exit_data_event_info.other_event.event_type
|
|
= prof_info.event_type;
|
|
enter_exit_data_event_info.other_event.valid_bytes
|
|
= _ACC_OTHER_EVENT_INFO_VALID_BYTES;
|
|
enter_exit_data_event_info.other_event.parent_construct
|
|
= compute_construct_event_info.other_event.parent_construct;
|
|
enter_exit_data_event_info.other_event.implicit = 1;
|
|
enter_exit_data_event_info.other_event.tool_info = NULL;
|
|
goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
|
|
&api_info);
|
|
}
|
|
|
|
goacc_aq aq = get_goacc_asyncqueue (async);
|
|
|
|
tgt = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
|
|
true, 0);
|
|
if (profiling_p)
|
|
{
|
|
prof_info.event_type = acc_ev_enter_data_end;
|
|
enter_exit_data_event_info.other_event.event_type
|
|
= prof_info.event_type;
|
|
goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
|
|
&api_info);
|
|
}
|
|
|
|
devaddrs = gomp_alloca (sizeof (void *) * mapnum);
|
|
for (i = 0; i < mapnum; i++)
|
|
devaddrs[i] = (void *) gomp_map_val (tgt, hostaddrs, i);
|
|
|
|
if (aq == NULL)
|
|
acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims,
|
|
tgt);
|
|
else
|
|
acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
|
|
dims, tgt, aq);
|
|
|
|
if (profiling_p)
|
|
{
|
|
prof_info.event_type = acc_ev_exit_data_start;
|
|
enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
|
|
enter_exit_data_event_info.other_event.tool_info = NULL;
|
|
goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
|
|
&api_info);
|
|
}
|
|
|
|
/* If running synchronously (aq == NULL), this will unmap immediately. */
|
|
goacc_unmap_vars (tgt, true, aq);
|
|
|
|
if (profiling_p)
|
|
{
|
|
prof_info.event_type = acc_ev_exit_data_end;
|
|
enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
|
|
goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
|
|
&api_info);
|
|
}
|
|
|
|
out_prof:
|
|
if (profiling_p)
|
|
{
|
|
prof_info.event_type = acc_ev_compute_construct_end;
|
|
compute_construct_event_info.other_event.event_type
|
|
= prof_info.event_type;
|
|
goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
|
|
&api_info);
|
|
|
|
thr->prof_info = NULL;
|
|
thr->api_info = NULL;
|
|
}
|
|
}
|
|
|
|
/* Legacy entry point (GCC 5). Only provide host fallback execution. */
|
|
|
|
void
|
|
GOACC_parallel (int flags_m, void (*fn) (void *),
|
|
size_t mapnum, void **hostaddrs, size_t *sizes,
|
|
unsigned short *kinds,
|
|
int num_gangs, int num_workers, int vector_length,
|
|
int async, int num_waits, ...)
|
|
{
|
|
goacc_save_and_set_bind (acc_device_host);
|
|
fn (hostaddrs);
|
|
goacc_restore_bind ();
|
|
}
|
|
|
|
void
|
|
GOACC_data_start (int flags_m, size_t mapnum,
|
|
void **hostaddrs, size_t *sizes, unsigned short *kinds)
|
|
{
|
|
int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
|
|
|
|
struct target_mem_desc *tgt;
|
|
|
|
#ifdef HAVE_INTTYPES_H
|
|
gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
|
|
__FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
|
|
#else
|
|
gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
|
|
__FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
|
|
#endif
|
|
|
|
goacc_lazy_initialize ();
|
|
|
|
struct goacc_thread *thr = goacc_thread ();
|
|
struct gomp_device_descr *acc_dev = thr->dev;
|
|
|
|
bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
|
|
|
|
acc_prof_info prof_info;
|
|
if (profiling_p)
|
|
{
|
|
thr->prof_info = &prof_info;
|
|
|
|
prof_info.event_type = acc_ev_enter_data_start;
|
|
prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
|
|
prof_info.version = _ACC_PROF_INFO_VERSION;
|
|
prof_info.device_type = acc_device_type (acc_dev->type);
|
|
prof_info.device_number = acc_dev->target_id;
|
|
prof_info.thread_id = -1;
|
|
prof_info.async = acc_async_sync; /* Always synchronous. */
|
|
prof_info.async_queue = prof_info.async;
|
|
prof_info.src_file = NULL;
|
|
prof_info.func_name = NULL;
|
|
prof_info.line_no = -1;
|
|
prof_info.end_line_no = -1;
|
|
prof_info.func_line_no = -1;
|
|
prof_info.func_end_line_no = -1;
|
|
}
|
|
acc_event_info enter_data_event_info;
|
|
if (profiling_p)
|
|
{
|
|
enter_data_event_info.other_event.event_type
|
|
= prof_info.event_type;
|
|
enter_data_event_info.other_event.valid_bytes
|
|
= _ACC_OTHER_EVENT_INFO_VALID_BYTES;
|
|
enter_data_event_info.other_event.parent_construct = acc_construct_data;
|
|
for (int i = 0; i < mapnum; ++i)
|
|
if ((kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR
|
|
|| (kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
|
|
{
|
|
/* If there is one such data mapping kind, then this is actually an
|
|
OpenACC 'host_data' construct. (GCC maps the OpenACC
|
|
'host_data' construct to the OpenACC 'data' construct.) Apart
|
|
from artificial test cases (such as an OpenACC 'host_data'
|
|
construct's (implicit) device initialization when there hasn't
|
|
been any device data be set up before...), there can't really
|
|
any meaningful events be generated from OpenACC 'host_data'
|
|
constructs, though. */
|
|
enter_data_event_info.other_event.parent_construct
|
|
= acc_construct_host_data;
|
|
break;
|
|
}
|
|
enter_data_event_info.other_event.implicit = 0;
|
|
enter_data_event_info.other_event.tool_info = NULL;
|
|
}
|
|
acc_api_info api_info;
|
|
if (profiling_p)
|
|
{
|
|
thr->api_info = &api_info;
|
|
|
|
api_info.device_api = acc_device_api_none;
|
|
api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
|
|
api_info.device_type = prof_info.device_type;
|
|
api_info.vendor = -1;
|
|
api_info.device_handle = NULL;
|
|
api_info.context_handle = NULL;
|
|
api_info.async_handle = NULL;
|
|
}
|
|
|
|
if (profiling_p)
|
|
goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info);
|
|
|
|
/* Host fallback or 'do nothing'. */
|
|
if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
|| (flags & GOACC_FLAG_HOST_FALLBACK))
|
|
{
|
|
prof_info.device_type = acc_device_host;
|
|
api_info.device_type = prof_info.device_type;
|
|
tgt = goacc_map_vars (NULL, NULL, 0, NULL, NULL, NULL, NULL, true, 0);
|
|
tgt->prev = thr->mapped_data;
|
|
thr->mapped_data = tgt;
|
|
|
|
goto out_prof;
|
|
}
|
|
|
|
gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
|
|
tgt = goacc_map_vars (acc_dev, NULL, mapnum, hostaddrs, NULL, sizes, kinds,
|
|
true, 0);
|
|
gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
|
|
tgt->prev = thr->mapped_data;
|
|
thr->mapped_data = tgt;
|
|
|
|
out_prof:
|
|
if (profiling_p)
|
|
{
|
|
prof_info.event_type = acc_ev_enter_data_end;
|
|
enter_data_event_info.other_event.event_type = prof_info.event_type;
|
|
goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info);
|
|
|
|
thr->prof_info = NULL;
|
|
thr->api_info = NULL;
|
|
}
|
|
}
|
|
|
|
void
|
|
GOACC_data_end (void)
|
|
{
|
|
struct goacc_thread *thr = goacc_thread ();
|
|
struct gomp_device_descr *acc_dev = thr->dev;
|
|
struct target_mem_desc *tgt = thr->mapped_data;
|
|
|
|
bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
|
|
|
|
acc_prof_info prof_info;
|
|
if (profiling_p)
|
|
{
|
|
thr->prof_info = &prof_info;
|
|
|
|
prof_info.event_type = acc_ev_exit_data_start;
|
|
prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
|
|
prof_info.version = _ACC_PROF_INFO_VERSION;
|
|
prof_info.device_type = acc_device_type (acc_dev->type);
|
|
prof_info.device_number = acc_dev->target_id;
|
|
prof_info.thread_id = -1;
|
|
prof_info.async = acc_async_sync; /* Always synchronous. */
|
|
prof_info.async_queue = prof_info.async;
|
|
prof_info.src_file = NULL;
|
|
prof_info.func_name = NULL;
|
|
prof_info.line_no = -1;
|
|
prof_info.end_line_no = -1;
|
|
prof_info.func_line_no = -1;
|
|
prof_info.func_end_line_no = -1;
|
|
}
|
|
acc_event_info exit_data_event_info;
|
|
if (profiling_p)
|
|
{
|
|
exit_data_event_info.other_event.event_type
|
|
= prof_info.event_type;
|
|
exit_data_event_info.other_event.valid_bytes
|
|
= _ACC_OTHER_EVENT_INFO_VALID_BYTES;
|
|
exit_data_event_info.other_event.parent_construct = acc_construct_data;
|
|
exit_data_event_info.other_event.implicit = 0;
|
|
exit_data_event_info.other_event.tool_info = NULL;
|
|
}
|
|
acc_api_info api_info;
|
|
if (profiling_p)
|
|
{
|
|
thr->api_info = &api_info;
|
|
|
|
api_info.device_api = acc_device_api_none;
|
|
api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
|
|
api_info.device_type = prof_info.device_type;
|
|
api_info.vendor = -1;
|
|
api_info.device_handle = NULL;
|
|
api_info.context_handle = NULL;
|
|
api_info.async_handle = NULL;
|
|
}
|
|
|
|
if (profiling_p)
|
|
goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info);
|
|
|
|
gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
|
|
thr->mapped_data = tgt->prev;
|
|
goacc_unmap_vars (tgt, true, NULL);
|
|
gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
|
|
|
|
if (profiling_p)
|
|
{
|
|
prof_info.event_type = acc_ev_exit_data_end;
|
|
exit_data_event_info.other_event.event_type = prof_info.event_type;
|
|
goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info);
|
|
|
|
thr->prof_info = NULL;
|
|
thr->api_info = NULL;
|
|
}
|
|
}
|
|
|
|
void
|
|
GOACC_update (int flags_m, size_t mapnum,
|
|
void **hostaddrs, size_t *sizes, unsigned short *kinds,
|
|
int async, int num_waits, ...)
|
|
{
|
|
int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
|
|
|
|
size_t i;
|
|
|
|
goacc_lazy_initialize ();
|
|
|
|
struct goacc_thread *thr = goacc_thread ();
|
|
struct gomp_device_descr *acc_dev = thr->dev;
|
|
|
|
bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
|
|
|
|
acc_prof_info prof_info;
|
|
if (profiling_p)
|
|
{
|
|
thr->prof_info = &prof_info;
|
|
|
|
prof_info.event_type = acc_ev_update_start;
|
|
prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
|
|
prof_info.version = _ACC_PROF_INFO_VERSION;
|
|
prof_info.device_type = acc_device_type (acc_dev->type);
|
|
prof_info.device_number = acc_dev->target_id;
|
|
prof_info.thread_id = -1;
|
|
prof_info.async = async;
|
|
prof_info.async_queue = prof_info.async;
|
|
prof_info.src_file = NULL;
|
|
prof_info.func_name = NULL;
|
|
prof_info.line_no = -1;
|
|
prof_info.end_line_no = -1;
|
|
prof_info.func_line_no = -1;
|
|
prof_info.func_end_line_no = -1;
|
|
}
|
|
acc_event_info update_event_info;
|
|
if (profiling_p)
|
|
{
|
|
update_event_info.other_event.event_type
|
|
= prof_info.event_type;
|
|
update_event_info.other_event.valid_bytes
|
|
= _ACC_OTHER_EVENT_INFO_VALID_BYTES;
|
|
update_event_info.other_event.parent_construct = acc_construct_update;
|
|
update_event_info.other_event.implicit = 0;
|
|
update_event_info.other_event.tool_info = NULL;
|
|
}
|
|
acc_api_info api_info;
|
|
if (profiling_p)
|
|
{
|
|
thr->api_info = &api_info;
|
|
|
|
api_info.device_api = acc_device_api_none;
|
|
api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
|
|
api_info.device_type = prof_info.device_type;
|
|
api_info.vendor = -1;
|
|
api_info.device_handle = NULL;
|
|
api_info.context_handle = NULL;
|
|
api_info.async_handle = NULL;
|
|
}
|
|
|
|
if (profiling_p)
|
|
goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
|
|
|
|
if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
|| (flags & GOACC_FLAG_HOST_FALLBACK))
|
|
{
|
|
prof_info.device_type = acc_device_host;
|
|
api_info.device_type = prof_info.device_type;
|
|
|
|
goto out_prof;
|
|
}
|
|
|
|
if (num_waits)
|
|
{
|
|
va_list ap;
|
|
|
|
va_start (ap, num_waits);
|
|
goacc_wait (async, num_waits, &ap);
|
|
va_end (ap);
|
|
}
|
|
|
|
bool update_device = false;
|
|
for (i = 0; i < mapnum; ++i)
|
|
{
|
|
unsigned char kind = kinds[i] & 0xff;
|
|
|
|
switch (kind)
|
|
{
|
|
case GOMP_MAP_POINTER:
|
|
case GOMP_MAP_TO_PSET:
|
|
break;
|
|
|
|
case GOMP_MAP_ALWAYS_POINTER:
|
|
if (update_device)
|
|
{
|
|
/* Save the contents of the host pointer. */
|
|
void *dptr = acc_deviceptr (hostaddrs[i-1]);
|
|
uintptr_t t = *(uintptr_t *) hostaddrs[i];
|
|
|
|
/* Update the contents of the host pointer to reflect
|
|
the value of the allocated device memory in the
|
|
previous pointer. */
|
|
*(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr;
|
|
/* TODO: verify that we really cannot use acc_update_device_async
|
|
here. */
|
|
acc_update_device (hostaddrs[i], sizeof (uintptr_t));
|
|
|
|
/* Restore the host pointer. */
|
|
*(uintptr_t *) hostaddrs[i] = t;
|
|
update_device = false;
|
|
}
|
|
break;
|
|
|
|
case GOMP_MAP_TO:
|
|
if (!acc_is_present (hostaddrs[i], sizes[i]))
|
|
{
|
|
update_device = false;
|
|
break;
|
|
}
|
|
/* Fallthru */
|
|
case GOMP_MAP_FORCE_TO:
|
|
update_device = true;
|
|
acc_update_device_async (hostaddrs[i], sizes[i], async);
|
|
break;
|
|
|
|
case GOMP_MAP_FROM:
|
|
if (!acc_is_present (hostaddrs[i], sizes[i]))
|
|
{
|
|
update_device = false;
|
|
break;
|
|
}
|
|
/* Fallthru */
|
|
case GOMP_MAP_FORCE_FROM:
|
|
update_device = false;
|
|
acc_update_self_async (hostaddrs[i], sizes[i], async);
|
|
break;
|
|
|
|
default:
|
|
gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
|
|
break;
|
|
}
|
|
}
|
|
|
|
out_prof:
|
|
if (profiling_p)
|
|
{
|
|
prof_info.event_type = acc_ev_update_end;
|
|
update_event_info.other_event.event_type = prof_info.event_type;
|
|
goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
|
|
|
|
thr->prof_info = NULL;
|
|
thr->api_info = NULL;
|
|
}
|
|
}
|
|
|
|
|
|
/* Legacy entry point (GCC 5). */
|
|
|
|
int
|
|
GOACC_get_num_threads (void)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
/* Legacy entry point (GCC 5). */
|
|
|
|
int
|
|
GOACC_get_thread_num (void)
|
|
{
|
|
return 0;
|
|
}
|