OpenACC 2.6 deep copy: attach/detach API routines

libgomp/
	* libgomp.h (struct splay_tree_aux): Add attach_count field.
	(gomp_attach_pointer, gomp_detach_pointer): Add prototypes.
	* libgomp.map (OACC_2.6): New section. Add acc_attach,
	acc_attach_async, acc_detach, acc_detach_async, acc_detach_finalize,
	acc_detach_finalize_async.
	* oacc-mem.c (acc_attach_async, acc_attach, goacc_detach_internal,
	acc_detach, acc_detach_async, acc_detach_finalize,
	acc_detach_finalize_async): New functions.
	* openacc.h (acc_attach, acc_attach_async, acc_detach,
	(acc_detach_async, acc_detach_finalize, acc_detach_finalize_async): Add
	prototypes.
	* target.c (gomp_attach_pointer, gomp_detach_pointer): New functions.
	(gomp_remove_var_internal): Free attachment counts if present.
	* testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c: New test.

Co-Authored-By: Thomas Schwinge <thomas@codesourcery.com>

From-SVN: r279624
This commit is contained in:
Julian Brown 2019-12-20 01:20:27 +00:00 committed by Julian Brown
parent 4d83edf7ef
commit 5d5be7bfb5
8 changed files with 374 additions and 0 deletions

View File

@ -1,3 +1,22 @@
2019-12-19 Julian Brown <julian@codesourcery.com>
Thomas Schwinge <thomas@codesourcery.com>
* libgomp.h (struct splay_tree_aux): Add attach_count field.
(gomp_attach_pointer, gomp_detach_pointer): Add prototypes.
* libgomp.map (OACC_2.6): New section. Add acc_attach,
acc_attach_async, acc_detach, acc_detach_async, acc_detach_finalize,
acc_detach_finalize_async.
* oacc-mem.c (acc_attach_async, acc_attach, goacc_detach_internal,
acc_detach, acc_detach_async, acc_detach_finalize,
acc_detach_finalize_async): New functions.
* openacc.h (acc_attach, acc_attach_async, acc_detach,
(acc_detach_async, acc_detach_finalize, acc_detach_finalize_async): Add
prototypes.
* target.c (gomp_attach_pointer, gomp_detach_pointer): New functions.
(gomp_remove_var_internal): Free attachment counts if present.
* testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c: New test.
* testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c: New test.
2019-12-19 Julian Brown <julian@codesourcery.com>
Cesar Philippidis <cesar@codesourcery.com>

View File

@ -994,6 +994,9 @@ struct target_mem_desc {
struct splay_tree_aux {
/* Pointer to the original mapping of "omp declare target link" object. */
splay_tree_key link_key;
/* For a block with attached pointers, the attachment counters for each.
Only used for OpenACC. */
uintptr_t *attach_count;
};
struct splay_tree_key_s {
@ -1158,6 +1161,13 @@ extern void gomp_copy_dev2host (struct gomp_device_descr *,
struct goacc_asyncqueue *, void *, const void *,
size_t);
extern uintptr_t gomp_map_val (struct target_mem_desc *, void **, size_t);
extern void gomp_attach_pointer (struct gomp_device_descr *,
struct goacc_asyncqueue *, splay_tree,
splay_tree_key, uintptr_t, size_t,
struct gomp_coalesce_buf *);
extern void gomp_detach_pointer (struct gomp_device_descr *,
struct goacc_asyncqueue *, splay_tree_key,
uintptr_t, bool, struct gomp_coalesce_buf *);
extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *,
size_t, void **, void **,

View File

@ -484,6 +484,16 @@ OACC_2.5.1 {
acc_register_library;
} OACC_2.5;
OACC_2.6 {
global:
acc_attach;
acc_attach_async;
acc_detach;
acc_detach_async;
acc_detach_finalize;
acc_detach_finalize_async;
} OACC_2.5.1;
GOACC_2.0 {
global:
GOACC_data_end;

View File

@ -867,6 +867,90 @@ acc_update_self_async (void *h, size_t s, int async)
update_dev_host (0, h, s, async);
}
void
acc_attach_async (void **hostaddr, int async)
{
struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev;
goacc_aq aq = get_goacc_asyncqueue (async);
struct splay_tree_key_s cur_node;
splay_tree_key n;
if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
return;
gomp_mutex_lock (&acc_dev->lock);
cur_node.host_start = (uintptr_t) hostaddr;
cur_node.host_end = cur_node.host_start + sizeof (void *);
n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
if (n == NULL)
gomp_fatal ("struct not mapped for acc_attach");
gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr,
0, NULL);
gomp_mutex_unlock (&acc_dev->lock);
}
void
acc_attach (void **hostaddr)
{
acc_attach_async (hostaddr, acc_async_sync);
}
static void
goacc_detach_internal (void **hostaddr, int async, bool finalize)
{
struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev;
struct splay_tree_key_s cur_node;
splay_tree_key n;
struct goacc_asyncqueue *aq = get_goacc_asyncqueue (async);
if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
return;
gomp_mutex_lock (&acc_dev->lock);
cur_node.host_start = (uintptr_t) hostaddr;
cur_node.host_end = cur_node.host_start + sizeof (void *);
n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
if (n == NULL)
gomp_fatal ("struct not mapped for acc_detach");
gomp_detach_pointer (acc_dev, aq, n, (uintptr_t) hostaddr, finalize, NULL);
gomp_mutex_unlock (&acc_dev->lock);
}
void
acc_detach (void **hostaddr)
{
goacc_detach_internal (hostaddr, acc_async_sync, false);
}
void
acc_detach_async (void **hostaddr, int async)
{
goacc_detach_internal (hostaddr, async, false);
}
void
acc_detach_finalize (void **hostaddr)
{
goacc_detach_internal (hostaddr, acc_async_sync, true);
}
void
acc_detach_finalize_async (void **hostaddr, int async)
{
goacc_detach_internal (hostaddr, async, true);
}
/* Some types of (pointer) variables use several consecutive mappings, which
must be treated as a group for enter/exit data directives. This function
returns the last mapping in such a group (inclusive), or POS for singleton

View File

@ -109,12 +109,18 @@ void *acc_hostptr (void *) __GOACC_NOTHROW;
int acc_is_present (void *, size_t) __GOACC_NOTHROW;
void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW;
void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW;
void acc_attach (void **) __GOACC_NOTHROW;
void acc_attach_async (void **, int) __GOACC_NOTHROW;
void acc_detach (void **) __GOACC_NOTHROW;
void acc_detach_async (void **, int) __GOACC_NOTHROW;
/* Finalize versions of copyout/delete functions, specified in OpenACC 2.5. */
void acc_copyout_finalize (void *, size_t) __GOACC_NOTHROW;
void acc_copyout_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_delete_finalize (void *, size_t) __GOACC_NOTHROW;
void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_detach_finalize (void **) __GOACC_NOTHROW;
void acc_detach_finalize_async (void **, int) __GOACC_NOTHROW;
/* Async functions, specified in OpenACC 2.5. */
void acc_copyin_async (void *, size_t, int) __GOACC_NOTHROW;

View File

@ -493,6 +493,134 @@ gomp_map_fields_existing (struct target_mem_desc *tgt,
(void *) cur_node.host_end);
}
attribute_hidden void
gomp_attach_pointer (struct gomp_device_descr *devicep,
struct goacc_asyncqueue *aq, splay_tree mem_map,
splay_tree_key n, uintptr_t attach_to, size_t bias,
struct gomp_coalesce_buf *cbufp)
{
struct splay_tree_key_s s;
size_t size, idx;
if (n == NULL)
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("enclosing struct not mapped for attach");
}
size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
/* We might have a pointer in a packed struct: however we cannot have more
than one such pointer in each pointer-sized portion of the struct, so
this is safe. */
idx = (attach_to - n->host_start) / sizeof (void *);
if (!n->aux)
n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
if (!n->aux->attach_count)
n->aux->attach_count
= gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
if (n->aux->attach_count[idx] < UINTPTR_MAX)
n->aux->attach_count[idx]++;
else
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("attach count overflow");
}
if (n->aux->attach_count[idx] == 1)
{
uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
- n->host_start;
uintptr_t target = (uintptr_t) *(void **) attach_to;
splay_tree_key tn;
uintptr_t data;
if ((void *) target == NULL)
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("attempt to attach null pointer");
}
s.host_start = target + bias;
s.host_end = s.host_start + 1;
tn = splay_tree_lookup (mem_map, &s);
if (!tn)
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("pointer target not mapped for attach");
}
data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
gomp_debug (1,
"%s: attaching host %p, target %p (struct base %p) to %p\n",
__FUNCTION__, (void *) attach_to, (void *) devptr,
(void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
sizeof (void *), cbufp);
}
else
gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
(void *) attach_to, (int) n->aux->attach_count[idx]);
}
attribute_hidden void
gomp_detach_pointer (struct gomp_device_descr *devicep,
struct goacc_asyncqueue *aq, splay_tree_key n,
uintptr_t detach_from, bool finalize,
struct gomp_coalesce_buf *cbufp)
{
size_t idx;
if (n == NULL)
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("enclosing struct not mapped for detach");
}
idx = (detach_from - n->host_start) / sizeof (void *);
if (!n->aux || !n->aux->attach_count)
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("no attachment counters for struct");
}
if (finalize)
n->aux->attach_count[idx] = 1;
if (n->aux->attach_count[idx] == 0)
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("attach count underflow");
}
else
n->aux->attach_count[idx]--;
if (n->aux->attach_count[idx] == 0)
{
uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
- n->host_start;
uintptr_t target = (uintptr_t) *(void **) detach_from;
gomp_debug (1,
"%s: detaching host %p, target %p (struct base %p) to %p\n",
__FUNCTION__, (void *) detach_from, (void *) devptr,
(void *) (n->tgt->tgt_start + n->tgt_offset),
(void *) target);
gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
sizeof (void *), cbufp);
}
else
gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
(void *) detach_from, (int) n->aux->attach_count[idx]);
}
attribute_hidden uintptr_t
gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
{
@ -1191,6 +1319,8 @@ gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
if (k->aux->link_key)
splay_tree_insert (&devicep->mem_map,
(splay_tree_node) k->aux->link_key);
if (k->aux->attach_count)
free (k->aux->attach_count);
free (k->aux);
k->aux = NULL;
}

View File

@ -0,0 +1,34 @@
#include <assert.h>
#include <stdlib.h>
#include <openacc.h>
int
main ()
{
int n = 100, i;
int *a = (int *) malloc (sizeof (int) * n);
int *b;
for (i = 0; i < n; i++)
a[i] = i+1;
#pragma acc enter data copyin(a[:n]) create(b)
b = a;
acc_attach ((void **)&b);
#pragma acc parallel loop present (b[:n])
for (i = 0; i < n; i++)
b[i] = i+1;
acc_detach ((void **)&b);
#pragma acc exit data copyout(a[:n], b)
for (i = 0; i < 10; i++)
assert (a[i] == b[i]);
free (a);
return 0;
}

View File

@ -0,0 +1,81 @@
#include <assert.h>
#include <stdlib.h>
#include <openacc.h>
struct node
{
struct node *next;
int val;
};
int
sum_nodes (struct node *head)
{
int i = 0, sum = 0;
#pragma acc parallel reduction(+:sum) present(head[:1])
{
for (; head != NULL; head = head->next)
sum += head->val;
}
return sum;
}
void
insert (struct node *head, int val)
{
struct node *n = (struct node *) malloc (sizeof (struct node));
if (head->next)
acc_detach ((void **) &head->next);
n->val = val;
n->next = head->next;
head->next = n;
acc_copyin (n, sizeof (struct node));
acc_attach((void **) &head->next);
if (n->next)
acc_attach ((void **) &n->next);
}
void
destroy (struct node *head)
{
while (head->next != NULL)
{
acc_detach ((void **) &head->next);
struct node * n = head->next;
head->next = n->next;
if (n->next)
acc_detach ((void **) &n->next);
acc_delete (n, sizeof (struct node));
if (head->next)
acc_attach((void **) &head->next);
free (n);
}
}
int
main ()
{
struct node list = { .next = NULL, .val = 0 };
int i;
acc_copyin (&list, sizeof (struct node));
for (i = 0; i < 10; i++)
insert (&list, 2);
assert (sum_nodes (&list) == 10 * 2);
destroy (&list);
acc_delete (&list, sizeof (struct node));
return 0;
}