diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 6c146217fe2..90e4fd1e2ac 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,22 @@ +2019-12-19 Julian Brown + Thomas Schwinge + + * libgomp.h (struct splay_tree_aux): Add attach_count field. + (gomp_attach_pointer, gomp_detach_pointer): Add prototypes. + * libgomp.map (OACC_2.6): New section. Add acc_attach, + acc_attach_async, acc_detach, acc_detach_async, acc_detach_finalize, + acc_detach_finalize_async. + * oacc-mem.c (acc_attach_async, acc_attach, goacc_detach_internal, + acc_detach, acc_detach_async, acc_detach_finalize, + acc_detach_finalize_async): New functions. + * openacc.h (acc_attach, acc_attach_async, acc_detach, + (acc_detach_async, acc_detach_finalize, acc_detach_finalize_async): Add + prototypes. + * target.c (gomp_attach_pointer, gomp_detach_pointer): New functions. + (gomp_remove_var_internal): Free attachment counts if present. + * testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c: New test. + * testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c: New test. + 2019-12-19 Julian Brown Cesar Philippidis diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 8139cd7de08..4da1a9e2590 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -994,6 +994,9 @@ struct target_mem_desc { struct splay_tree_aux { /* Pointer to the original mapping of "omp declare target link" object. */ splay_tree_key link_key; + /* For a block with attached pointers, the attachment counters for each. + Only used for OpenACC. */ + uintptr_t *attach_count; }; struct splay_tree_key_s { @@ -1158,6 +1161,13 @@ extern void gomp_copy_dev2host (struct gomp_device_descr *, struct goacc_asyncqueue *, void *, const void *, size_t); extern uintptr_t gomp_map_val (struct target_mem_desc *, void **, size_t); +extern void gomp_attach_pointer (struct gomp_device_descr *, + struct goacc_asyncqueue *, splay_tree, + splay_tree_key, uintptr_t, size_t, + struct gomp_coalesce_buf *); +extern void gomp_detach_pointer (struct gomp_device_descr *, + struct goacc_asyncqueue *, splay_tree_key, + uintptr_t, bool, struct gomp_coalesce_buf *); extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *, size_t, void **, void **, diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index c79430f8d8d..63276f7d29b 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -484,6 +484,16 @@ OACC_2.5.1 { acc_register_library; } OACC_2.5; +OACC_2.6 { + global: + acc_attach; + acc_attach_async; + acc_detach; + acc_detach_async; + acc_detach_finalize; + acc_detach_finalize_async; +} OACC_2.5.1; + GOACC_2.0 { global: GOACC_data_end; diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 64168d64017..4fcb62ec386 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -867,6 +867,90 @@ acc_update_self_async (void *h, size_t s, int async) update_dev_host (0, h, s, async); } +void +acc_attach_async (void **hostaddr, int async) +{ + struct goacc_thread *thr = goacc_thread (); + struct gomp_device_descr *acc_dev = thr->dev; + goacc_aq aq = get_goacc_asyncqueue (async); + + struct splay_tree_key_s cur_node; + splay_tree_key n; + + if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + return; + + gomp_mutex_lock (&acc_dev->lock); + + cur_node.host_start = (uintptr_t) hostaddr; + cur_node.host_end = cur_node.host_start + sizeof (void *); + n = splay_tree_lookup (&acc_dev->mem_map, &cur_node); + + if (n == NULL) + gomp_fatal ("struct not mapped for acc_attach"); + + gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr, + 0, NULL); + + gomp_mutex_unlock (&acc_dev->lock); +} + +void +acc_attach (void **hostaddr) +{ + acc_attach_async (hostaddr, acc_async_sync); +} + +static void +goacc_detach_internal (void **hostaddr, int async, bool finalize) +{ + struct goacc_thread *thr = goacc_thread (); + struct gomp_device_descr *acc_dev = thr->dev; + struct splay_tree_key_s cur_node; + splay_tree_key n; + struct goacc_asyncqueue *aq = get_goacc_asyncqueue (async); + + if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + return; + + gomp_mutex_lock (&acc_dev->lock); + + cur_node.host_start = (uintptr_t) hostaddr; + cur_node.host_end = cur_node.host_start + sizeof (void *); + n = splay_tree_lookup (&acc_dev->mem_map, &cur_node); + + if (n == NULL) + gomp_fatal ("struct not mapped for acc_detach"); + + gomp_detach_pointer (acc_dev, aq, n, (uintptr_t) hostaddr, finalize, NULL); + + gomp_mutex_unlock (&acc_dev->lock); +} + +void +acc_detach (void **hostaddr) +{ + goacc_detach_internal (hostaddr, acc_async_sync, false); +} + +void +acc_detach_async (void **hostaddr, int async) +{ + goacc_detach_internal (hostaddr, async, false); +} + +void +acc_detach_finalize (void **hostaddr) +{ + goacc_detach_internal (hostaddr, acc_async_sync, true); +} + +void +acc_detach_finalize_async (void **hostaddr, int async) +{ + goacc_detach_internal (hostaddr, async, true); +} + /* Some types of (pointer) variables use several consecutive mappings, which must be treated as a group for enter/exit data directives. This function returns the last mapping in such a group (inclusive), or POS for singleton diff --git a/libgomp/openacc.h b/libgomp/openacc.h index 42c861caabf..d2e5c101f7f 100644 --- a/libgomp/openacc.h +++ b/libgomp/openacc.h @@ -109,12 +109,18 @@ void *acc_hostptr (void *) __GOACC_NOTHROW; int acc_is_present (void *, size_t) __GOACC_NOTHROW; void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW; void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW; +void acc_attach (void **) __GOACC_NOTHROW; +void acc_attach_async (void **, int) __GOACC_NOTHROW; +void acc_detach (void **) __GOACC_NOTHROW; +void acc_detach_async (void **, int) __GOACC_NOTHROW; /* Finalize versions of copyout/delete functions, specified in OpenACC 2.5. */ void acc_copyout_finalize (void *, size_t) __GOACC_NOTHROW; void acc_copyout_finalize_async (void *, size_t, int) __GOACC_NOTHROW; void acc_delete_finalize (void *, size_t) __GOACC_NOTHROW; void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW; +void acc_detach_finalize (void **) __GOACC_NOTHROW; +void acc_detach_finalize_async (void **, int) __GOACC_NOTHROW; /* Async functions, specified in OpenACC 2.5. */ void acc_copyin_async (void *, size_t, int) __GOACC_NOTHROW; diff --git a/libgomp/target.c b/libgomp/target.c index ef8e9ab14fc..13f304cf97e 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -493,6 +493,134 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, (void *) cur_node.host_end); } +attribute_hidden void +gomp_attach_pointer (struct gomp_device_descr *devicep, + struct goacc_asyncqueue *aq, splay_tree mem_map, + splay_tree_key n, uintptr_t attach_to, size_t bias, + struct gomp_coalesce_buf *cbufp) +{ + struct splay_tree_key_s s; + size_t size, idx; + + if (n == NULL) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("enclosing struct not mapped for attach"); + } + + size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *); + /* We might have a pointer in a packed struct: however we cannot have more + than one such pointer in each pointer-sized portion of the struct, so + this is safe. */ + idx = (attach_to - n->host_start) / sizeof (void *); + + if (!n->aux) + n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux)); + + if (!n->aux->attach_count) + n->aux->attach_count + = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size); + + if (n->aux->attach_count[idx] < UINTPTR_MAX) + n->aux->attach_count[idx]++; + else + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("attach count overflow"); + } + + if (n->aux->attach_count[idx] == 1) + { + uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to + - n->host_start; + uintptr_t target = (uintptr_t) *(void **) attach_to; + splay_tree_key tn; + uintptr_t data; + + if ((void *) target == NULL) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("attempt to attach null pointer"); + } + + s.host_start = target + bias; + s.host_end = s.host_start + 1; + tn = splay_tree_lookup (mem_map, &s); + + if (!tn) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("pointer target not mapped for attach"); + } + + data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start; + + gomp_debug (1, + "%s: attaching host %p, target %p (struct base %p) to %p\n", + __FUNCTION__, (void *) attach_to, (void *) devptr, + (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data); + + gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data, + sizeof (void *), cbufp); + } + else + gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__, + (void *) attach_to, (int) n->aux->attach_count[idx]); +} + +attribute_hidden void +gomp_detach_pointer (struct gomp_device_descr *devicep, + struct goacc_asyncqueue *aq, splay_tree_key n, + uintptr_t detach_from, bool finalize, + struct gomp_coalesce_buf *cbufp) +{ + size_t idx; + + if (n == NULL) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("enclosing struct not mapped for detach"); + } + + idx = (detach_from - n->host_start) / sizeof (void *); + + if (!n->aux || !n->aux->attach_count) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("no attachment counters for struct"); + } + + if (finalize) + n->aux->attach_count[idx] = 1; + + if (n->aux->attach_count[idx] == 0) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("attach count underflow"); + } + else + n->aux->attach_count[idx]--; + + if (n->aux->attach_count[idx] == 0) + { + uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from + - n->host_start; + uintptr_t target = (uintptr_t) *(void **) detach_from; + + gomp_debug (1, + "%s: detaching host %p, target %p (struct base %p) to %p\n", + __FUNCTION__, (void *) detach_from, (void *) devptr, + (void *) (n->tgt->tgt_start + n->tgt_offset), + (void *) target); + + gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target, + sizeof (void *), cbufp); + } + else + gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__, + (void *) detach_from, (int) n->aux->attach_count[idx]); +} + attribute_hidden uintptr_t gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i) { @@ -1191,6 +1319,8 @@ gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k, if (k->aux->link_key) splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->aux->link_key); + if (k->aux->attach_count) + free (k->aux->attach_count); free (k->aux); k->aux = NULL; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c new file mode 100644 index 00000000000..cec764bd3e7 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c @@ -0,0 +1,34 @@ +#include +#include +#include + +int +main () +{ + int n = 100, i; + int *a = (int *) malloc (sizeof (int) * n); + int *b; + + for (i = 0; i < n; i++) + a[i] = i+1; + +#pragma acc enter data copyin(a[:n]) create(b) + + b = a; + acc_attach ((void **)&b); + +#pragma acc parallel loop present (b[:n]) + for (i = 0; i < n; i++) + b[i] = i+1; + + acc_detach ((void **)&b); + +#pragma acc exit data copyout(a[:n], b) + + for (i = 0; i < 10; i++) + assert (a[i] == b[i]); + + free (a); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c new file mode 100644 index 00000000000..89cafbb62ab --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c @@ -0,0 +1,81 @@ +#include +#include +#include + +struct node +{ + struct node *next; + int val; +}; + +int +sum_nodes (struct node *head) +{ + int i = 0, sum = 0; + +#pragma acc parallel reduction(+:sum) present(head[:1]) + { + for (; head != NULL; head = head->next) + sum += head->val; + } + + return sum; +} + +void +insert (struct node *head, int val) +{ + struct node *n = (struct node *) malloc (sizeof (struct node)); + + if (head->next) + acc_detach ((void **) &head->next); + + n->val = val; + n->next = head->next; + head->next = n; + + acc_copyin (n, sizeof (struct node)); + acc_attach((void **) &head->next); + + if (n->next) + acc_attach ((void **) &n->next); +} + +void +destroy (struct node *head) +{ + while (head->next != NULL) + { + acc_detach ((void **) &head->next); + struct node * n = head->next; + head->next = n->next; + if (n->next) + acc_detach ((void **) &n->next); + + acc_delete (n, sizeof (struct node)); + if (head->next) + acc_attach((void **) &head->next); + + free (n); + } +} + +int +main () +{ + struct node list = { .next = NULL, .val = 0 }; + int i; + + acc_copyin (&list, sizeof (struct node)); + + for (i = 0; i < 10; i++) + insert (&list, 2); + + assert (sum_nodes (&list) == 10 * 2); + + destroy (&list); + + acc_delete (&list, sizeof (struct node)); + + return 0; +}