openacc: Deep copy attach/detach should not affect reference counts

Attach and detach operations are not supposed to affect structural or
dynamic reference counts for OpenACC. Previously they did so, which led to
subtle problems in some circumstances. We can avoid reference-counting
attach/detach operations by extending and slightly repurposing the
do_detach field in target_var_desc. It is now called is_attach to better
reflect its new role.

2020-07-27  Julian Brown  <julian@codesourcery.com>
	    Thomas Schwinge  <thomas@codesourcery.com>

libgomp/
	* libgomp.h (struct target_var_desc): Rename do_detach field to
	is_attach.
	* oacc-mem.c (goacc_exit_datum_1): Add assert.  Don't set finalize for
	GOMP_MAP_FORCE_DETACH. Update checking to use is_attach field.
	(goacc_enter_data_internal): Don't affect reference counts
	for attach mappings.
	(goacc_exit_data_internal): Don't affect reference counts for detach
	mappings.
	* target.c (gomp_map_vars_existing): Don't affect reference counts for
	attach mappings.
	(gomp_map_vars_internal): Set renamed is_attach flag unconditionally to
	mark attach mappings.
	(gomp_unmap_vars_internal): Use is_attach flag to prevent affecting
	reference count for attach mappings.
	* testsuite/libgomp.oacc-c-c++-common/mdc-refcount-1.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/mdc-refcount-2.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/mdc-refcount-2.c: New test.
	* testsuite/libgomp.oacc-fortran/deep-copy-6-no_finalize.F90: Mark
	test as shouldfail.
	* testsuite/libgomp.oacc-fortran/deep-copy-6.f90: Adjust to fail
	gracefully in no-finalize mode.

Co-Authored-By: Thomas Schwinge <thomas@codesourcery.com>
This commit is contained in:
Julian Brown 2020-06-18 05:11:08 -07:00 committed by Giuliano Belinassi
parent b583d3f8d6
commit d8b6ad4c1a
8 changed files with 324 additions and 26 deletions

View File

@ -952,8 +952,8 @@ struct target_var_desc {
bool copy_from;
/* True if data always should be copied from device to host at the end. */
bool always_copy_from;
/* True if variable should be detached at end of region. */
bool do_detach;
/* True if this is for OpenACC 'attach'. */
bool is_attach;
/* Relative offset against key host_start. */
uintptr_t offset;
/* Actual length. */

View File

@ -667,6 +667,9 @@ static void
goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
unsigned short kind, splay_tree_key n, goacc_aq aq)
{
assert (kind != GOMP_MAP_DETACH
&& kind != GOMP_MAP_FORCE_DETACH);
if ((uintptr_t) h < n->host_start || (uintptr_t) h + s > n->host_end)
{
size_t host_size = n->host_end - n->host_start;
@ -676,8 +679,7 @@ goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
}
bool finalize = (kind == GOMP_MAP_FORCE_FROM
|| kind == GOMP_MAP_DELETE
|| kind == GOMP_MAP_FORCE_DETACH);
|| kind == GOMP_MAP_DELETE);
assert (n->refcount != REFCOUNT_LINK);
if (n->refcount != REFCOUNT_INFINITY
@ -725,7 +727,8 @@ goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
zero. Otherwise (e.g. for a 'GOMP_MAP_STRUCT' mapping with
multiple members), fall back to skipping the test. */
for (size_t l_i = 0; l_i < n->tgt->list_count; ++l_i)
if (n->tgt->list[l_i].key)
if (n->tgt->list[l_i].key
&& !n->tgt->list[l_i].is_attach)
++num_mappings;
bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
assert (is_tgt_unmapped || num_mappings > 1);
@ -1135,12 +1138,15 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
void *h = hostaddrs[i];
size_t s = sizes[i];
/* A standalone attach clause. */
if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH)
gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n,
(uintptr_t) h, s, NULL);
goacc_map_var_existing (acc_dev, h, s, n);
{
gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n,
(uintptr_t) h, s, NULL);
/* OpenACC 'attach'/'detach' doesn't affect structured/dynamic
reference counts ('n->refcount', 'n->dynamic_refcount'). */
}
else
goacc_map_var_existing (acc_dev, h, s, n);
}
else if (n && groupnum > 1)
{
@ -1168,7 +1174,9 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
list, and increment the refcounts for each item in that
group. */
for (size_t k = 0; k < groupnum; k++)
if (j + k < tgt->list_count && tgt->list[j + k].key)
if (j + k < tgt->list_count
&& tgt->list[j + k].key
&& !tgt->list[j + k].is_attach)
{
tgt->list[j + k].key->refcount++;
tgt->list[j + k].key->dynamic_refcount++;
@ -1202,7 +1210,7 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
for (size_t j = 0; j < tgt->list_count; j++)
{
n = tgt->list[j].key;
if (n)
if (n && !tgt->list[j].is_attach)
n->dynamic_refcount++;
}
}
@ -1268,14 +1276,10 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
case GOMP_MAP_POINTER:
case GOMP_MAP_DELETE:
case GOMP_MAP_RELEASE:
case GOMP_MAP_DETACH:
case GOMP_MAP_FORCE_DETACH:
{
struct splay_tree_key_s cur_node;
size_t size;
if (kind == GOMP_MAP_POINTER
|| kind == GOMP_MAP_DETACH
|| kind == GOMP_MAP_FORCE_DETACH)
if (kind == GOMP_MAP_POINTER)
size = sizeof (void *);
else
size = sizes[i];
@ -1298,6 +1302,12 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
'GOMP_MAP_STRUCT's anymore. */
break;
case GOMP_MAP_DETACH:
case GOMP_MAP_FORCE_DETACH:
/* OpenACC 'attach'/'detach' doesn't affect structured/dynamic
reference counts ('n->refcount', 'n->dynamic_refcount'). */
break;
default:
gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x",
kind);

View File

@ -362,7 +362,7 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep,
tgt_var->key = oldn;
tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
tgt_var->do_detach = false;
tgt_var->is_attach = false;
tgt_var->offset = newn->host_start - oldn->host_start;
tgt_var->length = newn->host_end - newn->host_start;
@ -1093,9 +1093,10 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
tgt->list[i].length = n->host_end - n->host_start;
tgt->list[i].copy_from = false;
tgt->list[i].always_copy_from = false;
tgt->list[i].do_detach
= (pragma_kind != GOMP_MAP_VARS_ENTER_DATA);
n->refcount++;
tgt->list[i].is_attach = true;
/* OpenACC 'attach'/'detach' doesn't affect
structured/dynamic reference counts ('n->refcount',
'n->dynamic_refcount'). */
}
else
{
@ -1151,7 +1152,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
tgt->list[i].always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
tgt->list[i].do_detach = false;
tgt->list[i].is_attach = false;
tgt->list[i].offset = 0;
tgt->list[i].length = k->host_end - k->host_start;
k->refcount = 1;
@ -1206,7 +1207,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
tgt->list[j].key = k;
tgt->list[j].copy_from = false;
tgt->list[j].always_copy_from = false;
tgt->list[j].do_detach = false;
tgt->list[j].is_attach = false;
if (k->refcount != REFCOUNT_INFINITY)
k->refcount++;
gomp_map_pointer (tgt, aq,
@ -1434,7 +1435,7 @@ gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
{
splay_tree_key k = tgt->list[i].key;
if (k != NULL && tgt->list[i].do_detach)
if (k != NULL && tgt->list[i].is_attach)
gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
+ tgt->list[i].offset,
false, NULL);
@ -1446,6 +1447,11 @@ gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
if (k == NULL)
continue;
/* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
counts ('n->refcount', 'n->dynamic_refcount'). */
if (tgt->list[i].is_attach)
continue;
bool do_unmap = false;
if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
k->refcount--;

View File

@ -0,0 +1,60 @@
/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
#include <openacc.h>
#include <assert.h>
#define N 1024
struct mystr {
int *data;
};
static void
test (unsigned variant)
{
int arr[N];
struct mystr s;
s.data = arr;
acc_copyin (&s, sizeof (s));
acc_create (s.data, N * sizeof (int));
for (int i = 0; i < 20; i++)
{
if ((variant + i) % 1)
{
#pragma acc enter data attach(s.data)
}
else
acc_attach ((void **) &s.data);
if ((variant + i) % 2)
{
#pragma acc exit data detach(s.data)
}
else
acc_detach ((void **) &s.data);
}
assert (acc_is_present (arr, N * sizeof (int)));
assert (acc_is_present (&s, sizeof (s)));
acc_delete (arr, N * sizeof (int));
assert (!acc_is_present (arr, N * sizeof (int)));
acc_copyout (&s, sizeof (s));
assert (!acc_is_present (&s, sizeof (s)));
assert (s.data == arr);
}
int
main (int argc, char *argv[])
{
for (unsigned variant = 0; variant < 4; ++variant)
test (variant);
return 0;
}

View File

@ -0,0 +1,123 @@
/* Verify that OpenACC 'attach'/'detach' doesn't interfere with reference
counting. */
#include <assert.h>
#include <stdlib.h>
#include <openacc.h>
/* Need to shared this (and, in particular, implicit '&data_work' in
'attach'/'detach' clauses) between 'test' and 'test_'. */
static unsigned char *data_work;
static void test_(unsigned variant,
unsigned char *data,
void *data_d)
{
assert(acc_is_present(&data_work, sizeof data_work));
assert(data_work == data);
acc_update_self(&data_work, sizeof data_work);
assert(data_work == data);
if (variant & 1)
{
#pragma acc enter data attach(data_work)
}
else
acc_attach((void **) &data_work);
acc_update_self(&data_work, sizeof data_work);
assert(data_work == data_d);
if (variant & 4)
{
if (variant & 2)
{ // attach some more
data_work = data;
acc_attach((void **) &data_work);
#pragma acc enter data attach(data_work)
acc_attach((void **) &data_work);
#pragma acc enter data attach(data_work)
#pragma acc enter data attach(data_work)
#pragma acc enter data attach(data_work)
acc_attach((void **) &data_work);
acc_attach((void **) &data_work);
#pragma acc enter data attach(data_work)
}
else
{}
}
else
{ // detach
data_work = data;
if (variant & 2)
{
#pragma acc exit data detach(data_work)
}
else
acc_detach((void **) &data_work);
acc_update_self(&data_work, sizeof data_work);
assert(data_work == data);
// now not attached anymore
#if 0
if (TODO)
{
acc_detach(&data_work); //TODO PR95203 "libgomp: attach count underflow"
acc_update_self(&data_work, sizeof data_work);
assert(data_work == data);
}
#endif
}
assert(acc_is_present(&data_work, sizeof data_work));
}
static void test(unsigned variant)
{
const int size = sizeof (void *);
unsigned char *data = (unsigned char *) malloc(size);
assert(data);
void *data_d = acc_create(data, size);
assert(data_d);
assert(acc_is_present(data, size));
data_work = data;
if (variant & 8)
{
#pragma acc data copyin(data_work)
test_(variant, data, data_d);
}
else
{
acc_copyin(&data_work, sizeof data_work);
test_(variant, data, data_d);
acc_delete(&data_work, sizeof data_work);
}
#if ACC_MEM_SHARED
assert(acc_is_present(&data_work, sizeof data_work));
#else
assert(!acc_is_present(&data_work, sizeof data_work));
#endif
data_work = NULL;
assert(acc_is_present(data, size));
acc_delete(data, size);
data_d = NULL;
#if ACC_MEM_SHARED
assert(acc_is_present(data, size));
#else
assert(!acc_is_present(data, size));
#endif
free(data);
data = NULL;
}
int main()
{
for (size_t i = 0; i < 16; ++i)
test(i);
return 0;
}

View File

@ -0,0 +1,86 @@
/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
/* Variant of 'deep-copy-7.c'. */
#include <stdlib.h>
#include <assert.h>
#include <openacc.h>
struct dc
{
int a;
int *b;
};
int
main ()
{
int n = 100, i, j, k;
struct dc v = { .a = 3 };
v.b = (int *) malloc (sizeof (int) * n);
for (k = 0; k < 16; k++)
{
/* Here, we do not explicitly copy the enclosing structure, but work
with fields directly. Make sure attachment counters and reference
counters work properly in that case. */
#pragma acc enter data copyin(v.a, v.b[0:n]) // 1
assert (acc_is_present (&v.b, sizeof v.b));
assert (acc_is_present (v.b, sizeof (int) * n));
#pragma acc enter data pcopyin(v.b[0:n]) // 2
#pragma acc enter data pcopyin(v.b[0:n]) // 3
#pragma acc parallel loop present(v.a, v.b)
for (i = 0; i < n; i++)
v.b[i] = k + v.a + i;
switch (k % 5)
{ // All optional.
case 0:
break;
case 1:
; //TODO PR95901
#pragma acc exit data detach(v.b) finalize
break;
case 2:
; //TODO PR95901
#pragma acc exit data detach(v.b)
break;
case 3:
acc_detach_finalize ((void **) &v.b);
break;
case 4:
acc_detach ((void **) &v.b);
break;
}
assert (acc_is_present (&v.b, sizeof v.b));
assert (acc_is_present (v.b, sizeof (int) * n));
{ // 3
acc_delete (&v.b, sizeof v.b);
assert (acc_is_present (&v.b, sizeof v.b));
acc_copyout (v.b, sizeof (int) * n);
assert (acc_is_present (v.b, sizeof (int) * n));
}
{ // 2
acc_delete (&v.b, sizeof v.b);
assert (acc_is_present (&v.b, sizeof v.b));
acc_copyout (v.b, sizeof (int) * n);
assert (acc_is_present (v.b, sizeof (int) * n));
}
{ // 1
acc_delete (&v.b, sizeof v.b);
assert (!acc_is_present (&v.b, sizeof v.b));
acc_copyout (v.b, sizeof (int) * n);
assert (!acc_is_present (v.b, sizeof (int) * n));
}
#pragma acc exit data delete(v.a)
for (i = 0; i < n; i++)
assert (v.b[i] == k + v.a + i);
assert (!acc_is_present (&v, sizeof (v)));
}
return 0;
}

View File

@ -1,5 +1,12 @@
! { dg-do run }
/* Nullify the 'finalize' clause. */
/* Nullify the 'finalize' clause.
That means, we do not detach properly, the host sees a device pointer, and
we fail as follows.
{ dg-output "STOP 30(\n|\r\n|\r)+" { target { ! openacc_host_selected } } }
{ dg-shouldfail "" { ! openacc_host_selected } }
*/
#define finalize
#include "deep-copy-6.f90"

View File

@ -12,11 +12,14 @@ program dtype
end type mytype
integer i
type(mytype) :: var
type(mytype), target :: var
integer, pointer :: hostptr(:)
allocate(var%a(1:n))
allocate(var%b(1:n))
hostptr => var%a
!$acc data copy(var)
do i = 1, n
@ -49,6 +52,9 @@ program dtype
!$acc end data
! See 'deep-copy-6-no_finalize.F90'.
if (.not. associated(hostptr, var%a)) stop 30
do i = 1,4
if (var%a(i) .ne. 0) stop 1
if (var%b(i) .ne. 0) stop 2