openmp: Relax handling of implicit map vs. existing device mappings

This patch implements relaxing the requirements when a map with the implicit
attribute encounters an overlapping existing map. As the OpenMP 5.0 spec
describes on page 320, lines 18-27 (and 5.1 spec, page 352, lines 13-22):

"If a single contiguous part of the original storage of a list item with an
 implicit data-mapping attribute has corresponding storage in the device data
 environment prior to a task encountering the construct that is associated with
 the map clause, only that part of the original storage will have corresponding
 storage in the device data environment as a result of the map clause."

2021-11-12  Chung-Lin Tang  <cltang@codesourcery.com>

include/ChangeLog:

	* gomp-constants.h (GOMP_MAP_FLAG_SPECIAL_3): Define special bit macro.
	(GOMP_MAP_IMPLICIT): New special map kind bits value.
	(GOMP_MAP_FLAG_SPECIAL_BITS): Define helper mask for whole set of
	special map kind bits.
	(GOMP_MAP_IMPLICIT_P): New predicate macro for implicit map kinds.

gcc/ChangeLog:

	* tree.h (OMP_CLAUSE_MAP_RUNTIME_IMPLICIT_P): New access macro for
	'implicit' bit, using 'base.deprecated_flag' field of tree_node.
	* tree-pretty-print.c (dump_omp_clause): Add support for printing
	implicit attribute in tree dumping.
	* gimplify.c (gimplify_adjust_omp_clauses_1):
	Set OMP_CLAUSE_MAP_RUNTIME_IMPLICIT_P to 1 if map clause is implicitly
	created.
	(gimplify_adjust_omp_clauses): Adjust place of adding implicitly created
	clauses, from simple append, to starting of list, after non-map clauses.
	* omp-low.c (lower_omp_target): Add GOMP_MAP_IMPLICIT bits into kind
	values passed to libgomp for implicit maps.

gcc/testsuite/ChangeLog:

	* c-c++-common/gomp/target-implicit-map-1.c: New test.
	* c-c++-common/goacc/combined-reduction.c: Adjust scan test pattern.
	* c-c++-common/goacc/firstprivate-mappings-1.c: Likewise.
	* c-c++-common/goacc/mdc-1.c: Likewise.
	* g++.dg/goacc/firstprivate-mappings-1.C: Likewise.

libgomp/ChangeLog:

	* target.c (gomp_map_vars_existing): Add 'bool implicit' parameter, add
	implicit map handling to allow a "superset" existing map as valid case.
	(get_kind): Adjust to filter out GOMP_MAP_IMPLICIT bits in return value.
	(get_implicit): New function to extract implicit status.
	(gomp_map_fields_existing): Adjust arguments in calls to
	gomp_map_vars_existing, and add uses of get_implicit.
	(gomp_map_vars_internal): Likewise.
	* testsuite/libgomp.c-c++-common/target-implicit-map-1.c: New test.
This commit is contained in:
Chung-Lin Tang 2021-11-12 20:29:00 +08:00
parent a54ce8865a
commit b7e2048063
12 changed files with 178 additions and 28 deletions

View File

@ -10889,6 +10889,10 @@ gimplify_adjust_omp_clauses_1 (splay_tree_node n, void *data)
gcc_unreachable ();
}
OMP_CLAUSE_SET_MAP_KIND (clause, kind);
/* Setting of the implicit flag for the runtime is currently disabled for
OpenACC. */
if ((gimplify_omp_ctxp->region_type & ORT_ACC) == 0)
OMP_CLAUSE_MAP_RUNTIME_IMPLICIT_P (clause) = 1;
if (DECL_SIZE (decl)
&& TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
{
@ -11504,9 +11508,15 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
list_p = &OMP_CLAUSE_CHAIN (c);
}
/* Add in any implicit data sharing. */
/* Add in any implicit data sharing. Implicit clauses are added at the start
of the clause list, but after any non-map clauses. */
struct gimplify_adjust_omp_clauses_data data;
data.list_p = list_p;
tree *implicit_add_list_p = orig_list_p;
while (*implicit_add_list_p
&& OMP_CLAUSE_CODE (*implicit_add_list_p) != OMP_CLAUSE_MAP)
implicit_add_list_p = &OMP_CLAUSE_CHAIN (*implicit_add_list_p);
data.list_p = implicit_add_list_p;
data.pre_p = pre_p;
splay_tree_foreach (ctx->variables, gimplify_adjust_omp_clauses_1, &data);

View File

@ -13168,6 +13168,19 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
else if (integer_nonzerop (s))
tkind_zero = tkind;
}
if (tkind_zero == tkind
&& OMP_CLAUSE_MAP_RUNTIME_IMPLICIT_P (c)
&& (((tkind & GOMP_MAP_FLAG_SPECIAL_BITS)
& ~GOMP_MAP_IMPLICIT)
== 0))
{
/* If this is an implicit map, and the GOMP_MAP_IMPLICIT
bits are not interfered by other special bit encodings,
then turn the GOMP_IMPLICIT_BIT flag on for the runtime
to see. */
tkind |= GOMP_MAP_IMPLICIT;
tkind_zero = tkind;
}
break;
case OMP_CLAUSE_FIRSTPRIVATE:
gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt));

View File

@ -23,7 +23,7 @@ main ()
return 0;
}
/* { dg-final { scan-tree-dump-times "omp target oacc_parallel reduction.+:v1. map.tofrom:v1" 1 "gimple" } } */
/* { dg-final { scan-tree-dump-times "omp target oacc_parallel reduction.+:v1. firstprivate.n. map.tofrom:v1" 1 "gimple" } } */
/* { dg-final { scan-tree-dump-times "acc loop reduction.+:v1. private.i." 1 "gimple" } } */
/* { dg-final { scan-tree-dump-times "omp target oacc_kernels map.force_tofrom:n .len: 4.. map.force_tofrom:v1 .len: 4.." 1 "gimple" } } */
/* { dg-final { scan-tree-dump-times "acc loop reduction.+:v1. private.i." 1 "gimple" } } */

View File

@ -419,12 +419,7 @@ vla (int array_li)
copyout (array_so)
/* The gimplifier has created an implicit 'firstprivate' clause for the array
length.
{ dg-final { scan-tree-dump {(?n)#pragma omp target oacc_parallel map\(from:array_so \[len: 4\]\) firstprivate\(array_li.[0-9]+\)} omplower { target { ! c++ } } } }
{ dg-final { scan-tree-dump {(?n)#pragma omp target oacc_parallel map\(from:array_so \[len: 4\]\) firstprivate\(} omplower { target { c++ } } } }
(C++ computes an intermediate value, so can't scan for 'firstprivate(array_li)'.) */
/* For C, non-LP64, the gimplifier has also created a mapping for the array
itself; PR90859.
{ dg-final { scan-tree-dump {(?n)#pragma omp target oacc_parallel map\(from:array_so \[len: 4\]\) firstprivate\(array_li.[0-9]+\) map\(tofrom:\(\*array.[0-9]+\) \[len: D\.[0-9]+\]\) map\(firstprivate:array \[pointer assign, bias: 0\]\) \[} omplower { target { c && { ! lp64 } } } } } */
{ dg-final { scan-tree-dump {(?n)#pragma omp target oacc_parallel firstprivate\(array_li.[0-9]+\) map\(from:array_so \[len: 4\]\) \[} omplower } } */
{
array_so = sizeof array;
}

View File

@ -45,7 +45,7 @@ t1 ()
/* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_data map.to:s .len: 32.." 1 "omplower" } } */
/* { dg-final { scan-tree-dump-times "pragma omp target oacc_data map.tofrom:.z .len: 40.. map.struct:s .len: 1.. map.alloc:s.a .len: 8.. map.tofrom:._1 .len: 40.. map.attach:s.a .bias: 0.." 1 "omplower" } } */
/* { dg-final { scan-tree-dump-times "pragma omp target oacc_parallel map.attach:s.e .bias: 0.. map.tofrom:s .len: 32" 1 "omplower" } } */
/* { dg-final { scan-tree-dump-times "pragma omp target oacc_parallel map.tofrom:s .len: 32.. map.attach:s.e .bias: 0.." 1 "omplower" } } */
/* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_data map.attach:a .bias: 0.." 1 "omplower" } } */
/* { dg-final { scan-tree-dump-times "pragma omp target oacc_exit_data map.detach:a .bias: 0.." 1 "omplower" } } */
/* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_data map.to:a .len: 8.." 1 "omplower" } } */

View File

@ -0,0 +1,39 @@
/* { dg-do compile } */
/* { dg-additional-options "-fdump-tree-gimple" } */
#ifdef __cplusplus
extern "C"
#else
extern
#endif
void abort (void);
int
main (void)
{
#define N 5
int array[N][N];
for (int i = 0; i < N; i++)
{
#pragma omp target enter data map(alloc: array[i:1][0:N])
#pragma omp target
for (int j = 0; j < N; j++)
array[i][j] = i * 10 + j;
#pragma omp target exit data map(from: array[i:1][0:N])
}
for (int i = 0; i < N; i++)
for (int j = 0; j < N; j++)
if (array[i][j] != i + j)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump {#pragma omp target enter data map\(alloc:array\[[^]]+\]\[0\] \[len: [0-9]+\]\)} "gimple" } } */
/* { dg-final { scan-tree-dump {#pragma omp target num_teams.* firstprivate\(i\) map\(tofrom:array \[len: [0-9]+\]\[implicit\]\)} "gimple" } } */
/* { dg-final { scan-tree-dump {#pragma omp target exit data map\(from:array\[[^]]+\]\[0\] \[len: [0-9]+\]\)} "gimple" } } */

View File

@ -416,7 +416,7 @@ vla (int &array_li)
copyout (array_so)
/* The gimplifier has created an implicit 'firstprivate' clause for the array
length.
{ dg-final { scan-tree-dump {(?n)#pragma omp target oacc_parallel map\(from:array_so \[len: 4\]\) firstprivate\(} omplower } }
{ dg-final { scan-tree-dump {(?n)#pragma omp target oacc_parallel firstprivate\([^)]+\) map\(from:array_so \[len: 4\]\)} omplower } }
(C++ computes an intermediate value, so can't scan for 'firstprivate(array_li)'.) */
{
array_so = sizeof array;

View File

@ -971,6 +971,9 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags)
spc, flags, false);
pp_right_bracket (pp);
}
if (OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_MAP
&& OMP_CLAUSE_MAP_RUNTIME_IMPLICIT_P (clause))
pp_string (pp, "[implicit]");
pp_right_paren (pp);
break;

View File

@ -1689,6 +1689,11 @@ class auto_suppress_location_wrappers
map clause. */
#define OMP_CLAUSE_MAP_IMPLICIT(NODE) \
(OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP)->base.default_def_flag)
/* Nonzero if this map clause is to be indicated to the runtime as 'implicit',
due to being created through implicit data-mapping rules in the middle-end.
NOTE: this is different than OMP_CLAUSE_MAP_IMPLICIT. */
#define OMP_CLAUSE_MAP_RUNTIME_IMPLICIT_P(NODE) \
(OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP)->base.deprecated_flag)
/* True on an OMP_CLAUSE_USE_DEVICE_PTR with an OpenACC 'if_present'
clause. */

View File

@ -40,11 +40,22 @@
#define GOMP_MAP_FLAG_SPECIAL_0 (1 << 2)
#define GOMP_MAP_FLAG_SPECIAL_1 (1 << 3)
#define GOMP_MAP_FLAG_SPECIAL_2 (1 << 4)
#define GOMP_MAP_FLAG_SPECIAL_3 (1 << 5)
#define GOMP_MAP_FLAG_SPECIAL_4 (1 << 6)
#define GOMP_MAP_FLAG_SPECIAL (GOMP_MAP_FLAG_SPECIAL_1 \
| GOMP_MAP_FLAG_SPECIAL_0)
#define GOMP_MAP_DEEP_COPY (GOMP_MAP_FLAG_SPECIAL_4 \
| GOMP_MAP_FLAG_SPECIAL_2)
/* This value indicates the map was created implicitly according to
OpenMP rules. */
#define GOMP_MAP_IMPLICIT (GOMP_MAP_FLAG_SPECIAL_3 \
| GOMP_MAP_FLAG_SPECIAL_4)
/* Mask for entire set of special map kind bits. */
#define GOMP_MAP_FLAG_SPECIAL_BITS (GOMP_MAP_FLAG_SPECIAL_0 \
| GOMP_MAP_FLAG_SPECIAL_1 \
| GOMP_MAP_FLAG_SPECIAL_2 \
| GOMP_MAP_FLAG_SPECIAL_3 \
| GOMP_MAP_FLAG_SPECIAL_4)
/* Flag to force a specific behavior (or else, trigger a run-time error). */
#define GOMP_MAP_FLAG_FORCE (1 << 7)
@ -186,6 +197,9 @@ enum gomp_map_kind
#define GOMP_MAP_ALWAYS_P(X) \
(GOMP_MAP_ALWAYS_TO_P (X) || ((X) == GOMP_MAP_ALWAYS_FROM))
#define GOMP_MAP_IMPLICIT_P(X) \
(((X) & GOMP_MAP_FLAG_SPECIAL_BITS) == GOMP_MAP_IMPLICIT)
/* Asynchronous behavior. Keep in sync with
libgomp/{openacc.h,openacc.f90,openacc_lib.h}:acc_async_t. */

View File

@ -539,7 +539,7 @@ static inline void
gomp_map_vars_existing (struct gomp_device_descr *devicep,
struct goacc_asyncqueue *aq, splay_tree_key oldn,
splay_tree_key newn, struct target_var_desc *tgt_var,
unsigned char kind, bool always_to_flag,
unsigned char kind, bool always_to_flag, bool implicit,
struct gomp_coalesce_buf *cbuf,
htab_t *refcount_set)
{
@ -550,11 +550,22 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep,
tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
tgt_var->is_attach = false;
tgt_var->offset = newn->host_start - oldn->host_start;
tgt_var->length = newn->host_end - newn->host_start;
/* For implicit maps, old contained in new is valid. */
bool implicit_subset = (implicit
&& newn->host_start <= oldn->host_start
&& oldn->host_end <= newn->host_end);
if (implicit_subset)
tgt_var->length = oldn->host_end - oldn->host_start;
else
tgt_var->length = newn->host_end - newn->host_start;
if ((kind & GOMP_MAP_FLAG_FORCE)
|| oldn->host_start > newn->host_start
|| oldn->host_end < newn->host_end)
/* For implicit maps, old contained in new is valid. */
|| !(implicit_subset
/* Otherwise, new contained inside old is considered valid. */
|| (oldn->host_start <= newn->host_start
&& newn->host_end <= oldn->host_end)))
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("Trying to map into device [%p..%p) object when "
@ -564,11 +575,17 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep,
}
if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
gomp_copy_host2dev (devicep, aq,
(void *) (oldn->tgt->tgt_start + oldn->tgt_offset
+ newn->host_start - oldn->host_start),
(void *) newn->host_start,
newn->host_end - newn->host_start, false, cbuf);
{
/* Implicit + always should not happen. If this does occur, below
address/length adjustment is a TODO. */
assert (!implicit_subset);
gomp_copy_host2dev (devicep, aq,
(void *) (oldn->tgt->tgt_start + oldn->tgt_offset
+ newn->host_start - oldn->host_start),
(void *) newn->host_start,
newn->host_end - newn->host_start, false, cbuf);
}
gomp_increment_refcount (oldn, refcount_set);
}
@ -576,8 +593,24 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep,
static int
get_kind (bool short_mapkind, void *kinds, int idx)
{
return short_mapkind ? ((unsigned short *) kinds)[idx]
: ((unsigned char *) kinds)[idx];
if (!short_mapkind)
return ((unsigned char *) kinds)[idx];
int val = ((unsigned short *) kinds)[idx];
if (GOMP_MAP_IMPLICIT_P (val))
val &= ~GOMP_MAP_IMPLICIT;
return val;
}
static bool
get_implicit (bool short_mapkind, void *kinds, int idx)
{
if (!short_mapkind)
return false;
int val = ((unsigned short *) kinds)[idx];
return GOMP_MAP_IMPLICIT_P (val);
}
static void
@ -631,6 +664,7 @@ gomp_map_fields_existing (struct target_mem_desc *tgt,
struct splay_tree_s *mem_map = &devicep->mem_map;
struct splay_tree_key_s cur_node;
int kind;
bool implicit;
const bool short_mapkind = true;
const int typemask = short_mapkind ? 0xff : 0x7;
@ -638,12 +672,14 @@ gomp_map_fields_existing (struct target_mem_desc *tgt,
cur_node.host_end = cur_node.host_start + sizes[i];
splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
kind = get_kind (short_mapkind, kinds, i);
implicit = get_implicit (short_mapkind, kinds, i);
if (n2
&& n2->tgt == n->tgt
&& n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
{
gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
kind & typemask, false, cbuf, refcount_set);
kind & typemask, false, implicit, cbuf,
refcount_set);
return;
}
if (sizes[i] == 0)
@ -659,7 +695,8 @@ gomp_map_fields_existing (struct target_mem_desc *tgt,
== n2->tgt_offset - n->tgt_offset)
{
gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
kind & typemask, false, cbuf, refcount_set);
kind & typemask, false, implicit, cbuf,
refcount_set);
return;
}
}
@ -671,7 +708,8 @@ gomp_map_fields_existing (struct target_mem_desc *tgt,
&& n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
{
gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
kind & typemask, false, cbuf, refcount_set);
kind & typemask, false, implicit, cbuf,
refcount_set);
return;
}
}
@ -903,6 +941,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
for (i = 0; i < mapnum; i++)
{
int kind = get_kind (short_mapkind, kinds, i);
bool implicit = get_implicit (short_mapkind, kinds, i);
if (hostaddrs[i] == NULL
|| (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
{
@ -1085,8 +1124,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
}
}
gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
kind & typemask, always_to_cnt > 0, NULL,
refcount_set);
kind & typemask, always_to_cnt > 0, implicit,
NULL, refcount_set);
i += always_to_cnt;
}
else
@ -1256,6 +1295,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
else if (tgt->list[i].key == NULL)
{
int kind = get_kind (short_mapkind, kinds, i);
bool implicit = get_implicit (short_mapkind, kinds, i);
if (hostaddrs[i] == NULL)
continue;
switch (kind & typemask)
@ -1415,7 +1455,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
splay_tree_key n = splay_tree_lookup (mem_map, k);
if (n && n->refcount != REFCOUNT_LINK)
gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
kind & typemask, false, cbufp,
kind & typemask, false, implicit, cbufp,
refcount_set);
else
{

View File

@ -0,0 +1,31 @@
#ifdef __cplusplus
extern "C"
#else
extern
#endif
void abort (void);
int
main (void)
{
#define N 5
int array[N][N];
for (int i = 0; i < N; i++)
{
#pragma omp target enter data map(alloc: array[i:1][0:N])
#pragma omp target
for (int j = 0; j < N; j++)
array[i][j] = i + j;
#pragma omp target exit data map(from: array[i:1][0:N])
}
for (int i = 0; i < N; i++)
for (int j = 0; j < N; j++)
if (array[i][j] != i + j)
abort ();
return 0;
}