internal-fn.def (IFN_GOACC_DIM_SIZE, [...]): New.

* internal-fn.def (IFN_GOACC_DIM_SIZE, IFN_GOACC_DIM_POS,
	IFN_GOACC_LOOP): New.
	* internal-fn.h (enum ifn_unique_kind): Add IFN_UNIQUE_OACC_FORK,
	IFN_UNIQUE_OACC_JOIN, IFN_UNIQUE_OACC_HEAD_MARK,
	IFN_UNIQUE_OACC_TAIL_MARK.
	(enum ifn_goacc_loop_kind): New.
	* internal-fn.c (expand_UNIQUE): Add IFN_UNIQUE_OACC_FORK,
	IFN_UNIQUE_OACC_JOIN cases.
	(expand_GOACC_DIM_SIZE, expand_GOACC_DIM_POS): New.
	(expand_GOACC_LOOP): New.
	* target-insns.def (oacc_dim_pos, oacc_dim_size): New.
	* omp-low.c: Include gimple-pretty-print.h.
	(struct oacc_loop): New.
	(enum oacc_loop_flags): New.
	(oacc_thread_numbers): New.
	(oacc_xform_loop): New.
	(new_oacc_loop_raw, new_oacc_loop_outer, new_oacc_loop,
	new_oacc_loop_routine, finish_oacc_loop, free_oacc_loop): New,
	(dump_oacc_loop_part, dump_oacc_loop, debug_oacc_loop): New,
	(oacc_loop_discover_walk, oacc_loop_sibling_nrevers,
	oacc_loop_discovery): New.
	(oacc_loop_xform_head_tail, oacc_loop_xform_loop,
	oacc_loop_process): New.
	(oacc_loop_fixed_partitions, oacc_loop_partition): New.
	(execute_oacc_device_lower): Discover & process loops.  Process
	internal fns.
	* target.def (goacc.fork_join): Change sense of hook, clarify
	documentation.
	* doc/tm.texi: Regenerated.

From-SVN: r229466
This commit is contained in:
Nathan Sidwell 2015-10-27 23:46:39 +00:00 committed by Nathan Sidwell
parent 6e91acf8bb
commit 9bd46bc97e
8 changed files with 1103 additions and 13 deletions

View File

@ -1,3 +1,35 @@
2015-10-27 Nathan Sidwell <nathan@codesourcery.com>
* internal-fn.def (IFN_GOACC_DIM_SIZE, IFN_GOACC_DIM_POS,
IFN_GOACC_LOOP): New.
* internal-fn.h (enum ifn_unique_kind): Add IFN_UNIQUE_OACC_FORK,
IFN_UNIQUE_OACC_JOIN, IFN_UNIQUE_OACC_HEAD_MARK,
IFN_UNIQUE_OACC_TAIL_MARK.
(enum ifn_goacc_loop_kind): New.
* internal-fn.c (expand_UNIQUE): Add IFN_UNIQUE_OACC_FORK,
IFN_UNIQUE_OACC_JOIN cases.
(expand_GOACC_DIM_SIZE, expand_GOACC_DIM_POS): New.
(expand_GOACC_LOOP): New.
* target-insns.def (oacc_dim_pos, oacc_dim_size): New.
* omp-low.c: Include gimple-pretty-print.h.
(struct oacc_loop): New.
(enum oacc_loop_flags): New.
(oacc_thread_numbers): New.
(oacc_xform_loop): New.
(new_oacc_loop_raw, new_oacc_loop_outer, new_oacc_loop,
new_oacc_loop_routine, finish_oacc_loop, free_oacc_loop): New,
(dump_oacc_loop_part, dump_oacc_loop, debug_oacc_loop): New,
(oacc_loop_discover_walk, oacc_loop_sibling_nrevers,
oacc_loop_discovery): New.
(oacc_loop_xform_head_tail, oacc_loop_xform_loop,
oacc_loop_process): New.
(oacc_loop_fixed_partitions, oacc_loop_partition): New.
(execute_oacc_device_lower): Discover & process loops. Process
internal fns.
* target.def (goacc.fork_join): Change sense of hook, clarify
documentation.
* doc/tm.texi: Regenerated.
2015-10-27 Nathan Sidwell <nathan@codesourcery.com>
* target-insns.def (oacc_fork, oacc_join): Define.

View File

@ -5778,11 +5778,13 @@ provide dimensions larger than 1.
@end deftypefn
@deftypefn {Target Hook} bool TARGET_GOACC_FORK_JOIN (gcall *@var{call}, const int *@var{dims}, bool @var{is_fork})
This hook should convert IFN_GOACC_FORK and IFN_GOACC_JOIN function
calls to target-specific gimple. It is executed during the
oacc_device_lower pass. It should return true, if the functions
should be deleted. The default hook returns true, if there are no
RTL expanders for them.
This hook can be used to convert IFN_GOACC_FORK and IFN_GOACC_JOIN
function calls to target-specific gimple, or indicate whether they
should be retained. It is executed during the oacc_device_lower pass.
It should return true, if the call should be retained. It should
return false, if it is to be deleted (either because target-specific
gimple has been inserted before it, or there is no need for it).
The default hook returns false, if there are no RTL expanders for them.
@end deftypefn
@node Anchored Addresses

View File

@ -1976,12 +1976,84 @@ expand_UNIQUE (gcall *stmt)
if (targetm.have_unique ())
pattern = targetm.gen_unique ();
break;
case IFN_UNIQUE_OACC_FORK:
case IFN_UNIQUE_OACC_JOIN:
if (targetm.have_oacc_fork () && targetm.have_oacc_join ())
{
tree lhs = gimple_call_lhs (stmt);
rtx target = const0_rtx;
if (lhs)
target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
rtx data_dep = expand_normal (gimple_call_arg (stmt, 1));
rtx axis = expand_normal (gimple_call_arg (stmt, 2));
if (kind == IFN_UNIQUE_OACC_FORK)
pattern = targetm.gen_oacc_fork (target, data_dep, axis);
else
pattern = targetm.gen_oacc_join (target, data_dep, axis);
}
else
gcc_unreachable ();
break;
}
if (pattern)
emit_insn (pattern);
}
/* The size of an OpenACC compute dimension. */
static void
expand_GOACC_DIM_SIZE (gcall *stmt)
{
tree lhs = gimple_call_lhs (stmt);
if (!lhs)
return;
rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
if (targetm.have_oacc_dim_size ())
{
rtx dim = expand_expr (gimple_call_arg (stmt, 0), NULL_RTX,
VOIDmode, EXPAND_NORMAL);
emit_insn (targetm.gen_oacc_dim_size (target, dim));
}
else
emit_move_insn (target, GEN_INT (1));
}
/* The position of an OpenACC execution engine along one compute axis. */
static void
expand_GOACC_DIM_POS (gcall *stmt)
{
tree lhs = gimple_call_lhs (stmt);
if (!lhs)
return;
rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
if (targetm.have_oacc_dim_pos ())
{
rtx dim = expand_expr (gimple_call_arg (stmt, 0), NULL_RTX,
VOIDmode, EXPAND_NORMAL);
emit_insn (targetm.gen_oacc_dim_pos (target, dim));
}
else
emit_move_insn (target, const0_rtx);
}
/* This is expanded by oacc_device_lower pass. */
static void
expand_GOACC_LOOP (gcall *stmt ATTRIBUTE_UNUSED)
{
gcc_unreachable ();
}
/* Routines to expand each internal function, indexed by function number.
Each routine has the prototype:

View File

@ -72,3 +72,14 @@ DEF_INTERNAL_FN (VA_ARG, ECF_NOTHROW | ECF_LEAF, NULL)
between uses. See internal-fn.h for usage. */
DEF_INTERNAL_FN (UNIQUE, ECF_NOTHROW, NULL)
/* DIM_SIZE and DIM_POS return the size of a particular compute
dimension and the executing thread's position within that
dimension. DIM_POS is pure (and not const) so that it isn't
thought to clobber memory and can be gcse'd within a single
parallel region, but not across FORK/JOIN boundaries. They take a
single INTEGER_CST argument. */
DEF_INTERNAL_FN (GOACC_DIM_SIZE, ECF_CONST | ECF_NOTHROW | ECF_LEAF, ".")
DEF_INTERNAL_FN (GOACC_DIM_POS, ECF_PURE | ECF_NOTHROW | ECF_LEAF, ".")
/* OpenACC looping abstraction. See internal-fn.h for usage. */
DEF_INTERNAL_FN (GOACC_LOOP, ECF_PURE | ECF_NOTHROW, NULL)

View File

@ -22,7 +22,48 @@ along with GCC; see the file COPYING3. If not see
/* INTEGER_CST values for IFN_UNIQUE function arg-0. */
enum ifn_unique_kind {
IFN_UNIQUE_UNSPEC /* Undifferentiated UNIQUE. */
IFN_UNIQUE_UNSPEC, /* Undifferentiated UNIQUE. */
/* FORK and JOIN mark the points at which OpenACC partitioned
execution is entered or exited.
return: data dependency value
arg-1: data dependency var
arg-2: INTEGER_CST argument, indicating the axis. */
IFN_UNIQUE_OACC_FORK,
IFN_UNIQUE_OACC_JOIN,
/* HEAD_MARK and TAIL_MARK are used to demark the sequence entering
or leaving partitioned execution.
return: data dependency value
arg-1: data dependency var
arg-2: INTEGER_CST argument, remaining markers in this sequence
arg-3...: varargs on primary header */
IFN_UNIQUE_OACC_HEAD_MARK,
IFN_UNIQUE_OACC_TAIL_MARK
};
/* INTEGER_CST values for IFN_GOACC_LOOP arg-0. Allows the precise
stepping of the compute geometry over the loop iterations to be
deferred until it is known which compiler is generating the code.
The action is encoded in a constant first argument.
CHUNK_MAX = LOOP (CODE_CHUNKS, DIR, RANGE, STEP, CHUNK_SIZE, MASK)
STEP = LOOP (CODE_STEP, DIR, RANGE, STEP, CHUNK_SIZE, MASK)
OFFSET = LOOP (CODE_OFFSET, DIR, RANGE, STEP, CHUNK_SIZE, MASK, CHUNK_NO)
BOUND = LOOP (CODE_BOUND, DIR, RANGE, STEP, CHUNK_SIZE, MASK, OFFSET)
DIR - +1 for up loop, -1 for down loop
RANGE - Range of loop (END - BASE)
STEP - iteration step size
CHUNKING - size of chunking, (constant zero for no chunking)
CHUNK_NO - chunk number
MASK - partitioning mask. */
enum ifn_goacc_loop_kind {
IFN_GOACC_LOOP_CHUNKS, /* Number of chunks. */
IFN_GOACC_LOOP_STEP, /* Size of each thread's step. */
IFN_GOACC_LOOP_OFFSET, /* Initial iteration value. */
IFN_GOACC_LOOP_BOUND /* Limit of iteration value. */
};
/* Initialize internal function tables. */

View File

@ -81,6 +81,7 @@ along with GCC; see the file COPYING3. If not see
#include "context.h"
#include "lto-section-names.h"
#include "gomp-constants.h"
#include "gimple-pretty-print.h"
/* Lowering of OMP parallel and workshare constructs proceeds in two
phases. The first phase scans the function looking for OMP statements
@ -233,6 +234,49 @@ struct omp_for_data
struct omp_for_data_loop *loops;
};
/* Describe the OpenACC looping structure of a function. The entire
function is held in a 'NULL' loop. */
struct oacc_loop
{
oacc_loop *parent; /* Containing loop. */
oacc_loop *child; /* First inner loop. */
oacc_loop *sibling; /* Next loop within same parent. */
location_t loc; /* Location of the loop start. */
gcall *marker; /* Initial head marker. */
gcall *heads[GOMP_DIM_MAX]; /* Head marker functions. */
gcall *tails[GOMP_DIM_MAX]; /* Tail marker functions. */
tree routine; /* Pseudo-loop enclosing a routine. */
unsigned mask; /* Partitioning mask. */
unsigned flags; /* Partitioning flags. */
tree chunk_size; /* Chunk size. */
gcall *head_end; /* Final marker of head sequence. */
};
/* Flags for an OpenACC loop. */
enum oacc_loop_flags {
OLF_SEQ = 1u << 0, /* Explicitly sequential */
OLF_AUTO = 1u << 1, /* Compiler chooses axes. */
OLF_INDEPENDENT = 1u << 2, /* Iterations are known independent. */
OLF_GANG_STATIC = 1u << 3, /* Gang partitioning is static (has op). */
/* Explicitly specified loop axes. */
OLF_DIM_BASE = 4,
OLF_DIM_GANG = 1u << (OLF_DIM_BASE + GOMP_DIM_GANG),
OLF_DIM_WORKER = 1u << (OLF_DIM_BASE + GOMP_DIM_WORKER),
OLF_DIM_VECTOR = 1u << (OLF_DIM_BASE + GOMP_DIM_VECTOR),
OLF_MAX = OLF_DIM_BASE + GOMP_DIM_MAX
};
static splay_tree all_contexts;
static int taskreg_nesting_level;
@ -17584,6 +17628,241 @@ omp_finish_file (void)
}
}
/* Find the number of threads (POS = false), or thread number (POS =
true) for an OpenACC region partitioned as MASK. Setup code
required for the calculation is added to SEQ. */
static tree
oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
{
tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1);
unsigned ix;
/* Start at gang level, and examine relevant dimension indices. */
for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
if (GOMP_DIM_MASK (ix) & mask)
{
tree arg = build_int_cst (unsigned_type_node, ix);
if (res)
{
/* We had an outer index, so scale that by the size of
this dimension. */
tree n = create_tmp_var (integer_type_node);
gimple *call
= gimple_build_call_internal (IFN_GOACC_DIM_SIZE, 1, arg);
gimple_call_set_lhs (call, n);
gimple_seq_add_stmt (seq, call);
res = fold_build2 (MULT_EXPR, integer_type_node, res, n);
}
if (pos)
{
/* Determine index in this dimension. */
tree id = create_tmp_var (integer_type_node);
gimple *call = gimple_build_call_internal
(IFN_GOACC_DIM_POS, 1, arg);
gimple_call_set_lhs (call, id);
gimple_seq_add_stmt (seq, call);
if (res)
res = fold_build2 (PLUS_EXPR, integer_type_node, res, id);
else
res = id;
}
}
if (res == NULL_TREE)
res = integer_zero_node;
return res;
}
/* Transform IFN_GOACC_LOOP calls to actual code. See
expand_oacc_for for where these are generated. At the vector
level, we stride loops, such that each member of a warp will
operate on adjacent iterations. At the worker and gang level,
each gang/warp executes a set of contiguous iterations. Chunking
can override this such that each iteration engine executes a
contiguous chunk, and then moves on to stride to the next chunk. */
static void
oacc_xform_loop (gcall *call)
{
gimple_stmt_iterator gsi = gsi_for_stmt (call);
enum ifn_goacc_loop_kind code
= (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
tree dir = gimple_call_arg (call, 1);
tree range = gimple_call_arg (call, 2);
tree step = gimple_call_arg (call, 3);
tree chunk_size = NULL_TREE;
unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
tree lhs = gimple_call_lhs (call);
tree type = TREE_TYPE (lhs);
tree diff_type = TREE_TYPE (range);
tree r = NULL_TREE;
gimple_seq seq = NULL;
bool chunking = false, striding = true;
unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
#ifdef ACCEL_COMPILER
chunk_size = gimple_call_arg (call, 4);
if (integer_minus_onep (chunk_size) /* Force static allocation. */
|| integer_zerop (chunk_size)) /* Default (also static). */
{
/* If we're at the gang level, we want each to execute a
contiguous run of iterations. Otherwise we want each element
to stride. */
striding = !(outer_mask & GOMP_DIM_MASK (GOMP_DIM_GANG));
chunking = false;
}
else
{
/* Chunk of size 1 is striding. */
striding = integer_onep (chunk_size);
chunking = !striding;
}
#endif
/* striding=true, chunking=true
-> invalid.
striding=true, chunking=false
-> chunks=1
striding=false,chunking=true
-> chunks=ceil (range/(chunksize*threads*step))
striding=false,chunking=false
-> chunk_size=ceil(range/(threads*step)),chunks=1 */
push_gimplify_context (true);
switch (code)
{
default: gcc_unreachable ();
case IFN_GOACC_LOOP_CHUNKS:
if (!chunking)
r = build_int_cst (type, 1);
else
{
/* chunk_max
= (range - dir) / (chunks * step * num_threads) + dir */
tree per = oacc_thread_numbers (false, mask, &seq);
per = fold_convert (type, per);
chunk_size = fold_convert (type, chunk_size);
per = fold_build2 (MULT_EXPR, type, per, chunk_size);
per = fold_build2 (MULT_EXPR, type, per, step);
r = build2 (MINUS_EXPR, type, range, dir);
r = build2 (PLUS_EXPR, type, r, per);
r = build2 (TRUNC_DIV_EXPR, type, r, per);
}
break;
case IFN_GOACC_LOOP_STEP:
{
/* If striding, step by the entire compute volume, otherwise
step by the inner volume. */
unsigned volume = striding ? mask : inner_mask;
r = oacc_thread_numbers (false, volume, &seq);
r = build2 (MULT_EXPR, type, fold_convert (type, r), step);
}
break;
case IFN_GOACC_LOOP_OFFSET:
if (striding)
{
r = oacc_thread_numbers (true, mask, &seq);
r = fold_convert (diff_type, r);
}
else
{
tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
inner_size, outer_size);
volume = fold_convert (diff_type, volume);
if (chunking)
chunk_size = fold_convert (diff_type, chunk_size);
else
{
tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
}
tree span = build2 (MULT_EXPR, diff_type, chunk_size,
fold_convert (diff_type, inner_size));
r = oacc_thread_numbers (true, outer_mask, &seq);
r = fold_convert (diff_type, r);
r = build2 (MULT_EXPR, diff_type, r, span);
tree inner = oacc_thread_numbers (true, inner_mask, &seq);
inner = fold_convert (diff_type, inner);
r = fold_build2 (PLUS_EXPR, diff_type, r, inner);
if (chunking)
{
tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6));
tree per
= fold_build2 (MULT_EXPR, diff_type, volume, chunk_size);
per = build2 (MULT_EXPR, diff_type, per, chunk);
r = build2 (PLUS_EXPR, diff_type, r, per);
}
}
r = fold_build2 (MULT_EXPR, diff_type, r, step);
if (type != diff_type)
r = fold_convert (type, r);
break;
case IFN_GOACC_LOOP_BOUND:
if (striding)
r = range;
else
{
tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
inner_size, outer_size);
volume = fold_convert (diff_type, volume);
if (chunking)
chunk_size = fold_convert (diff_type, chunk_size);
else
{
tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
}
tree span = build2 (MULT_EXPR, diff_type, chunk_size,
fold_convert (diff_type, inner_size));
r = fold_build2 (MULT_EXPR, diff_type, span, step);
tree offset = gimple_call_arg (call, 6);
r = build2 (PLUS_EXPR, diff_type, r,
fold_convert (diff_type, offset));
r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR,
diff_type, r, range);
}
if (diff_type != type)
r = fold_convert (type, r);
break;
}
gimplify_assign (lhs, r, &seq);
pop_gimplify_context (NULL);
gsi_replace_with_seq (&gsi, seq, true);
}
/* Validate and update the dimensions for offloaded FN. ATTRS is the
raw attribute. DIMS is an array of dimensions, which is returned.
Returns the function level dimensionality -- the level at which an
@ -17642,6 +17921,553 @@ oacc_validate_dims (tree fn, tree attrs, int *dims)
return fn_level;
}
/* Create an empty OpenACC loop structure at LOC. */
static oacc_loop *
new_oacc_loop_raw (oacc_loop *parent, location_t loc)
{
oacc_loop *loop = XCNEW (oacc_loop);
loop->parent = parent;
loop->child = loop->sibling = NULL;
if (parent)
{
loop->sibling = parent->child;
parent->child = loop;
}
loop->loc = loc;
loop->marker = NULL;
memset (loop->heads, 0, sizeof (loop->heads));
memset (loop->tails, 0, sizeof (loop->tails));
loop->routine = NULL_TREE;
loop->mask = loop->flags = 0;
loop->chunk_size = 0;
loop->head_end = NULL;
return loop;
}
/* Create an outermost, dummy OpenACC loop for offloaded function
DECL. */
static oacc_loop *
new_oacc_loop_outer (tree decl)
{
return new_oacc_loop_raw (NULL, DECL_SOURCE_LOCATION (decl));
}
/* Start a new OpenACC loop structure beginning at head marker HEAD.
Link into PARENT loop. Return the new loop. */
static oacc_loop *
new_oacc_loop (oacc_loop *parent, gcall *marker)
{
oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (marker));
loop->marker = marker;
/* TODO: This is where device_type flattening would occur for the loop
flags. */
loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3));
tree chunk_size = integer_zero_node;
if (loop->flags & OLF_GANG_STATIC)
chunk_size = gimple_call_arg (marker, 4);
loop->chunk_size = chunk_size;
return loop;
}
/* Create a dummy loop encompassing a call to a openACC routine.
Extract the routine's partitioning requirements. */
static void
new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs)
{
oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
int dims[GOMP_DIM_MAX];
int level = oacc_validate_dims (decl, attrs, dims);
gcc_assert (level >= 0);
loop->marker = call;
loop->routine = decl;
loop->mask = ((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1)
^ (GOMP_DIM_MASK (level) - 1));
}
/* Finish off the current OpenACC loop ending at tail marker TAIL.
Return the parent loop. */
static oacc_loop *
finish_oacc_loop (oacc_loop *loop)
{
return loop->parent;
}
/* Free all OpenACC loop structures within LOOP (inclusive). */
static void
free_oacc_loop (oacc_loop *loop)
{
if (loop->sibling)
free_oacc_loop (loop->sibling);
if (loop->child)
free_oacc_loop (loop->child);
free (loop);
}
/* Dump out the OpenACC loop head or tail beginning at FROM. */
static void
dump_oacc_loop_part (FILE *file, gcall *from, int depth,
const char *title, int level)
{
enum ifn_unique_kind kind
= (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level);
for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
{
gimple *stmt = gsi_stmt (gsi);
if (is_gimple_call (stmt)
&& gimple_call_internal_p (stmt)
&& gimple_call_internal_fn (stmt) == IFN_UNIQUE)
{
enum ifn_unique_kind k
= ((enum ifn_unique_kind) TREE_INT_CST_LOW
(gimple_call_arg (stmt, 0)));
if (k == kind && stmt != from)
break;
}
print_gimple_stmt (file, stmt, depth * 2 + 2, 0);
gsi_next (&gsi);
while (gsi_end_p (gsi))
gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
}
}
/* Dump OpenACC loops LOOP, its siblings and its children. */
static void
dump_oacc_loop (FILE *file, oacc_loop *loop, int depth)
{
int ix;
fprintf (file, "%*sLoop %x(%x) %s:%u\n", depth * 2, "",
loop->flags, loop->mask,
LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc));
if (loop->marker)
print_gimple_stmt (file, loop->marker, depth * 2, 0);
if (loop->routine)
fprintf (file, "%*sRoutine %s:%u:%s\n",
depth * 2, "", DECL_SOURCE_FILE (loop->routine),
DECL_SOURCE_LINE (loop->routine),
IDENTIFIER_POINTER (DECL_NAME (loop->routine)));
for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
if (loop->heads[ix])
dump_oacc_loop_part (file, loop->heads[ix], depth, "Head", ix);
for (ix = GOMP_DIM_MAX; ix--;)
if (loop->tails[ix])
dump_oacc_loop_part (file, loop->tails[ix], depth, "Tail", ix);
if (loop->child)
dump_oacc_loop (file, loop->child, depth + 1);
if (loop->sibling)
dump_oacc_loop (file, loop->sibling, depth);
}
void debug_oacc_loop (oacc_loop *);
/* Dump loops to stderr. */
DEBUG_FUNCTION void
debug_oacc_loop (oacc_loop *loop)
{
dump_oacc_loop (stderr, loop, 0);
}
/* DFS walk of basic blocks BB onwards, creating OpenACC loop
structures as we go. By construction these loops are properly
nested. */
static void
oacc_loop_discover_walk (oacc_loop *loop, basic_block bb)
{
int marker = 0;
int remaining = 0;
if (bb->flags & BB_VISITED)
return;
follow:
bb->flags |= BB_VISITED;
/* Scan for loop markers. */
for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
gsi_next (&gsi))
{
gimple *stmt = gsi_stmt (gsi);
if (!is_gimple_call (stmt))
continue;
gcall *call = as_a <gcall *> (stmt);
/* If this is a routine, make a dummy loop for it. */
if (tree decl = gimple_call_fndecl (call))
if (tree attrs = get_oacc_fn_attrib (decl))
{
gcc_assert (!marker);
new_oacc_loop_routine (loop, call, decl, attrs);
}
if (!gimple_call_internal_p (call))
continue;
if (gimple_call_internal_fn (call) != IFN_UNIQUE)
continue;
enum ifn_unique_kind kind
= (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
if (kind == IFN_UNIQUE_OACC_HEAD_MARK
|| kind == IFN_UNIQUE_OACC_TAIL_MARK)
{
if (gimple_call_num_args (call) == 2)
{
gcc_assert (marker && !remaining);
marker = 0;
if (kind == IFN_UNIQUE_OACC_TAIL_MARK)
loop = finish_oacc_loop (loop);
else
loop->head_end = call;
}
else
{
int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
if (!marker)
{
if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
loop = new_oacc_loop (loop, call);
remaining = count;
}
gcc_assert (count == remaining);
if (remaining)
{
remaining--;
if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
loop->heads[marker] = call;
else
loop->tails[remaining] = call;
}
marker++;
}
}
}
if (remaining || marker)
{
bb = single_succ (bb);
gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED));
goto follow;
}
/* Walk successor blocks. */
edge e;
edge_iterator ei;
FOR_EACH_EDGE (e, ei, bb->succs)
oacc_loop_discover_walk (loop, e->dest);
}
/* LOOP is the first sibling. Reverse the order in place and return
the new first sibling. Recurse to child loops. */
static oacc_loop *
oacc_loop_sibling_nreverse (oacc_loop *loop)
{
oacc_loop *last = NULL;
do
{
if (loop->child)
loop->child = oacc_loop_sibling_nreverse (loop->child);
oacc_loop *next = loop->sibling;
loop->sibling = last;
last = loop;
loop = next;
}
while (loop);
return last;
}
/* Discover the OpenACC loops marked up by HEAD and TAIL markers for
the current function. */
static oacc_loop *
oacc_loop_discovery ()
{
basic_block bb;
oacc_loop *top = new_oacc_loop_outer (current_function_decl);
oacc_loop_discover_walk (top, ENTRY_BLOCK_PTR_FOR_FN (cfun));
/* The siblings were constructed in reverse order, reverse them so
that diagnostics come out in an unsurprising order. */
top = oacc_loop_sibling_nreverse (top);
/* Reset the visited flags. */
FOR_ALL_BB_FN (bb, cfun)
bb->flags &= ~BB_VISITED;
return top;
}
/* Transform the abstract internal function markers starting at FROM
to be for partitioning level LEVEL. Stop when we meet another HEAD
or TAIL marker. */
static void
oacc_loop_xform_head_tail (gcall *from, int level)
{
enum ifn_unique_kind kind
= (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
tree replacement = build_int_cst (unsigned_type_node, level);
for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
{
gimple *stmt = gsi_stmt (gsi);
if (is_gimple_call (stmt)
&& gimple_call_internal_p (stmt)
&& gimple_call_internal_fn (stmt) == IFN_UNIQUE)
{
enum ifn_unique_kind k
= ((enum ifn_unique_kind)
TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
if (k == IFN_UNIQUE_OACC_FORK || k == IFN_UNIQUE_OACC_JOIN)
*gimple_call_arg_ptr (stmt, 2) = replacement;
else if (k == kind && stmt != from)
break;
}
gsi_next (&gsi);
while (gsi_end_p (gsi))
gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
}
}
/* Transform the IFN_GOACC_LOOP internal functions by providing the
determined partitioning mask and chunking argument. */
static void
oacc_loop_xform_loop (gcall *end_marker, tree mask_arg, tree chunk_arg)
{
gimple_stmt_iterator gsi = gsi_for_stmt (end_marker);
for (;;)
{
for (; !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple *stmt = gsi_stmt (gsi);
if (!is_gimple_call (stmt))
continue;
gcall *call = as_a <gcall *> (stmt);
if (!gimple_call_internal_p (call))
continue;
if (gimple_call_internal_fn (call) != IFN_GOACC_LOOP)
continue;
*gimple_call_arg_ptr (call, 5) = mask_arg;
*gimple_call_arg_ptr (call, 4) = chunk_arg;
if (TREE_INT_CST_LOW (gimple_call_arg (call, 0))
== IFN_GOACC_LOOP_BOUND)
return;
}
/* If we didn't see LOOP_BOUND, it should be in the single
successor block. */
basic_block bb = single_succ (gsi_bb (gsi));
gsi = gsi_start_bb (bb);
}
}
/* Process the discovered OpenACC loops, setting the correct
partitioning level etc. */
static void
oacc_loop_process (oacc_loop *loop)
{
if (loop->child)
oacc_loop_process (loop->child);
if (loop->mask && !loop->routine)
{
int ix;
unsigned mask = loop->mask;
unsigned dim = GOMP_DIM_GANG;
tree mask_arg = build_int_cst (unsigned_type_node, mask);
tree chunk_arg = loop->chunk_size;
oacc_loop_xform_loop (loop->head_end, mask_arg, chunk_arg);
for (ix = 0; ix != GOMP_DIM_MAX && loop->heads[ix]; ix++)
{
gcc_assert (mask);
while (!(GOMP_DIM_MASK (dim) & mask))
dim++;
oacc_loop_xform_head_tail (loop->heads[ix], dim);
oacc_loop_xform_head_tail (loop->tails[ix], dim);
mask ^= GOMP_DIM_MASK (dim);
}
}
if (loop->sibling)
oacc_loop_process (loop->sibling);
}
/* Walk the OpenACC loop heirarchy checking and assigning the
programmer-specified partitionings. OUTER_MASK is the partitioning
this loop is contained within. Return partitiong mask used within
this loop nest. */
static unsigned
oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
{
unsigned this_mask = loop->mask;
bool has_auto = false;
bool noisy = true;
#ifdef ACCEL_COMPILER
/* When device_type is supported, we want the device compiler to be
noisy, if the loop parameters are device_type-specific. */
noisy = false;
#endif
if (!loop->routine)
{
bool auto_par = (loop->flags & OLF_AUTO) != 0;
bool seq_par = (loop->flags & OLF_SEQ) != 0;
this_mask = ((loop->flags >> OLF_DIM_BASE)
& (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1));
if ((this_mask != 0) + auto_par + seq_par > 1)
{
if (noisy)
error_at (loop->loc,
seq_par
? "%<seq%> overrides other OpenACC loop specifiers"
: "%<auto%> conflicts with other OpenACC loop specifiers");
auto_par = false;
loop->flags &= ~OLF_AUTO;
if (seq_par)
{
loop->flags &=
~((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE);
this_mask = 0;
}
}
if (auto_par && (loop->flags & OLF_INDEPENDENT))
has_auto = true;
}
if (this_mask & outer_mask)
{
const oacc_loop *outer;
for (outer = loop->parent; outer; outer = outer->parent)
if (outer->mask & this_mask)
break;
if (noisy)
{
if (outer)
{
error_at (loop->loc,
"%s uses same OpenACC parallelism as containing loop",
loop->routine ? "routine call" : "inner loop");
inform (outer->loc, "containing loop here");
}
else
error_at (loop->loc,
"%s uses OpenACC parallelism disallowed by containing routine",
loop->routine ? "routine call" : "loop");
if (loop->routine)
inform (DECL_SOURCE_LOCATION (loop->routine),
"routine %qD declared here", loop->routine);
}
this_mask &= ~outer_mask;
}
else
{
unsigned outermost = this_mask & -this_mask;
if (outermost && outermost <= outer_mask)
{
if (noisy)
{
error_at (loop->loc,
"incorrectly nested OpenACC loop parallelism");
const oacc_loop *outer;
for (outer = loop->parent;
outer->flags && outer->flags < outermost;
outer = outer->parent)
continue;
inform (outer->loc, "containing loop here");
}
this_mask &= ~outermost;
}
}
loop->mask = this_mask;
if (loop->child
&& oacc_loop_fixed_partitions (loop->child, outer_mask | this_mask))
has_auto = true;
if (loop->sibling
&& oacc_loop_fixed_partitions (loop->sibling, outer_mask))
has_auto = true;
return has_auto;
}
/* Walk the OpenACC loop heirarchy to check and assign partitioning
axes. */
static void
oacc_loop_partition (oacc_loop *loop, int fn_level)
{
unsigned outer_mask = 0;
if (fn_level >= 0)
outer_mask = GOMP_DIM_MASK (fn_level) - 1;
oacc_loop_fixed_partitions (loop, outer_mask);
}
/* Default fork/join early expander. Delete the function calls if
there is no RTL expander. */
@ -17669,8 +18495,110 @@ execute_oacc_device_lower ()
/* Not an offloaded function. */
return 0;
oacc_validate_dims (current_function_decl, attrs, dims);
int fn_level = oacc_validate_dims (current_function_decl, attrs, dims);
/* Discover, partition and process the loops. */
oacc_loop *loops = oacc_loop_discovery ();
oacc_loop_partition (loops, fn_level);
oacc_loop_process (loops);
if (dump_file)
{
fprintf (dump_file, "OpenACC loops\n");
dump_oacc_loop (dump_file, loops, 0);
fprintf (dump_file, "\n");
}
/* Now lower internal loop functions to target-specific code
sequences. */
basic_block bb;
FOR_ALL_BB_FN (bb, cfun)
for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
{
gimple *stmt = gsi_stmt (gsi);
if (!is_gimple_call (stmt))
{
gsi_next (&gsi);
continue;
}
gcall *call = as_a <gcall *> (stmt);
if (!gimple_call_internal_p (call))
{
gsi_next (&gsi);
continue;
}
/* Rewind to allow rescan. */
gsi_prev (&gsi);
bool rescan = false, remove = false;
enum internal_fn ifn_code = gimple_call_internal_fn (call);
switch (ifn_code)
{
default: break;
case IFN_GOACC_LOOP:
oacc_xform_loop (call);
rescan = true;
break;
case IFN_UNIQUE:
{
enum ifn_unique_kind kind
= ((enum ifn_unique_kind)
TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
switch (kind)
{
default:
gcc_unreachable ();
case IFN_UNIQUE_OACC_FORK:
case IFN_UNIQUE_OACC_JOIN:
if (integer_minus_onep (gimple_call_arg (call, 2)))
remove = true;
else if (!targetm.goacc.fork_join
(call, dims, kind == IFN_UNIQUE_OACC_FORK))
remove = true;
break;
case IFN_UNIQUE_OACC_HEAD_MARK:
case IFN_UNIQUE_OACC_TAIL_MARK:
remove = true;
break;
}
break;
}
}
if (gsi_end_p (gsi))
/* We rewound past the beginning of the BB. */
gsi = gsi_start_bb (bb);
else
/* Undo the rewind. */
gsi_next (&gsi);
if (remove)
{
if (gimple_vdef (call))
replace_uses_by (gimple_vdef (call), gimple_vuse (call));
if (gimple_call_lhs (call))
{
/* Propagate the data dependency var. */
gimple *ass = gimple_build_assign (gimple_call_lhs (call),
gimple_call_arg (call, 1));
gsi_replace (&gsi, ass, false);
}
else
gsi_remove (&gsi, true);
}
else if (!rescan)
/* If not rescanning, advance over the call. */
gsi_next (&gsi);
}
free_oacc_loop (loops);
return 0;
}

View File

@ -64,6 +64,8 @@ DEF_TARGET_INSN (memory_barrier, (void))
DEF_TARGET_INSN (movstr, (rtx x0, rtx x1, rtx x2))
DEF_TARGET_INSN (nonlocal_goto, (rtx x0, rtx x1, rtx x2, rtx x3))
DEF_TARGET_INSN (nonlocal_goto_receiver, (void))
DEF_TARGET_INSN (oacc_dim_pos, (rtx x0, rtx x1))
DEF_TARGET_INSN (oacc_dim_size, (rtx x0, rtx x1))
DEF_TARGET_INSN (oacc_fork, (rtx x0, rtx x1, rtx x2))
DEF_TARGET_INSN (oacc_join, (rtx x0, rtx x1, rtx x2))
DEF_TARGET_INSN (prefetch, (rtx x0, rtx x1, rtx x2))

View File

@ -1660,11 +1660,13 @@ default_goacc_validate_dims)
DEFHOOK
(fork_join,
"This hook should convert IFN_GOACC_FORK and IFN_GOACC_JOIN function\n\
calls to target-specific gimple. It is executed during the\n\
oacc_device_lower pass. It should return true, if the functions\n\
should be deleted. The default hook returns true, if there are no\n\
RTL expanders for them.",
"This hook can be used to convert IFN_GOACC_FORK and IFN_GOACC_JOIN\n\
function calls to target-specific gimple, or indicate whether they\n\
should be retained. It is executed during the oacc_device_lower pass.\n\
It should return true, if the call should be retained. It should\n\
return false, if it is to be deleted (either because target-specific\n\
gimple has been inserted before it, or there is no need for it).\n\
The default hook returns false, if there are no RTL expanders for them.",
bool, (gcall *call, const int *dims, bool is_fork),
default_goacc_fork_join)