internal-fn.def (GOACC_REDUCTION): New.

* internal-fn.def (GOACC_REDUCTION): New.
	* internal-fn.h (enum ifn_goacc_reduction_kind): New.
	* internal-fn.c (expand_GOACC_REDUCTION): New.
	* target.def (goacc.reduction): New OpenACC hook.
	* targhooks.h (default_goacc_reduction): Declare.
	* doc/tm.texi.in: Add TARGET_GOACC_REDUCTION.
	* doc/tm.texi: Rebuilt.
	* omp-low.c (oacc_get_reduction_array_id, oacc_max_threads,
	scan_sharing_clauses): Remove oacc reduction handling here.
	(lower_rec_input_clauses): Don't handle OpenACC reductions here.
	(oacc_lower_reduction_var_helper): Delete.
	(lower_oacc_reductions): New.
	(lower_reduction_clauses): Don't handle OpenACC reductions here.
	(lower_oacc_head_tail): Call lower_oacc_reductions.
	(oacc_gimple_assign, oacc_init_reduction_array,
	oacc_initialize_reduction_data, oacc_finalize_reduction_data,
	oacc_process_reduction_data): Delete.
	(lower_omp_target): Remove old OpenACC reduction handling.  Insert
	dummy OpenACC gang reduction for reductions at outer level.
	(oacc_loop_xform_head_tail): Transform IFN_GOACC_REDUCTION.
	(default_goacc_reduction): New.
	(execute_oacc_device_lower): Handle IFN_GOACC_REDUCTION.

From-SVN: r229767
This commit is contained in:
Nathan Sidwell 2015-11-04 16:57:36 +00:00
parent be373510f8
commit e50146711b
9 changed files with 329 additions and 571 deletions

View File

@ -1,3 +1,29 @@
2015-11-04 Nathan Sidwell <nathan@codesourcery.com>
Cesar Philippidis <cesar@codesourcery.com>
* internal-fn.def (GOACC_REDUCTION): New.
* internal-fn.h (enum ifn_goacc_reduction_kind): New.
* internal-fn.c (expand_GOACC_REDUCTION): New.
* target.def (goacc.reduction): New OpenACC hook.
* targhooks.h (default_goacc_reduction): Declare.
* doc/tm.texi.in: Add TARGET_GOACC_REDUCTION.
* doc/tm.texi: Rebuilt.
* omp-low.c (oacc_get_reduction_array_id, oacc_max_threads,
scan_sharing_clauses): Remove oacc reduction handling here.
(lower_rec_input_clauses): Don't handle OpenACC reductions here.
(oacc_lower_reduction_var_helper): Delete.
(lower_oacc_reductions): New.
(lower_reduction_clauses): Don't handle OpenACC reductions here.
(lower_oacc_head_tail): Call lower_oacc_reductions.
(oacc_gimple_assign, oacc_init_reduction_array,
oacc_initialize_reduction_data, oacc_finalize_reduction_data,
oacc_process_reduction_data): Delete.
(lower_omp_target): Remove old OpenACC reduction handling. Insert
dummy OpenACC gang reduction for reductions at outer level.
(oacc_loop_xform_head_tail): Transform IFN_GOACC_REDUCTION.
(default_goacc_reduction): New.
(execute_oacc_device_lower): Handle IFN_GOACC_REDUCTION.
2015-11-04 Martin Liska <mliska@suse.cz>
* cgraphunit.c (cgraph_node::expand_thunk): Call
@ -41,7 +67,7 @@
vi->only_restrict_pointers.
(intra_create_variable_infos): Call create_variable_info_for_1 with
extra arg. Remove restrict handling. Call make_param_constraints with
one less arg.
one fewer arg.
2015-11-04 Tom de Vries <tom@codesourcery.com>

View File

@ -5787,6 +5787,15 @@ gimple has been inserted before it, or there is no need for it).
The default hook returns false, if there are no RTL expanders for them.
@end deftypefn
@deftypefn {Target Hook} void TARGET_GOACC_REDUCTION (gcall *@var{call})
This hook is used by the oacc_transform pass to expand calls to the
@var{GOACC_REDUCTION} internal function, into a sequence of gimple
instructions. @var{call} is gimple statement containing the call to
the function. This hook removes statement @var{call} after the
expanded sequence has been inserted. This hook is also responsible
for allocating any storage for reductions when necessary.
@end deftypefn
@node Anchored Addresses
@section Anchored Addresses
@cindex anchored addresses

View File

@ -4264,6 +4264,8 @@ address; but often a machine-dependent strategy can generate better code.
@hook TARGET_GOACC_FORK_JOIN
@hook TARGET_GOACC_REDUCTION
@node Anchored Addresses
@section Anchored Addresses
@cindex anchored addresses

View File

@ -2045,6 +2045,14 @@ expand_GOACC_LOOP (gcall *stmt ATTRIBUTE_UNUSED)
gcc_unreachable ();
}
/* This is expanded by oacc_device_lower pass. */
static void
expand_GOACC_REDUCTION (gcall *stmt ATTRIBUTE_UNUSED)
{
gcc_unreachable ();
}
/* Routines to expand each internal function, indexed by function number.
Each routine has the prototype:

View File

@ -83,3 +83,6 @@ DEF_INTERNAL_FN (GOACC_DIM_POS, ECF_PURE | ECF_NOTHROW | ECF_LEAF, ".")
/* OpenACC looping abstraction. See internal-fn.h for usage. */
DEF_INTERNAL_FN (GOACC_LOOP, ECF_PURE | ECF_NOTHROW, NULL)
/* OpenACC reduction abstraction. See internal-fn.h for usage. */
DEF_INTERNAL_FN (GOACC_REDUCTION, ECF_NOTHROW | ECF_LEAF, NULL)

View File

@ -66,6 +66,28 @@ enum ifn_goacc_loop_kind {
IFN_GOACC_LOOP_BOUND /* Limit of iteration value. */
};
/* The GOACC_REDUCTION function defines a generic interface to support
gang, worker and vector reductions. All calls are of the following
form:
V = REDUCTION (CODE, REF_TO_RES, LOCAL_VAR, LEVEL, OP, OFFSET)
REF_TO_RES - is a reference to the original reduction varl, may be NULL
LOCAL_VAR is the intermediate reduction variable
LEVEL corresponds to the GOMP_DIM of the reduction
OP is the tree code of the reduction operation
OFFSET may be used as an offset into a reduction array for the
reductions occuring at this level.
In general the return value is LOCAL_VAR, which creates a data
dependency between calls operating on the same reduction. */
enum ifn_goacc_reduction_kind {
IFN_GOACC_REDUCTION_SETUP,
IFN_GOACC_REDUCTION_INIT,
IFN_GOACC_REDUCTION_FINI,
IFN_GOACC_REDUCTION_TEARDOWN
};
/* Initialize internal function tables. */
extern void init_internal_fns ();

View File

@ -305,66 +305,6 @@ is_oacc_kernels (omp_context *ctx)
== GF_OMP_TARGET_KIND_OACC_KERNELS));
}
/* Helper function to get the name of the array containing the partial
reductions for OpenACC reductions. */
static const char *
oacc_get_reduction_array_id (tree node)
{
const char *id = IDENTIFIER_POINTER (DECL_NAME (node));
int len = strlen ("OACC") + strlen (id);
char *temp_name = XALLOCAVEC (char, len + 1);
snprintf (temp_name, len + 1, "OACC%s", id);
return IDENTIFIER_POINTER (get_identifier (temp_name));
}
/* Determine the number of threads OpenACC threads used to determine the
size of the array of partial reductions. Currently, this is num_gangs
* vector_length. This value may be different than GOACC_GET_NUM_THREADS,
because it is independed of the device used. */
static tree
oacc_max_threads (omp_context *ctx)
{
tree nthreads, vector_length, gangs, clauses;
gangs = fold_convert (sizetype, integer_one_node);
vector_length = gangs;
/* The reduction clause may be nested inside a loop directive.
Scan for the innermost vector_length clause. */
for (omp_context *oc = ctx; oc; oc = oc->outer)
{
if (gimple_code (oc->stmt) != GIMPLE_OMP_TARGET
|| (gimple_omp_target_kind (oc->stmt)
!= GF_OMP_TARGET_KIND_OACC_PARALLEL))
continue;
clauses = gimple_omp_target_clauses (oc->stmt);
vector_length = find_omp_clause (clauses, OMP_CLAUSE_VECTOR_LENGTH);
if (vector_length)
vector_length = fold_convert_loc (OMP_CLAUSE_LOCATION (vector_length),
sizetype,
OMP_CLAUSE_VECTOR_LENGTH_EXPR
(vector_length));
else
vector_length = fold_convert (sizetype, integer_one_node);
gangs = find_omp_clause (clauses, OMP_CLAUSE_NUM_GANGS);
if (gangs)
gangs = fold_convert_loc (OMP_CLAUSE_LOCATION (gangs), sizetype,
OMP_CLAUSE_NUM_GANGS_EXPR (gangs));
else
gangs = fold_convert (sizetype, integer_one_node);
break;
}
nthreads = fold_build2 (MULT_EXPR, sizetype, gangs, vector_length);
return nthreads;
}
/* If DECL is the artificial dummy VAR_DECL created for non-static
data member privatization, return the underlying "this" parameter,
otherwise return NULL. */
@ -2016,27 +1956,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
install_var_field (decl, by_ref, 3, ctx);
}
install_var_local (decl, ctx);
if (is_gimple_omp_oacc (ctx->stmt)
&& OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
{
/* Create a decl for the reduction array. */
tree var = OMP_CLAUSE_DECL (c);
tree type = get_base_type (var);
tree ptype = build_pointer_type (type);
tree array = create_tmp_var (ptype,
oacc_get_reduction_array_id (var));
omp_context *octx = (ctx->field_map ? ctx : ctx->outer);
install_var_field (array, true, 3, octx);
install_var_local (array, octx);
/* Insert it into the current context. */
splay_tree_insert (ctx->reduction_map, (splay_tree_key)
oacc_get_reduction_array_id (var),
(splay_tree_value) array);
splay_tree_insert (ctx->reduction_map,
(splay_tree_key) array,
(splay_tree_value) array);
}
break;
case OMP_CLAUSE_USE_DEVICE_PTR:
@ -4938,6 +4857,10 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
break;
case OMP_CLAUSE_REDUCTION:
/* OpenACC reductions are initialized using the
GOACC_REDUCTION internal function. */
if (is_gimple_omp_oacc (ctx->stmt))
break;
if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
{
tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
@ -5351,56 +5274,170 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
gimple_seq_add_stmt (stmt_list, gimple_build_label (label));
}
/* Lower the OpenACC reductions of CLAUSES for compute axis LEVEL
(which might be a placeholder). INNER is true if this is an inner
axis of a multi-axis loop. FORK and JOIN are (optional) fork and
join markers. Generate the before-loop forking sequence in
FORK_SEQ and the after-loop joining sequence to JOIN_SEQ. The
general form of these sequences is
GOACC_REDUCTION_SETUP
GOACC_FORK
GOACC_REDUCTION_INIT
...
GOACC_REDUCTION_FINI
GOACC_JOIN
GOACC_REDUCTION_TEARDOWN. */
static void
oacc_lower_reduction_var_helper (gimple_seq *stmt_seqp, omp_context *ctx,
tree tid, tree var, tree new_var)
lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
gcall *fork, gcall *join, gimple_seq *fork_seq,
gimple_seq *join_seq, omp_context *ctx)
{
/* The atomic add at the end of the sum creates unnecessary
write contention on accelerators. To work around this,
create an array to store the partial reductions. Later, in
lower_omp_for (for openacc), the values of array will be
combined. */
gimple_seq before_fork = NULL;
gimple_seq after_fork = NULL;
gimple_seq before_join = NULL;
gimple_seq after_join = NULL;
tree init_code = NULL_TREE, fini_code = NULL_TREE,
setup_code = NULL_TREE, teardown_code = NULL_TREE;
unsigned offset = 0;
tree t = NULL_TREE, array, x;
tree type = get_base_type (var);
gimple *stmt;
for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
{
tree orig = OMP_CLAUSE_DECL (c);
tree var = maybe_lookup_decl (orig, ctx);
tree ref_to_res = NULL_TREE;
tree incoming, outgoing;
/* Now insert the partial reductions into the array. */
enum tree_code rcode = OMP_CLAUSE_REDUCTION_CODE (c);
if (rcode == MINUS_EXPR)
rcode = PLUS_EXPR;
else if (rcode == TRUTH_ANDIF_EXPR)
rcode = BIT_AND_EXPR;
else if (rcode == TRUTH_ORIF_EXPR)
rcode = BIT_IOR_EXPR;
tree op = build_int_cst (unsigned_type_node, rcode);
/* Find the reduction array. */
if (!var)
var = orig;
gcc_assert (!is_reference (var));
tree ptype = build_pointer_type (type);
incoming = outgoing = var;
if (!inner)
{
/* See if an outer construct also reduces this variable. */
omp_context *outer = ctx;
t = lookup_oacc_reduction (oacc_get_reduction_array_id (var), ctx);
t = build_receiver_ref (t, false, ctx->outer);
while (omp_context *probe = outer->outer)
{
enum gimple_code type = gimple_code (probe->stmt);
tree cls;
array = create_tmp_var (ptype);
gimplify_assign (array, t, stmt_seqp);
switch (type)
{
case GIMPLE_OMP_FOR:
cls = gimple_omp_for_clauses (probe->stmt);
break;
tree ptr = create_tmp_var (TREE_TYPE (array));
case GIMPLE_OMP_TARGET:
if (gimple_omp_target_kind (probe->stmt)
!= GF_OMP_TARGET_KIND_OACC_PARALLEL)
goto do_lookup;
/* Find the reduction array. */
cls = gimple_omp_target_clauses (probe->stmt);
break;
/* testing a unary conversion. */
tree offset = create_tmp_var (sizetype);
gimplify_assign (offset, TYPE_SIZE_UNIT (type),
stmt_seqp);
t = create_tmp_var (sizetype);
gimplify_assign (t, unshare_expr (fold_build1 (NOP_EXPR, sizetype, tid)),
stmt_seqp);
stmt = gimple_build_assign (offset, MULT_EXPR, offset, t);
gimple_seq_add_stmt (stmt_seqp, stmt);
default:
goto do_lookup;
}
outer = probe;
for (; cls; cls = OMP_CLAUSE_CHAIN (cls))
if (OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_REDUCTION
&& orig == OMP_CLAUSE_DECL (cls))
goto has_outer_reduction;
}
/* Offset expression. Does the POINTER_PLUS_EXPR take care
of adding sizeof(var) to the array? */
ptr = create_tmp_var (ptype);
stmt = gimple_build_assign (unshare_expr (ptr), POINTER_PLUS_EXPR, array,
offset);
gimple_seq_add_stmt (stmt_seqp, stmt);
do_lookup:
/* This is the outermost construct with this reduction,
see if there's a mapping for it. */
if (gimple_code (outer->stmt) == GIMPLE_OMP_TARGET
&& maybe_lookup_field (orig, outer))
{
ref_to_res = build_receiver_ref (orig, false, outer);
if (is_reference (orig))
ref_to_res = build_simple_mem_ref (ref_to_res);
/* Move the local sum to gfc$sum[i]. */
x = unshare_expr (build_simple_mem_ref (ptr));
stmt = gimplify_assign (x, new_var, stmt_seqp);
outgoing = var;
incoming = omp_reduction_init_op (loc, rcode, TREE_TYPE (var));
}
else
incoming = outgoing = orig;
has_outer_reduction:;
}
if (!ref_to_res)
ref_to_res = integer_zero_node;
/* Determine position in reduction buffer, which may be used
by target. */
enum machine_mode mode = TYPE_MODE (TREE_TYPE (var));
unsigned align = GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT;
offset = (offset + align - 1) & ~(align - 1);
tree off = build_int_cst (sizetype, offset);
offset += GET_MODE_SIZE (mode);
if (!init_code)
{
init_code = build_int_cst (integer_type_node,
IFN_GOACC_REDUCTION_INIT);
fini_code = build_int_cst (integer_type_node,
IFN_GOACC_REDUCTION_FINI);
setup_code = build_int_cst (integer_type_node,
IFN_GOACC_REDUCTION_SETUP);
teardown_code = build_int_cst (integer_type_node,
IFN_GOACC_REDUCTION_TEARDOWN);
}
tree setup_call
= build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
TREE_TYPE (var), 6, setup_code,
unshare_expr (ref_to_res),
incoming, level, op, off);
tree init_call
= build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
TREE_TYPE (var), 6, init_code,
unshare_expr (ref_to_res),
var, level, op, off);
tree fini_call
= build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
TREE_TYPE (var), 6, fini_code,
unshare_expr (ref_to_res),
var, level, op, off);
tree teardown_call
= build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
TREE_TYPE (var), 6, teardown_code,
ref_to_res, var, level, op, off);
gimplify_assign (var, setup_call, &before_fork);
gimplify_assign (var, init_call, &after_fork);
gimplify_assign (var, fini_call, &before_join);
gimplify_assign (outgoing, teardown_call, &after_join);
}
/* Now stitch things together. */
gimple_seq_add_seq (fork_seq, before_fork);
if (fork)
gimple_seq_add_stmt (fork_seq, fork);
gimple_seq_add_seq (fork_seq, after_fork);
gimple_seq_add_seq (join_seq, before_join);
if (join)
gimple_seq_add_stmt (join_seq, join);
gimple_seq_add_seq (join_seq, after_join);
}
/* Generate code to implement the REDUCTION clauses. */
@ -5413,6 +5450,10 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx)
tree x, c, tid = NULL_TREE;
int count = 0;
/* OpenACC loop reductions are handled elsewhere. */
if (is_gimple_omp_oacc (ctx->stmt))
return;
/* SIMD reductions are handled in lower_rec_input_clauses. */
if (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
&& gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD)
@ -5484,13 +5525,7 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx)
if (code == MINUS_EXPR)
code = PLUS_EXPR;
if (is_gimple_omp_oacc (ctx->stmt))
{
gcc_checking_assert (!OMP_CLAUSE_REDUCTION_PLACEHOLDER (c));
oacc_lower_reduction_var_helper (stmt_seqp, ctx, tid, var, new_var);
}
else if (count == 1)
if (count == 1)
{
tree addr = build_fold_addr_expr_loc (clause_loc, ref);
@ -6055,8 +6090,8 @@ lower_oacc_head_tail (location_t loc, tree clauses,
build_int_cst (integer_type_node, done),
&join_seq);
gimple_seq_add_stmt (&fork_seq, fork);
gimple_seq_add_stmt (&join_seq, join);
lower_oacc_reductions (loc, clauses, place, inner,
fork, join, &fork_seq, &join_seq, ctx);
/* Append this level to head. */
gimple_seq_add_seq (head, fork_seq);
@ -12948,446 +12983,6 @@ make_pass_expand_omp_ssa (gcc::context *ctxt)
/* Routines to lower OMP directives into OMP-GIMPLE. */
/* Helper function to preform, potentially COMPLEX_TYPE, operation and
convert it to gimple. */
static void
oacc_gimple_assign (tree dest, tree_code op, tree src, gimple_seq *seq)
{
gimple *stmt;
if (TREE_CODE (TREE_TYPE (dest)) != COMPLEX_TYPE)
{
stmt = gimple_build_assign (dest, op, dest, src);
gimple_seq_add_stmt (seq, stmt);
return;
}
tree t = create_tmp_var (TREE_TYPE (TREE_TYPE (dest)));
tree rdest = fold_build1 (REALPART_EXPR, TREE_TYPE (TREE_TYPE (dest)), dest);
gimplify_assign (t, rdest, seq);
rdest = t;
t = create_tmp_var (TREE_TYPE (TREE_TYPE (dest)));
tree idest = fold_build1 (IMAGPART_EXPR, TREE_TYPE (TREE_TYPE (dest)), dest);
gimplify_assign (t, idest, seq);
idest = t;
t = create_tmp_var (TREE_TYPE (TREE_TYPE (src)));
tree rsrc = fold_build1 (REALPART_EXPR, TREE_TYPE (TREE_TYPE (src)), src);
gimplify_assign (t, rsrc, seq);
rsrc = t;
t = create_tmp_var (TREE_TYPE (TREE_TYPE (src)));
tree isrc = fold_build1 (IMAGPART_EXPR, TREE_TYPE (TREE_TYPE (src)), src);
gimplify_assign (t, isrc, seq);
isrc = t;
tree r = create_tmp_var (TREE_TYPE (TREE_TYPE (dest)));
tree i = create_tmp_var (TREE_TYPE (TREE_TYPE (dest)));
tree result;
if (op == PLUS_EXPR)
{
stmt = gimple_build_assign (r, op, rdest, rsrc);
gimple_seq_add_stmt (seq, stmt);
stmt = gimple_build_assign (i, op, idest, isrc);
gimple_seq_add_stmt (seq, stmt);
}
else if (op == MULT_EXPR)
{
/* Let x = a + ib = dest, y = c + id = src.
x * y = (ac - bd) + i(ad + bc) */
tree ac = create_tmp_var (TREE_TYPE (TREE_TYPE (dest)));
tree bd = create_tmp_var (TREE_TYPE (TREE_TYPE (dest)));
tree ad = create_tmp_var (TREE_TYPE (TREE_TYPE (dest)));
tree bc = create_tmp_var (TREE_TYPE (TREE_TYPE (dest)));
stmt = gimple_build_assign (ac, MULT_EXPR, rdest, rsrc);
gimple_seq_add_stmt (seq, stmt);
stmt = gimple_build_assign (bd, MULT_EXPR, idest, isrc);
gimple_seq_add_stmt (seq, stmt);
stmt = gimple_build_assign (r, MINUS_EXPR, ac, bd);
gimple_seq_add_stmt (seq, stmt);
stmt = gimple_build_assign (ad, MULT_EXPR, rdest, isrc);
gimple_seq_add_stmt (seq, stmt);
stmt = gimple_build_assign (bd, MULT_EXPR, idest, rsrc);
gimple_seq_add_stmt (seq, stmt);
stmt = gimple_build_assign (i, PLUS_EXPR, ad, bc);
gimple_seq_add_stmt (seq, stmt);
}
else
gcc_unreachable ();
result = build2 (COMPLEX_EXPR, TREE_TYPE (dest), r, i);
gimplify_assign (dest, result, seq);
}
/* Initialize the reduction array with default values. */
static void
oacc_init_reduction_array (tree array, tree init, tree nthreads,
gimple_seq *stmt_seqp)
{
tree type = TREE_TYPE (TREE_TYPE (array));
tree x, loop_header, loop_body, loop_exit;
gimple *stmt;
/* Create for loop.
let var = the original reduction variable
let array = reduction variable array
for (i = 0; i < nthreads; i++)
var op= array[i]
*/
loop_header = create_artificial_label (UNKNOWN_LOCATION);
loop_body = create_artificial_label (UNKNOWN_LOCATION);
loop_exit = create_artificial_label (UNKNOWN_LOCATION);
/* Create and initialize an index variable. */
tree ix = create_tmp_var (sizetype);
gimplify_assign (ix, fold_build1 (NOP_EXPR, sizetype, integer_zero_node),
stmt_seqp);
/* Insert the loop header label here. */
gimple_seq_add_stmt (stmt_seqp, gimple_build_label (loop_header));
/* Exit loop if ix >= nthreads. */
x = create_tmp_var (sizetype);
gimplify_assign (x, fold_build1 (NOP_EXPR, sizetype, nthreads), stmt_seqp);
stmt = gimple_build_cond (GE_EXPR, ix, x, loop_exit, loop_body);
gimple_seq_add_stmt (stmt_seqp, stmt);
/* Insert the loop body label here. */
gimple_seq_add_stmt (stmt_seqp, gimple_build_label (loop_body));
/* Calculate the array offset. */
tree offset = create_tmp_var (sizetype);
gimplify_assign (offset, TYPE_SIZE_UNIT (type), stmt_seqp);
stmt = gimple_build_assign (offset, MULT_EXPR, offset, ix);
gimple_seq_add_stmt (stmt_seqp, stmt);
tree ptr = create_tmp_var (TREE_TYPE (array));
stmt = gimple_build_assign (ptr, POINTER_PLUS_EXPR, array, offset);
gimple_seq_add_stmt (stmt_seqp, stmt);
/* Assign init. */
gimplify_assign (build_simple_mem_ref (ptr), init, stmt_seqp);
/* Increment the induction variable. */
tree one = fold_build1 (NOP_EXPR, sizetype, integer_one_node);
stmt = gimple_build_assign (ix, PLUS_EXPR, ix, one);
gimple_seq_add_stmt (stmt_seqp, stmt);
/* Go back to the top of the loop. */
gimple_seq_add_stmt (stmt_seqp, gimple_build_goto (loop_header));
/* Place the loop exit label here. */
gimple_seq_add_stmt (stmt_seqp, gimple_build_label (loop_exit));
}
/* Helper function to initialize local data for the reduction arrays.
The reduction arrays need to be placed inside the calling function
for accelerators, or else the host won't be able to preform the final
reduction. */
static void
oacc_initialize_reduction_data (tree clauses, tree nthreads,
gimple_seq *stmt_seqp, omp_context *ctx)
{
tree c, t, oc;
gimple *stmt;
omp_context *octx;
/* Find the innermost OpenACC parallel context. */
if (gimple_code (ctx->stmt) == GIMPLE_OMP_TARGET
&& (gimple_omp_target_kind (ctx->stmt)
== GF_OMP_TARGET_KIND_OACC_PARALLEL))
octx = ctx;
else
octx = ctx->outer;
gcc_checking_assert (gimple_code (octx->stmt) == GIMPLE_OMP_TARGET
&& (gimple_omp_target_kind (octx->stmt)
== GF_OMP_TARGET_KIND_OACC_PARALLEL));
/* Extract the clauses. */
oc = gimple_omp_target_clauses (octx->stmt);
/* Find the last outer clause. */
for (; oc && OMP_CLAUSE_CHAIN (oc); oc = OMP_CLAUSE_CHAIN (oc))
;
/* Allocate arrays for each reduction variable. */
for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
{
if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION)
continue;
tree var = OMP_CLAUSE_DECL (c);
tree type = get_base_type (var);
tree array = lookup_oacc_reduction (oacc_get_reduction_array_id (var),
ctx);
tree size, call;
/* Calculate size of the reduction array. */
t = create_tmp_var (TREE_TYPE (nthreads));
stmt = gimple_build_assign (t, MULT_EXPR, nthreads,
fold_convert (TREE_TYPE (nthreads),
TYPE_SIZE_UNIT (type)));
gimple_seq_add_stmt (stmt_seqp, stmt);
size = create_tmp_var (sizetype);
gimplify_assign (size, fold_build1 (NOP_EXPR, sizetype, t), stmt_seqp);
/* Now allocate memory for it. */
call = unshare_expr (builtin_decl_explicit (BUILT_IN_ALLOCA));
stmt = gimple_build_call (call, 1, size);
gimple_call_set_lhs (stmt, array);
gimple_seq_add_stmt (stmt_seqp, stmt);
/* Initialize array. */
tree init = omp_reduction_init_op (OMP_CLAUSE_LOCATION (c),
OMP_CLAUSE_REDUCTION_CODE (c),
type);
oacc_init_reduction_array (array, init, nthreads, stmt_seqp);
/* Map this array into the accelerator. */
/* Add the reduction array to the list of clauses. */
tree x = array;
t = build_omp_clause (gimple_location (ctx->stmt), OMP_CLAUSE_MAP);
OMP_CLAUSE_SET_MAP_KIND (t, GOMP_MAP_FORCE_TOFROM);
OMP_CLAUSE_DECL (t) = x;
OMP_CLAUSE_CHAIN (t) = NULL;
if (oc)
OMP_CLAUSE_CHAIN (oc) = t;
else
gimple_omp_target_set_clauses (as_a <gomp_target *> (octx->stmt), t);
OMP_CLAUSE_SIZE (t) = size;
oc = t;
}
}
/* Helper function to process the array of partial reductions. Nthreads
indicates the number of threads. Unfortunately, GOACC_GET_NUM_THREADS
cannot be used here, because nthreads on the host may be different than
on the accelerator. */
static void
oacc_finalize_reduction_data (tree clauses, tree nthreads,
gimple_seq *stmt_seqp, omp_context *ctx)
{
tree c, x, var, array, loop_header, loop_body, loop_exit, type;
gimple *stmt;
/* Create for loop.
let var = the original reduction variable
let array = reduction variable array
for (i = 0; i < nthreads; i++)
var op= array[i]
*/
loop_header = create_artificial_label (UNKNOWN_LOCATION);
loop_body = create_artificial_label (UNKNOWN_LOCATION);
loop_exit = create_artificial_label (UNKNOWN_LOCATION);
/* Create and initialize an index variable. */
tree ix = create_tmp_var (sizetype);
gimplify_assign (ix, fold_build1 (NOP_EXPR, sizetype, integer_zero_node),
stmt_seqp);
/* Insert the loop header label here. */
gimple_seq_add_stmt (stmt_seqp, gimple_build_label (loop_header));
/* Exit loop if ix >= nthreads. */
x = create_tmp_var (sizetype);
gimplify_assign (x, fold_build1 (NOP_EXPR, sizetype, nthreads), stmt_seqp);
stmt = gimple_build_cond (GE_EXPR, ix, x, loop_exit, loop_body);
gimple_seq_add_stmt (stmt_seqp, stmt);
/* Insert the loop body label here. */
gimple_seq_add_stmt (stmt_seqp, gimple_build_label (loop_body));
/* Collapse each reduction array, one element at a time. */
for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
{
if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION)
continue;
tree_code reduction_code = OMP_CLAUSE_REDUCTION_CODE (c);
/* reduction(-:var) sums up the partial results, so it acts
identically to reduction(+:var). */
if (reduction_code == MINUS_EXPR)
reduction_code = PLUS_EXPR;
/* Set up reduction variable var. */
var = OMP_CLAUSE_DECL (c);
type = get_base_type (var);
array = lookup_oacc_reduction (oacc_get_reduction_array_id
(OMP_CLAUSE_DECL (c)), ctx);
/* Calculate the array offset. */
tree offset = create_tmp_var (sizetype);
gimplify_assign (offset, TYPE_SIZE_UNIT (type), stmt_seqp);
stmt = gimple_build_assign (offset, MULT_EXPR, offset, ix);
gimple_seq_add_stmt (stmt_seqp, stmt);
tree ptr = create_tmp_var (TREE_TYPE (array));
stmt = gimple_build_assign (ptr, POINTER_PLUS_EXPR, array, offset);
gimple_seq_add_stmt (stmt_seqp, stmt);
/* Extract array[ix] into mem. */
tree mem = create_tmp_var (type);
gimplify_assign (mem, build_simple_mem_ref (ptr), stmt_seqp);
/* Find the original reduction variable. */
if (is_reference (var))
var = build_simple_mem_ref (var);
tree t = create_tmp_var (type);
x = lang_hooks.decls.omp_clause_assign_op (c, t, var);
gimplify_and_add (unshare_expr(x), stmt_seqp);
/* var = var op mem */
switch (OMP_CLAUSE_REDUCTION_CODE (c))
{
case TRUTH_ANDIF_EXPR:
case TRUTH_ORIF_EXPR:
t = fold_build2 (OMP_CLAUSE_REDUCTION_CODE (c), integer_type_node,
t, mem);
gimplify_and_add (t, stmt_seqp);
break;
default:
/* The lhs isn't a gimple_reg when var is COMPLEX_TYPE. */
oacc_gimple_assign (t, OMP_CLAUSE_REDUCTION_CODE (c), mem,
stmt_seqp);
}
t = fold_build1 (NOP_EXPR, TREE_TYPE (var), t);
x = lang_hooks.decls.omp_clause_assign_op (c, var, t);
gimplify_and_add (unshare_expr(x), stmt_seqp);
}
/* Increment the induction variable. */
tree one = fold_build1 (NOP_EXPR, sizetype, integer_one_node);
stmt = gimple_build_assign (ix, PLUS_EXPR, ix, one);
gimple_seq_add_stmt (stmt_seqp, stmt);
/* Go back to the top of the loop. */
gimple_seq_add_stmt (stmt_seqp, gimple_build_goto (loop_header));
/* Place the loop exit label here. */
gimple_seq_add_stmt (stmt_seqp, gimple_build_label (loop_exit));
}
/* Scan through all of the gimple stmts searching for an OMP_FOR_EXPR, and
scan that for reductions. */
static void
oacc_process_reduction_data (gimple_seq *body, gimple_seq *in_stmt_seqp,
gimple_seq *out_stmt_seqp, omp_context *ctx)
{
gimple_stmt_iterator gsi;
gimple_seq inner = NULL;
/* A collapse clause may have inserted a new bind block. */
gsi = gsi_start (*body);
while (!gsi_end_p (gsi))
{
gimple *stmt = gsi_stmt (gsi);
if (gbind *bind_stmt = dyn_cast <gbind *> (stmt))
{
inner = gimple_bind_body (bind_stmt);
body = &inner;
gsi = gsi_start (*body);
}
else if (dyn_cast <gomp_for *> (stmt))
break;
else
gsi_next (&gsi);
}
for (gsi = gsi_start (*body); !gsi_end_p (gsi); gsi_next (&gsi))
{
tree clauses, nthreads, t, c, acc_device, acc_device_host, call,
enter, exit;
bool reduction_found = false;
gimple *stmt = gsi_stmt (gsi);
switch (gimple_code (stmt))
{
case GIMPLE_OMP_FOR:
clauses = gimple_omp_for_clauses (stmt);
/* Search for a reduction clause. */
for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
{
reduction_found = true;
break;
}
if (!reduction_found)
break;
ctx = maybe_lookup_ctx (stmt);
t = NULL_TREE;
/* Extract the number of threads. */
nthreads = create_tmp_var (sizetype);
t = oacc_max_threads (ctx);
gimplify_assign (nthreads, t, in_stmt_seqp);
/* Determine if this is kernel will be executed on the host. */
call = builtin_decl_explicit (BUILT_IN_ACC_GET_DEVICE_TYPE);
acc_device = create_tmp_var (integer_type_node, ".acc_device_type");
stmt = gimple_build_call (call, 0);
gimple_call_set_lhs (stmt, acc_device);
gimple_seq_add_stmt (in_stmt_seqp, stmt);
/* Set nthreads = 1 for ACC_DEVICE_TYPE=host. */
acc_device_host = create_tmp_var (integer_type_node,
".acc_device_host");
gimplify_assign (acc_device_host,
build_int_cst (integer_type_node,
GOMP_DEVICE_HOST),
in_stmt_seqp);
enter = create_artificial_label (UNKNOWN_LOCATION);
exit = create_artificial_label (UNKNOWN_LOCATION);
stmt = gimple_build_cond (EQ_EXPR, acc_device, acc_device_host,
enter, exit);
gimple_seq_add_stmt (in_stmt_seqp, stmt);
gimple_seq_add_stmt (in_stmt_seqp, gimple_build_label (enter));
gimplify_assign (nthreads, fold_build1 (NOP_EXPR, sizetype,
integer_one_node),
in_stmt_seqp);
gimple_seq_add_stmt (in_stmt_seqp, gimple_build_label (exit));
oacc_initialize_reduction_data (clauses, nthreads, in_stmt_seqp,
ctx);
oacc_finalize_reduction_data (clauses, nthreads, out_stmt_seqp, ctx);
break;
default:
// Scan for other directives which support reduction here.
break;
}
}
}
/* If ctx is a worksharing context inside of a cancellable parallel
region and it isn't nowait, add lhs to its GIMPLE_OMP_RETURN
and conditional branch to parallel's cancel_label to handle
@ -15009,9 +14604,6 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
irlist = NULL;
orlist = NULL;
if (offloaded
&& is_gimple_omp_oacc (stmt))
oacc_process_reduction_data (&tgt_body, &irlist, &orlist, ctx);
for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
switch (OMP_CLAUSE_CODE (c))
@ -15933,7 +15525,22 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
break;
}
gimple_seq fork_seq = NULL;
gimple_seq join_seq = NULL;
if (is_oacc_parallel (ctx))
{
/* If there are reductions on the offloaded region itself, treat
them as a dummy GANG loop. */
tree level = build_int_cst (integer_type_node, GOMP_DIM_GANG);
lower_oacc_reductions (gimple_location (ctx->stmt), clauses, level,
false, NULL, NULL, &fork_seq, &join_seq, ctx);
}
gimple_seq_add_seq (&new_body, fork_seq);
gimple_seq_add_seq (&new_body, tgt_body);
gimple_seq_add_seq (&new_body, join_seq);
if (offloaded)
new_body = maybe_catch_exception (new_body);
@ -19022,6 +18629,11 @@ oacc_loop_xform_head_tail (gcall *from, int level)
else if (k == kind && stmt != from)
break;
}
else if (is_gimple_call (stmt)
&& gimple_call_internal_p (stmt)
&& gimple_call_internal_fn (stmt) == IFN_GOACC_REDUCTION)
*gimple_call_arg_ptr (stmt, 3) = replacement;
gsi_next (&gsi);
while (gsi_end_p (gsi))
gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
@ -19240,6 +18852,53 @@ default_goacc_fork_join (gcall *ARG_UNUSED (call),
return targetm.have_oacc_join ();
}
/* Default goacc.reduction early expander.
LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
If RES_PTR is not integer-zerop:
SETUP - emit 'LHS = *RES_PTR', LHS = NULL
TEARDOWN - emit '*RES_PTR = VAR'
If LHS is not NULL
emit 'LHS = VAR' */
void
default_goacc_reduction (gcall *call)
{
unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
gimple_stmt_iterator gsi = gsi_for_stmt (call);
tree lhs = gimple_call_lhs (call);
tree var = gimple_call_arg (call, 2);
gimple_seq seq = NULL;
if (code == IFN_GOACC_REDUCTION_SETUP
|| code == IFN_GOACC_REDUCTION_TEARDOWN)
{
/* Setup and Teardown need to copy from/to the receiver object,
if there is one. */
tree ref_to_res = gimple_call_arg (call, 1);
if (!integer_zerop (ref_to_res))
{
tree dst = build_simple_mem_ref (ref_to_res);
tree src = var;
if (code == IFN_GOACC_REDUCTION_SETUP)
{
src = dst;
dst = lhs;
lhs = NULL;
}
gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src));
}
}
/* Copy VAR to LHS, if there is an LHS. */
if (lhs)
gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var));
gsi_replace_with_seq (&gsi, seq, true);
}
/* Main entry point for oacc transformations which run on the device
compiler after LTO, so we know what the target device is at this
point (including the host fallback). */
@ -19267,6 +18926,10 @@ execute_oacc_device_lower ()
fprintf (dump_file, "\n");
}
/* Offloaded targets may introduce new basic blocks, which require
dominance information to update SSA. */
calculate_dominance_info (CDI_DOMINATORS);
/* Now lower internal loop functions to target-specific code
sequences. */
basic_block bb;
@ -19301,6 +18964,19 @@ execute_oacc_device_lower ()
rescan = true;
break;
case IFN_GOACC_REDUCTION:
/* Mark the function for SSA renaming. */
mark_virtual_operands_for_renaming (cfun);
/* If the level is -1, this ended up being an unused
axis. Handle as a default. */
if (integer_minus_onep (gimple_call_arg (call, 3)))
default_goacc_reduction (call);
else
targetm.goacc.reduction (call);
rescan = true;
break;
case IFN_UNIQUE:
{
enum ifn_unique_kind kind

View File

@ -1670,6 +1670,17 @@ The default hook returns false, if there are no RTL expanders for them.",
bool, (gcall *call, const int *dims, bool is_fork),
default_goacc_fork_join)
DEFHOOK
(reduction,
"This hook is used by the oacc_transform pass to expand calls to the\n\
@var{GOACC_REDUCTION} internal function, into a sequence of gimple\n\
instructions. @var{call} is gimple statement containing the call to\n\
the function. This hook removes statement @var{call} after the\n\
expanded sequence has been inserted. This hook is also responsible\n\
for allocating any storage for reductions when necessary.",
void, (gcall *call),
default_goacc_reduction)
HOOK_VECTOR_END (goacc)
/* Functions relating to vectorization. */

View File

@ -111,6 +111,7 @@ extern void default_destroy_cost_data (void *);
/* OpenACC hooks. */
extern bool default_goacc_validate_dims (tree, int [], int);
extern bool default_goacc_fork_join (gcall *, const int [], bool);
extern void default_goacc_reduction (gcall *);
/* These are here, and not in hooks.[ch], because not all users of
hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS. */