OpenMP offloading to NVPTX: middle-end changes

* internal-fn.c (expand_GOMP_SIMT_LANE): New.
	(expand_GOMP_SIMT_VF): New.
	(expand_GOMP_SIMT_LAST_LANE): New.
	(expand_GOMP_SIMT_ORDERED_PRED): New.
	(expand_GOMP_SIMT_VOTE_ANY): New.
	(expand_GOMP_SIMT_XCHG_BFLY): New.
	(expand_GOMP_SIMT_XCHG_IDX): New.
	* internal-fn.def (GOMP_SIMT_LANE): New.
	(GOMP_SIMT_VF): New.
	(GOMP_SIMT_LAST_LANE): New.
	(GOMP_SIMT_ORDERED_PRED): New.
	(GOMP_SIMT_VOTE_ANY): New.
	(GOMP_SIMT_XCHG_BFLY): New.
	(GOMP_SIMT_XCHG_IDX): New.
	* omp-low.c (omp_maybe_offloaded_ctx): New, outlined from...
	(create_omp_child_function): ...here.  Set "omp target entrypoint"
	or "omp declare target" attribute based on is_gimple_omp_offloaded.
	(omp_max_simt_vf): New.  Use it...
	(omp_max_vf): ...here.
	(lower_rec_input_clauses): Add reduction lowering for SIMT execution.
	(lower_lastprivate_clauses): Likewise, for "lastprivate" lowering.
	(lower_omp_ordered): Likewise, for "ordered" lowering.
	(expand_omp_simd): Add SIMT transforms.
	(pass_data_lower_omp): Add PROP_gimple_lomp_dev.
	(execute_omp_device_lower): New.
	(pass_data_omp_device_lower): New.
	(pass_omp_device_lower): New pass.
	(make_pass_omp_device_lower): New.
	* passes.def (pass_omp_device_lower): Position new pass.
	* tree-pass.h (PROP_gimple_lomp_dev): Define.
	(make_pass_omp_device_lower): Declare.

From-SVN: r242710
This commit is contained in:
Alexander Monakov 2016-11-22 19:57:29 +03:00 committed by Alexander Monakov
parent 9435cd52b3
commit 9669b00bfb
6 changed files with 554 additions and 64 deletions

View File

@ -1,3 +1,37 @@
2016-11-22 Alexander Monakov <amonakov@ispras.ru>
* internal-fn.c (expand_GOMP_SIMT_LANE): New.
(expand_GOMP_SIMT_VF): New.
(expand_GOMP_SIMT_LAST_LANE): New.
(expand_GOMP_SIMT_ORDERED_PRED): New.
(expand_GOMP_SIMT_VOTE_ANY): New.
(expand_GOMP_SIMT_XCHG_BFLY): New.
(expand_GOMP_SIMT_XCHG_IDX): New.
* internal-fn.def (GOMP_SIMT_LANE): New.
(GOMP_SIMT_VF): New.
(GOMP_SIMT_LAST_LANE): New.
(GOMP_SIMT_ORDERED_PRED): New.
(GOMP_SIMT_VOTE_ANY): New.
(GOMP_SIMT_XCHG_BFLY): New.
(GOMP_SIMT_XCHG_IDX): New.
* omp-low.c (omp_maybe_offloaded_ctx): New, outlined from...
(create_omp_child_function): ...here. Set "omp target entrypoint"
or "omp declare target" attribute based on is_gimple_omp_offloaded.
(omp_max_simt_vf): New. Use it...
(omp_max_vf): ...here.
(lower_rec_input_clauses): Add reduction lowering for SIMT execution.
(lower_lastprivate_clauses): Likewise, for "lastprivate" lowering.
(lower_omp_ordered): Likewise, for "ordered" lowering.
(expand_omp_simd): Add SIMT transforms.
(pass_data_lower_omp): Add PROP_gimple_lomp_dev.
(execute_omp_device_lower): New.
(pass_data_omp_device_lower): New.
(pass_omp_device_lower): New pass.
(make_pass_omp_device_lower): New.
* passes.def (pass_omp_device_lower): Position new pass.
* tree-pass.h (PROP_gimple_lomp_dev): Define.
(make_pass_omp_device_lower): Declare.
2016-11-22 Jakub Jelinek <jakub@redhat.com>
PR target/78451

View File

@ -158,6 +158,132 @@ expand_ANNOTATE (internal_fn, gcall *)
gcc_unreachable ();
}
/* Lane index on SIMT targets: thread index in the warp on NVPTX. On targets
without SIMT execution this should be expanded in omp_device_lower pass. */
static void
expand_GOMP_SIMT_LANE (internal_fn, gcall *stmt)
{
tree lhs = gimple_call_lhs (stmt);
if (!lhs)
return;
rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
gcc_assert (targetm.have_omp_simt_lane ());
emit_insn (targetm.gen_omp_simt_lane (target));
}
/* This should get expanded in omp_device_lower pass. */
static void
expand_GOMP_SIMT_VF (internal_fn, gcall *)
{
gcc_unreachable ();
}
/* Lane index of the first SIMT lane that supplies a non-zero argument.
This is a SIMT counterpart to GOMP_SIMD_LAST_LANE, used to represent the
lane that executed the last iteration for handling OpenMP lastprivate. */
static void
expand_GOMP_SIMT_LAST_LANE (internal_fn, gcall *stmt)
{
tree lhs = gimple_call_lhs (stmt);
if (!lhs)
return;
rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
rtx cond = expand_normal (gimple_call_arg (stmt, 0));
machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
struct expand_operand ops[2];
create_output_operand (&ops[0], target, mode);
create_input_operand (&ops[1], cond, mode);
gcc_assert (targetm.have_omp_simt_last_lane ());
expand_insn (targetm.code_for_omp_simt_last_lane, 2, ops);
}
/* Non-transparent predicate used in SIMT lowering of OpenMP "ordered". */
static void
expand_GOMP_SIMT_ORDERED_PRED (internal_fn, gcall *stmt)
{
tree lhs = gimple_call_lhs (stmt);
if (!lhs)
return;
rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
rtx ctr = expand_normal (gimple_call_arg (stmt, 0));
machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
struct expand_operand ops[2];
create_output_operand (&ops[0], target, mode);
create_input_operand (&ops[1], ctr, mode);
gcc_assert (targetm.have_omp_simt_ordered ());
expand_insn (targetm.code_for_omp_simt_ordered, 2, ops);
}
/* "Or" boolean reduction across SIMT lanes: return non-zero in all lanes if
any lane supplies a non-zero argument. */
static void
expand_GOMP_SIMT_VOTE_ANY (internal_fn, gcall *stmt)
{
tree lhs = gimple_call_lhs (stmt);
if (!lhs)
return;
rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
rtx cond = expand_normal (gimple_call_arg (stmt, 0));
machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
struct expand_operand ops[2];
create_output_operand (&ops[0], target, mode);
create_input_operand (&ops[1], cond, mode);
gcc_assert (targetm.have_omp_simt_vote_any ());
expand_insn (targetm.code_for_omp_simt_vote_any, 2, ops);
}
/* Exchange between SIMT lanes with a "butterfly" pattern: source lane index
is destination lane index XOR given offset. */
static void
expand_GOMP_SIMT_XCHG_BFLY (internal_fn, gcall *stmt)
{
tree lhs = gimple_call_lhs (stmt);
if (!lhs)
return;
rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
rtx src = expand_normal (gimple_call_arg (stmt, 0));
rtx idx = expand_normal (gimple_call_arg (stmt, 1));
machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
struct expand_operand ops[3];
create_output_operand (&ops[0], target, mode);
create_input_operand (&ops[1], src, mode);
create_input_operand (&ops[2], idx, SImode);
gcc_assert (targetm.have_omp_simt_xchg_bfly ());
expand_insn (targetm.code_for_omp_simt_xchg_bfly, 3, ops);
}
/* Exchange between SIMT lanes according to given source lane index. */
static void
expand_GOMP_SIMT_XCHG_IDX (internal_fn, gcall *stmt)
{
tree lhs = gimple_call_lhs (stmt);
if (!lhs)
return;
rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
rtx src = expand_normal (gimple_call_arg (stmt, 0));
rtx idx = expand_normal (gimple_call_arg (stmt, 1));
machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
struct expand_operand ops[3];
create_output_operand (&ops[0], target, mode);
create_input_operand (&ops[1], src, mode);
create_input_operand (&ops[2], idx, SImode);
gcc_assert (targetm.have_omp_simt_xchg_idx ());
expand_insn (targetm.code_for_omp_simt_xchg_idx, 3, ops);
}
/* This should get expanded in adjust_simduid_builtins. */
static void

View File

@ -141,6 +141,13 @@ DEF_INTERNAL_INT_FN (FFS, ECF_CONST, ffs, unary)
DEF_INTERNAL_INT_FN (PARITY, ECF_CONST, parity, unary)
DEF_INTERNAL_INT_FN (POPCOUNT, ECF_CONST, popcount, unary)
DEF_INTERNAL_FN (GOMP_SIMT_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_VF, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_LAST_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_ORDERED_PRED, ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_VOTE_ANY, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_XCHG_BFLY, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_XCHG_IDX, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMD_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMD_VF, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMD_LAST_LANE, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)

View File

@ -2427,6 +2427,20 @@ cilk_for_check_loop_diff_type (tree type)
}
}
/* Return true if CTX may belong to offloaded code: either if current function
is offloaded, or any enclosing context corresponds to a target region. */
static bool
omp_maybe_offloaded_ctx (omp_context *ctx)
{
if (cgraph_node::get (current_function_decl)->offloadable)
return true;
for (; ctx; ctx = ctx->outer)
if (is_gimple_omp_offloaded (ctx->stmt))
return true;
return false;
}
/* Build a decl for the omp child function. It'll not contain a body
yet, just the bare decl. */
@ -2475,28 +2489,24 @@ create_omp_child_function (omp_context *ctx, bool task_copy)
DECL_CONTEXT (decl) = NULL_TREE;
DECL_INITIAL (decl) = make_node (BLOCK);
BLOCK_SUPERCONTEXT (DECL_INITIAL (decl)) = decl;
if (cgraph_node::get (current_function_decl)->offloadable)
cgraph_node::get_create (decl)->offloadable = 1;
else
if (omp_maybe_offloaded_ctx (ctx))
{
omp_context *octx;
for (octx = ctx; octx; octx = octx->outer)
if (is_gimple_omp_offloaded (octx->stmt))
{
cgraph_node::get_create (decl)->offloadable = 1;
if (ENABLE_OFFLOADING)
g->have_offload = true;
break;
}
cgraph_node::get_create (decl)->offloadable = 1;
if (ENABLE_OFFLOADING)
g->have_offload = true;
}
if (cgraph_node::get_create (decl)->offloadable
&& !lookup_attribute ("omp declare target",
DECL_ATTRIBUTES (current_function_decl)))
DECL_ATTRIBUTES (decl)
= tree_cons (get_identifier ("omp target entrypoint"),
NULL_TREE, DECL_ATTRIBUTES (decl));
{
const char *target_attr = (is_gimple_omp_offloaded (ctx->stmt)
? "omp target entrypoint"
: "omp declare target");
DECL_ATTRIBUTES (decl)
= tree_cons (get_identifier (target_attr),
NULL_TREE, DECL_ATTRIBUTES (decl));
}
t = build_decl (DECL_SOURCE_LOCATION (decl),
RESULT_DECL, NULL_TREE, void_type_node);
@ -4264,6 +4274,25 @@ omp_clause_aligned_alignment (tree clause)
return build_int_cst (integer_type_node, al);
}
/* Return maximum SIMT width if offloading may target SIMT hardware. */
static int
omp_max_simt_vf (void)
{
if (!optimize)
return 0;
if (ENABLE_OFFLOADING)
for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c; )
{
if (!strncmp (c, "nvptx", strlen ("nvptx")))
return 32;
else if ((c = strchr (c, ',')))
c++;
}
return 0;
}
/* Return maximum possible vectorization factor for the target. */
static int
@ -4277,16 +4306,18 @@ omp_max_vf (void)
|| global_options_set.x_flag_tree_vectorize)))
return 1;
int vf = 1;
int vs = targetm.vectorize.autovectorize_vector_sizes ();
if (vs)
vf = 1 << floor_log2 (vs);
else
{
vs = 1 << floor_log2 (vs);
return vs;
machine_mode vqimode = targetm.vectorize.preferred_simd_mode (QImode);
if (GET_MODE_CLASS (vqimode) == MODE_VECTOR_INT)
vf = GET_MODE_NUNITS (vqimode);
}
machine_mode vqimode = targetm.vectorize.preferred_simd_mode (QImode);
if (GET_MODE_CLASS (vqimode) == MODE_VECTOR_INT)
return GET_MODE_NUNITS (vqimode);
return 1;
int svf = omp_max_simt_vf ();
return MAX (vf, svf);
}
/* Helper function of lower_rec_input_clauses, used for #pragma omp simd
@ -4374,10 +4405,13 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
int pass;
bool is_simd = (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
&& gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD);
bool maybe_simt
= is_simd && omp_maybe_offloaded_ctx (ctx) && omp_max_simt_vf () > 1;
int max_vf = 0;
tree lane = NULL_TREE, idx = NULL_TREE;
tree simt_lane = NULL_TREE;
tree ivar = NULL_TREE, lvar = NULL_TREE;
gimple_seq llist[2] = { NULL, NULL };
gimple_seq llist[3] = { };
copyin_seq = NULL;
@ -5251,6 +5285,16 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
gimplify_assign (unshare_expr (ivar), x, &llist[0]);
if (maybe_simt)
{
if (!simt_lane)
simt_lane = create_tmp_var (unsigned_type_node);
x = build_call_expr_internal_loc
(UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_BFLY,
TREE_TYPE (ivar), 2, ivar, simt_lane);
x = build2 (code, TREE_TYPE (ivar), ivar, x);
gimplify_assign (ivar, x, &llist[2]);
}
x = build2 (code, TREE_TYPE (ref), ref, ivar);
ref = build_outer_var_ref (var, ctx);
gimplify_assign (ref, x, &llist[1]);
@ -5303,6 +5347,39 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
g = gimple_build_assign (lane, INTEGER_CST,
build_int_cst (unsigned_type_node, 0));
gimple_seq_add_stmt (ilist, g);
/* Emit reductions across SIMT lanes in log_2(simt_vf) steps. */
if (llist[2])
{
tree simt_vf = create_tmp_var (unsigned_type_node);
g = gimple_build_call_internal (IFN_GOMP_SIMT_VF, 0);
gimple_call_set_lhs (g, simt_vf);
gimple_seq_add_stmt (dlist, g);
tree t = build_int_cst (unsigned_type_node, 1);
g = gimple_build_assign (simt_lane, INTEGER_CST, t);
gimple_seq_add_stmt (dlist, g);
t = build_int_cst (unsigned_type_node, 0);
g = gimple_build_assign (idx, INTEGER_CST, t);
gimple_seq_add_stmt (dlist, g);
tree body = create_artificial_label (UNKNOWN_LOCATION);
tree header = create_artificial_label (UNKNOWN_LOCATION);
tree end = create_artificial_label (UNKNOWN_LOCATION);
gimple_seq_add_stmt (dlist, gimple_build_goto (header));
gimple_seq_add_stmt (dlist, gimple_build_label (body));
gimple_seq_add_seq (dlist, llist[2]);
g = gimple_build_assign (simt_lane, LSHIFT_EXPR, simt_lane, integer_one_node);
gimple_seq_add_stmt (dlist, g);
gimple_seq_add_stmt (dlist, gimple_build_label (header));
g = gimple_build_cond (LT_EXPR, simt_lane, simt_vf, body, end);
gimple_seq_add_stmt (dlist, g);
gimple_seq_add_stmt (dlist, gimple_build_label (end));
}
for (int i = 0; i < 2; i++)
if (llist[i])
{
@ -5389,7 +5466,7 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
{
tree x, c, label = NULL, orig_clauses = clauses;
bool par_clauses = false;
tree simduid = NULL, lastlane = NULL;
tree simduid = NULL, lastlane = NULL, simtcond = NULL, simtlast = NULL;
/* Early exit if there are no lastprivate or linear clauses. */
for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses))
@ -5416,6 +5493,16 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
par_clauses = true;
}
bool maybe_simt = false;
if (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
&& gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD)
{
maybe_simt = omp_maybe_offloaded_ctx (ctx) && omp_max_simt_vf () > 1;
simduid = find_omp_clause (orig_clauses, OMP_CLAUSE__SIMDUID_);
if (simduid)
simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
}
if (predicate)
{
gcond *stmt;
@ -5427,20 +5514,27 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
arm2 = TREE_OPERAND (predicate, 1);
gimplify_expr (&arm1, stmt_list, NULL, is_gimple_val, fb_rvalue);
gimplify_expr (&arm2, stmt_list, NULL, is_gimple_val, fb_rvalue);
stmt = gimple_build_cond (TREE_CODE (predicate), arm1, arm2,
label_true, label);
if (maybe_simt)
{
c = build2 (TREE_CODE (predicate), boolean_type_node, arm1, arm2);
c = fold_convert (integer_type_node, c);
simtcond = create_tmp_var (integer_type_node);
gimplify_assign (simtcond, c, stmt_list);
gcall *g = gimple_build_call_internal (IFN_GOMP_SIMT_VOTE_ANY,
1, simtcond);
c = create_tmp_var (integer_type_node);
gimple_call_set_lhs (g, c);
gimple_seq_add_stmt (stmt_list, g);
stmt = gimple_build_cond (NE_EXPR, c, integer_zero_node,
label_true, label);
}
else
stmt = gimple_build_cond (TREE_CODE (predicate), arm1, arm2,
label_true, label);
gimple_seq_add_stmt (stmt_list, stmt);
gimple_seq_add_stmt (stmt_list, gimple_build_label (label_true));
}
if (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
&& gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD)
{
simduid = find_omp_clause (orig_clauses, OMP_CLAUSE__SIMDUID_);
if (simduid)
simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
}
for (c = clauses; c ;)
{
tree var, new_var;
@ -5491,6 +5585,24 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
new_var = build4 (ARRAY_REF, TREE_TYPE (val),
TREE_OPERAND (val, 0), lastlane,
NULL_TREE, NULL_TREE);
if (maybe_simt)
{
gcall *g;
if (simtlast == NULL)
{
simtlast = create_tmp_var (unsigned_type_node);
g = gimple_build_call_internal
(IFN_GOMP_SIMT_LAST_LANE, 1, simtcond);
gimple_call_set_lhs (g, simtlast);
gimple_seq_add_stmt (stmt_list, g);
}
x = build_call_expr_internal_loc
(UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX,
TREE_TYPE (new_var), 2, new_var, simtlast);
new_var = unshare_expr (new_var);
gimplify_assign (new_var, x, stmt_list);
new_var = unshare_expr (new_var);
}
}
}
@ -10564,12 +10676,23 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
edge e, ne;
tree *counts = NULL;
int i;
int safelen_int = INT_MAX;
tree safelen = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
OMP_CLAUSE_SAFELEN);
tree simduid = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
OMP_CLAUSE__SIMDUID_);
tree n1, n2;
if (safelen)
{
safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
if (TREE_CODE (safelen) != INTEGER_CST)
safelen_int = 0;
else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX)
safelen_int = tree_to_uhwi (safelen);
if (safelen_int == 1)
safelen_int = 0;
}
type = TREE_TYPE (fd->loop.v);
entry_bb = region->entry;
cont_bb = region->cont;
@ -10623,20 +10746,53 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
OMP_CLAUSE__LOOPTEMP_);
gcc_assert (innerc);
n2 = OMP_CLAUSE_DECL (innerc);
expand_omp_build_assign (&gsi, fd->loop.v,
fold_convert (type, n1));
}
tree step = fd->loop.step;
bool offloaded = cgraph_node::get (current_function_decl)->offloadable;
for (struct omp_region *rgn = region; !offloaded && rgn; rgn = rgn->outer)
offloaded = rgn->type == GIMPLE_OMP_TARGET;
bool is_simt = offloaded && omp_max_simt_vf () > 1 && safelen_int > 1;
tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
if (is_simt)
{
cfun->curr_properties &= ~PROP_gimple_lomp_dev;
simt_lane = create_tmp_var (unsigned_type_node);
gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
gimple_call_set_lhs (g, simt_lane);
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
fold_convert (TREE_TYPE (step), simt_lane));
n1 = fold_convert (type, n1);
if (POINTER_TYPE_P (type))
n1 = fold_build_pointer_plus (n1, offset);
else
n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
/* Collapsed loops not handled for SIMT yet: limit to one lane only. */
if (fd->collapse > 1)
simt_maxlane = build_one_cst (unsigned_type_node);
else if (safelen_int < omp_max_simt_vf ())
simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
tree vf
= build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
unsigned_type_node, 0);
if (simt_maxlane)
vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
vf = fold_convert (TREE_TYPE (step), vf);
step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
}
expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
if (fd->collapse > 1)
{
if (gimple_omp_for_combined_into_p (fd->for_stmt))
{
gsi_prev (&gsi);
expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
gsi_next (&gsi);
}
}
else
{
expand_omp_build_assign (&gsi, fd->loop.v,
fold_convert (type, fd->loop.n1));
if (fd->collapse > 1)
else
for (i = 0; i < fd->collapse; i++)
{
tree itype = TREE_TYPE (fd->loops[i].v);
@ -10645,7 +10801,7 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
expand_omp_build_assign (&gsi, fd->loops[i].v, t);
}
}
}
/* Remove the GIMPLE_OMP_FOR statement. */
gsi_remove (&gsi, true);
@ -10658,9 +10814,9 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
if (POINTER_TYPE_P (type))
t = fold_build_pointer_plus (fd->loop.v, fd->loop.step);
t = fold_build_pointer_plus (fd->loop.v, step);
else
t = fold_build2 (PLUS_EXPR, type, fd->loop.v, fd->loop.step);
t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
expand_omp_build_assign (&gsi, fd->loop.v, t);
if (fd->collapse > 1)
@ -10734,6 +10890,18 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
gimple_regimplify_operands (cond_stmt, &gsi);
}
/* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
if (is_simt)
{
gsi = gsi_start_bb (l2_bb);
step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
if (POINTER_TYPE_P (type))
t = fold_build_pointer_plus (fd->loop.v, step);
else
t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
expand_omp_build_assign (&gsi, fd->loop.v, t);
}
/* Remove GIMPLE_OMP_RETURN. */
gsi = gsi_last_bb (exit_bb);
gsi_remove (&gsi, true);
@ -10763,30 +10931,29 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
ne->probability = REG_BR_PROB_BASE / 8;
set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
if (simt_maxlane)
{
cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
NULL_TREE, NULL_TREE);
gsi = gsi_last_bb (entry_bb);
gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE * 7 / 8;
BRANCH_EDGE (entry_bb)->probability = REG_BR_PROB_BASE / 8;
l2_dom_bb = entry_bb;
}
set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
if (!broken_loop)
{
struct loop *loop = alloc_loop ();
loop->header = l1_bb;
loop->latch = cont_bb;
add_loop (loop, l1_bb->loop_father);
if (safelen == NULL_TREE)
loop->safelen = INT_MAX;
else
{
safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
if (TREE_CODE (safelen) != INTEGER_CST)
loop->safelen = 0;
else if (!tree_fits_uhwi_p (safelen)
|| tree_to_uhwi (safelen) > INT_MAX)
loop->safelen = INT_MAX;
else
loop->safelen = tree_to_uhwi (safelen);
if (loop->safelen == 1)
loop->safelen = 0;
}
loop->safelen = safelen_int;
if (simduid)
{
loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
@ -13951,7 +14118,6 @@ expand_omp (struct omp_region *region)
}
}
/* Helper for build_omp_regions. Scan the dominator tree starting at
block BB. PARENT is the region that contains BB. If SINGLE_TREE is
true, the function ends once a single tree is built (otherwise, whole
@ -14834,12 +15000,14 @@ static void
lower_omp_ordered (gimple_stmt_iterator *gsi_p, omp_context *ctx)
{
tree block;
gimple *stmt = gsi_stmt (*gsi_p);
gimple *stmt = gsi_stmt (*gsi_p), *g;
gomp_ordered *ord_stmt = as_a <gomp_ordered *> (stmt);
gcall *x;
gbind *bind;
bool simd = find_omp_clause (gimple_omp_ordered_clauses (ord_stmt),
OMP_CLAUSE_SIMD);
bool maybe_simt
= simd && omp_maybe_offloaded_ctx (ctx) && omp_max_simt_vf () > 1;
bool threads = find_omp_clause (gimple_omp_ordered_clauses (ord_stmt),
OMP_CLAUSE_THREADS);
@ -14873,11 +15041,56 @@ lower_omp_ordered (gimple_stmt_iterator *gsi_p, omp_context *ctx)
0);
gimple_bind_add_stmt (bind, x);
tree counter = NULL_TREE, test = NULL_TREE, body = NULL_TREE;
if (maybe_simt)
{
counter = create_tmp_var (integer_type_node);
g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
gimple_call_set_lhs (g, counter);
gimple_bind_add_stmt (bind, g);
body = create_artificial_label (UNKNOWN_LOCATION);
test = create_artificial_label (UNKNOWN_LOCATION);
gimple_bind_add_stmt (bind, gimple_build_label (body));
tree simt_pred = create_tmp_var (integer_type_node);
g = gimple_build_call_internal (IFN_GOMP_SIMT_ORDERED_PRED, 1, counter);
gimple_call_set_lhs (g, simt_pred);
gimple_bind_add_stmt (bind, g);
tree t = create_artificial_label (UNKNOWN_LOCATION);
g = gimple_build_cond (EQ_EXPR, simt_pred, integer_zero_node, t, test);
gimple_bind_add_stmt (bind, g);
gimple_bind_add_stmt (bind, gimple_build_label (t));
}
lower_omp (gimple_omp_body_ptr (stmt), ctx);
gimple_omp_set_body (stmt, maybe_catch_exception (gimple_omp_body (stmt)));
gimple_bind_add_seq (bind, gimple_omp_body (stmt));
gimple_omp_set_body (stmt, NULL);
if (maybe_simt)
{
gimple_bind_add_stmt (bind, gimple_build_label (test));
g = gimple_build_assign (counter, MINUS_EXPR, counter, integer_one_node);
gimple_bind_add_stmt (bind, g);
tree c = build2 (GE_EXPR, boolean_type_node, counter, integer_zero_node);
tree nonneg = create_tmp_var (integer_type_node);
gimple_seq tseq = NULL;
gimplify_assign (nonneg, fold_convert (integer_type_node, c), &tseq);
gimple_bind_add_seq (bind, tseq);
g = gimple_build_call_internal (IFN_GOMP_SIMT_VOTE_ANY, 1, nonneg);
gimple_call_set_lhs (g, nonneg);
gimple_bind_add_stmt (bind, g);
tree end = create_artificial_label (UNKNOWN_LOCATION);
g = gimple_build_cond (NE_EXPR, nonneg, integer_zero_node, body, end);
gimple_bind_add_stmt (bind, g);
gimple_bind_add_stmt (bind, gimple_build_label (end));
}
if (simd)
x = gimple_build_call_internal (IFN_GOMP_SIMD_ORDERED_END, 1,
build_int_cst (NULL_TREE, threads));
@ -17998,7 +18211,7 @@ const pass_data pass_data_lower_omp =
OPTGROUP_NONE, /* optinfo_flags */
TV_NONE, /* tv_id */
PROP_gimple_any, /* properties_required */
PROP_gimple_lomp, /* properties_provided */
PROP_gimple_lomp | PROP_gimple_lomp_dev, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
0, /* todo_flags_finish */
@ -19930,6 +20143,113 @@ make_pass_oacc_device_lower (gcc::context *ctxt)
{
return new pass_oacc_device_lower (ctxt);
}
/* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
internal functions on non-SIMT targets, and likewise some SIMD internal
functions on SIMT targets. */
static unsigned int
execute_omp_device_lower ()
{
int vf = targetm.simt.vf ? targetm.simt.vf () : 1;
basic_block bb;
gimple_stmt_iterator gsi;
FOR_EACH_BB_FN (bb, cfun)
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple *stmt = gsi_stmt (gsi);
if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
continue;
tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE;
tree type = lhs ? TREE_TYPE (lhs) : integer_type_node;
switch (gimple_call_internal_fn (stmt))
{
case IFN_GOMP_SIMT_LANE:
case IFN_GOMP_SIMT_LAST_LANE:
rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE;
break;
case IFN_GOMP_SIMT_VF:
rhs = build_int_cst (type, vf);
break;
case IFN_GOMP_SIMT_ORDERED_PRED:
rhs = vf == 1 ? integer_zero_node : NULL_TREE;
if (rhs || !lhs)
unlink_stmt_vdef (stmt);
break;
case IFN_GOMP_SIMT_VOTE_ANY:
case IFN_GOMP_SIMT_XCHG_BFLY:
case IFN_GOMP_SIMT_XCHG_IDX:
rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
break;
case IFN_GOMP_SIMD_LANE:
case IFN_GOMP_SIMD_LAST_LANE:
rhs = vf != 1 ? build_zero_cst (type) : NULL_TREE;
break;
case IFN_GOMP_SIMD_VF:
rhs = vf != 1 ? build_one_cst (type) : NULL_TREE;
break;
default:
continue;
}
if (lhs && !rhs)
continue;
stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
gsi_replace (&gsi, stmt, false);
}
if (vf != 1)
cfun->has_force_vectorize_loops = false;
return 0;
}
namespace {
const pass_data pass_data_omp_device_lower =
{
GIMPLE_PASS, /* type */
"ompdevlow", /* name */
OPTGROUP_NONE, /* optinfo_flags */
TV_NONE, /* tv_id */
PROP_cfg, /* properties_required */
PROP_gimple_lomp_dev, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_update_ssa, /* todo_flags_finish */
};
class pass_omp_device_lower : public gimple_opt_pass
{
public:
pass_omp_device_lower (gcc::context *ctxt)
: gimple_opt_pass (pass_data_omp_device_lower, ctxt)
{}
/* opt_pass methods: */
virtual bool gate (function *ARG_UNUSED (fun))
{
/* FIXME: this should use PROP_gimple_lomp_dev. */
#ifdef ACCEL_COMPILER
return true;
#else
return ENABLE_OFFLOADING && (flag_openmp || in_lto_p);
#endif
}
virtual unsigned int execute (function *)
{
return execute_omp_device_lower ();
}
}; // class pass_expand_omp_ssa
} // anon namespace
gimple_opt_pass *
make_pass_omp_device_lower (gcc::context *ctxt)
{
return new pass_omp_device_lower (ctxt);
}
/* "omp declare target link" handling pass. */

View File

@ -183,6 +183,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_fixup_cfg);
NEXT_PASS (pass_lower_eh_dispatch);
NEXT_PASS (pass_oacc_device_lower);
NEXT_PASS (pass_omp_device_lower);
NEXT_PASS (pass_omp_target_link);
NEXT_PASS (pass_all_optimizations);
PUSH_INSERT_PASSES_WITHIN (pass_all_optimizations)

View File

@ -222,6 +222,7 @@ protected:
of math functions; the
current choices have
been optimized. */
#define PROP_gimple_lomp_dev (1 << 16) /* done omp_device_lower */
#define PROP_trees \
(PROP_gimple_any | PROP_gimple_lcf | PROP_gimple_leh | PROP_gimple_lomp)
@ -417,6 +418,7 @@ extern gimple_opt_pass *make_pass_expand_omp (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_expand_omp_ssa (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_omp_target_link (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_oacc_device_lower (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_omp_device_lower (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_object_sizes (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_strlen (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_fold_builtins (gcc::context *ctxt);