OpenMP/PTX privatization in SIMD regions

* config/nvptx/nvptx-protos.h (nvptx_output_simt_enter): Declare.
	(nvptx_output_simt_exit): Declare.
	* config/nvptx/nvptx.c (nvptx_init_unisimt_predicate): Use
	cfun->machine->unisimt_location.  Handle NULL unisimt_predicate.
	(init_softstack_frame): Move initialization of crtl->is_leaf to...
	(nvptx_declare_function_name): ...here.  Emit declaration of local
	memory space buffer for omp_simt_enter insn.
	(nvptx_output_unisimt_switch): New.
	(nvptx_output_softstack_switch): New.
	(nvptx_output_simt_enter): New.
	(nvptx_output_simt_exit): New.
	* config/nvptx/nvptx.h (struct machine_function): New fields
	has_simtreg, unisimt_location, simt_stack_size, simt_stack_align.
	* config/nvptx/nvptx.md (UNSPECV_SIMT_ENTER): New unspec.
	(UNSPECV_SIMT_EXIT): Ditto.
	(omp_simt_enter_insn): New insn.
	(omp_simt_enter): New expansion.
	(omp_simt_exit): New insn.
	* config/nvptx/nvptx.opt (msoft-stack-reserve-local): New option.

	* internal-fn.c (expand_GOMP_SIMT_ENTER): New.
	(expand_GOMP_SIMT_ENTER_ALLOC): New.
	(expand_GOMP_SIMT_EXIT): New.
	* internal-fn.def (GOMP_SIMT_ENTER): New internal function.
	(GOMP_SIMT_ENTER_ALLOC): Ditto.
	(GOMP_SIMT_EXIT): Ditto.
	* target-insns.def (omp_simt_enter): New insn.
	(omp_simt_exit): Ditto.
	* omp-low.c (struct omplow_simd_context): New fields simt_eargs,
	simt_dlist.
	(lower_rec_simd_input_clauses): Implement SIMT privatization.
	(lower_rec_input_clauses): Likewise.
	(lower_lastprivate_clauses): Handle SIMT privatization.

	* omp-offload.c: Include langhooks.h, tree-nested.h, stor-layout.h.
	(ompdevlow_adjust_simt_enter): New.
	(find_simtpriv_var_op): New.
	(execute_omp_device_lower): Handle IFN_GOMP_SIMT_ENTER,
	IFN_GOMP_SIMT_ENTER_ALLOC, IFN_GOMP_SIMT_EXIT.

	* tree-inline.h (struct copy_body_data): New field dst_simt_vars.
	* tree-inline.c (expand_call_inline): Handle SIMT privatization.
	(copy_decl_for_dup_finish): Ditto.

	* tree-ssa.c (execute_update_addresses_taken): Handle GOMP_SIMT_ENTER.

From-SVN: r246550
This commit is contained in:
Alexander Monakov 2017-03-28 20:24:57 +03:00 committed by Alexander Monakov
parent cf47453061
commit 0c6b03b515
14 changed files with 573 additions and 63 deletions

View File

@ -1,3 +1,51 @@
2017-03-28 Alexander Monakov <amonakov@ispras.ru>
* config/nvptx/nvptx-protos.h (nvptx_output_simt_enter): Declare.
(nvptx_output_simt_exit): Declare.
* config/nvptx/nvptx.c (nvptx_init_unisimt_predicate): Use
cfun->machine->unisimt_location. Handle NULL unisimt_predicate.
(init_softstack_frame): Move initialization of crtl->is_leaf to...
(nvptx_declare_function_name): ...here. Emit declaration of local
memory space buffer for omp_simt_enter insn.
(nvptx_output_unisimt_switch): New.
(nvptx_output_softstack_switch): New.
(nvptx_output_simt_enter): New.
(nvptx_output_simt_exit): New.
* config/nvptx/nvptx.h (struct machine_function): New fields
has_simtreg, unisimt_location, simt_stack_size, simt_stack_align.
* config/nvptx/nvptx.md (UNSPECV_SIMT_ENTER): New unspec.
(UNSPECV_SIMT_EXIT): Ditto.
(omp_simt_enter_insn): New insn.
(omp_simt_enter): New expansion.
(omp_simt_exit): New insn.
* config/nvptx/nvptx.opt (msoft-stack-reserve-local): New option.
* internal-fn.c (expand_GOMP_SIMT_ENTER): New.
(expand_GOMP_SIMT_ENTER_ALLOC): New.
(expand_GOMP_SIMT_EXIT): New.
* internal-fn.def (GOMP_SIMT_ENTER): New internal function.
(GOMP_SIMT_ENTER_ALLOC): Ditto.
(GOMP_SIMT_EXIT): Ditto.
* target-insns.def (omp_simt_enter): New insn.
(omp_simt_exit): Ditto.
* omp-low.c (struct omplow_simd_context): New fields simt_eargs,
simt_dlist.
(lower_rec_simd_input_clauses): Implement SIMT privatization.
(lower_rec_input_clauses): Likewise.
(lower_lastprivate_clauses): Handle SIMT privatization.
* omp-offload.c: Include langhooks.h, tree-nested.h, stor-layout.h.
(ompdevlow_adjust_simt_enter): New.
(find_simtpriv_var_op): New.
(execute_omp_device_lower): Handle IFN_GOMP_SIMT_ENTER,
IFN_GOMP_SIMT_ENTER_ALLOC, IFN_GOMP_SIMT_EXIT.
* tree-inline.h (struct copy_body_data): New field dst_simt_vars.
* tree-inline.c (expand_call_inline): Handle SIMT privatization.
(copy_decl_for_dup_finish): Ditto.
* tree-ssa.c (execute_update_addresses_taken): Handle GOMP_SIMT_ENTER.
2017-03-28 Uros Bizjak <ubizjak@gmail.com>
PR target/53383

View File

@ -53,5 +53,7 @@ extern const char *nvptx_output_mov_insn (rtx, rtx);
extern const char *nvptx_output_call_insn (rtx_insn *, rtx, rtx);
extern const char *nvptx_output_return (void);
extern const char *nvptx_output_set_softstack (unsigned);
extern const char *nvptx_output_simt_enter (rtx, rtx, rtx);
extern const char *nvptx_output_simt_exit (rtx);
#endif
#endif

View File

@ -1048,11 +1048,6 @@ init_softstack_frame (FILE *file, unsigned alignment, HOST_WIDE_INT size)
fprintf (file, "\t\tsub.u%d %s, %s, " HOST_WIDE_INT_PRINT_DEC ";\n",
bits, reg_stack, reg_frame, size);
/* Usually 'crtl->is_leaf' is computed during register allocator
initialization (which is not done on NVPTX) or for pressure-sensitive
optimizations. Initialize it here, except if already set. */
if (!crtl->is_leaf)
crtl->is_leaf = leaf_function_p ();
if (!crtl->is_leaf)
fprintf (file, "\t\tst.shared.u%d [%s], %s;\n",
bits, reg_sspslot, reg_stack);
@ -1080,24 +1075,29 @@ nvptx_init_axis_predicate (FILE *file, int regno, const char *name)
static void
nvptx_init_unisimt_predicate (FILE *file)
{
cfun->machine->unisimt_location = gen_reg_rtx (Pmode);
int loc = REGNO (cfun->machine->unisimt_location);
int bits = POINTER_SIZE;
int master = REGNO (cfun->machine->unisimt_master);
int pred = REGNO (cfun->machine->unisimt_predicate);
fprintf (file, "\t.reg.u%d %%r%d;\n", bits, loc);
fprintf (file, "\t{\n");
fprintf (file, "\t\t.reg.u32 %%ustmp0;\n");
fprintf (file, "\t\t.reg.u%d %%ustmp1;\n", bits);
fprintf (file, "\t\t.reg.u%d %%ustmp2;\n", bits);
fprintf (file, "\t\tmov.u32 %%ustmp0, %%tid.y;\n");
fprintf (file, "\t\tmul%s.u32 %%ustmp1, %%ustmp0, 4;\n",
bits == 64 ? ".wide" : ".lo");
fprintf (file, "\t\tmov.u%d %%ustmp2, __nvptx_uni;\n", bits);
fprintf (file, "\t\tadd.u%d %%ustmp2, %%ustmp2, %%ustmp1;\n", bits);
fprintf (file, "\t\tld.shared.u32 %%r%d, [%%ustmp2];\n", master);
fprintf (file, "\t\tmov.u32 %%ustmp0, %%tid.x;\n");
/* Compute 'master lane index' as 'tid.x & __nvptx_uni[tid.y]'. */
fprintf (file, "\t\tand.b32 %%r%d, %%r%d, %%ustmp0;\n", master, master);
/* Compute predicate as 'tid.x == master'. */
fprintf (file, "\t\tsetp.eq.u32 %%r%d, %%r%d, %%ustmp0;\n", pred, master);
fprintf (file, "\t\tmov.u%d %%r%d, __nvptx_uni;\n", bits, loc);
fprintf (file, "\t\tadd.u%d %%r%d, %%r%d, %%ustmp1;\n", bits, loc, loc);
if (cfun->machine->unisimt_predicate)
{
int master = REGNO (cfun->machine->unisimt_master);
int pred = REGNO (cfun->machine->unisimt_predicate);
fprintf (file, "\t\tld.shared.u32 %%r%d, [%%r%d];\n", master, loc);
fprintf (file, "\t\tmov.u32 %%ustmp0, %%laneid;\n");
/* Compute 'master lane index' as 'laneid & __nvptx_uni[tid.y]'. */
fprintf (file, "\t\tand.b32 %%r%d, %%r%d, %%ustmp0;\n", master, master);
/* Compute predicate as 'tid.x == master'. */
fprintf (file, "\t\tsetp.eq.u32 %%r%d, %%r%d, %%ustmp0;\n", pred, master);
}
fprintf (file, "\t}\n");
need_unisimt_decl = true;
}
@ -1224,6 +1224,12 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
fprintf (file, "%s", s.str().c_str());
/* Usually 'crtl->is_leaf' is computed during register allocator
initialization (which is not done on NVPTX) or for pressure-sensitive
optimizations. Initialize it here, except if already set. */
if (!crtl->is_leaf)
crtl->is_leaf = leaf_function_p ();
HOST_WIDE_INT sz = get_frame_size ();
bool need_frameptr = sz || cfun->machine->has_chain;
int alignment = crtl->stack_alignment_needed / BITS_PER_UNIT;
@ -1240,9 +1246,28 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
init_frame (file, FRAME_POINTER_REGNUM, alignment,
ROUND_UP (sz, GET_MODE_SIZE (DImode)));
}
else if (need_frameptr || cfun->machine->has_varadic || cfun->calls_alloca)
else if (need_frameptr || cfun->machine->has_varadic || cfun->calls_alloca
|| (cfun->machine->has_simtreg && !crtl->is_leaf))
init_softstack_frame (file, alignment, sz);
if (cfun->machine->has_simtreg)
{
unsigned HOST_WIDE_INT &simtsz = cfun->machine->simt_stack_size;
unsigned HOST_WIDE_INT &align = cfun->machine->simt_stack_align;
align = MAX (align, GET_MODE_SIZE (DImode));
if (!crtl->is_leaf || cfun->calls_alloca)
simtsz = HOST_WIDE_INT_M1U;
if (simtsz == HOST_WIDE_INT_M1U)
simtsz = nvptx_softstack_size;
if (cfun->machine->has_softstack)
simtsz += POINTER_SIZE / 8;
simtsz = ROUND_UP (simtsz, GET_MODE_SIZE (DImode));
if (align > GET_MODE_SIZE (DImode))
simtsz += align - GET_MODE_SIZE (DImode);
if (simtsz)
fprintf (file, "\t.local.align 8 .b8 %%simtstack_ar["
HOST_WIDE_INT_PRINT_DEC "];\n", simtsz);
}
/* Declare the pseudos we have as ptx registers. */
int maxregs = max_reg_num ();
for (int i = LAST_VIRTUAL_REGISTER + 1; i < maxregs; i++)
@ -1267,10 +1292,112 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
if (cfun->machine->axis_predicate[1])
nvptx_init_axis_predicate (file,
REGNO (cfun->machine->axis_predicate[1]), "x");
if (cfun->machine->unisimt_predicate)
if (cfun->machine->unisimt_predicate
|| (cfun->machine->has_simtreg && !crtl->is_leaf))
nvptx_init_unisimt_predicate (file);
}
/* Output code for switching uniform-simt state. ENTERING indicates whether
we are entering or leaving non-uniform execution region. */
static void
nvptx_output_unisimt_switch (FILE *file, bool entering)
{
if (crtl->is_leaf && !cfun->machine->unisimt_predicate)
return;
fprintf (file, "\t{\n");
fprintf (file, "\t\t.reg.u32 %%ustmp2;\n");
fprintf (file, "\t\tmov.u32 %%ustmp2, %d;\n", entering ? -1 : 0);
if (!crtl->is_leaf)
{
int loc = REGNO (cfun->machine->unisimt_location);
fprintf (file, "\t\tst.shared.u32 [%%r%d], %%ustmp2;\n", loc);
}
if (cfun->machine->unisimt_predicate)
{
int master = REGNO (cfun->machine->unisimt_master);
int pred = REGNO (cfun->machine->unisimt_predicate);
fprintf (file, "\t\tmov.u32 %%ustmp2, %%laneid;\n");
fprintf (file, "\t\tmov.u32 %%r%d, %s;\n",
master, entering ? "%ustmp2" : "0");
fprintf (file, "\t\tsetp.eq.u32 %%r%d, %%r%d, %%ustmp2;\n", pred, master);
}
fprintf (file, "\t}\n");
}
/* Output code for allocating per-lane storage and switching soft-stack pointer.
ENTERING indicates whether we are entering or leaving non-uniform execution.
PTR is the register pointing to allocated storage, it is assigned to on
entering and used to restore state on leaving. SIZE and ALIGN are used only
on entering. */
static void
nvptx_output_softstack_switch (FILE *file, bool entering,
rtx ptr, rtx size, rtx align)
{
gcc_assert (REG_P (ptr) && !HARD_REGISTER_P (ptr));
if (crtl->is_leaf && !cfun->machine->simt_stack_size)
return;
int bits = POINTER_SIZE, regno = REGNO (ptr);
fprintf (file, "\t{\n");
if (entering)
{
fprintf (file, "\t\tcvta.local.u%d %%r%d, %%simtstack_ar + "
HOST_WIDE_INT_PRINT_DEC ";\n", bits, regno,
cfun->machine->simt_stack_size);
fprintf (file, "\t\tsub.u%d %%r%d, %%r%d, ", bits, regno, regno);
if (CONST_INT_P (size))
fprintf (file, HOST_WIDE_INT_PRINT_DEC,
ROUND_UP (UINTVAL (size), GET_MODE_SIZE (DImode)));
else
output_reg (file, REGNO (size), VOIDmode);
fputs (";\n", file);
if (!CONST_INT_P (size) || UINTVAL (align) > GET_MODE_SIZE (DImode))
fprintf (file, "\t\tand.u%d %%r%d, %%r%d, -%d;\n",
bits, regno, regno, UINTVAL (align));
}
if (cfun->machine->has_softstack)
{
const char *reg_stack = reg_names[STACK_POINTER_REGNUM];
if (entering)
{
fprintf (file, "\t\tst.u%d [%%r%d + -%d], %s;\n",
bits, regno, bits / 8, reg_stack);
fprintf (file, "\t\tsub.u%d %s, %%r%d, %d;\n",
bits, reg_stack, regno, bits / 8);
}
else
{
fprintf (file, "\t\tld.u%d %s, [%%r%d + -%d];\n",
bits, reg_stack, regno, bits / 8);
}
nvptx_output_set_softstack (REGNO (stack_pointer_rtx));
}
fprintf (file, "\t}\n");
}
/* Output code to enter non-uniform execution region. DEST is a register
to hold a per-lane allocation given by SIZE and ALIGN. */
const char *
nvptx_output_simt_enter (rtx dest, rtx size, rtx align)
{
nvptx_output_unisimt_switch (asm_out_file, true);
nvptx_output_softstack_switch (asm_out_file, true, dest, size, align);
return "";
}
/* Output code to leave non-uniform execution region. SRC is the register
holding per-lane storage previously allocated by omp_simt_enter insn. */
const char *
nvptx_output_simt_exit (rtx src)
{
nvptx_output_unisimt_switch (asm_out_file, false);
nvptx_output_softstack_switch (asm_out_file, false, src, NULL_RTX, NULL_RTX);
return "";
}
/* Output instruction that sets soft stack pointer in shared memory to the
value in register given by SRC_REGNO. */

View File

@ -213,12 +213,18 @@ struct GTY(()) machine_function
bool has_varadic; /* Current function has a varadic call. */
bool has_chain; /* Current function has outgoing static chain. */
bool has_softstack; /* Current function has a soft stack frame. */
bool has_simtreg; /* Current function has an OpenMP SIMD region. */
int num_args; /* Number of args of current call. */
int return_mode; /* Return mode of current fn.
(machine_mode not defined yet.) */
rtx axis_predicate[2]; /* Neutering predicates. */
rtx unisimt_master; /* 'Master lane index' for -muniform-simt. */
rtx unisimt_predicate; /* Predicate for -muniform-simt. */
rtx unisimt_location; /* Mask location for -muniform-simt. */
/* The following two fields hold the maximum size resp. alignment required
for per-lane storage in OpenMP SIMD regions. */
unsigned HOST_WIDE_INT simt_stack_size;
unsigned HOST_WIDE_INT simt_stack_align;
};
#endif

View File

@ -63,6 +63,9 @@
UNSPECV_JOIN
UNSPECV_NOUNROLL
UNSPECV_SIMT_ENTER
UNSPECV_SIMT_EXIT
])
(define_attr "subregs_ok" "false,true"
@ -1184,6 +1187,42 @@
;; Patterns for OpenMP SIMD-via-SIMT lowering
(define_insn "omp_simt_enter_insn"
[(set (match_operand 0 "nvptx_register_operand" "=R")
(unspec_volatile [(match_operand 1 "nvptx_nonmemory_operand" "Ri")
(match_operand 2 "nvptx_nonmemory_operand" "Ri")]
UNSPECV_SIMT_ENTER))]
""
{
return nvptx_output_simt_enter (operands[0], operands[1], operands[2]);
})
(define_expand "omp_simt_enter"
[(match_operand 0 "nvptx_register_operand" "=R")
(match_operand 1 "nvptx_nonmemory_operand" "Ri")
(match_operand 2 "const_int_operand" "n")]
""
{
if (!CONST_INT_P (operands[1]))
cfun->machine->simt_stack_size = HOST_WIDE_INT_M1U;
else
cfun->machine->simt_stack_size = MAX (UINTVAL (operands[1]),
cfun->machine->simt_stack_size);
cfun->machine->simt_stack_align = MAX (UINTVAL (operands[2]),
cfun->machine->simt_stack_align);
cfun->machine->has_simtreg = true;
emit_insn (gen_omp_simt_enter_insn (operands[0], operands[1], operands[2]));
DONE;
})
(define_insn "omp_simt_exit"
[(unspec_volatile [(match_operand 0 "nvptx_register_operand" "R")]
UNSPECV_SIMT_EXIT)]
""
{
return nvptx_output_simt_exit (operands[0]);
})
;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index
(define_insn "omp_simt_lane"
[(set (match_operand:SI 0 "nvptx_register_operand" "")

View File

@ -37,6 +37,10 @@ msoft-stack
Target Report Mask(SOFT_STACK)
Use custom stacks instead of local memory for automatic storage.
msoft-stack-reserve-local
Target Report Joined RejectNegative UInteger Var(nvptx_softstack_size) Init(128)
Specify size of .local memory used for stack when the exact amount is not known.
muniform-simt
Target Report Mask(UNIFORM_SIMT)
Generate code that can keep local state uniform across all lanes.

View File

@ -166,6 +166,48 @@ expand_GOMP_USE_SIMT (internal_fn, gcall *)
gcc_unreachable ();
}
/* This should get expanded in omp_device_lower pass. */
static void
expand_GOMP_SIMT_ENTER (internal_fn, gcall *)
{
gcc_unreachable ();
}
/* Allocate per-lane storage and begin non-uniform execution region. */
static void
expand_GOMP_SIMT_ENTER_ALLOC (internal_fn, gcall *stmt)
{
rtx target;
tree lhs = gimple_call_lhs (stmt);
if (lhs)
target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
else
target = gen_reg_rtx (Pmode);
rtx size = expand_normal (gimple_call_arg (stmt, 0));
rtx align = expand_normal (gimple_call_arg (stmt, 1));
struct expand_operand ops[3];
create_output_operand (&ops[0], target, Pmode);
create_input_operand (&ops[1], size, Pmode);
create_input_operand (&ops[2], align, Pmode);
gcc_assert (targetm.have_omp_simt_enter ());
expand_insn (targetm.code_for_omp_simt_enter, 3, ops);
}
/* Deallocate per-lane storage and leave non-uniform execution region. */
static void
expand_GOMP_SIMT_EXIT (internal_fn, gcall *stmt)
{
gcc_checking_assert (!gimple_call_lhs (stmt));
rtx arg = expand_normal (gimple_call_arg (stmt, 0));
struct expand_operand ops[1];
create_input_operand (&ops[0], arg, Pmode);
gcc_assert (targetm.have_omp_simt_exit ());
expand_insn (targetm.code_for_omp_simt_exit, 1, ops);
}
/* Lane index on SIMT targets: thread index in the warp on NVPTX. On targets
without SIMT execution this should be expanded in omp_device_lower pass. */

View File

@ -142,6 +142,9 @@ DEF_INTERNAL_INT_FN (PARITY, ECF_CONST, parity, unary)
DEF_INTERNAL_INT_FN (POPCOUNT, ECF_CONST, popcount, unary)
DEF_INTERNAL_FN (GOMP_USE_SIMT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_ENTER, ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_ENTER_ALLOC, ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_EXIT, ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_VF, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_LAST_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)

View File

@ -3457,6 +3457,8 @@ omp_clause_aligned_alignment (tree clause)
struct omplow_simd_context {
tree idx;
tree lane;
vec<tree, va_heap> simt_eargs;
gimple_seq simt_dlist;
int max_vf;
bool is_simt;
};
@ -3492,18 +3494,39 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
if (sctx->max_vf == 1)
return false;
tree atype = build_array_type_nelts (TREE_TYPE (new_var), sctx->max_vf);
tree avar = create_tmp_var_raw (atype);
if (TREE_ADDRESSABLE (new_var))
TREE_ADDRESSABLE (avar) = 1;
DECL_ATTRIBUTES (avar)
= tree_cons (get_identifier ("omp simd array"), NULL,
DECL_ATTRIBUTES (avar));
gimple_add_tmp_var (avar);
ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->idx,
NULL_TREE, NULL_TREE);
lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->lane,
NULL_TREE, NULL_TREE);
if (sctx->is_simt)
{
if (is_gimple_reg (new_var))
{
ivar = lvar = new_var;
return true;
}
tree type = TREE_TYPE (new_var), ptype = build_pointer_type (type);
ivar = lvar = create_tmp_var (type);
TREE_ADDRESSABLE (ivar) = 1;
DECL_ATTRIBUTES (ivar) = tree_cons (get_identifier ("omp simt private"),
NULL, DECL_ATTRIBUTES (ivar));
sctx->simt_eargs.safe_push (build1 (ADDR_EXPR, ptype, ivar));
tree clobber = build_constructor (type, NULL);
TREE_THIS_VOLATILE (clobber) = 1;
gimple *g = gimple_build_assign (ivar, clobber);
gimple_seq_add_stmt (&sctx->simt_dlist, g);
}
else
{
tree atype = build_array_type_nelts (TREE_TYPE (new_var), sctx->max_vf);
tree avar = create_tmp_var_raw (atype);
if (TREE_ADDRESSABLE (new_var))
TREE_ADDRESSABLE (avar) = 1;
DECL_ATTRIBUTES (avar)
= tree_cons (get_identifier ("omp simd array"), NULL,
DECL_ATTRIBUTES (avar));
gimple_add_tmp_var (avar);
ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->idx,
NULL_TREE, NULL_TREE);
lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->lane,
NULL_TREE, NULL_TREE);
}
if (DECL_P (new_var))
{
SET_DECL_VALUE_EXPR (new_var, lvar);
@ -3547,8 +3570,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
bool is_simd = (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
&& gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD);
omplow_simd_context sctx = omplow_simd_context ();
tree simt_lane = NULL_TREE;
tree ivar = NULL_TREE, lvar = NULL_TREE;
tree simt_lane = NULL_TREE, simtrec = NULL_TREE;
tree ivar = NULL_TREE, lvar = NULL_TREE, uid = NULL_TREE;
gimple_seq llist[3] = { };
copyin_seq = NULL;
@ -3581,6 +3604,10 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
continue;
}
/* Add a placeholder for simduid. */
if (sctx.is_simt && sctx.max_vf != 1)
sctx.simt_eargs.safe_push (NULL_TREE);
/* Do all the fixed sized types in the first pass, and the variable sized
types in the second pass. This makes sure that the scalar arguments to
the variable sized types are processed before we use them in the
@ -4468,21 +4495,43 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
}
}
if (sctx.lane)
if (sctx.max_vf == 1)
sctx.is_simt = false;
if (sctx.lane || sctx.is_simt)
{
tree uid = create_tmp_var (ptr_type_node, "simduid");
uid = create_tmp_var (ptr_type_node, "simduid");
/* Don't want uninit warnings on simduid, it is always uninitialized,
but we use it not for the value, but for the DECL_UID only. */
TREE_NO_WARNING (uid) = 1;
c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SIMDUID_);
OMP_CLAUSE__SIMDUID__DECL (c) = uid;
OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (ctx->stmt);
gimple_omp_for_set_clauses (ctx->stmt, c);
}
/* Emit calls denoting privatized variables and initializing a pointer to
structure that holds private variables as fields after ompdevlow pass. */
if (sctx.is_simt)
{
sctx.simt_eargs[0] = uid;
gimple *g
= gimple_build_call_internal_vec (IFN_GOMP_SIMT_ENTER, sctx.simt_eargs);
gimple_call_set_lhs (g, uid);
gimple_seq_add_stmt (ilist, g);
sctx.simt_eargs.release ();
simtrec = create_tmp_var (ptr_type_node, ".omp_simt");
g = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC, 1, uid);
gimple_call_set_lhs (g, simtrec);
gimple_seq_add_stmt (ilist, g);
}
if (sctx.lane)
{
gimple *g
= gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 1, uid);
gimple_call_set_lhs (g, sctx.lane);
gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (ctx->stmt));
gsi_insert_before_without_update (&gsi, g, GSI_SAME_STMT);
c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SIMDUID_);
OMP_CLAUSE__SIMDUID__DECL (c) = uid;
OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (ctx->stmt);
gimple_omp_for_set_clauses (ctx->stmt, c);
g = gimple_build_assign (sctx.lane, INTEGER_CST,
build_int_cst (unsigned_type_node, 0));
gimple_seq_add_stmt (ilist, g);
@ -4545,6 +4594,13 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
gimple_seq_add_stmt (seq, gimple_build_label (end));
}
}
if (sctx.is_simt)
{
gimple_seq_add_seq (dlist, sctx.simt_dlist);
gimple *g
= gimple_build_call_internal (IFN_GOMP_SIMT_EXIT, 1, simtrec);
gimple_seq_add_stmt (dlist, g);
}
/* The copyin sequence is not to be executed by the main thread, since
that would result in self-copies. Perhaps not visible to scalars,
@ -4715,7 +4771,8 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
if (simduid && DECL_HAS_VALUE_EXPR_P (new_var))
{
tree val = DECL_VALUE_EXPR (new_var);
if (TREE_CODE (val) == ARRAY_REF
if (!maybe_simt
&& TREE_CODE (val) == ARRAY_REF
&& VAR_P (TREE_OPERAND (val, 0))
&& lookup_attribute ("omp simd array",
DECL_ATTRIBUTES (TREE_OPERAND (val,
@ -4734,24 +4791,26 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
new_var = build4 (ARRAY_REF, TREE_TYPE (val),
TREE_OPERAND (val, 0), lastlane,
NULL_TREE, NULL_TREE);
if (maybe_simt)
}
else if (maybe_simt
&& VAR_P (val)
&& lookup_attribute ("omp simt private",
DECL_ATTRIBUTES (val)))
{
if (simtlast == NULL)
{
gcall *g;
if (simtlast == NULL)
{
simtlast = create_tmp_var (unsigned_type_node);
g = gimple_build_call_internal
(IFN_GOMP_SIMT_LAST_LANE, 1, simtcond);
gimple_call_set_lhs (g, simtlast);
gimple_seq_add_stmt (stmt_list, g);
}
x = build_call_expr_internal_loc
(UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX,
TREE_TYPE (new_var), 2, new_var, simtlast);
new_var = unshare_expr (new_var);
gimplify_assign (new_var, x, stmt_list);
new_var = unshare_expr (new_var);
simtlast = create_tmp_var (unsigned_type_node);
gcall *g = gimple_build_call_internal
(IFN_GOMP_SIMT_LAST_LANE, 1, simtcond);
gimple_call_set_lhs (g, simtlast);
gimple_seq_add_stmt (stmt_list, g);
}
x = build_call_expr_internal_loc
(UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX,
TREE_TYPE (val), 2, val, simtlast);
new_var = unshare_expr (new_var);
gimplify_assign (new_var, x, stmt_list);
new_var = unshare_expr (new_var);
}
}

View File

@ -33,12 +33,15 @@ along with GCC; see the file COPYING3. If not see
#include "diagnostic-core.h"
#include "fold-const.h"
#include "internal-fn.h"
#include "langhooks.h"
#include "gimplify.h"
#include "gimple-iterator.h"
#include "gimplify-me.h"
#include "gimple-walk.h"
#include "tree-cfg.h"
#include "tree-into-ssa.h"
#include "tree-nested.h"
#include "stor-layout.h"
#include "common/common-target.h"
#include "omp-general.h"
#include "omp-offload.h"
@ -1669,6 +1672,92 @@ make_pass_oacc_device_lower (gcc::context *ctxt)
return new pass_oacc_device_lower (ctxt);
}
/* Rewrite GOMP_SIMT_ENTER_ALLOC call given by GSI and remove the preceding
GOMP_SIMT_ENTER call identifying the privatized variables, which are
turned to structure fields and receive a DECL_VALUE_EXPR accordingly.
Set *REGIMPLIFY to true, except if no privatized variables were seen. */
static void
ompdevlow_adjust_simt_enter (gimple_stmt_iterator *gsi, bool *regimplify)
{
gimple *alloc_stmt = gsi_stmt (*gsi);
tree simtrec = gimple_call_lhs (alloc_stmt);
tree simduid = gimple_call_arg (alloc_stmt, 0);
gimple *enter_stmt = SSA_NAME_DEF_STMT (simduid);
gcc_assert (gimple_call_internal_p (enter_stmt, IFN_GOMP_SIMT_ENTER));
tree rectype = lang_hooks.types.make_type (RECORD_TYPE);
TYPE_ARTIFICIAL (rectype) = TYPE_NAMELESS (rectype) = 1;
TREE_ADDRESSABLE (rectype) = 1;
TREE_TYPE (simtrec) = build_pointer_type (rectype);
for (unsigned i = 1; i < gimple_call_num_args (enter_stmt); i++)
{
tree *argp = gimple_call_arg_ptr (enter_stmt, i);
if (*argp == null_pointer_node)
continue;
gcc_assert (TREE_CODE (*argp) == ADDR_EXPR
&& VAR_P (TREE_OPERAND (*argp, 0)));
tree var = TREE_OPERAND (*argp, 0);
tree field = build_decl (DECL_SOURCE_LOCATION (var), FIELD_DECL,
DECL_NAME (var), TREE_TYPE (var));
SET_DECL_ALIGN (field, DECL_ALIGN (var));
DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
insert_field_into_struct (rectype, field);
tree t = build_simple_mem_ref (simtrec);
t = build3 (COMPONENT_REF, TREE_TYPE (var), t, field, NULL);
TREE_THIS_VOLATILE (t) = TREE_THIS_VOLATILE (var);
SET_DECL_VALUE_EXPR (var, t);
DECL_HAS_VALUE_EXPR_P (var) = 1;
*regimplify = true;
}
layout_type (rectype);
tree size = TYPE_SIZE_UNIT (rectype);
tree align = build_int_cst (TREE_TYPE (size), TYPE_ALIGN_UNIT (rectype));
alloc_stmt
= gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC, 2, size, align);
gimple_call_set_lhs (alloc_stmt, simtrec);
gsi_replace (gsi, alloc_stmt, false);
gimple_stmt_iterator enter_gsi = gsi_for_stmt (enter_stmt);
enter_stmt = gimple_build_assign (simduid, gimple_call_arg (enter_stmt, 0));
gsi_replace (&enter_gsi, enter_stmt, false);
use_operand_p use;
gimple *exit_stmt;
if (single_imm_use (simtrec, &use, &exit_stmt))
{
gcc_assert (gimple_call_internal_p (exit_stmt, IFN_GOMP_SIMT_EXIT));
gimple_stmt_iterator exit_gsi = gsi_for_stmt (exit_stmt);
tree clobber = build_constructor (rectype, NULL);
TREE_THIS_VOLATILE (clobber) = 1;
exit_stmt = gimple_build_assign (build_simple_mem_ref (simtrec), clobber);
gsi_insert_before (&exit_gsi, exit_stmt, GSI_SAME_STMT);
}
else
gcc_checking_assert (has_zero_uses (simtrec));
}
/* Callback for walk_gimple_stmt used to scan for SIMT-privatized variables. */
static tree
find_simtpriv_var_op (tree *tp, int *walk_subtrees, void *)
{
tree t = *tp;
if (VAR_P (t)
&& DECL_HAS_VALUE_EXPR_P (t)
&& lookup_attribute ("omp simt private", DECL_ATTRIBUTES (t)))
{
*walk_subtrees = 0;
return t;
}
return NULL_TREE;
}
/* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
@ -1679,6 +1768,7 @@ static unsigned int
execute_omp_device_lower ()
{
int vf = targetm.simt.vf ? targetm.simt.vf () : 1;
bool regimplify = false;
basic_block bb;
gimple_stmt_iterator gsi;
FOR_EACH_BB_FN (bb, cfun)
@ -1694,6 +1784,20 @@ execute_omp_device_lower ()
case IFN_GOMP_USE_SIMT:
rhs = vf == 1 ? integer_zero_node : integer_one_node;
break;
case IFN_GOMP_SIMT_ENTER:
rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
goto simtreg_enter_exit;
case IFN_GOMP_SIMT_ENTER_ALLOC:
if (vf != 1)
ompdevlow_adjust_simt_enter (&gsi, &regimplify);
rhs = vf == 1 ? null_pointer_node : NULL_TREE;
goto simtreg_enter_exit;
case IFN_GOMP_SIMT_EXIT:
simtreg_enter_exit:
if (vf != 1)
continue;
unlink_stmt_vdef (stmt);
break;
case IFN_GOMP_SIMT_LANE:
case IFN_GOMP_SIMT_LAST_LANE:
rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE;
@ -1726,6 +1830,16 @@ execute_omp_device_lower ()
stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
gsi_replace (&gsi, stmt, false);
}
if (regimplify)
FOR_EACH_BB_REVERSE_FN (bb, cfun)
for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
if (walk_gimple_stmt (&gsi, NULL, find_simtpriv_var_op, NULL))
{
if (gimple_clobber_p (gsi_stmt (gsi)))
gsi_remove (&gsi, true);
else
gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
}
if (vf != 1)
cfun->has_force_vectorize_loops = false;
return 0;

View File

@ -68,6 +68,8 @@ DEF_TARGET_INSN (oacc_dim_pos, (rtx x0, rtx x1))
DEF_TARGET_INSN (oacc_dim_size, (rtx x0, rtx x1))
DEF_TARGET_INSN (oacc_fork, (rtx x0, rtx x1, rtx x2))
DEF_TARGET_INSN (oacc_join, (rtx x0, rtx x1, rtx x2))
DEF_TARGET_INSN (omp_simt_enter, (rtx x0, rtx x1, rtx x2))
DEF_TARGET_INSN (omp_simt_exit, (rtx x0))
DEF_TARGET_INSN (omp_simt_lane, (rtx x0))
DEF_TARGET_INSN (omp_simt_last_lane, (rtx x0, rtx x1))
DEF_TARGET_INSN (omp_simt_ordered, (rtx x0, rtx x1))

View File

@ -4395,6 +4395,11 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
gcall *call_stmt;
unsigned int i;
unsigned int prop_mask, src_properties;
struct function *dst_cfun;
tree simduid;
use_operand_p use;
gimple *simtenter_stmt = NULL;
vec<tree> *simtvars_save;
/* The gimplifier uses input_location in too many places, such as
internal_get_tmp_var (). */
@ -4598,15 +4603,26 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
id->src_cfun = DECL_STRUCT_FUNCTION (fn);
id->call_stmt = call_stmt;
/* When inlining into an OpenMP SIMD-on-SIMT loop, arrange for new automatic
variables to be added to IFN_GOMP_SIMT_ENTER argument list. */
dst_cfun = DECL_STRUCT_FUNCTION (id->dst_fn);
simtvars_save = id->dst_simt_vars;
if (!(dst_cfun->curr_properties & PROP_gimple_lomp_dev)
&& (simduid = bb->loop_father->simduid) != NULL_TREE
&& (simduid = ssa_default_def (dst_cfun, simduid)) != NULL_TREE
&& single_imm_use (simduid, &use, &simtenter_stmt)
&& is_gimple_call (simtenter_stmt)
&& gimple_call_internal_p (simtenter_stmt, IFN_GOMP_SIMT_ENTER))
vec_alloc (id->dst_simt_vars, 0);
else
id->dst_simt_vars = NULL;
/* If the src function contains an IFN_VA_ARG, then so will the dst
function after inlining. Likewise for IFN_GOMP_USE_SIMT. */
prop_mask = PROP_gimple_lva | PROP_gimple_lomp_dev;
src_properties = id->src_cfun->curr_properties & prop_mask;
if (src_properties != prop_mask)
{
struct function *dst_cfun = DECL_STRUCT_FUNCTION (id->dst_fn);
dst_cfun->curr_properties &= src_properties | ~prop_mask;
}
dst_cfun->curr_properties &= src_properties | ~prop_mask;
gcc_assert (!id->src_cfun->after_inlining);
@ -4740,6 +4756,27 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
if (cfun->gimple_df)
pt_solution_reset (&cfun->gimple_df->escaped);
/* Add new automatic variables to IFN_GOMP_SIMT_ENTER arguments. */
if (id->dst_simt_vars && id->dst_simt_vars->length () > 0)
{
size_t nargs = gimple_call_num_args (simtenter_stmt);
vec<tree> *vars = id->dst_simt_vars;
auto_vec<tree> newargs (nargs + vars->length ());
for (size_t i = 0; i < nargs; i++)
newargs.quick_push (gimple_call_arg (simtenter_stmt, i));
for (tree *pvar = vars->begin (); pvar != vars->end (); pvar++)
{
tree ptrtype = build_pointer_type (TREE_TYPE (*pvar));
newargs.quick_push (build1 (ADDR_EXPR, ptrtype, *pvar));
}
gcall *g = gimple_build_call_internal_vec (IFN_GOMP_SIMT_ENTER, newargs);
gimple_call_set_lhs (g, gimple_call_lhs (simtenter_stmt));
gimple_stmt_iterator gsi = gsi_for_stmt (simtenter_stmt);
gsi_replace (&gsi, g, false);
}
vec_free (id->dst_simt_vars);
id->dst_simt_vars = simtvars_save;
/* Clean up. */
if (id->debug_map)
{
@ -5463,9 +5500,19 @@ copy_decl_for_dup_finish (copy_body_data *id, tree decl, tree copy)
function. */
;
else
/* Ordinary automatic local variables are now in the scope of the
new function. */
DECL_CONTEXT (copy) = id->dst_fn;
{
/* Ordinary automatic local variables are now in the scope of the
new function. */
DECL_CONTEXT (copy) = id->dst_fn;
if (VAR_P (copy) && id->dst_simt_vars && !is_gimple_reg (copy))
{
if (!lookup_attribute ("omp simt private", DECL_ATTRIBUTES (copy)))
DECL_ATTRIBUTES (copy)
= tree_cons (get_identifier ("omp simt private"), NULL,
DECL_ATTRIBUTES (copy));
id->dst_simt_vars->safe_push (copy);
}
}
return copy;
}

View File

@ -145,6 +145,10 @@ struct copy_body_data
equivalents in the function into which it is being inlined. */
hash_map<dependence_hash, unsigned short> *dependence_map;
/* A list of addressable local variables remapped into the caller
when inlining a call within an OpenMP SIMD-on-SIMT loop. */
vec<tree> *dst_simt_vars;
/* Cilk keywords currently need to replace some variables that
ordinary nested functions do not. */
bool remap_var_for_cilk;

View File

@ -1654,7 +1654,8 @@ execute_update_addresses_taken (void)
gimple_ior_addresses_taken (addresses_taken, stmt);
gimple_call_set_arg (stmt, 1, arg);
}
else if (is_asan_mark_p (stmt))
else if (is_asan_mark_p (stmt)
|| gimple_call_internal_p (stmt, IFN_GOMP_SIMT_ENTER))
;
else
gimple_ior_addresses_taken (addresses_taken, stmt);
@ -1940,6 +1941,18 @@ execute_update_addresses_taken (void)
continue;
}
}
else if (gimple_call_internal_p (stmt, IFN_GOMP_SIMT_ENTER))
for (i = 1; i < gimple_call_num_args (stmt); i++)
{
tree *argp = gimple_call_arg_ptr (stmt, i);
if (*argp == null_pointer_node)
continue;
gcc_assert (TREE_CODE (*argp) == ADDR_EXPR
&& VAR_P (TREE_OPERAND (*argp, 0)));
tree var = TREE_OPERAND (*argp, 0);
if (bitmap_bit_p (suitable_for_renaming, DECL_UID (var)))
*argp = null_pointer_node;
}
for (i = 0; i < gimple_call_num_args (stmt); ++i)
{
tree *argp = gimple_call_arg_ptr (stmt, i);