OpenMP/PTX privatization in SIMD regions
* config/nvptx/nvptx-protos.h (nvptx_output_simt_enter): Declare. (nvptx_output_simt_exit): Declare. * config/nvptx/nvptx.c (nvptx_init_unisimt_predicate): Use cfun->machine->unisimt_location. Handle NULL unisimt_predicate. (init_softstack_frame): Move initialization of crtl->is_leaf to... (nvptx_declare_function_name): ...here. Emit declaration of local memory space buffer for omp_simt_enter insn. (nvptx_output_unisimt_switch): New. (nvptx_output_softstack_switch): New. (nvptx_output_simt_enter): New. (nvptx_output_simt_exit): New. * config/nvptx/nvptx.h (struct machine_function): New fields has_simtreg, unisimt_location, simt_stack_size, simt_stack_align. * config/nvptx/nvptx.md (UNSPECV_SIMT_ENTER): New unspec. (UNSPECV_SIMT_EXIT): Ditto. (omp_simt_enter_insn): New insn. (omp_simt_enter): New expansion. (omp_simt_exit): New insn. * config/nvptx/nvptx.opt (msoft-stack-reserve-local): New option. * internal-fn.c (expand_GOMP_SIMT_ENTER): New. (expand_GOMP_SIMT_ENTER_ALLOC): New. (expand_GOMP_SIMT_EXIT): New. * internal-fn.def (GOMP_SIMT_ENTER): New internal function. (GOMP_SIMT_ENTER_ALLOC): Ditto. (GOMP_SIMT_EXIT): Ditto. * target-insns.def (omp_simt_enter): New insn. (omp_simt_exit): Ditto. * omp-low.c (struct omplow_simd_context): New fields simt_eargs, simt_dlist. (lower_rec_simd_input_clauses): Implement SIMT privatization. (lower_rec_input_clauses): Likewise. (lower_lastprivate_clauses): Handle SIMT privatization. * omp-offload.c: Include langhooks.h, tree-nested.h, stor-layout.h. (ompdevlow_adjust_simt_enter): New. (find_simtpriv_var_op): New. (execute_omp_device_lower): Handle IFN_GOMP_SIMT_ENTER, IFN_GOMP_SIMT_ENTER_ALLOC, IFN_GOMP_SIMT_EXIT. * tree-inline.h (struct copy_body_data): New field dst_simt_vars. * tree-inline.c (expand_call_inline): Handle SIMT privatization. (copy_decl_for_dup_finish): Ditto. * tree-ssa.c (execute_update_addresses_taken): Handle GOMP_SIMT_ENTER. From-SVN: r246550
This commit is contained in:
parent
cf47453061
commit
0c6b03b515
|
@ -1,3 +1,51 @@
|
|||
2017-03-28 Alexander Monakov <amonakov@ispras.ru>
|
||||
|
||||
* config/nvptx/nvptx-protos.h (nvptx_output_simt_enter): Declare.
|
||||
(nvptx_output_simt_exit): Declare.
|
||||
* config/nvptx/nvptx.c (nvptx_init_unisimt_predicate): Use
|
||||
cfun->machine->unisimt_location. Handle NULL unisimt_predicate.
|
||||
(init_softstack_frame): Move initialization of crtl->is_leaf to...
|
||||
(nvptx_declare_function_name): ...here. Emit declaration of local
|
||||
memory space buffer for omp_simt_enter insn.
|
||||
(nvptx_output_unisimt_switch): New.
|
||||
(nvptx_output_softstack_switch): New.
|
||||
(nvptx_output_simt_enter): New.
|
||||
(nvptx_output_simt_exit): New.
|
||||
* config/nvptx/nvptx.h (struct machine_function): New fields
|
||||
has_simtreg, unisimt_location, simt_stack_size, simt_stack_align.
|
||||
* config/nvptx/nvptx.md (UNSPECV_SIMT_ENTER): New unspec.
|
||||
(UNSPECV_SIMT_EXIT): Ditto.
|
||||
(omp_simt_enter_insn): New insn.
|
||||
(omp_simt_enter): New expansion.
|
||||
(omp_simt_exit): New insn.
|
||||
* config/nvptx/nvptx.opt (msoft-stack-reserve-local): New option.
|
||||
|
||||
* internal-fn.c (expand_GOMP_SIMT_ENTER): New.
|
||||
(expand_GOMP_SIMT_ENTER_ALLOC): New.
|
||||
(expand_GOMP_SIMT_EXIT): New.
|
||||
* internal-fn.def (GOMP_SIMT_ENTER): New internal function.
|
||||
(GOMP_SIMT_ENTER_ALLOC): Ditto.
|
||||
(GOMP_SIMT_EXIT): Ditto.
|
||||
* target-insns.def (omp_simt_enter): New insn.
|
||||
(omp_simt_exit): Ditto.
|
||||
* omp-low.c (struct omplow_simd_context): New fields simt_eargs,
|
||||
simt_dlist.
|
||||
(lower_rec_simd_input_clauses): Implement SIMT privatization.
|
||||
(lower_rec_input_clauses): Likewise.
|
||||
(lower_lastprivate_clauses): Handle SIMT privatization.
|
||||
|
||||
* omp-offload.c: Include langhooks.h, tree-nested.h, stor-layout.h.
|
||||
(ompdevlow_adjust_simt_enter): New.
|
||||
(find_simtpriv_var_op): New.
|
||||
(execute_omp_device_lower): Handle IFN_GOMP_SIMT_ENTER,
|
||||
IFN_GOMP_SIMT_ENTER_ALLOC, IFN_GOMP_SIMT_EXIT.
|
||||
|
||||
* tree-inline.h (struct copy_body_data): New field dst_simt_vars.
|
||||
* tree-inline.c (expand_call_inline): Handle SIMT privatization.
|
||||
(copy_decl_for_dup_finish): Ditto.
|
||||
|
||||
* tree-ssa.c (execute_update_addresses_taken): Handle GOMP_SIMT_ENTER.
|
||||
|
||||
2017-03-28 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
PR target/53383
|
||||
|
|
|
@ -53,5 +53,7 @@ extern const char *nvptx_output_mov_insn (rtx, rtx);
|
|||
extern const char *nvptx_output_call_insn (rtx_insn *, rtx, rtx);
|
||||
extern const char *nvptx_output_return (void);
|
||||
extern const char *nvptx_output_set_softstack (unsigned);
|
||||
extern const char *nvptx_output_simt_enter (rtx, rtx, rtx);
|
||||
extern const char *nvptx_output_simt_exit (rtx);
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -1048,11 +1048,6 @@ init_softstack_frame (FILE *file, unsigned alignment, HOST_WIDE_INT size)
|
|||
fprintf (file, "\t\tsub.u%d %s, %s, " HOST_WIDE_INT_PRINT_DEC ";\n",
|
||||
bits, reg_stack, reg_frame, size);
|
||||
|
||||
/* Usually 'crtl->is_leaf' is computed during register allocator
|
||||
initialization (which is not done on NVPTX) or for pressure-sensitive
|
||||
optimizations. Initialize it here, except if already set. */
|
||||
if (!crtl->is_leaf)
|
||||
crtl->is_leaf = leaf_function_p ();
|
||||
if (!crtl->is_leaf)
|
||||
fprintf (file, "\t\tst.shared.u%d [%s], %s;\n",
|
||||
bits, reg_sspslot, reg_stack);
|
||||
|
@ -1080,24 +1075,29 @@ nvptx_init_axis_predicate (FILE *file, int regno, const char *name)
|
|||
static void
|
||||
nvptx_init_unisimt_predicate (FILE *file)
|
||||
{
|
||||
cfun->machine->unisimt_location = gen_reg_rtx (Pmode);
|
||||
int loc = REGNO (cfun->machine->unisimt_location);
|
||||
int bits = POINTER_SIZE;
|
||||
int master = REGNO (cfun->machine->unisimt_master);
|
||||
int pred = REGNO (cfun->machine->unisimt_predicate);
|
||||
fprintf (file, "\t.reg.u%d %%r%d;\n", bits, loc);
|
||||
fprintf (file, "\t{\n");
|
||||
fprintf (file, "\t\t.reg.u32 %%ustmp0;\n");
|
||||
fprintf (file, "\t\t.reg.u%d %%ustmp1;\n", bits);
|
||||
fprintf (file, "\t\t.reg.u%d %%ustmp2;\n", bits);
|
||||
fprintf (file, "\t\tmov.u32 %%ustmp0, %%tid.y;\n");
|
||||
fprintf (file, "\t\tmul%s.u32 %%ustmp1, %%ustmp0, 4;\n",
|
||||
bits == 64 ? ".wide" : ".lo");
|
||||
fprintf (file, "\t\tmov.u%d %%ustmp2, __nvptx_uni;\n", bits);
|
||||
fprintf (file, "\t\tadd.u%d %%ustmp2, %%ustmp2, %%ustmp1;\n", bits);
|
||||
fprintf (file, "\t\tld.shared.u32 %%r%d, [%%ustmp2];\n", master);
|
||||
fprintf (file, "\t\tmov.u32 %%ustmp0, %%tid.x;\n");
|
||||
/* Compute 'master lane index' as 'tid.x & __nvptx_uni[tid.y]'. */
|
||||
fprintf (file, "\t\tand.b32 %%r%d, %%r%d, %%ustmp0;\n", master, master);
|
||||
/* Compute predicate as 'tid.x == master'. */
|
||||
fprintf (file, "\t\tsetp.eq.u32 %%r%d, %%r%d, %%ustmp0;\n", pred, master);
|
||||
fprintf (file, "\t\tmov.u%d %%r%d, __nvptx_uni;\n", bits, loc);
|
||||
fprintf (file, "\t\tadd.u%d %%r%d, %%r%d, %%ustmp1;\n", bits, loc, loc);
|
||||
if (cfun->machine->unisimt_predicate)
|
||||
{
|
||||
int master = REGNO (cfun->machine->unisimt_master);
|
||||
int pred = REGNO (cfun->machine->unisimt_predicate);
|
||||
fprintf (file, "\t\tld.shared.u32 %%r%d, [%%r%d];\n", master, loc);
|
||||
fprintf (file, "\t\tmov.u32 %%ustmp0, %%laneid;\n");
|
||||
/* Compute 'master lane index' as 'laneid & __nvptx_uni[tid.y]'. */
|
||||
fprintf (file, "\t\tand.b32 %%r%d, %%r%d, %%ustmp0;\n", master, master);
|
||||
/* Compute predicate as 'tid.x == master'. */
|
||||
fprintf (file, "\t\tsetp.eq.u32 %%r%d, %%r%d, %%ustmp0;\n", pred, master);
|
||||
}
|
||||
fprintf (file, "\t}\n");
|
||||
need_unisimt_decl = true;
|
||||
}
|
||||
|
@ -1224,6 +1224,12 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
|
|||
|
||||
fprintf (file, "%s", s.str().c_str());
|
||||
|
||||
/* Usually 'crtl->is_leaf' is computed during register allocator
|
||||
initialization (which is not done on NVPTX) or for pressure-sensitive
|
||||
optimizations. Initialize it here, except if already set. */
|
||||
if (!crtl->is_leaf)
|
||||
crtl->is_leaf = leaf_function_p ();
|
||||
|
||||
HOST_WIDE_INT sz = get_frame_size ();
|
||||
bool need_frameptr = sz || cfun->machine->has_chain;
|
||||
int alignment = crtl->stack_alignment_needed / BITS_PER_UNIT;
|
||||
|
@ -1240,9 +1246,28 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
|
|||
init_frame (file, FRAME_POINTER_REGNUM, alignment,
|
||||
ROUND_UP (sz, GET_MODE_SIZE (DImode)));
|
||||
}
|
||||
else if (need_frameptr || cfun->machine->has_varadic || cfun->calls_alloca)
|
||||
else if (need_frameptr || cfun->machine->has_varadic || cfun->calls_alloca
|
||||
|| (cfun->machine->has_simtreg && !crtl->is_leaf))
|
||||
init_softstack_frame (file, alignment, sz);
|
||||
|
||||
if (cfun->machine->has_simtreg)
|
||||
{
|
||||
unsigned HOST_WIDE_INT &simtsz = cfun->machine->simt_stack_size;
|
||||
unsigned HOST_WIDE_INT &align = cfun->machine->simt_stack_align;
|
||||
align = MAX (align, GET_MODE_SIZE (DImode));
|
||||
if (!crtl->is_leaf || cfun->calls_alloca)
|
||||
simtsz = HOST_WIDE_INT_M1U;
|
||||
if (simtsz == HOST_WIDE_INT_M1U)
|
||||
simtsz = nvptx_softstack_size;
|
||||
if (cfun->machine->has_softstack)
|
||||
simtsz += POINTER_SIZE / 8;
|
||||
simtsz = ROUND_UP (simtsz, GET_MODE_SIZE (DImode));
|
||||
if (align > GET_MODE_SIZE (DImode))
|
||||
simtsz += align - GET_MODE_SIZE (DImode);
|
||||
if (simtsz)
|
||||
fprintf (file, "\t.local.align 8 .b8 %%simtstack_ar["
|
||||
HOST_WIDE_INT_PRINT_DEC "];\n", simtsz);
|
||||
}
|
||||
/* Declare the pseudos we have as ptx registers. */
|
||||
int maxregs = max_reg_num ();
|
||||
for (int i = LAST_VIRTUAL_REGISTER + 1; i < maxregs; i++)
|
||||
|
@ -1267,10 +1292,112 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
|
|||
if (cfun->machine->axis_predicate[1])
|
||||
nvptx_init_axis_predicate (file,
|
||||
REGNO (cfun->machine->axis_predicate[1]), "x");
|
||||
if (cfun->machine->unisimt_predicate)
|
||||
if (cfun->machine->unisimt_predicate
|
||||
|| (cfun->machine->has_simtreg && !crtl->is_leaf))
|
||||
nvptx_init_unisimt_predicate (file);
|
||||
}
|
||||
|
||||
/* Output code for switching uniform-simt state. ENTERING indicates whether
|
||||
we are entering or leaving non-uniform execution region. */
|
||||
|
||||
static void
|
||||
nvptx_output_unisimt_switch (FILE *file, bool entering)
|
||||
{
|
||||
if (crtl->is_leaf && !cfun->machine->unisimt_predicate)
|
||||
return;
|
||||
fprintf (file, "\t{\n");
|
||||
fprintf (file, "\t\t.reg.u32 %%ustmp2;\n");
|
||||
fprintf (file, "\t\tmov.u32 %%ustmp2, %d;\n", entering ? -1 : 0);
|
||||
if (!crtl->is_leaf)
|
||||
{
|
||||
int loc = REGNO (cfun->machine->unisimt_location);
|
||||
fprintf (file, "\t\tst.shared.u32 [%%r%d], %%ustmp2;\n", loc);
|
||||
}
|
||||
if (cfun->machine->unisimt_predicate)
|
||||
{
|
||||
int master = REGNO (cfun->machine->unisimt_master);
|
||||
int pred = REGNO (cfun->machine->unisimt_predicate);
|
||||
fprintf (file, "\t\tmov.u32 %%ustmp2, %%laneid;\n");
|
||||
fprintf (file, "\t\tmov.u32 %%r%d, %s;\n",
|
||||
master, entering ? "%ustmp2" : "0");
|
||||
fprintf (file, "\t\tsetp.eq.u32 %%r%d, %%r%d, %%ustmp2;\n", pred, master);
|
||||
}
|
||||
fprintf (file, "\t}\n");
|
||||
}
|
||||
|
||||
/* Output code for allocating per-lane storage and switching soft-stack pointer.
|
||||
ENTERING indicates whether we are entering or leaving non-uniform execution.
|
||||
PTR is the register pointing to allocated storage, it is assigned to on
|
||||
entering and used to restore state on leaving. SIZE and ALIGN are used only
|
||||
on entering. */
|
||||
|
||||
static void
|
||||
nvptx_output_softstack_switch (FILE *file, bool entering,
|
||||
rtx ptr, rtx size, rtx align)
|
||||
{
|
||||
gcc_assert (REG_P (ptr) && !HARD_REGISTER_P (ptr));
|
||||
if (crtl->is_leaf && !cfun->machine->simt_stack_size)
|
||||
return;
|
||||
int bits = POINTER_SIZE, regno = REGNO (ptr);
|
||||
fprintf (file, "\t{\n");
|
||||
if (entering)
|
||||
{
|
||||
fprintf (file, "\t\tcvta.local.u%d %%r%d, %%simtstack_ar + "
|
||||
HOST_WIDE_INT_PRINT_DEC ";\n", bits, regno,
|
||||
cfun->machine->simt_stack_size);
|
||||
fprintf (file, "\t\tsub.u%d %%r%d, %%r%d, ", bits, regno, regno);
|
||||
if (CONST_INT_P (size))
|
||||
fprintf (file, HOST_WIDE_INT_PRINT_DEC,
|
||||
ROUND_UP (UINTVAL (size), GET_MODE_SIZE (DImode)));
|
||||
else
|
||||
output_reg (file, REGNO (size), VOIDmode);
|
||||
fputs (";\n", file);
|
||||
if (!CONST_INT_P (size) || UINTVAL (align) > GET_MODE_SIZE (DImode))
|
||||
fprintf (file, "\t\tand.u%d %%r%d, %%r%d, -%d;\n",
|
||||
bits, regno, regno, UINTVAL (align));
|
||||
}
|
||||
if (cfun->machine->has_softstack)
|
||||
{
|
||||
const char *reg_stack = reg_names[STACK_POINTER_REGNUM];
|
||||
if (entering)
|
||||
{
|
||||
fprintf (file, "\t\tst.u%d [%%r%d + -%d], %s;\n",
|
||||
bits, regno, bits / 8, reg_stack);
|
||||
fprintf (file, "\t\tsub.u%d %s, %%r%d, %d;\n",
|
||||
bits, reg_stack, regno, bits / 8);
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf (file, "\t\tld.u%d %s, [%%r%d + -%d];\n",
|
||||
bits, reg_stack, regno, bits / 8);
|
||||
}
|
||||
nvptx_output_set_softstack (REGNO (stack_pointer_rtx));
|
||||
}
|
||||
fprintf (file, "\t}\n");
|
||||
}
|
||||
|
||||
/* Output code to enter non-uniform execution region. DEST is a register
|
||||
to hold a per-lane allocation given by SIZE and ALIGN. */
|
||||
|
||||
const char *
|
||||
nvptx_output_simt_enter (rtx dest, rtx size, rtx align)
|
||||
{
|
||||
nvptx_output_unisimt_switch (asm_out_file, true);
|
||||
nvptx_output_softstack_switch (asm_out_file, true, dest, size, align);
|
||||
return "";
|
||||
}
|
||||
|
||||
/* Output code to leave non-uniform execution region. SRC is the register
|
||||
holding per-lane storage previously allocated by omp_simt_enter insn. */
|
||||
|
||||
const char *
|
||||
nvptx_output_simt_exit (rtx src)
|
||||
{
|
||||
nvptx_output_unisimt_switch (asm_out_file, false);
|
||||
nvptx_output_softstack_switch (asm_out_file, false, src, NULL_RTX, NULL_RTX);
|
||||
return "";
|
||||
}
|
||||
|
||||
/* Output instruction that sets soft stack pointer in shared memory to the
|
||||
value in register given by SRC_REGNO. */
|
||||
|
||||
|
|
|
@ -213,12 +213,18 @@ struct GTY(()) machine_function
|
|||
bool has_varadic; /* Current function has a varadic call. */
|
||||
bool has_chain; /* Current function has outgoing static chain. */
|
||||
bool has_softstack; /* Current function has a soft stack frame. */
|
||||
bool has_simtreg; /* Current function has an OpenMP SIMD region. */
|
||||
int num_args; /* Number of args of current call. */
|
||||
int return_mode; /* Return mode of current fn.
|
||||
(machine_mode not defined yet.) */
|
||||
rtx axis_predicate[2]; /* Neutering predicates. */
|
||||
rtx unisimt_master; /* 'Master lane index' for -muniform-simt. */
|
||||
rtx unisimt_predicate; /* Predicate for -muniform-simt. */
|
||||
rtx unisimt_location; /* Mask location for -muniform-simt. */
|
||||
/* The following two fields hold the maximum size resp. alignment required
|
||||
for per-lane storage in OpenMP SIMD regions. */
|
||||
unsigned HOST_WIDE_INT simt_stack_size;
|
||||
unsigned HOST_WIDE_INT simt_stack_align;
|
||||
};
|
||||
#endif
|
||||
|
||||
|
|
|
@ -63,6 +63,9 @@
|
|||
UNSPECV_JOIN
|
||||
|
||||
UNSPECV_NOUNROLL
|
||||
|
||||
UNSPECV_SIMT_ENTER
|
||||
UNSPECV_SIMT_EXIT
|
||||
])
|
||||
|
||||
(define_attr "subregs_ok" "false,true"
|
||||
|
@ -1184,6 +1187,42 @@
|
|||
|
||||
;; Patterns for OpenMP SIMD-via-SIMT lowering
|
||||
|
||||
(define_insn "omp_simt_enter_insn"
|
||||
[(set (match_operand 0 "nvptx_register_operand" "=R")
|
||||
(unspec_volatile [(match_operand 1 "nvptx_nonmemory_operand" "Ri")
|
||||
(match_operand 2 "nvptx_nonmemory_operand" "Ri")]
|
||||
UNSPECV_SIMT_ENTER))]
|
||||
""
|
||||
{
|
||||
return nvptx_output_simt_enter (operands[0], operands[1], operands[2]);
|
||||
})
|
||||
|
||||
(define_expand "omp_simt_enter"
|
||||
[(match_operand 0 "nvptx_register_operand" "=R")
|
||||
(match_operand 1 "nvptx_nonmemory_operand" "Ri")
|
||||
(match_operand 2 "const_int_operand" "n")]
|
||||
""
|
||||
{
|
||||
if (!CONST_INT_P (operands[1]))
|
||||
cfun->machine->simt_stack_size = HOST_WIDE_INT_M1U;
|
||||
else
|
||||
cfun->machine->simt_stack_size = MAX (UINTVAL (operands[1]),
|
||||
cfun->machine->simt_stack_size);
|
||||
cfun->machine->simt_stack_align = MAX (UINTVAL (operands[2]),
|
||||
cfun->machine->simt_stack_align);
|
||||
cfun->machine->has_simtreg = true;
|
||||
emit_insn (gen_omp_simt_enter_insn (operands[0], operands[1], operands[2]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "omp_simt_exit"
|
||||
[(unspec_volatile [(match_operand 0 "nvptx_register_operand" "R")]
|
||||
UNSPECV_SIMT_EXIT)]
|
||||
""
|
||||
{
|
||||
return nvptx_output_simt_exit (operands[0]);
|
||||
})
|
||||
|
||||
;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index
|
||||
(define_insn "omp_simt_lane"
|
||||
[(set (match_operand:SI 0 "nvptx_register_operand" "")
|
||||
|
|
|
@ -37,6 +37,10 @@ msoft-stack
|
|||
Target Report Mask(SOFT_STACK)
|
||||
Use custom stacks instead of local memory for automatic storage.
|
||||
|
||||
msoft-stack-reserve-local
|
||||
Target Report Joined RejectNegative UInteger Var(nvptx_softstack_size) Init(128)
|
||||
Specify size of .local memory used for stack when the exact amount is not known.
|
||||
|
||||
muniform-simt
|
||||
Target Report Mask(UNIFORM_SIMT)
|
||||
Generate code that can keep local state uniform across all lanes.
|
||||
|
|
|
@ -166,6 +166,48 @@ expand_GOMP_USE_SIMT (internal_fn, gcall *)
|
|||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* This should get expanded in omp_device_lower pass. */
|
||||
|
||||
static void
|
||||
expand_GOMP_SIMT_ENTER (internal_fn, gcall *)
|
||||
{
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Allocate per-lane storage and begin non-uniform execution region. */
|
||||
|
||||
static void
|
||||
expand_GOMP_SIMT_ENTER_ALLOC (internal_fn, gcall *stmt)
|
||||
{
|
||||
rtx target;
|
||||
tree lhs = gimple_call_lhs (stmt);
|
||||
if (lhs)
|
||||
target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
|
||||
else
|
||||
target = gen_reg_rtx (Pmode);
|
||||
rtx size = expand_normal (gimple_call_arg (stmt, 0));
|
||||
rtx align = expand_normal (gimple_call_arg (stmt, 1));
|
||||
struct expand_operand ops[3];
|
||||
create_output_operand (&ops[0], target, Pmode);
|
||||
create_input_operand (&ops[1], size, Pmode);
|
||||
create_input_operand (&ops[2], align, Pmode);
|
||||
gcc_assert (targetm.have_omp_simt_enter ());
|
||||
expand_insn (targetm.code_for_omp_simt_enter, 3, ops);
|
||||
}
|
||||
|
||||
/* Deallocate per-lane storage and leave non-uniform execution region. */
|
||||
|
||||
static void
|
||||
expand_GOMP_SIMT_EXIT (internal_fn, gcall *stmt)
|
||||
{
|
||||
gcc_checking_assert (!gimple_call_lhs (stmt));
|
||||
rtx arg = expand_normal (gimple_call_arg (stmt, 0));
|
||||
struct expand_operand ops[1];
|
||||
create_input_operand (&ops[0], arg, Pmode);
|
||||
gcc_assert (targetm.have_omp_simt_exit ());
|
||||
expand_insn (targetm.code_for_omp_simt_exit, 1, ops);
|
||||
}
|
||||
|
||||
/* Lane index on SIMT targets: thread index in the warp on NVPTX. On targets
|
||||
without SIMT execution this should be expanded in omp_device_lower pass. */
|
||||
|
||||
|
|
|
@ -142,6 +142,9 @@ DEF_INTERNAL_INT_FN (PARITY, ECF_CONST, parity, unary)
|
|||
DEF_INTERNAL_INT_FN (POPCOUNT, ECF_CONST, popcount, unary)
|
||||
|
||||
DEF_INTERNAL_FN (GOMP_USE_SIMT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
|
||||
DEF_INTERNAL_FN (GOMP_SIMT_ENTER, ECF_LEAF | ECF_NOTHROW, NULL)
|
||||
DEF_INTERNAL_FN (GOMP_SIMT_ENTER_ALLOC, ECF_LEAF | ECF_NOTHROW, NULL)
|
||||
DEF_INTERNAL_FN (GOMP_SIMT_EXIT, ECF_LEAF | ECF_NOTHROW, NULL)
|
||||
DEF_INTERNAL_FN (GOMP_SIMT_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
|
||||
DEF_INTERNAL_FN (GOMP_SIMT_VF, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
|
||||
DEF_INTERNAL_FN (GOMP_SIMT_LAST_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
|
||||
|
|
133
gcc/omp-low.c
133
gcc/omp-low.c
|
@ -3457,6 +3457,8 @@ omp_clause_aligned_alignment (tree clause)
|
|||
struct omplow_simd_context {
|
||||
tree idx;
|
||||
tree lane;
|
||||
vec<tree, va_heap> simt_eargs;
|
||||
gimple_seq simt_dlist;
|
||||
int max_vf;
|
||||
bool is_simt;
|
||||
};
|
||||
|
@ -3492,18 +3494,39 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
|
|||
if (sctx->max_vf == 1)
|
||||
return false;
|
||||
|
||||
tree atype = build_array_type_nelts (TREE_TYPE (new_var), sctx->max_vf);
|
||||
tree avar = create_tmp_var_raw (atype);
|
||||
if (TREE_ADDRESSABLE (new_var))
|
||||
TREE_ADDRESSABLE (avar) = 1;
|
||||
DECL_ATTRIBUTES (avar)
|
||||
= tree_cons (get_identifier ("omp simd array"), NULL,
|
||||
DECL_ATTRIBUTES (avar));
|
||||
gimple_add_tmp_var (avar);
|
||||
ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->idx,
|
||||
NULL_TREE, NULL_TREE);
|
||||
lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->lane,
|
||||
NULL_TREE, NULL_TREE);
|
||||
if (sctx->is_simt)
|
||||
{
|
||||
if (is_gimple_reg (new_var))
|
||||
{
|
||||
ivar = lvar = new_var;
|
||||
return true;
|
||||
}
|
||||
tree type = TREE_TYPE (new_var), ptype = build_pointer_type (type);
|
||||
ivar = lvar = create_tmp_var (type);
|
||||
TREE_ADDRESSABLE (ivar) = 1;
|
||||
DECL_ATTRIBUTES (ivar) = tree_cons (get_identifier ("omp simt private"),
|
||||
NULL, DECL_ATTRIBUTES (ivar));
|
||||
sctx->simt_eargs.safe_push (build1 (ADDR_EXPR, ptype, ivar));
|
||||
tree clobber = build_constructor (type, NULL);
|
||||
TREE_THIS_VOLATILE (clobber) = 1;
|
||||
gimple *g = gimple_build_assign (ivar, clobber);
|
||||
gimple_seq_add_stmt (&sctx->simt_dlist, g);
|
||||
}
|
||||
else
|
||||
{
|
||||
tree atype = build_array_type_nelts (TREE_TYPE (new_var), sctx->max_vf);
|
||||
tree avar = create_tmp_var_raw (atype);
|
||||
if (TREE_ADDRESSABLE (new_var))
|
||||
TREE_ADDRESSABLE (avar) = 1;
|
||||
DECL_ATTRIBUTES (avar)
|
||||
= tree_cons (get_identifier ("omp simd array"), NULL,
|
||||
DECL_ATTRIBUTES (avar));
|
||||
gimple_add_tmp_var (avar);
|
||||
ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->idx,
|
||||
NULL_TREE, NULL_TREE);
|
||||
lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->lane,
|
||||
NULL_TREE, NULL_TREE);
|
||||
}
|
||||
if (DECL_P (new_var))
|
||||
{
|
||||
SET_DECL_VALUE_EXPR (new_var, lvar);
|
||||
|
@ -3547,8 +3570,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
|
|||
bool is_simd = (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
|
||||
&& gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD);
|
||||
omplow_simd_context sctx = omplow_simd_context ();
|
||||
tree simt_lane = NULL_TREE;
|
||||
tree ivar = NULL_TREE, lvar = NULL_TREE;
|
||||
tree simt_lane = NULL_TREE, simtrec = NULL_TREE;
|
||||
tree ivar = NULL_TREE, lvar = NULL_TREE, uid = NULL_TREE;
|
||||
gimple_seq llist[3] = { };
|
||||
|
||||
copyin_seq = NULL;
|
||||
|
@ -3581,6 +3604,10 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
|
|||
continue;
|
||||
}
|
||||
|
||||
/* Add a placeholder for simduid. */
|
||||
if (sctx.is_simt && sctx.max_vf != 1)
|
||||
sctx.simt_eargs.safe_push (NULL_TREE);
|
||||
|
||||
/* Do all the fixed sized types in the first pass, and the variable sized
|
||||
types in the second pass. This makes sure that the scalar arguments to
|
||||
the variable sized types are processed before we use them in the
|
||||
|
@ -4468,21 +4495,43 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
|
|||
}
|
||||
}
|
||||
|
||||
if (sctx.lane)
|
||||
if (sctx.max_vf == 1)
|
||||
sctx.is_simt = false;
|
||||
|
||||
if (sctx.lane || sctx.is_simt)
|
||||
{
|
||||
tree uid = create_tmp_var (ptr_type_node, "simduid");
|
||||
uid = create_tmp_var (ptr_type_node, "simduid");
|
||||
/* Don't want uninit warnings on simduid, it is always uninitialized,
|
||||
but we use it not for the value, but for the DECL_UID only. */
|
||||
TREE_NO_WARNING (uid) = 1;
|
||||
c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SIMDUID_);
|
||||
OMP_CLAUSE__SIMDUID__DECL (c) = uid;
|
||||
OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (ctx->stmt);
|
||||
gimple_omp_for_set_clauses (ctx->stmt, c);
|
||||
}
|
||||
/* Emit calls denoting privatized variables and initializing a pointer to
|
||||
structure that holds private variables as fields after ompdevlow pass. */
|
||||
if (sctx.is_simt)
|
||||
{
|
||||
sctx.simt_eargs[0] = uid;
|
||||
gimple *g
|
||||
= gimple_build_call_internal_vec (IFN_GOMP_SIMT_ENTER, sctx.simt_eargs);
|
||||
gimple_call_set_lhs (g, uid);
|
||||
gimple_seq_add_stmt (ilist, g);
|
||||
sctx.simt_eargs.release ();
|
||||
|
||||
simtrec = create_tmp_var (ptr_type_node, ".omp_simt");
|
||||
g = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC, 1, uid);
|
||||
gimple_call_set_lhs (g, simtrec);
|
||||
gimple_seq_add_stmt (ilist, g);
|
||||
}
|
||||
if (sctx.lane)
|
||||
{
|
||||
gimple *g
|
||||
= gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 1, uid);
|
||||
gimple_call_set_lhs (g, sctx.lane);
|
||||
gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (ctx->stmt));
|
||||
gsi_insert_before_without_update (&gsi, g, GSI_SAME_STMT);
|
||||
c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SIMDUID_);
|
||||
OMP_CLAUSE__SIMDUID__DECL (c) = uid;
|
||||
OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (ctx->stmt);
|
||||
gimple_omp_for_set_clauses (ctx->stmt, c);
|
||||
g = gimple_build_assign (sctx.lane, INTEGER_CST,
|
||||
build_int_cst (unsigned_type_node, 0));
|
||||
gimple_seq_add_stmt (ilist, g);
|
||||
|
@ -4545,6 +4594,13 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
|
|||
gimple_seq_add_stmt (seq, gimple_build_label (end));
|
||||
}
|
||||
}
|
||||
if (sctx.is_simt)
|
||||
{
|
||||
gimple_seq_add_seq (dlist, sctx.simt_dlist);
|
||||
gimple *g
|
||||
= gimple_build_call_internal (IFN_GOMP_SIMT_EXIT, 1, simtrec);
|
||||
gimple_seq_add_stmt (dlist, g);
|
||||
}
|
||||
|
||||
/* The copyin sequence is not to be executed by the main thread, since
|
||||
that would result in self-copies. Perhaps not visible to scalars,
|
||||
|
@ -4715,7 +4771,8 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
|
|||
if (simduid && DECL_HAS_VALUE_EXPR_P (new_var))
|
||||
{
|
||||
tree val = DECL_VALUE_EXPR (new_var);
|
||||
if (TREE_CODE (val) == ARRAY_REF
|
||||
if (!maybe_simt
|
||||
&& TREE_CODE (val) == ARRAY_REF
|
||||
&& VAR_P (TREE_OPERAND (val, 0))
|
||||
&& lookup_attribute ("omp simd array",
|
||||
DECL_ATTRIBUTES (TREE_OPERAND (val,
|
||||
|
@ -4734,24 +4791,26 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
|
|||
new_var = build4 (ARRAY_REF, TREE_TYPE (val),
|
||||
TREE_OPERAND (val, 0), lastlane,
|
||||
NULL_TREE, NULL_TREE);
|
||||
if (maybe_simt)
|
||||
}
|
||||
else if (maybe_simt
|
||||
&& VAR_P (val)
|
||||
&& lookup_attribute ("omp simt private",
|
||||
DECL_ATTRIBUTES (val)))
|
||||
{
|
||||
if (simtlast == NULL)
|
||||
{
|
||||
gcall *g;
|
||||
if (simtlast == NULL)
|
||||
{
|
||||
simtlast = create_tmp_var (unsigned_type_node);
|
||||
g = gimple_build_call_internal
|
||||
(IFN_GOMP_SIMT_LAST_LANE, 1, simtcond);
|
||||
gimple_call_set_lhs (g, simtlast);
|
||||
gimple_seq_add_stmt (stmt_list, g);
|
||||
}
|
||||
x = build_call_expr_internal_loc
|
||||
(UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX,
|
||||
TREE_TYPE (new_var), 2, new_var, simtlast);
|
||||
new_var = unshare_expr (new_var);
|
||||
gimplify_assign (new_var, x, stmt_list);
|
||||
new_var = unshare_expr (new_var);
|
||||
simtlast = create_tmp_var (unsigned_type_node);
|
||||
gcall *g = gimple_build_call_internal
|
||||
(IFN_GOMP_SIMT_LAST_LANE, 1, simtcond);
|
||||
gimple_call_set_lhs (g, simtlast);
|
||||
gimple_seq_add_stmt (stmt_list, g);
|
||||
}
|
||||
x = build_call_expr_internal_loc
|
||||
(UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX,
|
||||
TREE_TYPE (val), 2, val, simtlast);
|
||||
new_var = unshare_expr (new_var);
|
||||
gimplify_assign (new_var, x, stmt_list);
|
||||
new_var = unshare_expr (new_var);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -33,12 +33,15 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "diagnostic-core.h"
|
||||
#include "fold-const.h"
|
||||
#include "internal-fn.h"
|
||||
#include "langhooks.h"
|
||||
#include "gimplify.h"
|
||||
#include "gimple-iterator.h"
|
||||
#include "gimplify-me.h"
|
||||
#include "gimple-walk.h"
|
||||
#include "tree-cfg.h"
|
||||
#include "tree-into-ssa.h"
|
||||
#include "tree-nested.h"
|
||||
#include "stor-layout.h"
|
||||
#include "common/common-target.h"
|
||||
#include "omp-general.h"
|
||||
#include "omp-offload.h"
|
||||
|
@ -1669,6 +1672,92 @@ make_pass_oacc_device_lower (gcc::context *ctxt)
|
|||
return new pass_oacc_device_lower (ctxt);
|
||||
}
|
||||
|
||||
|
||||
/* Rewrite GOMP_SIMT_ENTER_ALLOC call given by GSI and remove the preceding
|
||||
GOMP_SIMT_ENTER call identifying the privatized variables, which are
|
||||
turned to structure fields and receive a DECL_VALUE_EXPR accordingly.
|
||||
Set *REGIMPLIFY to true, except if no privatized variables were seen. */
|
||||
|
||||
static void
|
||||
ompdevlow_adjust_simt_enter (gimple_stmt_iterator *gsi, bool *regimplify)
|
||||
{
|
||||
gimple *alloc_stmt = gsi_stmt (*gsi);
|
||||
tree simtrec = gimple_call_lhs (alloc_stmt);
|
||||
tree simduid = gimple_call_arg (alloc_stmt, 0);
|
||||
gimple *enter_stmt = SSA_NAME_DEF_STMT (simduid);
|
||||
gcc_assert (gimple_call_internal_p (enter_stmt, IFN_GOMP_SIMT_ENTER));
|
||||
tree rectype = lang_hooks.types.make_type (RECORD_TYPE);
|
||||
TYPE_ARTIFICIAL (rectype) = TYPE_NAMELESS (rectype) = 1;
|
||||
TREE_ADDRESSABLE (rectype) = 1;
|
||||
TREE_TYPE (simtrec) = build_pointer_type (rectype);
|
||||
for (unsigned i = 1; i < gimple_call_num_args (enter_stmt); i++)
|
||||
{
|
||||
tree *argp = gimple_call_arg_ptr (enter_stmt, i);
|
||||
if (*argp == null_pointer_node)
|
||||
continue;
|
||||
gcc_assert (TREE_CODE (*argp) == ADDR_EXPR
|
||||
&& VAR_P (TREE_OPERAND (*argp, 0)));
|
||||
tree var = TREE_OPERAND (*argp, 0);
|
||||
|
||||
tree field = build_decl (DECL_SOURCE_LOCATION (var), FIELD_DECL,
|
||||
DECL_NAME (var), TREE_TYPE (var));
|
||||
SET_DECL_ALIGN (field, DECL_ALIGN (var));
|
||||
DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
|
||||
TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
|
||||
|
||||
insert_field_into_struct (rectype, field);
|
||||
|
||||
tree t = build_simple_mem_ref (simtrec);
|
||||
t = build3 (COMPONENT_REF, TREE_TYPE (var), t, field, NULL);
|
||||
TREE_THIS_VOLATILE (t) = TREE_THIS_VOLATILE (var);
|
||||
SET_DECL_VALUE_EXPR (var, t);
|
||||
DECL_HAS_VALUE_EXPR_P (var) = 1;
|
||||
*regimplify = true;
|
||||
}
|
||||
layout_type (rectype);
|
||||
tree size = TYPE_SIZE_UNIT (rectype);
|
||||
tree align = build_int_cst (TREE_TYPE (size), TYPE_ALIGN_UNIT (rectype));
|
||||
|
||||
alloc_stmt
|
||||
= gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC, 2, size, align);
|
||||
gimple_call_set_lhs (alloc_stmt, simtrec);
|
||||
gsi_replace (gsi, alloc_stmt, false);
|
||||
gimple_stmt_iterator enter_gsi = gsi_for_stmt (enter_stmt);
|
||||
enter_stmt = gimple_build_assign (simduid, gimple_call_arg (enter_stmt, 0));
|
||||
gsi_replace (&enter_gsi, enter_stmt, false);
|
||||
|
||||
use_operand_p use;
|
||||
gimple *exit_stmt;
|
||||
if (single_imm_use (simtrec, &use, &exit_stmt))
|
||||
{
|
||||
gcc_assert (gimple_call_internal_p (exit_stmt, IFN_GOMP_SIMT_EXIT));
|
||||
gimple_stmt_iterator exit_gsi = gsi_for_stmt (exit_stmt);
|
||||
tree clobber = build_constructor (rectype, NULL);
|
||||
TREE_THIS_VOLATILE (clobber) = 1;
|
||||
exit_stmt = gimple_build_assign (build_simple_mem_ref (simtrec), clobber);
|
||||
gsi_insert_before (&exit_gsi, exit_stmt, GSI_SAME_STMT);
|
||||
}
|
||||
else
|
||||
gcc_checking_assert (has_zero_uses (simtrec));
|
||||
}
|
||||
|
||||
/* Callback for walk_gimple_stmt used to scan for SIMT-privatized variables. */
|
||||
|
||||
static tree
|
||||
find_simtpriv_var_op (tree *tp, int *walk_subtrees, void *)
|
||||
{
|
||||
tree t = *tp;
|
||||
|
||||
if (VAR_P (t)
|
||||
&& DECL_HAS_VALUE_EXPR_P (t)
|
||||
&& lookup_attribute ("omp simt private", DECL_ATTRIBUTES (t)))
|
||||
{
|
||||
*walk_subtrees = 0;
|
||||
return t;
|
||||
}
|
||||
return NULL_TREE;
|
||||
}
|
||||
|
||||
/* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
|
||||
VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
|
||||
LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
|
||||
|
@ -1679,6 +1768,7 @@ static unsigned int
|
|||
execute_omp_device_lower ()
|
||||
{
|
||||
int vf = targetm.simt.vf ? targetm.simt.vf () : 1;
|
||||
bool regimplify = false;
|
||||
basic_block bb;
|
||||
gimple_stmt_iterator gsi;
|
||||
FOR_EACH_BB_FN (bb, cfun)
|
||||
|
@ -1694,6 +1784,20 @@ execute_omp_device_lower ()
|
|||
case IFN_GOMP_USE_SIMT:
|
||||
rhs = vf == 1 ? integer_zero_node : integer_one_node;
|
||||
break;
|
||||
case IFN_GOMP_SIMT_ENTER:
|
||||
rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
|
||||
goto simtreg_enter_exit;
|
||||
case IFN_GOMP_SIMT_ENTER_ALLOC:
|
||||
if (vf != 1)
|
||||
ompdevlow_adjust_simt_enter (&gsi, ®implify);
|
||||
rhs = vf == 1 ? null_pointer_node : NULL_TREE;
|
||||
goto simtreg_enter_exit;
|
||||
case IFN_GOMP_SIMT_EXIT:
|
||||
simtreg_enter_exit:
|
||||
if (vf != 1)
|
||||
continue;
|
||||
unlink_stmt_vdef (stmt);
|
||||
break;
|
||||
case IFN_GOMP_SIMT_LANE:
|
||||
case IFN_GOMP_SIMT_LAST_LANE:
|
||||
rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE;
|
||||
|
@ -1726,6 +1830,16 @@ execute_omp_device_lower ()
|
|||
stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
|
||||
gsi_replace (&gsi, stmt, false);
|
||||
}
|
||||
if (regimplify)
|
||||
FOR_EACH_BB_REVERSE_FN (bb, cfun)
|
||||
for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
|
||||
if (walk_gimple_stmt (&gsi, NULL, find_simtpriv_var_op, NULL))
|
||||
{
|
||||
if (gimple_clobber_p (gsi_stmt (gsi)))
|
||||
gsi_remove (&gsi, true);
|
||||
else
|
||||
gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
|
||||
}
|
||||
if (vf != 1)
|
||||
cfun->has_force_vectorize_loops = false;
|
||||
return 0;
|
||||
|
|
|
@ -68,6 +68,8 @@ DEF_TARGET_INSN (oacc_dim_pos, (rtx x0, rtx x1))
|
|||
DEF_TARGET_INSN (oacc_dim_size, (rtx x0, rtx x1))
|
||||
DEF_TARGET_INSN (oacc_fork, (rtx x0, rtx x1, rtx x2))
|
||||
DEF_TARGET_INSN (oacc_join, (rtx x0, rtx x1, rtx x2))
|
||||
DEF_TARGET_INSN (omp_simt_enter, (rtx x0, rtx x1, rtx x2))
|
||||
DEF_TARGET_INSN (omp_simt_exit, (rtx x0))
|
||||
DEF_TARGET_INSN (omp_simt_lane, (rtx x0))
|
||||
DEF_TARGET_INSN (omp_simt_last_lane, (rtx x0, rtx x1))
|
||||
DEF_TARGET_INSN (omp_simt_ordered, (rtx x0, rtx x1))
|
||||
|
|
|
@ -4395,6 +4395,11 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
|
|||
gcall *call_stmt;
|
||||
unsigned int i;
|
||||
unsigned int prop_mask, src_properties;
|
||||
struct function *dst_cfun;
|
||||
tree simduid;
|
||||
use_operand_p use;
|
||||
gimple *simtenter_stmt = NULL;
|
||||
vec<tree> *simtvars_save;
|
||||
|
||||
/* The gimplifier uses input_location in too many places, such as
|
||||
internal_get_tmp_var (). */
|
||||
|
@ -4598,15 +4603,26 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
|
|||
id->src_cfun = DECL_STRUCT_FUNCTION (fn);
|
||||
id->call_stmt = call_stmt;
|
||||
|
||||
/* When inlining into an OpenMP SIMD-on-SIMT loop, arrange for new automatic
|
||||
variables to be added to IFN_GOMP_SIMT_ENTER argument list. */
|
||||
dst_cfun = DECL_STRUCT_FUNCTION (id->dst_fn);
|
||||
simtvars_save = id->dst_simt_vars;
|
||||
if (!(dst_cfun->curr_properties & PROP_gimple_lomp_dev)
|
||||
&& (simduid = bb->loop_father->simduid) != NULL_TREE
|
||||
&& (simduid = ssa_default_def (dst_cfun, simduid)) != NULL_TREE
|
||||
&& single_imm_use (simduid, &use, &simtenter_stmt)
|
||||
&& is_gimple_call (simtenter_stmt)
|
||||
&& gimple_call_internal_p (simtenter_stmt, IFN_GOMP_SIMT_ENTER))
|
||||
vec_alloc (id->dst_simt_vars, 0);
|
||||
else
|
||||
id->dst_simt_vars = NULL;
|
||||
|
||||
/* If the src function contains an IFN_VA_ARG, then so will the dst
|
||||
function after inlining. Likewise for IFN_GOMP_USE_SIMT. */
|
||||
prop_mask = PROP_gimple_lva | PROP_gimple_lomp_dev;
|
||||
src_properties = id->src_cfun->curr_properties & prop_mask;
|
||||
if (src_properties != prop_mask)
|
||||
{
|
||||
struct function *dst_cfun = DECL_STRUCT_FUNCTION (id->dst_fn);
|
||||
dst_cfun->curr_properties &= src_properties | ~prop_mask;
|
||||
}
|
||||
dst_cfun->curr_properties &= src_properties | ~prop_mask;
|
||||
|
||||
gcc_assert (!id->src_cfun->after_inlining);
|
||||
|
||||
|
@ -4740,6 +4756,27 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
|
|||
if (cfun->gimple_df)
|
||||
pt_solution_reset (&cfun->gimple_df->escaped);
|
||||
|
||||
/* Add new automatic variables to IFN_GOMP_SIMT_ENTER arguments. */
|
||||
if (id->dst_simt_vars && id->dst_simt_vars->length () > 0)
|
||||
{
|
||||
size_t nargs = gimple_call_num_args (simtenter_stmt);
|
||||
vec<tree> *vars = id->dst_simt_vars;
|
||||
auto_vec<tree> newargs (nargs + vars->length ());
|
||||
for (size_t i = 0; i < nargs; i++)
|
||||
newargs.quick_push (gimple_call_arg (simtenter_stmt, i));
|
||||
for (tree *pvar = vars->begin (); pvar != vars->end (); pvar++)
|
||||
{
|
||||
tree ptrtype = build_pointer_type (TREE_TYPE (*pvar));
|
||||
newargs.quick_push (build1 (ADDR_EXPR, ptrtype, *pvar));
|
||||
}
|
||||
gcall *g = gimple_build_call_internal_vec (IFN_GOMP_SIMT_ENTER, newargs);
|
||||
gimple_call_set_lhs (g, gimple_call_lhs (simtenter_stmt));
|
||||
gimple_stmt_iterator gsi = gsi_for_stmt (simtenter_stmt);
|
||||
gsi_replace (&gsi, g, false);
|
||||
}
|
||||
vec_free (id->dst_simt_vars);
|
||||
id->dst_simt_vars = simtvars_save;
|
||||
|
||||
/* Clean up. */
|
||||
if (id->debug_map)
|
||||
{
|
||||
|
@ -5463,9 +5500,19 @@ copy_decl_for_dup_finish (copy_body_data *id, tree decl, tree copy)
|
|||
function. */
|
||||
;
|
||||
else
|
||||
/* Ordinary automatic local variables are now in the scope of the
|
||||
new function. */
|
||||
DECL_CONTEXT (copy) = id->dst_fn;
|
||||
{
|
||||
/* Ordinary automatic local variables are now in the scope of the
|
||||
new function. */
|
||||
DECL_CONTEXT (copy) = id->dst_fn;
|
||||
if (VAR_P (copy) && id->dst_simt_vars && !is_gimple_reg (copy))
|
||||
{
|
||||
if (!lookup_attribute ("omp simt private", DECL_ATTRIBUTES (copy)))
|
||||
DECL_ATTRIBUTES (copy)
|
||||
= tree_cons (get_identifier ("omp simt private"), NULL,
|
||||
DECL_ATTRIBUTES (copy));
|
||||
id->dst_simt_vars->safe_push (copy);
|
||||
}
|
||||
}
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
|
|
@ -145,6 +145,10 @@ struct copy_body_data
|
|||
equivalents in the function into which it is being inlined. */
|
||||
hash_map<dependence_hash, unsigned short> *dependence_map;
|
||||
|
||||
/* A list of addressable local variables remapped into the caller
|
||||
when inlining a call within an OpenMP SIMD-on-SIMT loop. */
|
||||
vec<tree> *dst_simt_vars;
|
||||
|
||||
/* Cilk keywords currently need to replace some variables that
|
||||
ordinary nested functions do not. */
|
||||
bool remap_var_for_cilk;
|
||||
|
|
|
@ -1654,7 +1654,8 @@ execute_update_addresses_taken (void)
|
|||
gimple_ior_addresses_taken (addresses_taken, stmt);
|
||||
gimple_call_set_arg (stmt, 1, arg);
|
||||
}
|
||||
else if (is_asan_mark_p (stmt))
|
||||
else if (is_asan_mark_p (stmt)
|
||||
|| gimple_call_internal_p (stmt, IFN_GOMP_SIMT_ENTER))
|
||||
;
|
||||
else
|
||||
gimple_ior_addresses_taken (addresses_taken, stmt);
|
||||
|
@ -1940,6 +1941,18 @@ execute_update_addresses_taken (void)
|
|||
continue;
|
||||
}
|
||||
}
|
||||
else if (gimple_call_internal_p (stmt, IFN_GOMP_SIMT_ENTER))
|
||||
for (i = 1; i < gimple_call_num_args (stmt); i++)
|
||||
{
|
||||
tree *argp = gimple_call_arg_ptr (stmt, i);
|
||||
if (*argp == null_pointer_node)
|
||||
continue;
|
||||
gcc_assert (TREE_CODE (*argp) == ADDR_EXPR
|
||||
&& VAR_P (TREE_OPERAND (*argp, 0)));
|
||||
tree var = TREE_OPERAND (*argp, 0);
|
||||
if (bitmap_bit_p (suitable_for_renaming, DECL_UID (var)))
|
||||
*argp = null_pointer_node;
|
||||
}
|
||||
for (i = 0; i < gimple_call_num_args (stmt); ++i)
|
||||
{
|
||||
tree *argp = gimple_call_arg_ptr (stmt, i);
|
||||
|
|
Loading…
Reference in New Issue