OpenMP/PTX privatization in SIMD regions
* config/nvptx/nvptx-protos.h (nvptx_output_simt_enter): Declare. (nvptx_output_simt_exit): Declare. * config/nvptx/nvptx.c (nvptx_init_unisimt_predicate): Use cfun->machine->unisimt_location. Handle NULL unisimt_predicate. (init_softstack_frame): Move initialization of crtl->is_leaf to... (nvptx_declare_function_name): ...here. Emit declaration of local memory space buffer for omp_simt_enter insn. (nvptx_output_unisimt_switch): New. (nvptx_output_softstack_switch): New. (nvptx_output_simt_enter): New. (nvptx_output_simt_exit): New. * config/nvptx/nvptx.h (struct machine_function): New fields has_simtreg, unisimt_location, simt_stack_size, simt_stack_align. * config/nvptx/nvptx.md (UNSPECV_SIMT_ENTER): New unspec. (UNSPECV_SIMT_EXIT): Ditto. (omp_simt_enter_insn): New insn. (omp_simt_enter): New expansion. (omp_simt_exit): New insn. * config/nvptx/nvptx.opt (msoft-stack-reserve-local): New option. * internal-fn.c (expand_GOMP_SIMT_ENTER): New. (expand_GOMP_SIMT_ENTER_ALLOC): New. (expand_GOMP_SIMT_EXIT): New. * internal-fn.def (GOMP_SIMT_ENTER): New internal function. (GOMP_SIMT_ENTER_ALLOC): Ditto. (GOMP_SIMT_EXIT): Ditto. * target-insns.def (omp_simt_enter): New insn. (omp_simt_exit): Ditto. * omp-low.c (struct omplow_simd_context): New fields simt_eargs, simt_dlist. (lower_rec_simd_input_clauses): Implement SIMT privatization. (lower_rec_input_clauses): Likewise. (lower_lastprivate_clauses): Handle SIMT privatization. * omp-offload.c: Include langhooks.h, tree-nested.h, stor-layout.h. (ompdevlow_adjust_simt_enter): New. (find_simtpriv_var_op): New. (execute_omp_device_lower): Handle IFN_GOMP_SIMT_ENTER, IFN_GOMP_SIMT_ENTER_ALLOC, IFN_GOMP_SIMT_EXIT. * tree-inline.h (struct copy_body_data): New field dst_simt_vars. * tree-inline.c (expand_call_inline): Handle SIMT privatization. (copy_decl_for_dup_finish): Ditto. * tree-ssa.c (execute_update_addresses_taken): Handle GOMP_SIMT_ENTER. From-SVN: r246550
This commit is contained in:
parent
cf47453061
commit
0c6b03b515
|
@ -1,3 +1,51 @@
|
||||||
|
2017-03-28 Alexander Monakov <amonakov@ispras.ru>
|
||||||
|
|
||||||
|
* config/nvptx/nvptx-protos.h (nvptx_output_simt_enter): Declare.
|
||||||
|
(nvptx_output_simt_exit): Declare.
|
||||||
|
* config/nvptx/nvptx.c (nvptx_init_unisimt_predicate): Use
|
||||||
|
cfun->machine->unisimt_location. Handle NULL unisimt_predicate.
|
||||||
|
(init_softstack_frame): Move initialization of crtl->is_leaf to...
|
||||||
|
(nvptx_declare_function_name): ...here. Emit declaration of local
|
||||||
|
memory space buffer for omp_simt_enter insn.
|
||||||
|
(nvptx_output_unisimt_switch): New.
|
||||||
|
(nvptx_output_softstack_switch): New.
|
||||||
|
(nvptx_output_simt_enter): New.
|
||||||
|
(nvptx_output_simt_exit): New.
|
||||||
|
* config/nvptx/nvptx.h (struct machine_function): New fields
|
||||||
|
has_simtreg, unisimt_location, simt_stack_size, simt_stack_align.
|
||||||
|
* config/nvptx/nvptx.md (UNSPECV_SIMT_ENTER): New unspec.
|
||||||
|
(UNSPECV_SIMT_EXIT): Ditto.
|
||||||
|
(omp_simt_enter_insn): New insn.
|
||||||
|
(omp_simt_enter): New expansion.
|
||||||
|
(omp_simt_exit): New insn.
|
||||||
|
* config/nvptx/nvptx.opt (msoft-stack-reserve-local): New option.
|
||||||
|
|
||||||
|
* internal-fn.c (expand_GOMP_SIMT_ENTER): New.
|
||||||
|
(expand_GOMP_SIMT_ENTER_ALLOC): New.
|
||||||
|
(expand_GOMP_SIMT_EXIT): New.
|
||||||
|
* internal-fn.def (GOMP_SIMT_ENTER): New internal function.
|
||||||
|
(GOMP_SIMT_ENTER_ALLOC): Ditto.
|
||||||
|
(GOMP_SIMT_EXIT): Ditto.
|
||||||
|
* target-insns.def (omp_simt_enter): New insn.
|
||||||
|
(omp_simt_exit): Ditto.
|
||||||
|
* omp-low.c (struct omplow_simd_context): New fields simt_eargs,
|
||||||
|
simt_dlist.
|
||||||
|
(lower_rec_simd_input_clauses): Implement SIMT privatization.
|
||||||
|
(lower_rec_input_clauses): Likewise.
|
||||||
|
(lower_lastprivate_clauses): Handle SIMT privatization.
|
||||||
|
|
||||||
|
* omp-offload.c: Include langhooks.h, tree-nested.h, stor-layout.h.
|
||||||
|
(ompdevlow_adjust_simt_enter): New.
|
||||||
|
(find_simtpriv_var_op): New.
|
||||||
|
(execute_omp_device_lower): Handle IFN_GOMP_SIMT_ENTER,
|
||||||
|
IFN_GOMP_SIMT_ENTER_ALLOC, IFN_GOMP_SIMT_EXIT.
|
||||||
|
|
||||||
|
* tree-inline.h (struct copy_body_data): New field dst_simt_vars.
|
||||||
|
* tree-inline.c (expand_call_inline): Handle SIMT privatization.
|
||||||
|
(copy_decl_for_dup_finish): Ditto.
|
||||||
|
|
||||||
|
* tree-ssa.c (execute_update_addresses_taken): Handle GOMP_SIMT_ENTER.
|
||||||
|
|
||||||
2017-03-28 Uros Bizjak <ubizjak@gmail.com>
|
2017-03-28 Uros Bizjak <ubizjak@gmail.com>
|
||||||
|
|
||||||
PR target/53383
|
PR target/53383
|
||||||
|
|
|
@ -53,5 +53,7 @@ extern const char *nvptx_output_mov_insn (rtx, rtx);
|
||||||
extern const char *nvptx_output_call_insn (rtx_insn *, rtx, rtx);
|
extern const char *nvptx_output_call_insn (rtx_insn *, rtx, rtx);
|
||||||
extern const char *nvptx_output_return (void);
|
extern const char *nvptx_output_return (void);
|
||||||
extern const char *nvptx_output_set_softstack (unsigned);
|
extern const char *nvptx_output_set_softstack (unsigned);
|
||||||
|
extern const char *nvptx_output_simt_enter (rtx, rtx, rtx);
|
||||||
|
extern const char *nvptx_output_simt_exit (rtx);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1048,11 +1048,6 @@ init_softstack_frame (FILE *file, unsigned alignment, HOST_WIDE_INT size)
|
||||||
fprintf (file, "\t\tsub.u%d %s, %s, " HOST_WIDE_INT_PRINT_DEC ";\n",
|
fprintf (file, "\t\tsub.u%d %s, %s, " HOST_WIDE_INT_PRINT_DEC ";\n",
|
||||||
bits, reg_stack, reg_frame, size);
|
bits, reg_stack, reg_frame, size);
|
||||||
|
|
||||||
/* Usually 'crtl->is_leaf' is computed during register allocator
|
|
||||||
initialization (which is not done on NVPTX) or for pressure-sensitive
|
|
||||||
optimizations. Initialize it here, except if already set. */
|
|
||||||
if (!crtl->is_leaf)
|
|
||||||
crtl->is_leaf = leaf_function_p ();
|
|
||||||
if (!crtl->is_leaf)
|
if (!crtl->is_leaf)
|
||||||
fprintf (file, "\t\tst.shared.u%d [%s], %s;\n",
|
fprintf (file, "\t\tst.shared.u%d [%s], %s;\n",
|
||||||
bits, reg_sspslot, reg_stack);
|
bits, reg_sspslot, reg_stack);
|
||||||
|
@ -1080,24 +1075,29 @@ nvptx_init_axis_predicate (FILE *file, int regno, const char *name)
|
||||||
static void
|
static void
|
||||||
nvptx_init_unisimt_predicate (FILE *file)
|
nvptx_init_unisimt_predicate (FILE *file)
|
||||||
{
|
{
|
||||||
|
cfun->machine->unisimt_location = gen_reg_rtx (Pmode);
|
||||||
|
int loc = REGNO (cfun->machine->unisimt_location);
|
||||||
int bits = POINTER_SIZE;
|
int bits = POINTER_SIZE;
|
||||||
int master = REGNO (cfun->machine->unisimt_master);
|
fprintf (file, "\t.reg.u%d %%r%d;\n", bits, loc);
|
||||||
int pred = REGNO (cfun->machine->unisimt_predicate);
|
|
||||||
fprintf (file, "\t{\n");
|
fprintf (file, "\t{\n");
|
||||||
fprintf (file, "\t\t.reg.u32 %%ustmp0;\n");
|
fprintf (file, "\t\t.reg.u32 %%ustmp0;\n");
|
||||||
fprintf (file, "\t\t.reg.u%d %%ustmp1;\n", bits);
|
fprintf (file, "\t\t.reg.u%d %%ustmp1;\n", bits);
|
||||||
fprintf (file, "\t\t.reg.u%d %%ustmp2;\n", bits);
|
|
||||||
fprintf (file, "\t\tmov.u32 %%ustmp0, %%tid.y;\n");
|
fprintf (file, "\t\tmov.u32 %%ustmp0, %%tid.y;\n");
|
||||||
fprintf (file, "\t\tmul%s.u32 %%ustmp1, %%ustmp0, 4;\n",
|
fprintf (file, "\t\tmul%s.u32 %%ustmp1, %%ustmp0, 4;\n",
|
||||||
bits == 64 ? ".wide" : ".lo");
|
bits == 64 ? ".wide" : ".lo");
|
||||||
fprintf (file, "\t\tmov.u%d %%ustmp2, __nvptx_uni;\n", bits);
|
fprintf (file, "\t\tmov.u%d %%r%d, __nvptx_uni;\n", bits, loc);
|
||||||
fprintf (file, "\t\tadd.u%d %%ustmp2, %%ustmp2, %%ustmp1;\n", bits);
|
fprintf (file, "\t\tadd.u%d %%r%d, %%r%d, %%ustmp1;\n", bits, loc, loc);
|
||||||
fprintf (file, "\t\tld.shared.u32 %%r%d, [%%ustmp2];\n", master);
|
if (cfun->machine->unisimt_predicate)
|
||||||
fprintf (file, "\t\tmov.u32 %%ustmp0, %%tid.x;\n");
|
{
|
||||||
/* Compute 'master lane index' as 'tid.x & __nvptx_uni[tid.y]'. */
|
int master = REGNO (cfun->machine->unisimt_master);
|
||||||
|
int pred = REGNO (cfun->machine->unisimt_predicate);
|
||||||
|
fprintf (file, "\t\tld.shared.u32 %%r%d, [%%r%d];\n", master, loc);
|
||||||
|
fprintf (file, "\t\tmov.u32 %%ustmp0, %%laneid;\n");
|
||||||
|
/* Compute 'master lane index' as 'laneid & __nvptx_uni[tid.y]'. */
|
||||||
fprintf (file, "\t\tand.b32 %%r%d, %%r%d, %%ustmp0;\n", master, master);
|
fprintf (file, "\t\tand.b32 %%r%d, %%r%d, %%ustmp0;\n", master, master);
|
||||||
/* Compute predicate as 'tid.x == master'. */
|
/* Compute predicate as 'tid.x == master'. */
|
||||||
fprintf (file, "\t\tsetp.eq.u32 %%r%d, %%r%d, %%ustmp0;\n", pred, master);
|
fprintf (file, "\t\tsetp.eq.u32 %%r%d, %%r%d, %%ustmp0;\n", pred, master);
|
||||||
|
}
|
||||||
fprintf (file, "\t}\n");
|
fprintf (file, "\t}\n");
|
||||||
need_unisimt_decl = true;
|
need_unisimt_decl = true;
|
||||||
}
|
}
|
||||||
|
@ -1224,6 +1224,12 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
|
||||||
|
|
||||||
fprintf (file, "%s", s.str().c_str());
|
fprintf (file, "%s", s.str().c_str());
|
||||||
|
|
||||||
|
/* Usually 'crtl->is_leaf' is computed during register allocator
|
||||||
|
initialization (which is not done on NVPTX) or for pressure-sensitive
|
||||||
|
optimizations. Initialize it here, except if already set. */
|
||||||
|
if (!crtl->is_leaf)
|
||||||
|
crtl->is_leaf = leaf_function_p ();
|
||||||
|
|
||||||
HOST_WIDE_INT sz = get_frame_size ();
|
HOST_WIDE_INT sz = get_frame_size ();
|
||||||
bool need_frameptr = sz || cfun->machine->has_chain;
|
bool need_frameptr = sz || cfun->machine->has_chain;
|
||||||
int alignment = crtl->stack_alignment_needed / BITS_PER_UNIT;
|
int alignment = crtl->stack_alignment_needed / BITS_PER_UNIT;
|
||||||
|
@ -1240,9 +1246,28 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
|
||||||
init_frame (file, FRAME_POINTER_REGNUM, alignment,
|
init_frame (file, FRAME_POINTER_REGNUM, alignment,
|
||||||
ROUND_UP (sz, GET_MODE_SIZE (DImode)));
|
ROUND_UP (sz, GET_MODE_SIZE (DImode)));
|
||||||
}
|
}
|
||||||
else if (need_frameptr || cfun->machine->has_varadic || cfun->calls_alloca)
|
else if (need_frameptr || cfun->machine->has_varadic || cfun->calls_alloca
|
||||||
|
|| (cfun->machine->has_simtreg && !crtl->is_leaf))
|
||||||
init_softstack_frame (file, alignment, sz);
|
init_softstack_frame (file, alignment, sz);
|
||||||
|
|
||||||
|
if (cfun->machine->has_simtreg)
|
||||||
|
{
|
||||||
|
unsigned HOST_WIDE_INT &simtsz = cfun->machine->simt_stack_size;
|
||||||
|
unsigned HOST_WIDE_INT &align = cfun->machine->simt_stack_align;
|
||||||
|
align = MAX (align, GET_MODE_SIZE (DImode));
|
||||||
|
if (!crtl->is_leaf || cfun->calls_alloca)
|
||||||
|
simtsz = HOST_WIDE_INT_M1U;
|
||||||
|
if (simtsz == HOST_WIDE_INT_M1U)
|
||||||
|
simtsz = nvptx_softstack_size;
|
||||||
|
if (cfun->machine->has_softstack)
|
||||||
|
simtsz += POINTER_SIZE / 8;
|
||||||
|
simtsz = ROUND_UP (simtsz, GET_MODE_SIZE (DImode));
|
||||||
|
if (align > GET_MODE_SIZE (DImode))
|
||||||
|
simtsz += align - GET_MODE_SIZE (DImode);
|
||||||
|
if (simtsz)
|
||||||
|
fprintf (file, "\t.local.align 8 .b8 %%simtstack_ar["
|
||||||
|
HOST_WIDE_INT_PRINT_DEC "];\n", simtsz);
|
||||||
|
}
|
||||||
/* Declare the pseudos we have as ptx registers. */
|
/* Declare the pseudos we have as ptx registers. */
|
||||||
int maxregs = max_reg_num ();
|
int maxregs = max_reg_num ();
|
||||||
for (int i = LAST_VIRTUAL_REGISTER + 1; i < maxregs; i++)
|
for (int i = LAST_VIRTUAL_REGISTER + 1; i < maxregs; i++)
|
||||||
|
@ -1267,10 +1292,112 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
|
||||||
if (cfun->machine->axis_predicate[1])
|
if (cfun->machine->axis_predicate[1])
|
||||||
nvptx_init_axis_predicate (file,
|
nvptx_init_axis_predicate (file,
|
||||||
REGNO (cfun->machine->axis_predicate[1]), "x");
|
REGNO (cfun->machine->axis_predicate[1]), "x");
|
||||||
if (cfun->machine->unisimt_predicate)
|
if (cfun->machine->unisimt_predicate
|
||||||
|
|| (cfun->machine->has_simtreg && !crtl->is_leaf))
|
||||||
nvptx_init_unisimt_predicate (file);
|
nvptx_init_unisimt_predicate (file);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Output code for switching uniform-simt state. ENTERING indicates whether
|
||||||
|
we are entering or leaving non-uniform execution region. */
|
||||||
|
|
||||||
|
static void
|
||||||
|
nvptx_output_unisimt_switch (FILE *file, bool entering)
|
||||||
|
{
|
||||||
|
if (crtl->is_leaf && !cfun->machine->unisimt_predicate)
|
||||||
|
return;
|
||||||
|
fprintf (file, "\t{\n");
|
||||||
|
fprintf (file, "\t\t.reg.u32 %%ustmp2;\n");
|
||||||
|
fprintf (file, "\t\tmov.u32 %%ustmp2, %d;\n", entering ? -1 : 0);
|
||||||
|
if (!crtl->is_leaf)
|
||||||
|
{
|
||||||
|
int loc = REGNO (cfun->machine->unisimt_location);
|
||||||
|
fprintf (file, "\t\tst.shared.u32 [%%r%d], %%ustmp2;\n", loc);
|
||||||
|
}
|
||||||
|
if (cfun->machine->unisimt_predicate)
|
||||||
|
{
|
||||||
|
int master = REGNO (cfun->machine->unisimt_master);
|
||||||
|
int pred = REGNO (cfun->machine->unisimt_predicate);
|
||||||
|
fprintf (file, "\t\tmov.u32 %%ustmp2, %%laneid;\n");
|
||||||
|
fprintf (file, "\t\tmov.u32 %%r%d, %s;\n",
|
||||||
|
master, entering ? "%ustmp2" : "0");
|
||||||
|
fprintf (file, "\t\tsetp.eq.u32 %%r%d, %%r%d, %%ustmp2;\n", pred, master);
|
||||||
|
}
|
||||||
|
fprintf (file, "\t}\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Output code for allocating per-lane storage and switching soft-stack pointer.
|
||||||
|
ENTERING indicates whether we are entering or leaving non-uniform execution.
|
||||||
|
PTR is the register pointing to allocated storage, it is assigned to on
|
||||||
|
entering and used to restore state on leaving. SIZE and ALIGN are used only
|
||||||
|
on entering. */
|
||||||
|
|
||||||
|
static void
|
||||||
|
nvptx_output_softstack_switch (FILE *file, bool entering,
|
||||||
|
rtx ptr, rtx size, rtx align)
|
||||||
|
{
|
||||||
|
gcc_assert (REG_P (ptr) && !HARD_REGISTER_P (ptr));
|
||||||
|
if (crtl->is_leaf && !cfun->machine->simt_stack_size)
|
||||||
|
return;
|
||||||
|
int bits = POINTER_SIZE, regno = REGNO (ptr);
|
||||||
|
fprintf (file, "\t{\n");
|
||||||
|
if (entering)
|
||||||
|
{
|
||||||
|
fprintf (file, "\t\tcvta.local.u%d %%r%d, %%simtstack_ar + "
|
||||||
|
HOST_WIDE_INT_PRINT_DEC ";\n", bits, regno,
|
||||||
|
cfun->machine->simt_stack_size);
|
||||||
|
fprintf (file, "\t\tsub.u%d %%r%d, %%r%d, ", bits, regno, regno);
|
||||||
|
if (CONST_INT_P (size))
|
||||||
|
fprintf (file, HOST_WIDE_INT_PRINT_DEC,
|
||||||
|
ROUND_UP (UINTVAL (size), GET_MODE_SIZE (DImode)));
|
||||||
|
else
|
||||||
|
output_reg (file, REGNO (size), VOIDmode);
|
||||||
|
fputs (";\n", file);
|
||||||
|
if (!CONST_INT_P (size) || UINTVAL (align) > GET_MODE_SIZE (DImode))
|
||||||
|
fprintf (file, "\t\tand.u%d %%r%d, %%r%d, -%d;\n",
|
||||||
|
bits, regno, regno, UINTVAL (align));
|
||||||
|
}
|
||||||
|
if (cfun->machine->has_softstack)
|
||||||
|
{
|
||||||
|
const char *reg_stack = reg_names[STACK_POINTER_REGNUM];
|
||||||
|
if (entering)
|
||||||
|
{
|
||||||
|
fprintf (file, "\t\tst.u%d [%%r%d + -%d], %s;\n",
|
||||||
|
bits, regno, bits / 8, reg_stack);
|
||||||
|
fprintf (file, "\t\tsub.u%d %s, %%r%d, %d;\n",
|
||||||
|
bits, reg_stack, regno, bits / 8);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fprintf (file, "\t\tld.u%d %s, [%%r%d + -%d];\n",
|
||||||
|
bits, reg_stack, regno, bits / 8);
|
||||||
|
}
|
||||||
|
nvptx_output_set_softstack (REGNO (stack_pointer_rtx));
|
||||||
|
}
|
||||||
|
fprintf (file, "\t}\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Output code to enter non-uniform execution region. DEST is a register
|
||||||
|
to hold a per-lane allocation given by SIZE and ALIGN. */
|
||||||
|
|
||||||
|
const char *
|
||||||
|
nvptx_output_simt_enter (rtx dest, rtx size, rtx align)
|
||||||
|
{
|
||||||
|
nvptx_output_unisimt_switch (asm_out_file, true);
|
||||||
|
nvptx_output_softstack_switch (asm_out_file, true, dest, size, align);
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Output code to leave non-uniform execution region. SRC is the register
|
||||||
|
holding per-lane storage previously allocated by omp_simt_enter insn. */
|
||||||
|
|
||||||
|
const char *
|
||||||
|
nvptx_output_simt_exit (rtx src)
|
||||||
|
{
|
||||||
|
nvptx_output_unisimt_switch (asm_out_file, false);
|
||||||
|
nvptx_output_softstack_switch (asm_out_file, false, src, NULL_RTX, NULL_RTX);
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
/* Output instruction that sets soft stack pointer in shared memory to the
|
/* Output instruction that sets soft stack pointer in shared memory to the
|
||||||
value in register given by SRC_REGNO. */
|
value in register given by SRC_REGNO. */
|
||||||
|
|
||||||
|
|
|
@ -213,12 +213,18 @@ struct GTY(()) machine_function
|
||||||
bool has_varadic; /* Current function has a varadic call. */
|
bool has_varadic; /* Current function has a varadic call. */
|
||||||
bool has_chain; /* Current function has outgoing static chain. */
|
bool has_chain; /* Current function has outgoing static chain. */
|
||||||
bool has_softstack; /* Current function has a soft stack frame. */
|
bool has_softstack; /* Current function has a soft stack frame. */
|
||||||
|
bool has_simtreg; /* Current function has an OpenMP SIMD region. */
|
||||||
int num_args; /* Number of args of current call. */
|
int num_args; /* Number of args of current call. */
|
||||||
int return_mode; /* Return mode of current fn.
|
int return_mode; /* Return mode of current fn.
|
||||||
(machine_mode not defined yet.) */
|
(machine_mode not defined yet.) */
|
||||||
rtx axis_predicate[2]; /* Neutering predicates. */
|
rtx axis_predicate[2]; /* Neutering predicates. */
|
||||||
rtx unisimt_master; /* 'Master lane index' for -muniform-simt. */
|
rtx unisimt_master; /* 'Master lane index' for -muniform-simt. */
|
||||||
rtx unisimt_predicate; /* Predicate for -muniform-simt. */
|
rtx unisimt_predicate; /* Predicate for -muniform-simt. */
|
||||||
|
rtx unisimt_location; /* Mask location for -muniform-simt. */
|
||||||
|
/* The following two fields hold the maximum size resp. alignment required
|
||||||
|
for per-lane storage in OpenMP SIMD regions. */
|
||||||
|
unsigned HOST_WIDE_INT simt_stack_size;
|
||||||
|
unsigned HOST_WIDE_INT simt_stack_align;
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -63,6 +63,9 @@
|
||||||
UNSPECV_JOIN
|
UNSPECV_JOIN
|
||||||
|
|
||||||
UNSPECV_NOUNROLL
|
UNSPECV_NOUNROLL
|
||||||
|
|
||||||
|
UNSPECV_SIMT_ENTER
|
||||||
|
UNSPECV_SIMT_EXIT
|
||||||
])
|
])
|
||||||
|
|
||||||
(define_attr "subregs_ok" "false,true"
|
(define_attr "subregs_ok" "false,true"
|
||||||
|
@ -1184,6 +1187,42 @@
|
||||||
|
|
||||||
;; Patterns for OpenMP SIMD-via-SIMT lowering
|
;; Patterns for OpenMP SIMD-via-SIMT lowering
|
||||||
|
|
||||||
|
(define_insn "omp_simt_enter_insn"
|
||||||
|
[(set (match_operand 0 "nvptx_register_operand" "=R")
|
||||||
|
(unspec_volatile [(match_operand 1 "nvptx_nonmemory_operand" "Ri")
|
||||||
|
(match_operand 2 "nvptx_nonmemory_operand" "Ri")]
|
||||||
|
UNSPECV_SIMT_ENTER))]
|
||||||
|
""
|
||||||
|
{
|
||||||
|
return nvptx_output_simt_enter (operands[0], operands[1], operands[2]);
|
||||||
|
})
|
||||||
|
|
||||||
|
(define_expand "omp_simt_enter"
|
||||||
|
[(match_operand 0 "nvptx_register_operand" "=R")
|
||||||
|
(match_operand 1 "nvptx_nonmemory_operand" "Ri")
|
||||||
|
(match_operand 2 "const_int_operand" "n")]
|
||||||
|
""
|
||||||
|
{
|
||||||
|
if (!CONST_INT_P (operands[1]))
|
||||||
|
cfun->machine->simt_stack_size = HOST_WIDE_INT_M1U;
|
||||||
|
else
|
||||||
|
cfun->machine->simt_stack_size = MAX (UINTVAL (operands[1]),
|
||||||
|
cfun->machine->simt_stack_size);
|
||||||
|
cfun->machine->simt_stack_align = MAX (UINTVAL (operands[2]),
|
||||||
|
cfun->machine->simt_stack_align);
|
||||||
|
cfun->machine->has_simtreg = true;
|
||||||
|
emit_insn (gen_omp_simt_enter_insn (operands[0], operands[1], operands[2]));
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
(define_insn "omp_simt_exit"
|
||||||
|
[(unspec_volatile [(match_operand 0 "nvptx_register_operand" "R")]
|
||||||
|
UNSPECV_SIMT_EXIT)]
|
||||||
|
""
|
||||||
|
{
|
||||||
|
return nvptx_output_simt_exit (operands[0]);
|
||||||
|
})
|
||||||
|
|
||||||
;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index
|
;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index
|
||||||
(define_insn "omp_simt_lane"
|
(define_insn "omp_simt_lane"
|
||||||
[(set (match_operand:SI 0 "nvptx_register_operand" "")
|
[(set (match_operand:SI 0 "nvptx_register_operand" "")
|
||||||
|
|
|
@ -37,6 +37,10 @@ msoft-stack
|
||||||
Target Report Mask(SOFT_STACK)
|
Target Report Mask(SOFT_STACK)
|
||||||
Use custom stacks instead of local memory for automatic storage.
|
Use custom stacks instead of local memory for automatic storage.
|
||||||
|
|
||||||
|
msoft-stack-reserve-local
|
||||||
|
Target Report Joined RejectNegative UInteger Var(nvptx_softstack_size) Init(128)
|
||||||
|
Specify size of .local memory used for stack when the exact amount is not known.
|
||||||
|
|
||||||
muniform-simt
|
muniform-simt
|
||||||
Target Report Mask(UNIFORM_SIMT)
|
Target Report Mask(UNIFORM_SIMT)
|
||||||
Generate code that can keep local state uniform across all lanes.
|
Generate code that can keep local state uniform across all lanes.
|
||||||
|
|
|
@ -166,6 +166,48 @@ expand_GOMP_USE_SIMT (internal_fn, gcall *)
|
||||||
gcc_unreachable ();
|
gcc_unreachable ();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* This should get expanded in omp_device_lower pass. */
|
||||||
|
|
||||||
|
static void
|
||||||
|
expand_GOMP_SIMT_ENTER (internal_fn, gcall *)
|
||||||
|
{
|
||||||
|
gcc_unreachable ();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Allocate per-lane storage and begin non-uniform execution region. */
|
||||||
|
|
||||||
|
static void
|
||||||
|
expand_GOMP_SIMT_ENTER_ALLOC (internal_fn, gcall *stmt)
|
||||||
|
{
|
||||||
|
rtx target;
|
||||||
|
tree lhs = gimple_call_lhs (stmt);
|
||||||
|
if (lhs)
|
||||||
|
target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
|
||||||
|
else
|
||||||
|
target = gen_reg_rtx (Pmode);
|
||||||
|
rtx size = expand_normal (gimple_call_arg (stmt, 0));
|
||||||
|
rtx align = expand_normal (gimple_call_arg (stmt, 1));
|
||||||
|
struct expand_operand ops[3];
|
||||||
|
create_output_operand (&ops[0], target, Pmode);
|
||||||
|
create_input_operand (&ops[1], size, Pmode);
|
||||||
|
create_input_operand (&ops[2], align, Pmode);
|
||||||
|
gcc_assert (targetm.have_omp_simt_enter ());
|
||||||
|
expand_insn (targetm.code_for_omp_simt_enter, 3, ops);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Deallocate per-lane storage and leave non-uniform execution region. */
|
||||||
|
|
||||||
|
static void
|
||||||
|
expand_GOMP_SIMT_EXIT (internal_fn, gcall *stmt)
|
||||||
|
{
|
||||||
|
gcc_checking_assert (!gimple_call_lhs (stmt));
|
||||||
|
rtx arg = expand_normal (gimple_call_arg (stmt, 0));
|
||||||
|
struct expand_operand ops[1];
|
||||||
|
create_input_operand (&ops[0], arg, Pmode);
|
||||||
|
gcc_assert (targetm.have_omp_simt_exit ());
|
||||||
|
expand_insn (targetm.code_for_omp_simt_exit, 1, ops);
|
||||||
|
}
|
||||||
|
|
||||||
/* Lane index on SIMT targets: thread index in the warp on NVPTX. On targets
|
/* Lane index on SIMT targets: thread index in the warp on NVPTX. On targets
|
||||||
without SIMT execution this should be expanded in omp_device_lower pass. */
|
without SIMT execution this should be expanded in omp_device_lower pass. */
|
||||||
|
|
||||||
|
|
|
@ -142,6 +142,9 @@ DEF_INTERNAL_INT_FN (PARITY, ECF_CONST, parity, unary)
|
||||||
DEF_INTERNAL_INT_FN (POPCOUNT, ECF_CONST, popcount, unary)
|
DEF_INTERNAL_INT_FN (POPCOUNT, ECF_CONST, popcount, unary)
|
||||||
|
|
||||||
DEF_INTERNAL_FN (GOMP_USE_SIMT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
|
DEF_INTERNAL_FN (GOMP_USE_SIMT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
|
||||||
|
DEF_INTERNAL_FN (GOMP_SIMT_ENTER, ECF_LEAF | ECF_NOTHROW, NULL)
|
||||||
|
DEF_INTERNAL_FN (GOMP_SIMT_ENTER_ALLOC, ECF_LEAF | ECF_NOTHROW, NULL)
|
||||||
|
DEF_INTERNAL_FN (GOMP_SIMT_EXIT, ECF_LEAF | ECF_NOTHROW, NULL)
|
||||||
DEF_INTERNAL_FN (GOMP_SIMT_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
|
DEF_INTERNAL_FN (GOMP_SIMT_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
|
||||||
DEF_INTERNAL_FN (GOMP_SIMT_VF, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
|
DEF_INTERNAL_FN (GOMP_SIMT_VF, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
|
||||||
DEF_INTERNAL_FN (GOMP_SIMT_LAST_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
|
DEF_INTERNAL_FN (GOMP_SIMT_LAST_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
|
||||||
|
|
|
@ -3457,6 +3457,8 @@ omp_clause_aligned_alignment (tree clause)
|
||||||
struct omplow_simd_context {
|
struct omplow_simd_context {
|
||||||
tree idx;
|
tree idx;
|
||||||
tree lane;
|
tree lane;
|
||||||
|
vec<tree, va_heap> simt_eargs;
|
||||||
|
gimple_seq simt_dlist;
|
||||||
int max_vf;
|
int max_vf;
|
||||||
bool is_simt;
|
bool is_simt;
|
||||||
};
|
};
|
||||||
|
@ -3492,6 +3494,26 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
|
||||||
if (sctx->max_vf == 1)
|
if (sctx->max_vf == 1)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
if (sctx->is_simt)
|
||||||
|
{
|
||||||
|
if (is_gimple_reg (new_var))
|
||||||
|
{
|
||||||
|
ivar = lvar = new_var;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
tree type = TREE_TYPE (new_var), ptype = build_pointer_type (type);
|
||||||
|
ivar = lvar = create_tmp_var (type);
|
||||||
|
TREE_ADDRESSABLE (ivar) = 1;
|
||||||
|
DECL_ATTRIBUTES (ivar) = tree_cons (get_identifier ("omp simt private"),
|
||||||
|
NULL, DECL_ATTRIBUTES (ivar));
|
||||||
|
sctx->simt_eargs.safe_push (build1 (ADDR_EXPR, ptype, ivar));
|
||||||
|
tree clobber = build_constructor (type, NULL);
|
||||||
|
TREE_THIS_VOLATILE (clobber) = 1;
|
||||||
|
gimple *g = gimple_build_assign (ivar, clobber);
|
||||||
|
gimple_seq_add_stmt (&sctx->simt_dlist, g);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
tree atype = build_array_type_nelts (TREE_TYPE (new_var), sctx->max_vf);
|
tree atype = build_array_type_nelts (TREE_TYPE (new_var), sctx->max_vf);
|
||||||
tree avar = create_tmp_var_raw (atype);
|
tree avar = create_tmp_var_raw (atype);
|
||||||
if (TREE_ADDRESSABLE (new_var))
|
if (TREE_ADDRESSABLE (new_var))
|
||||||
|
@ -3504,6 +3526,7 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
|
||||||
NULL_TREE, NULL_TREE);
|
NULL_TREE, NULL_TREE);
|
||||||
lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->lane,
|
lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->lane,
|
||||||
NULL_TREE, NULL_TREE);
|
NULL_TREE, NULL_TREE);
|
||||||
|
}
|
||||||
if (DECL_P (new_var))
|
if (DECL_P (new_var))
|
||||||
{
|
{
|
||||||
SET_DECL_VALUE_EXPR (new_var, lvar);
|
SET_DECL_VALUE_EXPR (new_var, lvar);
|
||||||
|
@ -3547,8 +3570,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
|
||||||
bool is_simd = (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
|
bool is_simd = (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
|
||||||
&& gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD);
|
&& gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD);
|
||||||
omplow_simd_context sctx = omplow_simd_context ();
|
omplow_simd_context sctx = omplow_simd_context ();
|
||||||
tree simt_lane = NULL_TREE;
|
tree simt_lane = NULL_TREE, simtrec = NULL_TREE;
|
||||||
tree ivar = NULL_TREE, lvar = NULL_TREE;
|
tree ivar = NULL_TREE, lvar = NULL_TREE, uid = NULL_TREE;
|
||||||
gimple_seq llist[3] = { };
|
gimple_seq llist[3] = { };
|
||||||
|
|
||||||
copyin_seq = NULL;
|
copyin_seq = NULL;
|
||||||
|
@ -3581,6 +3604,10 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Add a placeholder for simduid. */
|
||||||
|
if (sctx.is_simt && sctx.max_vf != 1)
|
||||||
|
sctx.simt_eargs.safe_push (NULL_TREE);
|
||||||
|
|
||||||
/* Do all the fixed sized types in the first pass, and the variable sized
|
/* Do all the fixed sized types in the first pass, and the variable sized
|
||||||
types in the second pass. This makes sure that the scalar arguments to
|
types in the second pass. This makes sure that the scalar arguments to
|
||||||
the variable sized types are processed before we use them in the
|
the variable sized types are processed before we use them in the
|
||||||
|
@ -4468,21 +4495,43 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sctx.lane)
|
if (sctx.max_vf == 1)
|
||||||
|
sctx.is_simt = false;
|
||||||
|
|
||||||
|
if (sctx.lane || sctx.is_simt)
|
||||||
{
|
{
|
||||||
tree uid = create_tmp_var (ptr_type_node, "simduid");
|
uid = create_tmp_var (ptr_type_node, "simduid");
|
||||||
/* Don't want uninit warnings on simduid, it is always uninitialized,
|
/* Don't want uninit warnings on simduid, it is always uninitialized,
|
||||||
but we use it not for the value, but for the DECL_UID only. */
|
but we use it not for the value, but for the DECL_UID only. */
|
||||||
TREE_NO_WARNING (uid) = 1;
|
TREE_NO_WARNING (uid) = 1;
|
||||||
|
c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SIMDUID_);
|
||||||
|
OMP_CLAUSE__SIMDUID__DECL (c) = uid;
|
||||||
|
OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (ctx->stmt);
|
||||||
|
gimple_omp_for_set_clauses (ctx->stmt, c);
|
||||||
|
}
|
||||||
|
/* Emit calls denoting privatized variables and initializing a pointer to
|
||||||
|
structure that holds private variables as fields after ompdevlow pass. */
|
||||||
|
if (sctx.is_simt)
|
||||||
|
{
|
||||||
|
sctx.simt_eargs[0] = uid;
|
||||||
|
gimple *g
|
||||||
|
= gimple_build_call_internal_vec (IFN_GOMP_SIMT_ENTER, sctx.simt_eargs);
|
||||||
|
gimple_call_set_lhs (g, uid);
|
||||||
|
gimple_seq_add_stmt (ilist, g);
|
||||||
|
sctx.simt_eargs.release ();
|
||||||
|
|
||||||
|
simtrec = create_tmp_var (ptr_type_node, ".omp_simt");
|
||||||
|
g = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC, 1, uid);
|
||||||
|
gimple_call_set_lhs (g, simtrec);
|
||||||
|
gimple_seq_add_stmt (ilist, g);
|
||||||
|
}
|
||||||
|
if (sctx.lane)
|
||||||
|
{
|
||||||
gimple *g
|
gimple *g
|
||||||
= gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 1, uid);
|
= gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 1, uid);
|
||||||
gimple_call_set_lhs (g, sctx.lane);
|
gimple_call_set_lhs (g, sctx.lane);
|
||||||
gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (ctx->stmt));
|
gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (ctx->stmt));
|
||||||
gsi_insert_before_without_update (&gsi, g, GSI_SAME_STMT);
|
gsi_insert_before_without_update (&gsi, g, GSI_SAME_STMT);
|
||||||
c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SIMDUID_);
|
|
||||||
OMP_CLAUSE__SIMDUID__DECL (c) = uid;
|
|
||||||
OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (ctx->stmt);
|
|
||||||
gimple_omp_for_set_clauses (ctx->stmt, c);
|
|
||||||
g = gimple_build_assign (sctx.lane, INTEGER_CST,
|
g = gimple_build_assign (sctx.lane, INTEGER_CST,
|
||||||
build_int_cst (unsigned_type_node, 0));
|
build_int_cst (unsigned_type_node, 0));
|
||||||
gimple_seq_add_stmt (ilist, g);
|
gimple_seq_add_stmt (ilist, g);
|
||||||
|
@ -4545,6 +4594,13 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
|
||||||
gimple_seq_add_stmt (seq, gimple_build_label (end));
|
gimple_seq_add_stmt (seq, gimple_build_label (end));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (sctx.is_simt)
|
||||||
|
{
|
||||||
|
gimple_seq_add_seq (dlist, sctx.simt_dlist);
|
||||||
|
gimple *g
|
||||||
|
= gimple_build_call_internal (IFN_GOMP_SIMT_EXIT, 1, simtrec);
|
||||||
|
gimple_seq_add_stmt (dlist, g);
|
||||||
|
}
|
||||||
|
|
||||||
/* The copyin sequence is not to be executed by the main thread, since
|
/* The copyin sequence is not to be executed by the main thread, since
|
||||||
that would result in self-copies. Perhaps not visible to scalars,
|
that would result in self-copies. Perhaps not visible to scalars,
|
||||||
|
@ -4715,7 +4771,8 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
|
||||||
if (simduid && DECL_HAS_VALUE_EXPR_P (new_var))
|
if (simduid && DECL_HAS_VALUE_EXPR_P (new_var))
|
||||||
{
|
{
|
||||||
tree val = DECL_VALUE_EXPR (new_var);
|
tree val = DECL_VALUE_EXPR (new_var);
|
||||||
if (TREE_CODE (val) == ARRAY_REF
|
if (!maybe_simt
|
||||||
|
&& TREE_CODE (val) == ARRAY_REF
|
||||||
&& VAR_P (TREE_OPERAND (val, 0))
|
&& VAR_P (TREE_OPERAND (val, 0))
|
||||||
&& lookup_attribute ("omp simd array",
|
&& lookup_attribute ("omp simd array",
|
||||||
DECL_ATTRIBUTES (TREE_OPERAND (val,
|
DECL_ATTRIBUTES (TREE_OPERAND (val,
|
||||||
|
@ -4734,26 +4791,28 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
|
||||||
new_var = build4 (ARRAY_REF, TREE_TYPE (val),
|
new_var = build4 (ARRAY_REF, TREE_TYPE (val),
|
||||||
TREE_OPERAND (val, 0), lastlane,
|
TREE_OPERAND (val, 0), lastlane,
|
||||||
NULL_TREE, NULL_TREE);
|
NULL_TREE, NULL_TREE);
|
||||||
if (maybe_simt)
|
}
|
||||||
|
else if (maybe_simt
|
||||||
|
&& VAR_P (val)
|
||||||
|
&& lookup_attribute ("omp simt private",
|
||||||
|
DECL_ATTRIBUTES (val)))
|
||||||
{
|
{
|
||||||
gcall *g;
|
|
||||||
if (simtlast == NULL)
|
if (simtlast == NULL)
|
||||||
{
|
{
|
||||||
simtlast = create_tmp_var (unsigned_type_node);
|
simtlast = create_tmp_var (unsigned_type_node);
|
||||||
g = gimple_build_call_internal
|
gcall *g = gimple_build_call_internal
|
||||||
(IFN_GOMP_SIMT_LAST_LANE, 1, simtcond);
|
(IFN_GOMP_SIMT_LAST_LANE, 1, simtcond);
|
||||||
gimple_call_set_lhs (g, simtlast);
|
gimple_call_set_lhs (g, simtlast);
|
||||||
gimple_seq_add_stmt (stmt_list, g);
|
gimple_seq_add_stmt (stmt_list, g);
|
||||||
}
|
}
|
||||||
x = build_call_expr_internal_loc
|
x = build_call_expr_internal_loc
|
||||||
(UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX,
|
(UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX,
|
||||||
TREE_TYPE (new_var), 2, new_var, simtlast);
|
TREE_TYPE (val), 2, val, simtlast);
|
||||||
new_var = unshare_expr (new_var);
|
new_var = unshare_expr (new_var);
|
||||||
gimplify_assign (new_var, x, stmt_list);
|
gimplify_assign (new_var, x, stmt_list);
|
||||||
new_var = unshare_expr (new_var);
|
new_var = unshare_expr (new_var);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
|
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
|
||||||
&& OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c))
|
&& OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c))
|
||||||
|
|
|
@ -33,12 +33,15 @@ along with GCC; see the file COPYING3. If not see
|
||||||
#include "diagnostic-core.h"
|
#include "diagnostic-core.h"
|
||||||
#include "fold-const.h"
|
#include "fold-const.h"
|
||||||
#include "internal-fn.h"
|
#include "internal-fn.h"
|
||||||
|
#include "langhooks.h"
|
||||||
#include "gimplify.h"
|
#include "gimplify.h"
|
||||||
#include "gimple-iterator.h"
|
#include "gimple-iterator.h"
|
||||||
#include "gimplify-me.h"
|
#include "gimplify-me.h"
|
||||||
#include "gimple-walk.h"
|
#include "gimple-walk.h"
|
||||||
#include "tree-cfg.h"
|
#include "tree-cfg.h"
|
||||||
#include "tree-into-ssa.h"
|
#include "tree-into-ssa.h"
|
||||||
|
#include "tree-nested.h"
|
||||||
|
#include "stor-layout.h"
|
||||||
#include "common/common-target.h"
|
#include "common/common-target.h"
|
||||||
#include "omp-general.h"
|
#include "omp-general.h"
|
||||||
#include "omp-offload.h"
|
#include "omp-offload.h"
|
||||||
|
@ -1669,6 +1672,92 @@ make_pass_oacc_device_lower (gcc::context *ctxt)
|
||||||
return new pass_oacc_device_lower (ctxt);
|
return new pass_oacc_device_lower (ctxt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Rewrite GOMP_SIMT_ENTER_ALLOC call given by GSI and remove the preceding
|
||||||
|
GOMP_SIMT_ENTER call identifying the privatized variables, which are
|
||||||
|
turned to structure fields and receive a DECL_VALUE_EXPR accordingly.
|
||||||
|
Set *REGIMPLIFY to true, except if no privatized variables were seen. */
|
||||||
|
|
||||||
|
static void
|
||||||
|
ompdevlow_adjust_simt_enter (gimple_stmt_iterator *gsi, bool *regimplify)
|
||||||
|
{
|
||||||
|
gimple *alloc_stmt = gsi_stmt (*gsi);
|
||||||
|
tree simtrec = gimple_call_lhs (alloc_stmt);
|
||||||
|
tree simduid = gimple_call_arg (alloc_stmt, 0);
|
||||||
|
gimple *enter_stmt = SSA_NAME_DEF_STMT (simduid);
|
||||||
|
gcc_assert (gimple_call_internal_p (enter_stmt, IFN_GOMP_SIMT_ENTER));
|
||||||
|
tree rectype = lang_hooks.types.make_type (RECORD_TYPE);
|
||||||
|
TYPE_ARTIFICIAL (rectype) = TYPE_NAMELESS (rectype) = 1;
|
||||||
|
TREE_ADDRESSABLE (rectype) = 1;
|
||||||
|
TREE_TYPE (simtrec) = build_pointer_type (rectype);
|
||||||
|
for (unsigned i = 1; i < gimple_call_num_args (enter_stmt); i++)
|
||||||
|
{
|
||||||
|
tree *argp = gimple_call_arg_ptr (enter_stmt, i);
|
||||||
|
if (*argp == null_pointer_node)
|
||||||
|
continue;
|
||||||
|
gcc_assert (TREE_CODE (*argp) == ADDR_EXPR
|
||||||
|
&& VAR_P (TREE_OPERAND (*argp, 0)));
|
||||||
|
tree var = TREE_OPERAND (*argp, 0);
|
||||||
|
|
||||||
|
tree field = build_decl (DECL_SOURCE_LOCATION (var), FIELD_DECL,
|
||||||
|
DECL_NAME (var), TREE_TYPE (var));
|
||||||
|
SET_DECL_ALIGN (field, DECL_ALIGN (var));
|
||||||
|
DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
|
||||||
|
TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
|
||||||
|
|
||||||
|
insert_field_into_struct (rectype, field);
|
||||||
|
|
||||||
|
tree t = build_simple_mem_ref (simtrec);
|
||||||
|
t = build3 (COMPONENT_REF, TREE_TYPE (var), t, field, NULL);
|
||||||
|
TREE_THIS_VOLATILE (t) = TREE_THIS_VOLATILE (var);
|
||||||
|
SET_DECL_VALUE_EXPR (var, t);
|
||||||
|
DECL_HAS_VALUE_EXPR_P (var) = 1;
|
||||||
|
*regimplify = true;
|
||||||
|
}
|
||||||
|
layout_type (rectype);
|
||||||
|
tree size = TYPE_SIZE_UNIT (rectype);
|
||||||
|
tree align = build_int_cst (TREE_TYPE (size), TYPE_ALIGN_UNIT (rectype));
|
||||||
|
|
||||||
|
alloc_stmt
|
||||||
|
= gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC, 2, size, align);
|
||||||
|
gimple_call_set_lhs (alloc_stmt, simtrec);
|
||||||
|
gsi_replace (gsi, alloc_stmt, false);
|
||||||
|
gimple_stmt_iterator enter_gsi = gsi_for_stmt (enter_stmt);
|
||||||
|
enter_stmt = gimple_build_assign (simduid, gimple_call_arg (enter_stmt, 0));
|
||||||
|
gsi_replace (&enter_gsi, enter_stmt, false);
|
||||||
|
|
||||||
|
use_operand_p use;
|
||||||
|
gimple *exit_stmt;
|
||||||
|
if (single_imm_use (simtrec, &use, &exit_stmt))
|
||||||
|
{
|
||||||
|
gcc_assert (gimple_call_internal_p (exit_stmt, IFN_GOMP_SIMT_EXIT));
|
||||||
|
gimple_stmt_iterator exit_gsi = gsi_for_stmt (exit_stmt);
|
||||||
|
tree clobber = build_constructor (rectype, NULL);
|
||||||
|
TREE_THIS_VOLATILE (clobber) = 1;
|
||||||
|
exit_stmt = gimple_build_assign (build_simple_mem_ref (simtrec), clobber);
|
||||||
|
gsi_insert_before (&exit_gsi, exit_stmt, GSI_SAME_STMT);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
gcc_checking_assert (has_zero_uses (simtrec));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Callback for walk_gimple_stmt used to scan for SIMT-privatized variables. */
|
||||||
|
|
||||||
|
static tree
|
||||||
|
find_simtpriv_var_op (tree *tp, int *walk_subtrees, void *)
|
||||||
|
{
|
||||||
|
tree t = *tp;
|
||||||
|
|
||||||
|
if (VAR_P (t)
|
||||||
|
&& DECL_HAS_VALUE_EXPR_P (t)
|
||||||
|
&& lookup_attribute ("omp simt private", DECL_ATTRIBUTES (t)))
|
||||||
|
{
|
||||||
|
*walk_subtrees = 0;
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
return NULL_TREE;
|
||||||
|
}
|
||||||
|
|
||||||
/* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
|
/* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
|
||||||
VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
|
VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
|
||||||
LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
|
LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
|
||||||
|
@ -1679,6 +1768,7 @@ static unsigned int
|
||||||
execute_omp_device_lower ()
|
execute_omp_device_lower ()
|
||||||
{
|
{
|
||||||
int vf = targetm.simt.vf ? targetm.simt.vf () : 1;
|
int vf = targetm.simt.vf ? targetm.simt.vf () : 1;
|
||||||
|
bool regimplify = false;
|
||||||
basic_block bb;
|
basic_block bb;
|
||||||
gimple_stmt_iterator gsi;
|
gimple_stmt_iterator gsi;
|
||||||
FOR_EACH_BB_FN (bb, cfun)
|
FOR_EACH_BB_FN (bb, cfun)
|
||||||
|
@ -1694,6 +1784,20 @@ execute_omp_device_lower ()
|
||||||
case IFN_GOMP_USE_SIMT:
|
case IFN_GOMP_USE_SIMT:
|
||||||
rhs = vf == 1 ? integer_zero_node : integer_one_node;
|
rhs = vf == 1 ? integer_zero_node : integer_one_node;
|
||||||
break;
|
break;
|
||||||
|
case IFN_GOMP_SIMT_ENTER:
|
||||||
|
rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
|
||||||
|
goto simtreg_enter_exit;
|
||||||
|
case IFN_GOMP_SIMT_ENTER_ALLOC:
|
||||||
|
if (vf != 1)
|
||||||
|
ompdevlow_adjust_simt_enter (&gsi, ®implify);
|
||||||
|
rhs = vf == 1 ? null_pointer_node : NULL_TREE;
|
||||||
|
goto simtreg_enter_exit;
|
||||||
|
case IFN_GOMP_SIMT_EXIT:
|
||||||
|
simtreg_enter_exit:
|
||||||
|
if (vf != 1)
|
||||||
|
continue;
|
||||||
|
unlink_stmt_vdef (stmt);
|
||||||
|
break;
|
||||||
case IFN_GOMP_SIMT_LANE:
|
case IFN_GOMP_SIMT_LANE:
|
||||||
case IFN_GOMP_SIMT_LAST_LANE:
|
case IFN_GOMP_SIMT_LAST_LANE:
|
||||||
rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE;
|
rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE;
|
||||||
|
@ -1726,6 +1830,16 @@ execute_omp_device_lower ()
|
||||||
stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
|
stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
|
||||||
gsi_replace (&gsi, stmt, false);
|
gsi_replace (&gsi, stmt, false);
|
||||||
}
|
}
|
||||||
|
if (regimplify)
|
||||||
|
FOR_EACH_BB_REVERSE_FN (bb, cfun)
|
||||||
|
for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
|
||||||
|
if (walk_gimple_stmt (&gsi, NULL, find_simtpriv_var_op, NULL))
|
||||||
|
{
|
||||||
|
if (gimple_clobber_p (gsi_stmt (gsi)))
|
||||||
|
gsi_remove (&gsi, true);
|
||||||
|
else
|
||||||
|
gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
|
||||||
|
}
|
||||||
if (vf != 1)
|
if (vf != 1)
|
||||||
cfun->has_force_vectorize_loops = false;
|
cfun->has_force_vectorize_loops = false;
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -68,6 +68,8 @@ DEF_TARGET_INSN (oacc_dim_pos, (rtx x0, rtx x1))
|
||||||
DEF_TARGET_INSN (oacc_dim_size, (rtx x0, rtx x1))
|
DEF_TARGET_INSN (oacc_dim_size, (rtx x0, rtx x1))
|
||||||
DEF_TARGET_INSN (oacc_fork, (rtx x0, rtx x1, rtx x2))
|
DEF_TARGET_INSN (oacc_fork, (rtx x0, rtx x1, rtx x2))
|
||||||
DEF_TARGET_INSN (oacc_join, (rtx x0, rtx x1, rtx x2))
|
DEF_TARGET_INSN (oacc_join, (rtx x0, rtx x1, rtx x2))
|
||||||
|
DEF_TARGET_INSN (omp_simt_enter, (rtx x0, rtx x1, rtx x2))
|
||||||
|
DEF_TARGET_INSN (omp_simt_exit, (rtx x0))
|
||||||
DEF_TARGET_INSN (omp_simt_lane, (rtx x0))
|
DEF_TARGET_INSN (omp_simt_lane, (rtx x0))
|
||||||
DEF_TARGET_INSN (omp_simt_last_lane, (rtx x0, rtx x1))
|
DEF_TARGET_INSN (omp_simt_last_lane, (rtx x0, rtx x1))
|
||||||
DEF_TARGET_INSN (omp_simt_ordered, (rtx x0, rtx x1))
|
DEF_TARGET_INSN (omp_simt_ordered, (rtx x0, rtx x1))
|
||||||
|
|
|
@ -4395,6 +4395,11 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
|
||||||
gcall *call_stmt;
|
gcall *call_stmt;
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
unsigned int prop_mask, src_properties;
|
unsigned int prop_mask, src_properties;
|
||||||
|
struct function *dst_cfun;
|
||||||
|
tree simduid;
|
||||||
|
use_operand_p use;
|
||||||
|
gimple *simtenter_stmt = NULL;
|
||||||
|
vec<tree> *simtvars_save;
|
||||||
|
|
||||||
/* The gimplifier uses input_location in too many places, such as
|
/* The gimplifier uses input_location in too many places, such as
|
||||||
internal_get_tmp_var (). */
|
internal_get_tmp_var (). */
|
||||||
|
@ -4598,15 +4603,26 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
|
||||||
id->src_cfun = DECL_STRUCT_FUNCTION (fn);
|
id->src_cfun = DECL_STRUCT_FUNCTION (fn);
|
||||||
id->call_stmt = call_stmt;
|
id->call_stmt = call_stmt;
|
||||||
|
|
||||||
|
/* When inlining into an OpenMP SIMD-on-SIMT loop, arrange for new automatic
|
||||||
|
variables to be added to IFN_GOMP_SIMT_ENTER argument list. */
|
||||||
|
dst_cfun = DECL_STRUCT_FUNCTION (id->dst_fn);
|
||||||
|
simtvars_save = id->dst_simt_vars;
|
||||||
|
if (!(dst_cfun->curr_properties & PROP_gimple_lomp_dev)
|
||||||
|
&& (simduid = bb->loop_father->simduid) != NULL_TREE
|
||||||
|
&& (simduid = ssa_default_def (dst_cfun, simduid)) != NULL_TREE
|
||||||
|
&& single_imm_use (simduid, &use, &simtenter_stmt)
|
||||||
|
&& is_gimple_call (simtenter_stmt)
|
||||||
|
&& gimple_call_internal_p (simtenter_stmt, IFN_GOMP_SIMT_ENTER))
|
||||||
|
vec_alloc (id->dst_simt_vars, 0);
|
||||||
|
else
|
||||||
|
id->dst_simt_vars = NULL;
|
||||||
|
|
||||||
/* If the src function contains an IFN_VA_ARG, then so will the dst
|
/* If the src function contains an IFN_VA_ARG, then so will the dst
|
||||||
function after inlining. Likewise for IFN_GOMP_USE_SIMT. */
|
function after inlining. Likewise for IFN_GOMP_USE_SIMT. */
|
||||||
prop_mask = PROP_gimple_lva | PROP_gimple_lomp_dev;
|
prop_mask = PROP_gimple_lva | PROP_gimple_lomp_dev;
|
||||||
src_properties = id->src_cfun->curr_properties & prop_mask;
|
src_properties = id->src_cfun->curr_properties & prop_mask;
|
||||||
if (src_properties != prop_mask)
|
if (src_properties != prop_mask)
|
||||||
{
|
|
||||||
struct function *dst_cfun = DECL_STRUCT_FUNCTION (id->dst_fn);
|
|
||||||
dst_cfun->curr_properties &= src_properties | ~prop_mask;
|
dst_cfun->curr_properties &= src_properties | ~prop_mask;
|
||||||
}
|
|
||||||
|
|
||||||
gcc_assert (!id->src_cfun->after_inlining);
|
gcc_assert (!id->src_cfun->after_inlining);
|
||||||
|
|
||||||
|
@ -4740,6 +4756,27 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
|
||||||
if (cfun->gimple_df)
|
if (cfun->gimple_df)
|
||||||
pt_solution_reset (&cfun->gimple_df->escaped);
|
pt_solution_reset (&cfun->gimple_df->escaped);
|
||||||
|
|
||||||
|
/* Add new automatic variables to IFN_GOMP_SIMT_ENTER arguments. */
|
||||||
|
if (id->dst_simt_vars && id->dst_simt_vars->length () > 0)
|
||||||
|
{
|
||||||
|
size_t nargs = gimple_call_num_args (simtenter_stmt);
|
||||||
|
vec<tree> *vars = id->dst_simt_vars;
|
||||||
|
auto_vec<tree> newargs (nargs + vars->length ());
|
||||||
|
for (size_t i = 0; i < nargs; i++)
|
||||||
|
newargs.quick_push (gimple_call_arg (simtenter_stmt, i));
|
||||||
|
for (tree *pvar = vars->begin (); pvar != vars->end (); pvar++)
|
||||||
|
{
|
||||||
|
tree ptrtype = build_pointer_type (TREE_TYPE (*pvar));
|
||||||
|
newargs.quick_push (build1 (ADDR_EXPR, ptrtype, *pvar));
|
||||||
|
}
|
||||||
|
gcall *g = gimple_build_call_internal_vec (IFN_GOMP_SIMT_ENTER, newargs);
|
||||||
|
gimple_call_set_lhs (g, gimple_call_lhs (simtenter_stmt));
|
||||||
|
gimple_stmt_iterator gsi = gsi_for_stmt (simtenter_stmt);
|
||||||
|
gsi_replace (&gsi, g, false);
|
||||||
|
}
|
||||||
|
vec_free (id->dst_simt_vars);
|
||||||
|
id->dst_simt_vars = simtvars_save;
|
||||||
|
|
||||||
/* Clean up. */
|
/* Clean up. */
|
||||||
if (id->debug_map)
|
if (id->debug_map)
|
||||||
{
|
{
|
||||||
|
@ -5463,9 +5500,19 @@ copy_decl_for_dup_finish (copy_body_data *id, tree decl, tree copy)
|
||||||
function. */
|
function. */
|
||||||
;
|
;
|
||||||
else
|
else
|
||||||
|
{
|
||||||
/* Ordinary automatic local variables are now in the scope of the
|
/* Ordinary automatic local variables are now in the scope of the
|
||||||
new function. */
|
new function. */
|
||||||
DECL_CONTEXT (copy) = id->dst_fn;
|
DECL_CONTEXT (copy) = id->dst_fn;
|
||||||
|
if (VAR_P (copy) && id->dst_simt_vars && !is_gimple_reg (copy))
|
||||||
|
{
|
||||||
|
if (!lookup_attribute ("omp simt private", DECL_ATTRIBUTES (copy)))
|
||||||
|
DECL_ATTRIBUTES (copy)
|
||||||
|
= tree_cons (get_identifier ("omp simt private"), NULL,
|
||||||
|
DECL_ATTRIBUTES (copy));
|
||||||
|
id->dst_simt_vars->safe_push (copy);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return copy;
|
return copy;
|
||||||
}
|
}
|
||||||
|
|
|
@ -145,6 +145,10 @@ struct copy_body_data
|
||||||
equivalents in the function into which it is being inlined. */
|
equivalents in the function into which it is being inlined. */
|
||||||
hash_map<dependence_hash, unsigned short> *dependence_map;
|
hash_map<dependence_hash, unsigned short> *dependence_map;
|
||||||
|
|
||||||
|
/* A list of addressable local variables remapped into the caller
|
||||||
|
when inlining a call within an OpenMP SIMD-on-SIMT loop. */
|
||||||
|
vec<tree> *dst_simt_vars;
|
||||||
|
|
||||||
/* Cilk keywords currently need to replace some variables that
|
/* Cilk keywords currently need to replace some variables that
|
||||||
ordinary nested functions do not. */
|
ordinary nested functions do not. */
|
||||||
bool remap_var_for_cilk;
|
bool remap_var_for_cilk;
|
||||||
|
|
|
@ -1654,7 +1654,8 @@ execute_update_addresses_taken (void)
|
||||||
gimple_ior_addresses_taken (addresses_taken, stmt);
|
gimple_ior_addresses_taken (addresses_taken, stmt);
|
||||||
gimple_call_set_arg (stmt, 1, arg);
|
gimple_call_set_arg (stmt, 1, arg);
|
||||||
}
|
}
|
||||||
else if (is_asan_mark_p (stmt))
|
else if (is_asan_mark_p (stmt)
|
||||||
|
|| gimple_call_internal_p (stmt, IFN_GOMP_SIMT_ENTER))
|
||||||
;
|
;
|
||||||
else
|
else
|
||||||
gimple_ior_addresses_taken (addresses_taken, stmt);
|
gimple_ior_addresses_taken (addresses_taken, stmt);
|
||||||
|
@ -1940,6 +1941,18 @@ execute_update_addresses_taken (void)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (gimple_call_internal_p (stmt, IFN_GOMP_SIMT_ENTER))
|
||||||
|
for (i = 1; i < gimple_call_num_args (stmt); i++)
|
||||||
|
{
|
||||||
|
tree *argp = gimple_call_arg_ptr (stmt, i);
|
||||||
|
if (*argp == null_pointer_node)
|
||||||
|
continue;
|
||||||
|
gcc_assert (TREE_CODE (*argp) == ADDR_EXPR
|
||||||
|
&& VAR_P (TREE_OPERAND (*argp, 0)));
|
||||||
|
tree var = TREE_OPERAND (*argp, 0);
|
||||||
|
if (bitmap_bit_p (suitable_for_renaming, DECL_UID (var)))
|
||||||
|
*argp = null_pointer_node;
|
||||||
|
}
|
||||||
for (i = 0; i < gimple_call_num_args (stmt); ++i)
|
for (i = 0; i < gimple_call_num_args (stmt); ++i)
|
||||||
{
|
{
|
||||||
tree *argp = gimple_call_arg_ptr (stmt, i);
|
tree *argp = gimple_call_arg_ptr (stmt, i);
|
||||||
|
|
Loading…
Reference in New Issue