re PR tree-optimization/48765 (ICE in vect_transform_stmt)

PR tree-optimization/48765
        * tree-vectorizer.h (vect_make_slp_decision): Return bool.
        * tree-vect-loop.c (vect_analyze_loop_operations): Add new
        argument to indicate if loop aware SLP is being used.  Scan
        the statements and update the vectorization factor
        according to the type of
        vectorization before statement analysis.
        (vect_analyze_loop_2): Get a return value from
        vect_make_slp_decision, pass it to 
        vect_analyze_loop_operations.
        (vectorizable_reduction): Set number of copies to 1 in case of
        pure SLP statement.
        * tree-vect-stmts.c (vectorizable_conversion,
        vectorizable_assignment, vectorizable_shift,
        vectorizable_operation, vectorizable_type_demotion,
        vectorizable_type_promotion, vectorizable_store,
        vectorizable_load): Likewise.
        (vectorizable_condition): Move the check that it is not SLP
        vectorization before the number of copies check.
        * tree-vect-slp.c (vect_make_slp_decision): Return TRUE if
        decided to vectorize the loop using SLP.

From-SVN: r173132
This commit is contained in:
Ira Rosen 2011-04-28 19:50:28 +00:00 committed by Ira Rosen
parent 45540bcf24
commit 437f4a0024
7 changed files with 166 additions and 42 deletions

View File

@ -1,3 +1,25 @@
2011-04-28 Ira Rosen <ira.rosen@linaro.org>
PR tree-optimization/48765
* tree-vectorizer.h (vect_make_slp_decision): Return bool.
* tree-vect-loop.c (vect_analyze_loop_operations): Add new argument
to indicate if loop aware SLP is being used. Scan the statements
and update the vectorization factor according to the type of
vectorization before statement analysis.
(vect_analyze_loop_2): Get a return value from vect_make_slp_decision,
pass it to vect_analyze_loop_operations.
(vectorizable_reduction): Set number of copies to 1 in case of pure
SLP statement.
* tree-vect-stmts.c (vectorizable_conversion,
vectorizable_assignment, vectorizable_shift,
vectorizable_operation, vectorizable_type_demotion,
vectorizable_type_promotion, vectorizable_store, vectorizable_load):
Likewise.
(vectorizable_condition): Move the check that it is not SLP
vectorization before the number of copies check.
* tree-vect-slp.c (vect_make_slp_decision): Return TRUE if decided
to vectorize the loop using SLP.
2011-04-28 Jakub Jelinek <jakub@redhat.com>
PR middle-end/48597

View File

@ -1,3 +1,8 @@
2011-04-28 Ira Rosen <ira.rosen@linaro.org>
PR tree-optimization/48765
* gcc.dg/vect/pr48765.c: New.
2011-04-28 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
PR tree-optimization/48775

View File

@ -0,0 +1,82 @@
/* { dg-do compile { target powerpc*-*-* } } */
/* { dg-options "-m64 -O3 -mcpu=power6" } */
enum reg_class
{
NO_REGS, AP_REG, XRF_REGS, GENERAL_REGS, AGRF_REGS, XGRF_REGS, ALL_REGS,
LIM_REG_CLASSES
};
enum machine_mode
{
VOIDmode, QImode, HImode, PSImode, SImode, PDImode, DImode, TImode, OImode,
QFmode, HFmode, TQFmode, SFmode, DFmode, XFmode, TFmode, SCmode, DCmode,
XCmode, TCmode, CQImode, CHImode, CSImode, CDImode, CTImode, COImode,
BLKmode, CCmode, CCEVENmode, MAX_MACHINE_MODE
};
typedef struct rtx_def
{
int mode:8;
}
*rtx;
extern rtx *regno_reg_rtx;
typedef unsigned int HARD_REG_ELT_TYPE;
typedef HARD_REG_ELT_TYPE HARD_REG_SET[((64 + 32 - 1) / 32)];
extern int reg_alloc_order[64];
extern int max_regno;
extern int *reg_n_calls_crossed;
extern short *reg_renumber;
static int *reg_where_dead;
static int *reg_where_born;
static int *reg_order;
static char *regs_change_size;
static HARD_REG_SET *after_insn_hard_regs;
static int stupid_find_reg (int, enum reg_class, enum machine_mode, int, int,
int);
void
stupid_life_analysis (f, nregs, file)
rtx f;
{
register int i;
for (i = (((64)) + 3) + 1; i < max_regno; i++)
{
register int r = reg_order[i];
if ((int) LIM_REG_CLASSES > 1)
reg_renumber[r] =
stupid_find_reg (reg_n_calls_crossed[r], reg_preferred_class (r),
((regno_reg_rtx[r])->mode), reg_where_born[r],
reg_where_dead[r], regs_change_size[r]);
}
}
static int
stupid_find_reg (call_preserved, class, mode, born_insn, dead_insn,
changes_size)
int call_preserved;
enum reg_class class;
enum machine_mode mode;
{
register int i, ins;
HARD_REG_SET used, this_reg;
for (ins = born_insn; ins < dead_insn; ins++)
do
{
register HARD_REG_ELT_TYPE *scan_tp_ = (used), *scan_fp_ =
(after_insn_hard_regs[ins]);
for (i = 0; i < ((64 + 32 - 1) / 32); i++)
*scan_tp_++ |= *scan_fp_++;
}
while (0);
for (i = 0; i < 64; i++)
{
int regno = reg_alloc_order[i];
if (((used)[(regno) / ((unsigned) 32)] &
(((HARD_REG_ELT_TYPE) (1)) << ((regno) % ((unsigned) 32)))))
{
register int j;
if (j == regno)
return regno;
}
}
}
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -1146,7 +1146,7 @@ vect_get_cost (enum vect_cost_for_stmt type_of_cost)
Scan the loop stmts and make sure they are all vectorizable. */
static bool
vect_analyze_loop_operations (loop_vec_info loop_vinfo)
vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
@ -1167,6 +1167,40 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
if (slp)
{
/* If all the stmts in the loop can be SLPed, we perform only SLP, and
vectorization factor of the loop is the unrolling factor required by
the SLP instances. If that unrolling factor is 1, we say, that we
perform pure SLP on loop - cross iteration parallelism is not
exploited. */
for (i = 0; i < nbbs; i++)
{
basic_block bb = bbs[i];
for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
{
gimple stmt = gsi_stmt (si);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
gcc_assert (stmt_info);
if ((STMT_VINFO_RELEVANT_P (stmt_info)
|| VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
&& !PURE_SLP_STMT (stmt_info))
/* STMT needs both SLP and loop-based vectorization. */
only_slp_in_loop = false;
}
}
if (only_slp_in_loop)
vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
else
vectorization_factor = least_common_multiple (vectorization_factor,
LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "Updating vectorization factor to %d ",
vectorization_factor);
}
for (i = 0; i < nbbs; i++)
{
@ -1272,18 +1306,8 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
{
gimple stmt = gsi_stmt (si);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
gcc_assert (stmt_info);
if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
return false;
if ((STMT_VINFO_RELEVANT_P (stmt_info)
|| VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
&& !PURE_SLP_STMT (stmt_info))
/* STMT needs both SLP and loop-based vectorization. */
only_slp_in_loop = false;
}
} /* bbs */
@ -1303,18 +1327,6 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
return false;
}
/* If all the stmts in the loop can be SLPed, we perform only SLP, and
vectorization factor of the loop is the unrolling factor required by the
SLP instances. If that unrolling factor is 1, we say, that we perform
pure SLP on loop - cross iteration parallelism is not exploited. */
if (only_slp_in_loop)
vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
else
vectorization_factor = least_common_multiple (vectorization_factor,
LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
&& vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump,
@ -1410,7 +1422,7 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
static bool
vect_analyze_loop_2 (loop_vec_info loop_vinfo)
{
bool ok, dummy;
bool ok, dummy, slp = false;
int max_vf = MAX_VECTORIZATION_FACTOR;
int min_vf = 2;
@ -1524,7 +1536,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
if (ok)
{
/* Decide which possible SLP instances to SLP. */
vect_make_slp_decision (loop_vinfo);
slp = vect_make_slp_decision (loop_vinfo);
/* Find stmts that need to be both vectorized and SLPed. */
vect_detect_hybrid_slp (loop_vinfo);
@ -1533,7 +1545,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
/* Scan all the operations in the loop and make sure they are
vectorizable. */
ok = vect_analyze_loop_operations (loop_vinfo);
ok = vect_analyze_loop_operations (loop_vinfo, slp);
if (!ok)
{
if (vect_print_dump_info (REPORT_DETAILS))
@ -4136,7 +4148,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
return false;
if (slp_node)
if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)

View File

@ -1351,9 +1351,10 @@ vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
/* For each possible SLP instance decide whether to SLP it and calculate overall
unrolling factor needed to SLP the loop. */
unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
least one instance. */
void
bool
vect_make_slp_decision (loop_vec_info loop_vinfo)
{
unsigned int i, unrolling_factor = 1;
@ -1382,6 +1383,8 @@ vect_make_slp_decision (loop_vec_info loop_vinfo)
if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
decided_to_slp, unrolling_factor);
return (decided_to_slp > 0);
}

View File

@ -1747,7 +1747,7 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
if (slp_node)
if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
/* Sanity check: make sure that at least one copy of the vectorized stmt
@ -1940,7 +1940,7 @@ vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
if (slp_node)
if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@ -2149,7 +2149,7 @@ vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
if (slp_node)
if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@ -2497,7 +2497,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
if (slp_node)
if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@ -2895,7 +2895,7 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
if (slp_node)
if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
@ -3175,7 +3175,7 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
if (slp_node)
if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@ -3358,7 +3358,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
if (slp)
if (slp || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@ -3851,7 +3851,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
if (slp)
if (slp || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@ -4457,6 +4457,10 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
/* FORNOW: unsupported in basic block SLP. */
gcc_assert (loop_vinfo);
/* FORNOW: SLP not supported. */
if (STMT_SLP_TYPE (stmt_info))
return false;
gcc_assert (ncopies >= 1);
if (reduc_index && ncopies > 1)
return false; /* FORNOW */
@ -4469,10 +4473,6 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
&& reduc_def))
return false;
/* FORNOW: SLP not supported. */
if (STMT_SLP_TYPE (stmt_info))
return false;
/* FORNOW: not yet supported. */
if (STMT_VINFO_LIVE_P (stmt_info))
{

View File

@ -870,7 +870,7 @@ extern bool vect_transform_slp_perm_load (gimple, VEC (tree, heap) *,
extern bool vect_schedule_slp (loop_vec_info, bb_vec_info);
extern void vect_update_slp_costs_according_to_vf (loop_vec_info);
extern bool vect_analyze_slp (loop_vec_info, bb_vec_info);
extern void vect_make_slp_decision (loop_vec_info);
extern bool vect_make_slp_decision (loop_vec_info);
extern void vect_detect_hybrid_slp (loop_vec_info);
extern void vect_get_slp_defs (tree, tree, slp_tree, VEC (tree,heap) **,
VEC (tree,heap) **, int);