tree-optimization/100089 - avoid leaving scalar if-converted code around
This avoids leaving scalar if-converted code around for the case of BB vectorizing an if-converted loop body when using the very-cheap cost model. In this case we scan not vectorized scalar stmts in the basic-block vectorized for COND_EXPRs and force the vectorization to be marked as not profitable. The patch also makes sure to always consider all BB vectorization subgraphs together for costing purposes when vectorizing an if-converted loop body. 2021-08-24 Richard Biener <rguenther@suse.de> PR tree-optimization/100089 * tree-vectorizer.h (vect_slp_bb): Rename to ... (vect_slp_if_converted_bb): ... this and get the original loop as new argument. * tree-vectorizer.c (try_vectorize_loop_1): Revert previous fix, pass original loop to vect_slp_if_converted_bb. * tree-vect-slp.c (vect_bb_vectorization_profitable_p): If orig_loop was passed scan the not vectorized stmts for COND_EXPRs and force not profitable if found. (vect_slp_region): Pass down all SLP instances to costing if orig_loop was specified. (vect_slp_bbs): Pass through orig_loop. (vect_slp_bb): Rename to ... (vect_slp_if_converted_bb): ... this and get the original loop as new argument. (vect_slp_function): Adjust.
This commit is contained in:
parent
809330ab84
commit
9216ee6d11
@ -5233,7 +5233,8 @@ li_cost_vec_cmp (const void *a_, const void *b_)
|
||||
|
||||
static bool
|
||||
vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo,
|
||||
vec<slp_instance> slp_instances)
|
||||
vec<slp_instance> slp_instances,
|
||||
loop_p orig_loop)
|
||||
{
|
||||
slp_instance instance;
|
||||
int i;
|
||||
@ -5270,6 +5271,30 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo,
|
||||
vector_costs.safe_splice (instance->cost_vec);
|
||||
instance->cost_vec.release ();
|
||||
}
|
||||
/* When we're vectorizing an if-converted loop body with the
|
||||
very-cheap cost model make sure we vectorized all if-converted
|
||||
code. */
|
||||
bool force_not_profitable = false;
|
||||
if (orig_loop && flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP)
|
||||
{
|
||||
gcc_assert (bb_vinfo->bbs.length () == 1);
|
||||
for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[0]);
|
||||
!gsi_end_p (gsi); gsi_next (&gsi))
|
||||
{
|
||||
/* The costing above left us with DCEable vectorized scalar
|
||||
stmts having the visited flag set. */
|
||||
if (gimple_visited_p (gsi_stmt (gsi)))
|
||||
continue;
|
||||
|
||||
if (gassign *ass = dyn_cast <gassign *> (gsi_stmt (gsi)))
|
||||
if (gimple_assign_rhs_code (ass) == COND_EXPR)
|
||||
{
|
||||
force_not_profitable = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Unset visited flag. */
|
||||
stmt_info_for_cost *cost;
|
||||
FOR_EACH_VEC_ELT (scalar_costs, i, cost)
|
||||
@ -5394,9 +5419,14 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dump_enabled_p () && force_not_profitable)
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"not profitable because of unprofitable if-converted "
|
||||
"scalar code\n");
|
||||
|
||||
scalar_costs.release ();
|
||||
vector_costs.release ();
|
||||
return true;
|
||||
return !force_not_profitable;
|
||||
}
|
||||
|
||||
/* qsort comparator for lane defs. */
|
||||
@ -5810,7 +5840,8 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal,
|
||||
|
||||
static bool
|
||||
vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
|
||||
vec<int> *dataref_groups, unsigned int n_stmts)
|
||||
vec<int> *dataref_groups, unsigned int n_stmts,
|
||||
loop_p orig_loop)
|
||||
{
|
||||
bb_vec_info bb_vinfo;
|
||||
auto_vector_modes vector_modes;
|
||||
@ -5859,7 +5890,9 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
|
||||
vect_location = instance->location ();
|
||||
if (!unlimited_cost_model (NULL)
|
||||
&& !vect_bb_vectorization_profitable_p
|
||||
(bb_vinfo, instance->subgraph_entries))
|
||||
(bb_vinfo,
|
||||
orig_loop ? BB_VINFO_SLP_INSTANCES (bb_vinfo)
|
||||
: instance->subgraph_entries, orig_loop))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
@ -5877,7 +5910,9 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
|
||||
"using SLP\n");
|
||||
vectorized = true;
|
||||
|
||||
vect_schedule_slp (bb_vinfo, instance->subgraph_entries);
|
||||
vect_schedule_slp (bb_vinfo,
|
||||
orig_loop ? BB_VINFO_SLP_INSTANCES (bb_vinfo)
|
||||
: instance->subgraph_entries);
|
||||
|
||||
unsigned HOST_WIDE_INT bytes;
|
||||
if (dump_enabled_p ())
|
||||
@ -5892,6 +5927,11 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
|
||||
"basic block part vectorized using "
|
||||
"variable length vectors\n");
|
||||
}
|
||||
|
||||
/* When we're called from loop vectorization we're considering
|
||||
all subgraphs at once. */
|
||||
if (orig_loop)
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -5959,7 +5999,7 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
|
||||
true if anything in the basic-block was vectorized. */
|
||||
|
||||
static bool
|
||||
vect_slp_bbs (const vec<basic_block> &bbs)
|
||||
vect_slp_bbs (const vec<basic_block> &bbs, loop_p orig_loop)
|
||||
{
|
||||
vec<data_reference_p> datarefs = vNULL;
|
||||
auto_vec<int> dataref_groups;
|
||||
@ -5989,18 +6029,20 @@ vect_slp_bbs (const vec<basic_block> &bbs)
|
||||
++current_group;
|
||||
}
|
||||
|
||||
return vect_slp_region (bbs, datarefs, &dataref_groups, insns);
|
||||
return vect_slp_region (bbs, datarefs, &dataref_groups, insns, orig_loop);
|
||||
}
|
||||
|
||||
/* Main entry for the BB vectorizer. Analyze and transform BB, returns
|
||||
true if anything in the basic-block was vectorized. */
|
||||
/* Special entry for the BB vectorizer. Analyze and transform a single
|
||||
if-converted BB with ORIG_LOOPs body being the not if-converted
|
||||
representation. Returns true if anything in the basic-block was
|
||||
vectorized. */
|
||||
|
||||
bool
|
||||
vect_slp_bb (basic_block bb)
|
||||
vect_slp_if_converted_bb (basic_block bb, loop_p orig_loop)
|
||||
{
|
||||
auto_vec<basic_block> bbs;
|
||||
bbs.safe_push (bb);
|
||||
return vect_slp_bbs (bbs);
|
||||
return vect_slp_bbs (bbs, orig_loop);
|
||||
}
|
||||
|
||||
/* Main entry for the BB vectorizer. Analyze and transform BB, returns
|
||||
@ -6051,7 +6093,7 @@ vect_slp_function (function *fun)
|
||||
|
||||
if (split && !bbs.is_empty ())
|
||||
{
|
||||
r |= vect_slp_bbs (bbs);
|
||||
r |= vect_slp_bbs (bbs, NULL);
|
||||
bbs.truncate (0);
|
||||
bbs.quick_push (bb);
|
||||
}
|
||||
@ -6069,13 +6111,13 @@ vect_slp_function (function *fun)
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"splitting region at control altering "
|
||||
"definition %G", last);
|
||||
r |= vect_slp_bbs (bbs);
|
||||
r |= vect_slp_bbs (bbs, NULL);
|
||||
bbs.truncate (0);
|
||||
}
|
||||
}
|
||||
|
||||
if (!bbs.is_empty ())
|
||||
r |= vect_slp_bbs (bbs);
|
||||
r |= vect_slp_bbs (bbs, NULL);
|
||||
|
||||
free (rpo);
|
||||
|
||||
|
@ -1033,10 +1033,7 @@ try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
|
||||
only non-if-converted parts took part in BB vectorization. */
|
||||
if (flag_tree_slp_vectorize != 0
|
||||
&& loop_vectorized_call
|
||||
&& ! loop->inner
|
||||
/* This would purely be a workaround and should be removed
|
||||
once PR100089 is fixed. */
|
||||
&& flag_vect_cost_model != VECT_COST_MODEL_VERY_CHEAP)
|
||||
&& ! loop->inner)
|
||||
{
|
||||
basic_block bb = loop->header;
|
||||
bool require_loop_vectorize = false;
|
||||
@ -1062,12 +1059,17 @@ try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
|
||||
gimple_set_uid (stmt, -1);
|
||||
gimple_set_visited (stmt, false);
|
||||
}
|
||||
if (!require_loop_vectorize && vect_slp_bb (bb))
|
||||
if (!require_loop_vectorize)
|
||||
{
|
||||
fold_loop_internal_call (loop_vectorized_call,
|
||||
boolean_true_node);
|
||||
loop_vectorized_call = NULL;
|
||||
ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
|
||||
tree arg = gimple_call_arg (loop_vectorized_call, 1);
|
||||
class loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg));
|
||||
if (vect_slp_if_converted_bb (bb, scalar_loop))
|
||||
{
|
||||
fold_loop_internal_call (loop_vectorized_call,
|
||||
boolean_true_node);
|
||||
loop_vectorized_call = NULL;
|
||||
ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* If outer loop vectorization fails for LOOP_VECTORIZED guarded
|
||||
|
@ -2087,7 +2087,7 @@ extern void vect_gather_slp_loads (vec_info *);
|
||||
extern void vect_get_slp_defs (slp_tree, vec<tree> *);
|
||||
extern void vect_get_slp_defs (vec_info *, slp_tree, vec<vec<tree> > *,
|
||||
unsigned n = -1U);
|
||||
extern bool vect_slp_bb (basic_block);
|
||||
extern bool vect_slp_if_converted_bb (basic_block bb, loop_p orig_loop);
|
||||
extern bool vect_slp_function (function *);
|
||||
extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
|
||||
extern stmt_vec_info vect_find_first_scalar_stmt_in_slp (slp_tree);
|
||||
|
Loading…
x
Reference in New Issue
Block a user