tree-vectorizer.h (struct stmt_info_for_cost): Add where member.
2018-05-16 Richard Biener <rguenther@suse.de> * tree-vectorizer.h (struct stmt_info_for_cost): Add where member. (dump_stmt_cost): Declare. (add_stmt_cost): Dump cost we add. (add_stmt_costs): New function. (vect_model_simple_cost, vect_model_store_cost, vect_model_load_cost): No longer exported. (vect_analyze_stmt): Adjust prototype. (vectorizable_condition): Likewise. (vectorizable_live_operation): Likewise. (vectorizable_reduction): Likewise. (vectorizable_induction): Likewise. * tree-vect-loop.c (vect_analyze_loop_operations): Create local cost vector to pass to vectorizable_ and record afterwards. (vect_model_reduction_cost): Take cost vector argument and adjust. (vect_model_induction_cost): Likewise. (vectorizable_reduction): Likewise. (vectorizable_induction): Likewise. (vectorizable_live_operation): Likewise. * tree-vect-slp.c (vect_create_new_slp_node): Initialize SLP_TREE_NUMBER_OF_VEC_STMTS. (vect_analyze_slp_cost_1): Remove. (vect_analyze_slp_cost): Likewise. (vect_slp_analyze_node_operations): Take visited args and a target cost vector. Avoid processing already visited stmt sets. (vect_slp_analyze_operations): Use a local cost vector to gather costs and register those of non-discarded instances. (vect_bb_vectorization_profitable_p): Use add_stmt_costs. (vect_schedule_slp_instance): Remove copying of SLP_TREE_NUMBER_OF_VEC_STMTS. Instead assert that it is not zero. * tree-vect-stmts.c (record_stmt_cost): Remove path directly adding cost. Record cost entry location. (vect_prologue_cost_for_slp_op): Function to compute cost of a constant or invariant generated for SLP vect in the prologue, split out from vect_analyze_slp_cost_1. (vect_model_simple_cost): Make static. Adjust for SLP costing. (vect_model_promotion_demotion_cost): Likewise. (vect_model_store_cost): Likewise, make static. (vect_model_load_cost): Likewise. (vectorizable_bswap): Add cost vector arg and adjust. (vectorizable_call): Likewise. (vectorizable_simd_clone_call): Likewise. (vectorizable_conversion): Likewise. (vectorizable_assignment): Likewise. (vectorizable_shift): Likewise. (vectorizable_operation): Likewise. (vectorizable_store): Likewise. (vectorizable_load): Likewise. (vectorizable_condition): Likewise. (vectorizable_comparison): Likewise. (can_vectorize_live_stmts): Likewise. (vect_analyze_stmt): Likewise. (vect_transform_stmt): Adjust calls to vectorizable_*. * tree-vectorizer.c: Include gimple-pretty-print.h. (dump_stmt_cost): New function. From-SVN: r260289
This commit is contained in:
parent
311eb8168e
commit
68435eb293
@ -1,3 +1,61 @@
|
||||
2018-05-16 Richard Biener <rguenther@suse.de>
|
||||
|
||||
* tree-vectorizer.h (struct stmt_info_for_cost): Add where member.
|
||||
(dump_stmt_cost): Declare.
|
||||
(add_stmt_cost): Dump cost we add.
|
||||
(add_stmt_costs): New function.
|
||||
(vect_model_simple_cost, vect_model_store_cost, vect_model_load_cost):
|
||||
No longer exported.
|
||||
(vect_analyze_stmt): Adjust prototype.
|
||||
(vectorizable_condition): Likewise.
|
||||
(vectorizable_live_operation): Likewise.
|
||||
(vectorizable_reduction): Likewise.
|
||||
(vectorizable_induction): Likewise.
|
||||
* tree-vect-loop.c (vect_analyze_loop_operations): Create local
|
||||
cost vector to pass to vectorizable_ and record afterwards.
|
||||
(vect_model_reduction_cost): Take cost vector argument and adjust.
|
||||
(vect_model_induction_cost): Likewise.
|
||||
(vectorizable_reduction): Likewise.
|
||||
(vectorizable_induction): Likewise.
|
||||
(vectorizable_live_operation): Likewise.
|
||||
* tree-vect-slp.c (vect_create_new_slp_node): Initialize
|
||||
SLP_TREE_NUMBER_OF_VEC_STMTS.
|
||||
(vect_analyze_slp_cost_1): Remove.
|
||||
(vect_analyze_slp_cost): Likewise.
|
||||
(vect_slp_analyze_node_operations): Take visited args and
|
||||
a target cost vector. Avoid processing already visited stmt sets.
|
||||
(vect_slp_analyze_operations): Use a local cost vector to gather
|
||||
costs and register those of non-discarded instances.
|
||||
(vect_bb_vectorization_profitable_p): Use add_stmt_costs.
|
||||
(vect_schedule_slp_instance): Remove copying of
|
||||
SLP_TREE_NUMBER_OF_VEC_STMTS. Instead assert that it is not
|
||||
zero.
|
||||
* tree-vect-stmts.c (record_stmt_cost): Remove path directly
|
||||
adding cost. Record cost entry location.
|
||||
(vect_prologue_cost_for_slp_op): Function to compute cost of
|
||||
a constant or invariant generated for SLP vect in the prologue,
|
||||
split out from vect_analyze_slp_cost_1.
|
||||
(vect_model_simple_cost): Make static. Adjust for SLP costing.
|
||||
(vect_model_promotion_demotion_cost): Likewise.
|
||||
(vect_model_store_cost): Likewise, make static.
|
||||
(vect_model_load_cost): Likewise.
|
||||
(vectorizable_bswap): Add cost vector arg and adjust.
|
||||
(vectorizable_call): Likewise.
|
||||
(vectorizable_simd_clone_call): Likewise.
|
||||
(vectorizable_conversion): Likewise.
|
||||
(vectorizable_assignment): Likewise.
|
||||
(vectorizable_shift): Likewise.
|
||||
(vectorizable_operation): Likewise.
|
||||
(vectorizable_store): Likewise.
|
||||
(vectorizable_load): Likewise.
|
||||
(vectorizable_condition): Likewise.
|
||||
(vectorizable_comparison): Likewise.
|
||||
(can_vectorize_live_stmts): Likewise.
|
||||
(vect_analyze_stmt): Likewise.
|
||||
(vect_transform_stmt): Adjust calls to vectorizable_*.
|
||||
* tree-vectorizer.c: Include gimple-pretty-print.h.
|
||||
(dump_stmt_cost): New function.
|
||||
|
||||
2018-05-16 Richard Biener <rguenther@suse.de>
|
||||
|
||||
* params.def (PARAM_DSE_MAX_ALIAS_QUERIES_PER_STORE): New param.
|
||||
|
@ -1528,6 +1528,9 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"=== vect_analyze_loop_operations ===\n");
|
||||
|
||||
stmt_vector_for_cost cost_vec;
|
||||
cost_vec.create (2);
|
||||
|
||||
for (i = 0; i < nbbs; i++)
|
||||
{
|
||||
basic_block bb = bbs[i];
|
||||
@ -1613,18 +1616,20 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
|
||||
need_to_vectorize = true;
|
||||
if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
|
||||
&& ! PURE_SLP_STMT (stmt_info))
|
||||
ok = vectorizable_induction (phi, NULL, NULL, NULL);
|
||||
ok = vectorizable_induction (phi, NULL, NULL, NULL, &cost_vec);
|
||||
else if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
|
||||
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
|
||||
&& ! PURE_SLP_STMT (stmt_info))
|
||||
ok = vectorizable_reduction (phi, NULL, NULL, NULL, NULL);
|
||||
ok = vectorizable_reduction (phi, NULL, NULL, NULL, NULL,
|
||||
&cost_vec);
|
||||
}
|
||||
|
||||
/* SLP PHIs are tested by vect_slp_analyze_node_operations. */
|
||||
if (ok
|
||||
&& STMT_VINFO_LIVE_P (stmt_info)
|
||||
&& !PURE_SLP_STMT (stmt_info))
|
||||
ok = vectorizable_live_operation (phi, NULL, NULL, -1, NULL);
|
||||
ok = vectorizable_live_operation (phi, NULL, NULL, -1, NULL,
|
||||
&cost_vec);
|
||||
|
||||
if (!ok)
|
||||
{
|
||||
@ -1644,11 +1649,15 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
|
||||
{
|
||||
gimple *stmt = gsi_stmt (si);
|
||||
if (!gimple_clobber_p (stmt)
|
||||
&& !vect_analyze_stmt (stmt, &need_to_vectorize, NULL, NULL))
|
||||
&& !vect_analyze_stmt (stmt, &need_to_vectorize, NULL, NULL,
|
||||
&cost_vec))
|
||||
return false;
|
||||
}
|
||||
} /* bbs */
|
||||
|
||||
add_stmt_costs (loop_vinfo->target_cost_data, &cost_vec);
|
||||
cost_vec.release ();
|
||||
|
||||
/* All operations in the loop are either irrelevant (deal with loop
|
||||
control, or dead), or only used outside the loop and can be moved
|
||||
out of the loop (e.g. invariants, inductions). The loop can be
|
||||
@ -3840,7 +3849,7 @@ have_whole_vector_shift (machine_mode mode)
|
||||
|
||||
static void
|
||||
vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
|
||||
int ncopies)
|
||||
int ncopies, stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
int prologue_cost = 0, epilogue_cost = 0, inside_cost;
|
||||
enum tree_code code;
|
||||
@ -3850,15 +3859,9 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
|
||||
machine_mode mode;
|
||||
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
||||
struct loop *loop = NULL;
|
||||
void *target_cost_data;
|
||||
|
||||
if (loop_vinfo)
|
||||
{
|
||||
loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||
target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
|
||||
}
|
||||
else
|
||||
target_cost_data = BB_VINFO_TARGET_COST_DATA (STMT_VINFO_BB_VINFO (stmt_info));
|
||||
loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||
|
||||
/* Condition reductions generate two reductions in the loop. */
|
||||
vect_reduction_type reduction_type
|
||||
@ -3883,18 +3886,18 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
|
||||
|
||||
if (reduction_type == EXTRACT_LAST_REDUCTION || reduc_fn != IFN_LAST)
|
||||
/* Count one reduction-like operation per vector. */
|
||||
inside_cost = add_stmt_cost (target_cost_data, ncopies, vec_to_scalar,
|
||||
stmt_info, 0, vect_body);
|
||||
inside_cost = record_stmt_cost (cost_vec, ncopies, vec_to_scalar,
|
||||
stmt_info, 0, vect_body);
|
||||
else
|
||||
{
|
||||
/* Use NELEMENTS extracts and NELEMENTS scalar ops. */
|
||||
unsigned int nelements = ncopies * vect_nunits_for_cost (vectype);
|
||||
inside_cost = add_stmt_cost (target_cost_data, nelements,
|
||||
vec_to_scalar, stmt_info, 0,
|
||||
vect_body);
|
||||
inside_cost += add_stmt_cost (target_cost_data, nelements,
|
||||
scalar_stmt, stmt_info, 0,
|
||||
vect_body);
|
||||
inside_cost = record_stmt_cost (cost_vec, nelements,
|
||||
vec_to_scalar, stmt_info, 0,
|
||||
vect_body);
|
||||
inside_cost += record_stmt_cost (cost_vec, nelements,
|
||||
scalar_stmt, stmt_info, 0,
|
||||
vect_body);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -3904,13 +3907,13 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
|
||||
initial result of the data reduction, initial value of the index
|
||||
reduction. */
|
||||
int prologue_stmts = reduction_type == COND_REDUCTION ? 4 : 1;
|
||||
prologue_cost += add_stmt_cost (target_cost_data, prologue_stmts,
|
||||
scalar_to_vec, stmt_info, 0,
|
||||
vect_prologue);
|
||||
prologue_cost += record_stmt_cost (cost_vec, prologue_stmts,
|
||||
scalar_to_vec, stmt_info, 0,
|
||||
vect_prologue);
|
||||
|
||||
/* Cost of reduction op inside loop. */
|
||||
inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
|
||||
stmt_info, 0, vect_body);
|
||||
inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt,
|
||||
stmt_info, 0, vect_body);
|
||||
}
|
||||
|
||||
/* Determine cost of epilogue code.
|
||||
@ -3925,41 +3928,41 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
|
||||
if (reduction_type == COND_REDUCTION)
|
||||
{
|
||||
/* An EQ stmt and an COND_EXPR stmt. */
|
||||
epilogue_cost += add_stmt_cost (target_cost_data, 2,
|
||||
vector_stmt, stmt_info, 0,
|
||||
vect_epilogue);
|
||||
epilogue_cost += record_stmt_cost (cost_vec, 2,
|
||||
vector_stmt, stmt_info, 0,
|
||||
vect_epilogue);
|
||||
/* Reduction of the max index and a reduction of the found
|
||||
values. */
|
||||
epilogue_cost += add_stmt_cost (target_cost_data, 2,
|
||||
vec_to_scalar, stmt_info, 0,
|
||||
vect_epilogue);
|
||||
epilogue_cost += record_stmt_cost (cost_vec, 2,
|
||||
vec_to_scalar, stmt_info, 0,
|
||||
vect_epilogue);
|
||||
/* A broadcast of the max value. */
|
||||
epilogue_cost += add_stmt_cost (target_cost_data, 1,
|
||||
scalar_to_vec, stmt_info, 0,
|
||||
vect_epilogue);
|
||||
epilogue_cost += record_stmt_cost (cost_vec, 1,
|
||||
scalar_to_vec, stmt_info, 0,
|
||||
vect_epilogue);
|
||||
}
|
||||
else
|
||||
{
|
||||
epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
|
||||
stmt_info, 0, vect_epilogue);
|
||||
epilogue_cost += add_stmt_cost (target_cost_data, 1,
|
||||
vec_to_scalar, stmt_info, 0,
|
||||
vect_epilogue);
|
||||
epilogue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
|
||||
stmt_info, 0, vect_epilogue);
|
||||
epilogue_cost += record_stmt_cost (cost_vec, 1,
|
||||
vec_to_scalar, stmt_info, 0,
|
||||
vect_epilogue);
|
||||
}
|
||||
}
|
||||
else if (reduction_type == COND_REDUCTION)
|
||||
{
|
||||
unsigned estimated_nunits = vect_nunits_for_cost (vectype);
|
||||
/* Extraction of scalar elements. */
|
||||
epilogue_cost += add_stmt_cost (target_cost_data,
|
||||
2 * estimated_nunits,
|
||||
vec_to_scalar, stmt_info, 0,
|
||||
vect_epilogue);
|
||||
epilogue_cost += record_stmt_cost (cost_vec,
|
||||
2 * estimated_nunits,
|
||||
vec_to_scalar, stmt_info, 0,
|
||||
vect_epilogue);
|
||||
/* Scalar max reductions via COND_EXPR / MAX_EXPR. */
|
||||
epilogue_cost += add_stmt_cost (target_cost_data,
|
||||
2 * estimated_nunits - 3,
|
||||
scalar_stmt, stmt_info, 0,
|
||||
vect_epilogue);
|
||||
epilogue_cost += record_stmt_cost (cost_vec,
|
||||
2 * estimated_nunits - 3,
|
||||
scalar_stmt, stmt_info, 0,
|
||||
vect_epilogue);
|
||||
}
|
||||
else if (reduction_type == EXTRACT_LAST_REDUCTION
|
||||
|| reduction_type == FOLD_LEFT_REDUCTION)
|
||||
@ -3986,21 +3989,21 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
|
||||
{
|
||||
/* Final reduction via vector shifts and the reduction operator.
|
||||
Also requires scalar extract. */
|
||||
epilogue_cost += add_stmt_cost (target_cost_data,
|
||||
exact_log2 (nelements) * 2,
|
||||
vector_stmt, stmt_info, 0,
|
||||
vect_epilogue);
|
||||
epilogue_cost += add_stmt_cost (target_cost_data, 1,
|
||||
vec_to_scalar, stmt_info, 0,
|
||||
vect_epilogue);
|
||||
epilogue_cost += record_stmt_cost (cost_vec,
|
||||
exact_log2 (nelements) * 2,
|
||||
vector_stmt, stmt_info, 0,
|
||||
vect_epilogue);
|
||||
epilogue_cost += record_stmt_cost (cost_vec, 1,
|
||||
vec_to_scalar, stmt_info, 0,
|
||||
vect_epilogue);
|
||||
}
|
||||
else
|
||||
/* Use extracts and reduction op for final reduction. For N
|
||||
elements, we have N extracts and N-1 reduction ops. */
|
||||
epilogue_cost += add_stmt_cost (target_cost_data,
|
||||
nelements + nelements - 1,
|
||||
vector_stmt, stmt_info, 0,
|
||||
vect_epilogue);
|
||||
epilogue_cost += record_stmt_cost (cost_vec,
|
||||
nelements + nelements - 1,
|
||||
vector_stmt, stmt_info, 0,
|
||||
vect_epilogue);
|
||||
}
|
||||
}
|
||||
|
||||
@ -4017,22 +4020,21 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
|
||||
Models cost for induction operations. */
|
||||
|
||||
static void
|
||||
vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
|
||||
vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies,
|
||||
stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
||||
void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
|
||||
unsigned inside_cost, prologue_cost;
|
||||
|
||||
if (PURE_SLP_STMT (stmt_info))
|
||||
return;
|
||||
|
||||
/* loop cost for vec_loop. */
|
||||
inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
|
||||
stmt_info, 0, vect_body);
|
||||
inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt,
|
||||
stmt_info, 0, vect_body);
|
||||
|
||||
/* prologue cost for vec_init and vec_step. */
|
||||
prologue_cost = add_stmt_cost (target_cost_data, 2, scalar_to_vec,
|
||||
stmt_info, 0, vect_prologue);
|
||||
prologue_cost = record_stmt_cost (cost_vec, 2, scalar_to_vec,
|
||||
stmt_info, 0, vect_prologue);
|
||||
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
@ -6124,7 +6126,8 @@ is_nonwrapping_integer_induction (gimple *stmt, struct loop *loop)
|
||||
bool
|
||||
vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
gimple **vec_stmt, slp_tree slp_node,
|
||||
slp_instance slp_node_instance)
|
||||
slp_instance slp_node_instance,
|
||||
stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
tree vec_dest;
|
||||
tree scalar_dest;
|
||||
@ -6633,7 +6636,8 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
/* Only call during the analysis stage, otherwise we'll lose
|
||||
STMT_VINFO_TYPE. */
|
||||
if (!vec_stmt && !vectorizable_condition (stmt, gsi, NULL,
|
||||
ops[reduc_index], 0, NULL))
|
||||
ops[reduc_index], 0, NULL,
|
||||
cost_vec))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
@ -7055,7 +7059,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
if (!vec_stmt) /* transformation not required. */
|
||||
{
|
||||
if (first_p)
|
||||
vect_model_reduction_cost (stmt_info, reduc_fn, ncopies);
|
||||
vect_model_reduction_cost (stmt_info, reduc_fn, ncopies, cost_vec);
|
||||
if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
|
||||
{
|
||||
if (reduction_type != FOLD_LEFT_REDUCTION
|
||||
@ -7109,7 +7113,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
{
|
||||
gcc_assert (!slp_node);
|
||||
return vectorizable_condition (stmt, gsi, vec_stmt,
|
||||
NULL, reduc_index, NULL);
|
||||
NULL, reduc_index, NULL, NULL);
|
||||
}
|
||||
|
||||
/* Create the destination vector */
|
||||
@ -7142,7 +7146,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
gcc_assert (!slp_node);
|
||||
vectorizable_condition (stmt, gsi, vec_stmt,
|
||||
PHI_RESULT (phis[0]),
|
||||
reduc_index, NULL);
|
||||
reduc_index, NULL, NULL);
|
||||
/* Multiple types are not supported for condition. */
|
||||
break;
|
||||
}
|
||||
@ -7327,7 +7331,8 @@ vect_worthwhile_without_simd_p (vec_info *vinfo, tree_code code)
|
||||
bool
|
||||
vectorizable_induction (gimple *phi,
|
||||
gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
|
||||
gimple **vec_stmt, slp_tree slp_node)
|
||||
gimple **vec_stmt, slp_tree slp_node,
|
||||
stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (phi);
|
||||
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
||||
@ -7448,7 +7453,7 @@ vectorizable_induction (gimple *phi,
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"=== vectorizable_induction ===\n");
|
||||
vect_model_induction_cost (stmt_info, ncopies);
|
||||
vect_model_induction_cost (stmt_info, ncopies, cost_vec);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -7882,7 +7887,8 @@ bool
|
||||
vectorizable_live_operation (gimple *stmt,
|
||||
gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
|
||||
slp_tree slp_node, int slp_index,
|
||||
gimple **vec_stmt)
|
||||
gimple **vec_stmt,
|
||||
stmt_vector_for_cost *)
|
||||
{
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
||||
|
@ -112,6 +112,7 @@ vect_create_new_slp_node (vec<gimple *> scalar_stmts)
|
||||
node = XNEW (struct _slp_tree);
|
||||
SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
|
||||
SLP_TREE_VEC_STMTS (node).create (0);
|
||||
SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
|
||||
SLP_TREE_CHILDREN (node).create (nops);
|
||||
SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
|
||||
SLP_TREE_TWO_OPERATORS (node) = false;
|
||||
@ -1084,6 +1085,10 @@ bst_traits::equal (value_type existing, value_type candidate)
|
||||
typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
|
||||
static scalar_stmts_set_t *bst_fail;
|
||||
|
||||
typedef hash_map <vec <gimple *>, slp_tree,
|
||||
simple_hashmap_traits <bst_traits, slp_tree> >
|
||||
scalar_stmts_to_slp_tree_map_t;
|
||||
|
||||
static slp_tree
|
||||
vect_build_slp_tree_2 (vec_info *vinfo,
|
||||
vec<gimple *> stmts, unsigned int group_size,
|
||||
@ -1819,285 +1824,6 @@ vect_find_last_scalar_stmt_in_slp (slp_tree node)
|
||||
return last;
|
||||
}
|
||||
|
||||
/* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
|
||||
|
||||
static void
|
||||
vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
|
||||
stmt_vector_for_cost *prologue_cost_vec,
|
||||
stmt_vector_for_cost *body_cost_vec,
|
||||
unsigned ncopies_for_cost,
|
||||
scalar_stmts_set_t* visited)
|
||||
{
|
||||
unsigned i, j;
|
||||
slp_tree child;
|
||||
gimple *stmt;
|
||||
stmt_vec_info stmt_info;
|
||||
tree lhs;
|
||||
|
||||
/* If we already costed the exact same set of scalar stmts we're done.
|
||||
We share the generated vector stmts for those. */
|
||||
if (visited->contains (SLP_TREE_SCALAR_STMTS (node)))
|
||||
return;
|
||||
|
||||
visited->add (SLP_TREE_SCALAR_STMTS (node).copy ());
|
||||
|
||||
/* Recurse down the SLP tree. */
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
|
||||
if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
|
||||
vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
|
||||
body_cost_vec, ncopies_for_cost, visited);
|
||||
|
||||
/* Look at the first scalar stmt to determine the cost. */
|
||||
stmt = SLP_TREE_SCALAR_STMTS (node)[0];
|
||||
stmt_info = vinfo_for_stmt (stmt);
|
||||
if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
|
||||
{
|
||||
vect_memory_access_type memory_access_type
|
||||
= (STMT_VINFO_STRIDED_P (stmt_info)
|
||||
? VMAT_STRIDED_SLP
|
||||
: VMAT_CONTIGUOUS);
|
||||
if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
|
||||
vect_model_store_cost (stmt_info, ncopies_for_cost,
|
||||
memory_access_type, VLS_STORE,
|
||||
node, prologue_cost_vec, body_cost_vec);
|
||||
else
|
||||
{
|
||||
gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
|
||||
if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
|
||||
{
|
||||
/* If the load is permuted then the alignment is determined by
|
||||
the first group element not by the first scalar stmt DR. */
|
||||
stmt = GROUP_FIRST_ELEMENT (stmt_info);
|
||||
stmt_info = vinfo_for_stmt (stmt);
|
||||
/* Record the cost for the permutation. */
|
||||
unsigned n_perms;
|
||||
vect_transform_slp_perm_load (node, vNULL, NULL,
|
||||
ncopies_for_cost, instance, true,
|
||||
&n_perms);
|
||||
record_stmt_cost (body_cost_vec, n_perms, vec_perm,
|
||||
stmt_info, 0, vect_body);
|
||||
unsigned assumed_nunits
|
||||
= vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info));
|
||||
/* And adjust the number of loads performed. This handles
|
||||
redundancies as well as loads that are later dead. */
|
||||
auto_sbitmap perm (GROUP_SIZE (stmt_info));
|
||||
bitmap_clear (perm);
|
||||
for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
|
||||
bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]);
|
||||
ncopies_for_cost = 0;
|
||||
bool load_seen = false;
|
||||
for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
|
||||
{
|
||||
if (i % assumed_nunits == 0)
|
||||
{
|
||||
if (load_seen)
|
||||
ncopies_for_cost++;
|
||||
load_seen = false;
|
||||
}
|
||||
if (bitmap_bit_p (perm, i))
|
||||
load_seen = true;
|
||||
}
|
||||
if (load_seen)
|
||||
ncopies_for_cost++;
|
||||
gcc_assert (ncopies_for_cost
|
||||
<= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
|
||||
+ assumed_nunits - 1) / assumed_nunits);
|
||||
poly_uint64 uf = SLP_INSTANCE_UNROLLING_FACTOR (instance);
|
||||
ncopies_for_cost *= estimated_poly_value (uf);
|
||||
}
|
||||
/* Record the cost for the vector loads. */
|
||||
vect_model_load_cost (stmt_info, ncopies_for_cost,
|
||||
memory_access_type, node, prologue_cost_vec,
|
||||
body_cost_vec);
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type)
|
||||
{
|
||||
/* ncopies_for_cost is the number of IVs we generate. */
|
||||
record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
|
||||
stmt_info, 0, vect_body);
|
||||
|
||||
/* Prologue cost for the initial values and step vector. */
|
||||
record_stmt_cost (prologue_cost_vec, ncopies_for_cost,
|
||||
CONSTANT_CLASS_P
|
||||
(STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
|
||||
(stmt_info))
|
||||
? vector_load : vec_construct,
|
||||
stmt_info, 0, vect_prologue);
|
||||
record_stmt_cost (prologue_cost_vec, 1,
|
||||
CONSTANT_CLASS_P
|
||||
(STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info))
|
||||
? vector_load : vec_construct,
|
||||
stmt_info, 0, vect_prologue);
|
||||
|
||||
/* ??? No easy way to get at the actual number of vector stmts
|
||||
to be geneated and thus the derived IVs. */
|
||||
}
|
||||
else
|
||||
{
|
||||
record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
|
||||
stmt_info, 0, vect_body);
|
||||
if (SLP_TREE_TWO_OPERATORS (node))
|
||||
{
|
||||
record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
|
||||
stmt_info, 0, vect_body);
|
||||
record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
|
||||
stmt_info, 0, vect_body);
|
||||
}
|
||||
}
|
||||
|
||||
/* Push SLP node def-type to stmts. */
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
|
||||
if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
|
||||
STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
|
||||
|
||||
/* Scan operands and account for prologue cost of constants/externals.
|
||||
??? This over-estimates cost for multiple uses and should be
|
||||
re-engineered. */
|
||||
stmt = SLP_TREE_SCALAR_STMTS (node)[0];
|
||||
lhs = gimple_get_lhs (stmt);
|
||||
for (i = 0; i < gimple_num_ops (stmt); ++i)
|
||||
{
|
||||
tree op = gimple_op (stmt, i);
|
||||
gimple *def_stmt;
|
||||
enum vect_def_type dt;
|
||||
if (!op || op == lhs)
|
||||
continue;
|
||||
if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt)
|
||||
&& (dt == vect_constant_def || dt == vect_external_def))
|
||||
{
|
||||
/* Without looking at the actual initializer a vector of
|
||||
constants can be implemented as load from the constant pool.
|
||||
When all elements are the same we can use a splat. */
|
||||
tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
|
||||
unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
|
||||
unsigned num_vects_to_check;
|
||||
unsigned HOST_WIDE_INT const_nunits;
|
||||
unsigned nelt_limit;
|
||||
if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
|
||||
&& ! multiple_p (const_nunits, group_size))
|
||||
{
|
||||
num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
|
||||
nelt_limit = const_nunits;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* If either the vector has variable length or the vectors
|
||||
are composed of repeated whole groups we only need to
|
||||
cost construction once. All vectors will be the same. */
|
||||
num_vects_to_check = 1;
|
||||
nelt_limit = group_size;
|
||||
}
|
||||
tree elt = NULL_TREE;
|
||||
unsigned nelt = 0;
|
||||
for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
|
||||
{
|
||||
unsigned si = j % group_size;
|
||||
if (nelt == 0)
|
||||
elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si], i);
|
||||
/* ??? We're just tracking whether all operands of a single
|
||||
vector initializer are the same, ideally we'd check if
|
||||
we emitted the same one already. */
|
||||
else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si], i))
|
||||
elt = NULL_TREE;
|
||||
nelt++;
|
||||
if (nelt == nelt_limit)
|
||||
{
|
||||
/* ??? We need to pass down stmt_info for a vector type
|
||||
even if it points to the wrong stmt. */
|
||||
record_stmt_cost (prologue_cost_vec, 1,
|
||||
dt == vect_external_def
|
||||
? (elt ? scalar_to_vec : vec_construct)
|
||||
: vector_load,
|
||||
stmt_info, 0, vect_prologue);
|
||||
nelt = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Restore stmt def-types. */
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
|
||||
if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
|
||||
STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
|
||||
}
|
||||
|
||||
/* Compute the cost for the SLP instance INSTANCE. */
|
||||
|
||||
static void
|
||||
vect_analyze_slp_cost (slp_instance instance, void *data, scalar_stmts_set_t *visited)
|
||||
{
|
||||
stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
|
||||
unsigned ncopies_for_cost;
|
||||
stmt_info_for_cost *si;
|
||||
unsigned i;
|
||||
|
||||
/* Calculate the number of vector stmts to create based on the unrolling
|
||||
factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
|
||||
GROUP_SIZE / NUNITS otherwise. */
|
||||
unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
|
||||
slp_tree node = SLP_INSTANCE_TREE (instance);
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
|
||||
/* Get the estimated vectorization factor, which is always one for
|
||||
basic-block vectorization. */
|
||||
unsigned int assumed_vf;
|
||||
if (STMT_VINFO_LOOP_VINFO (stmt_info))
|
||||
assumed_vf = vect_vf_for_cost (STMT_VINFO_LOOP_VINFO (stmt_info));
|
||||
else
|
||||
assumed_vf = 1;
|
||||
/* For reductions look at a reduction operand in case the reduction
|
||||
operation is widening like DOT_PROD or SAD. */
|
||||
tree vectype_for_cost = STMT_VINFO_VECTYPE (stmt_info);
|
||||
if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
|
||||
{
|
||||
gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
|
||||
switch (gimple_assign_rhs_code (stmt))
|
||||
{
|
||||
case DOT_PROD_EXPR:
|
||||
case SAD_EXPR:
|
||||
vectype_for_cost = get_vectype_for_scalar_type
|
||||
(TREE_TYPE (gimple_assign_rhs1 (stmt)));
|
||||
break;
|
||||
default:;
|
||||
}
|
||||
}
|
||||
unsigned int assumed_nunits = vect_nunits_for_cost (vectype_for_cost);
|
||||
ncopies_for_cost = (least_common_multiple (assumed_nunits,
|
||||
group_size * assumed_vf)
|
||||
/ assumed_nunits);
|
||||
|
||||
prologue_cost_vec.create (10);
|
||||
body_cost_vec.create (10);
|
||||
vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
|
||||
&prologue_cost_vec, &body_cost_vec,
|
||||
ncopies_for_cost, visited);
|
||||
|
||||
/* Record the prologue costs, which were delayed until we were
|
||||
sure that SLP was successful. */
|
||||
FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
|
||||
{
|
||||
struct _stmt_vec_info *stmt_info
|
||||
= si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
|
||||
(void) add_stmt_cost (data, si->count, si->kind, stmt_info,
|
||||
si->misalign, vect_prologue);
|
||||
}
|
||||
|
||||
/* Record the instance's instructions in the target cost model. */
|
||||
FOR_EACH_VEC_ELT (body_cost_vec, i, si)
|
||||
{
|
||||
struct _stmt_vec_info *stmt_info
|
||||
= si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
|
||||
(void) add_stmt_cost (data, si->count, si->kind, stmt_info,
|
||||
si->misalign, vect_body);
|
||||
}
|
||||
|
||||
prologue_cost_vec.release ();
|
||||
body_cost_vec.release ();
|
||||
}
|
||||
|
||||
/* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
|
||||
one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
|
||||
the first GROUP1_SIZE stmts, since stores are consecutive), the second
|
||||
@ -2758,7 +2484,10 @@ _bb_vec_info::~_bb_vec_info ()
|
||||
|
||||
static bool
|
||||
vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
|
||||
slp_instance node_instance)
|
||||
slp_instance node_instance,
|
||||
scalar_stmts_to_slp_tree_map_t *visited,
|
||||
scalar_stmts_to_slp_tree_map_t *lvisited,
|
||||
stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
bool dummy;
|
||||
int i, j;
|
||||
@ -2768,8 +2497,25 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
|
||||
if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
|
||||
return true;
|
||||
|
||||
/* If we already analyzed the exact same set of scalar stmts we're done.
|
||||
We share the generated vector stmts for those. */
|
||||
slp_tree *leader;
|
||||
if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
|
||||
|| (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
|
||||
{
|
||||
SLP_TREE_NUMBER_OF_VEC_STMTS (node)
|
||||
= SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* The SLP graph is acyclic so not caching whether we failed or succeeded
|
||||
doesn't result in any issue since we throw away the lvisited set
|
||||
when we fail. */
|
||||
lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
|
||||
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
|
||||
if (!vect_slp_analyze_node_operations (vinfo, child, node_instance))
|
||||
if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
|
||||
visited, lvisited, cost_vec))
|
||||
return false;
|
||||
|
||||
stmt = SLP_TREE_SCALAR_STMTS (node)[0];
|
||||
@ -2831,7 +2577,7 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
|
||||
if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
|
||||
STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
|
||||
= SLP_TREE_DEF_TYPE (child);
|
||||
bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance);
|
||||
bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance, cost_vec);
|
||||
/* Restore def-types. */
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
|
||||
if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
|
||||
@ -2857,11 +2603,17 @@ vect_slp_analyze_operations (vec_info *vinfo)
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"=== vect_slp_analyze_operations ===\n");
|
||||
|
||||
scalar_stmts_to_slp_tree_map_t *visited
|
||||
= new scalar_stmts_to_slp_tree_map_t ();
|
||||
for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
|
||||
{
|
||||
scalar_stmts_to_slp_tree_map_t lvisited;
|
||||
stmt_vector_for_cost cost_vec;
|
||||
cost_vec.create (2);
|
||||
if (!vect_slp_analyze_node_operations (vinfo,
|
||||
SLP_INSTANCE_TREE (instance),
|
||||
instance))
|
||||
instance, visited, &lvisited,
|
||||
&cost_vec))
|
||||
{
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"removing SLP instance operations starting from: ");
|
||||
@ -2870,19 +2622,19 @@ vect_slp_analyze_operations (vec_info *vinfo)
|
||||
(SLP_INSTANCE_TREE (instance))[0], 0);
|
||||
vect_free_slp_instance (instance);
|
||||
vinfo->slp_instances.ordered_remove (i);
|
||||
cost_vec.release ();
|
||||
}
|
||||
else
|
||||
i++;
|
||||
{
|
||||
for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
|
||||
x != lvisited.end(); ++x)
|
||||
visited->put ((*x).first.copy (), (*x).second);
|
||||
i++;
|
||||
|
||||
add_stmt_costs (vinfo->target_cost_data, &cost_vec);
|
||||
cost_vec.release ();
|
||||
}
|
||||
}
|
||||
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"=== vect_analyze_slp_cost ===\n");
|
||||
|
||||
/* Compute the costs of the SLP instances. */
|
||||
scalar_stmts_set_t *visited = new scalar_stmts_set_t ();
|
||||
for (i = 0; vinfo->slp_instances.iterate (i, &instance); ++i)
|
||||
vect_analyze_slp_cost (instance, vinfo->target_cost_data, visited);
|
||||
delete visited;
|
||||
|
||||
return !vinfo->slp_instances.is_empty ();
|
||||
@ -2989,15 +2741,7 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
|
||||
&life, &scalar_costs);
|
||||
}
|
||||
void *target_cost_data = init_cost (NULL);
|
||||
stmt_info_for_cost *si;
|
||||
FOR_EACH_VEC_ELT (scalar_costs, i, si)
|
||||
{
|
||||
struct _stmt_vec_info *stmt_info
|
||||
= si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
|
||||
(void) add_stmt_cost (target_cost_data, si->count,
|
||||
si->kind, stmt_info, si->misalign,
|
||||
vect_body);
|
||||
}
|
||||
add_stmt_costs (target_cost_data, &scalar_costs);
|
||||
scalar_costs.release ();
|
||||
unsigned dummy;
|
||||
finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
|
||||
@ -4060,10 +3804,6 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
|
||||
return true;
|
||||
}
|
||||
|
||||
typedef hash_map <vec <gimple *>, slp_tree,
|
||||
simple_hashmap_traits <bst_traits, slp_tree> >
|
||||
scalar_stmts_to_slp_tree_map_t;
|
||||
|
||||
/* Vectorize SLP instance tree in postorder. */
|
||||
|
||||
static bool
|
||||
@ -4087,8 +3827,6 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
|
||||
if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
|
||||
{
|
||||
SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
|
||||
SLP_TREE_NUMBER_OF_VEC_STMTS (node)
|
||||
= SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -4110,6 +3848,7 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
|
||||
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
group_size = SLP_INSTANCE_GROUP_SIZE (instance);
|
||||
|
||||
gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
|
||||
if (!SLP_TREE_VEC_STMTS (node).exists ())
|
||||
SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
|
||||
|
||||
|
@ -97,19 +97,15 @@ record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
|
||||
if ((kind == vector_store || kind == unaligned_store)
|
||||
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info))
|
||||
kind = vector_scatter_store;
|
||||
if (body_cost_vec)
|
||||
{
|
||||
tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
|
||||
stmt_info_for_cost si = { count, kind,
|
||||
stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
|
||||
misalign };
|
||||
body_cost_vec->safe_push (si);
|
||||
return (unsigned)
|
||||
(builtin_vectorization_cost (kind, vectype, misalign) * count);
|
||||
}
|
||||
else
|
||||
return add_stmt_cost (stmt_info->vinfo->target_cost_data,
|
||||
count, kind, stmt_info, misalign, where);
|
||||
|
||||
stmt_info_for_cost si = { count, kind, where,
|
||||
stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
|
||||
misalign };
|
||||
body_cost_vec->safe_push (si);
|
||||
|
||||
tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
|
||||
return (unsigned)
|
||||
(builtin_vectorization_cost (kind, vectype, misalign) * count);
|
||||
}
|
||||
|
||||
/* Return a variable of type ELEM_TYPE[NELEMS]. */
|
||||
@ -819,6 +815,69 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Compute the prologue cost for invariant or constant operands. */
|
||||
|
||||
static unsigned
|
||||
vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
|
||||
unsigned opno, enum vect_def_type dt,
|
||||
stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
|
||||
tree op = gimple_op (stmt, opno);
|
||||
unsigned prologue_cost = 0;
|
||||
|
||||
/* Without looking at the actual initializer a vector of
|
||||
constants can be implemented as load from the constant pool.
|
||||
When all elements are the same we can use a splat. */
|
||||
tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
|
||||
unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
|
||||
unsigned num_vects_to_check;
|
||||
unsigned HOST_WIDE_INT const_nunits;
|
||||
unsigned nelt_limit;
|
||||
if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
|
||||
&& ! multiple_p (const_nunits, group_size))
|
||||
{
|
||||
num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
|
||||
nelt_limit = const_nunits;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* If either the vector has variable length or the vectors
|
||||
are composed of repeated whole groups we only need to
|
||||
cost construction once. All vectors will be the same. */
|
||||
num_vects_to_check = 1;
|
||||
nelt_limit = group_size;
|
||||
}
|
||||
tree elt = NULL_TREE;
|
||||
unsigned nelt = 0;
|
||||
for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
|
||||
{
|
||||
unsigned si = j % group_size;
|
||||
if (nelt == 0)
|
||||
elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si], opno);
|
||||
/* ??? We're just tracking whether all operands of a single
|
||||
vector initializer are the same, ideally we'd check if
|
||||
we emitted the same one already. */
|
||||
else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si],
|
||||
opno))
|
||||
elt = NULL_TREE;
|
||||
nelt++;
|
||||
if (nelt == nelt_limit)
|
||||
{
|
||||
/* ??? We need to pass down stmt_info for a vector type
|
||||
even if it points to the wrong stmt. */
|
||||
prologue_cost += record_stmt_cost
|
||||
(cost_vec, 1,
|
||||
dt == vect_external_def
|
||||
? (elt ? scalar_to_vec : vec_construct)
|
||||
: vector_load,
|
||||
stmt_info, 0, vect_prologue);
|
||||
nelt = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return prologue_cost;
|
||||
}
|
||||
|
||||
/* Function vect_model_simple_cost.
|
||||
|
||||
@ -826,30 +885,61 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
|
||||
single op. Right now, this does not account for multiple insns that could
|
||||
be generated for the single vector op. We will handle that shortly. */
|
||||
|
||||
void
|
||||
static void
|
||||
vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
|
||||
enum vect_def_type *dt,
|
||||
int ndts,
|
||||
stmt_vector_for_cost *prologue_cost_vec,
|
||||
stmt_vector_for_cost *body_cost_vec)
|
||||
slp_tree node,
|
||||
stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
int i;
|
||||
int inside_cost = 0, prologue_cost = 0;
|
||||
|
||||
/* The SLP costs were already calculated during SLP tree build. */
|
||||
gcc_assert (!PURE_SLP_STMT (stmt_info));
|
||||
gcc_assert (cost_vec != NULL);
|
||||
|
||||
/* Cost the "broadcast" of a scalar operand in to a vector operand.
|
||||
Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
|
||||
cost model. */
|
||||
for (i = 0; i < ndts; i++)
|
||||
if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
|
||||
prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
|
||||
stmt_info, 0, vect_prologue);
|
||||
/* ??? Somehow we need to fix this at the callers. */
|
||||
if (node)
|
||||
ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
|
||||
|
||||
if (node)
|
||||
{
|
||||
/* Scan operands and account for prologue cost of constants/externals.
|
||||
??? This over-estimates cost for multiple uses and should be
|
||||
re-engineered. */
|
||||
gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
|
||||
tree lhs = gimple_get_lhs (stmt);
|
||||
for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
|
||||
{
|
||||
tree op = gimple_op (stmt, i);
|
||||
gimple *def_stmt;
|
||||
enum vect_def_type dt;
|
||||
if (!op || op == lhs)
|
||||
continue;
|
||||
if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt)
|
||||
&& (dt == vect_constant_def || dt == vect_external_def))
|
||||
prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
|
||||
i, dt, cost_vec);
|
||||
}
|
||||
}
|
||||
else
|
||||
/* Cost the "broadcast" of a scalar operand in to a vector operand.
|
||||
Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
|
||||
cost model. */
|
||||
for (int i = 0; i < ndts; i++)
|
||||
if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
|
||||
prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
|
||||
stmt_info, 0, vect_prologue);
|
||||
|
||||
/* Adjust for two-operator SLP nodes. */
|
||||
if (node && SLP_TREE_TWO_OPERATORS (node))
|
||||
{
|
||||
ncopies *= 2;
|
||||
inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
|
||||
stmt_info, 0, vect_body);
|
||||
}
|
||||
|
||||
/* Pass the inside-of-loop statements to the target-specific cost model. */
|
||||
inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
|
||||
stmt_info, 0, vect_body);
|
||||
inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
|
||||
stmt_info, 0, vect_body);
|
||||
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
@ -865,36 +955,26 @@ vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
|
||||
|
||||
static void
|
||||
vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
|
||||
enum vect_def_type *dt, int pwr)
|
||||
enum vect_def_type *dt, int pwr,
|
||||
stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
int i, tmp;
|
||||
int inside_cost = 0, prologue_cost = 0;
|
||||
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
||||
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
|
||||
void *target_cost_data;
|
||||
|
||||
/* The SLP costs were already calculated during SLP tree build. */
|
||||
gcc_assert (!PURE_SLP_STMT (stmt_info));
|
||||
|
||||
if (loop_vinfo)
|
||||
target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
|
||||
else
|
||||
target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
|
||||
|
||||
for (i = 0; i < pwr + 1; i++)
|
||||
{
|
||||
tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
|
||||
(i + 1) : i;
|
||||
inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
|
||||
vec_promote_demote, stmt_info, 0,
|
||||
vect_body);
|
||||
inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
|
||||
vec_promote_demote, stmt_info, 0,
|
||||
vect_body);
|
||||
}
|
||||
|
||||
/* FORNOW: Assuming maximum 2 args per stmts. */
|
||||
for (i = 0; i < 2; i++)
|
||||
if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
|
||||
prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
|
||||
stmt_info, 0, vect_prologue);
|
||||
prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
|
||||
stmt_info, 0, vect_prologue);
|
||||
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
@ -907,21 +987,31 @@ vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
|
||||
Models cost for stores. In the case of grouped accesses, one access
|
||||
has the overhead of the grouped access attributed to it. */
|
||||
|
||||
void
|
||||
static void
|
||||
vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
|
||||
enum vect_def_type dt,
|
||||
vect_memory_access_type memory_access_type,
|
||||
vec_load_store_type vls_type, slp_tree slp_node,
|
||||
stmt_vector_for_cost *prologue_cost_vec,
|
||||
stmt_vector_for_cost *body_cost_vec)
|
||||
stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
unsigned int inside_cost = 0, prologue_cost = 0;
|
||||
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
|
||||
gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
|
||||
bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
|
||||
|
||||
/* ??? Somehow we need to fix this at the callers. */
|
||||
if (slp_node)
|
||||
ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
|
||||
|
||||
if (vls_type == VLS_STORE_INVARIANT)
|
||||
prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
|
||||
stmt_info, 0, vect_prologue);
|
||||
{
|
||||
if (slp_node)
|
||||
prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
|
||||
1, dt, cost_vec);
|
||||
else
|
||||
prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
|
||||
stmt_info, 0, vect_prologue);
|
||||
}
|
||||
|
||||
/* Grouped stores update all elements in the group at once,
|
||||
so we want the DR for the first statement. */
|
||||
@ -947,7 +1037,7 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
|
||||
needed permute. */
|
||||
int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
|
||||
int nstmts = ncopies * ceil_log2 (group_size) * group_size;
|
||||
inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
|
||||
inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
|
||||
stmt_info, 0, vect_body);
|
||||
|
||||
if (dump_enabled_p ())
|
||||
@ -963,19 +1053,19 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
|
||||
{
|
||||
/* N scalar stores plus extracting the elements. */
|
||||
unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
|
||||
inside_cost += record_stmt_cost (body_cost_vec,
|
||||
inside_cost += record_stmt_cost (cost_vec,
|
||||
ncopies * assumed_nunits,
|
||||
scalar_store, stmt_info, 0, vect_body);
|
||||
}
|
||||
else
|
||||
vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
|
||||
vect_get_store_cost (dr, ncopies, &inside_cost, cost_vec);
|
||||
|
||||
if (memory_access_type == VMAT_ELEMENTWISE
|
||||
|| memory_access_type == VMAT_STRIDED_SLP)
|
||||
{
|
||||
/* N scalar stores plus extracting the elements. */
|
||||
unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
|
||||
inside_cost += record_stmt_cost (body_cost_vec,
|
||||
inside_cost += record_stmt_cost (cost_vec,
|
||||
ncopies * assumed_nunits,
|
||||
vec_to_scalar, stmt_info, 0, vect_body);
|
||||
}
|
||||
@ -1047,18 +1137,70 @@ vect_get_store_cost (struct data_reference *dr, int ncopies,
|
||||
accesses are supported for loads, we also account for the costs of the
|
||||
access scheme chosen. */
|
||||
|
||||
void
|
||||
vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
|
||||
static void
|
||||
vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
|
||||
vect_memory_access_type memory_access_type,
|
||||
slp_instance instance,
|
||||
slp_tree slp_node,
|
||||
stmt_vector_for_cost *prologue_cost_vec,
|
||||
stmt_vector_for_cost *body_cost_vec)
|
||||
stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
|
||||
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
|
||||
unsigned int inside_cost = 0, prologue_cost = 0;
|
||||
bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
|
||||
|
||||
gcc_assert (cost_vec);
|
||||
|
||||
/* ??? Somehow we need to fix this at the callers. */
|
||||
if (slp_node)
|
||||
ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
|
||||
|
||||
if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
|
||||
{
|
||||
/* If the load is permuted then the alignment is determined by
|
||||
the first group element not by the first scalar stmt DR. */
|
||||
gimple *stmt = GROUP_FIRST_ELEMENT (stmt_info);
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||
/* Record the cost for the permutation. */
|
||||
unsigned n_perms;
|
||||
unsigned assumed_nunits
|
||||
= vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info));
|
||||
unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
|
||||
vect_transform_slp_perm_load (slp_node, vNULL, NULL,
|
||||
slp_vf, instance, true,
|
||||
&n_perms);
|
||||
inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
|
||||
stmt_info, 0, vect_body);
|
||||
/* And adjust the number of loads performed. This handles
|
||||
redundancies as well as loads that are later dead. */
|
||||
auto_sbitmap perm (GROUP_SIZE (stmt_info));
|
||||
bitmap_clear (perm);
|
||||
for (unsigned i = 0;
|
||||
i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
|
||||
bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
|
||||
ncopies = 0;
|
||||
bool load_seen = false;
|
||||
for (unsigned i = 0; i < GROUP_SIZE (stmt_info); ++i)
|
||||
{
|
||||
if (i % assumed_nunits == 0)
|
||||
{
|
||||
if (load_seen)
|
||||
ncopies++;
|
||||
load_seen = false;
|
||||
}
|
||||
if (bitmap_bit_p (perm, i))
|
||||
load_seen = true;
|
||||
}
|
||||
if (load_seen)
|
||||
ncopies++;
|
||||
gcc_assert (ncopies
|
||||
<= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
|
||||
+ assumed_nunits - 1) / assumed_nunits);
|
||||
}
|
||||
|
||||
/* ??? Need to transition load permutation (and load cost) handling
|
||||
from vect_analyze_slp_cost_1 to here. */
|
||||
|
||||
/* Grouped loads read all elements in the group at once,
|
||||
so we want the DR for the first statement. */
|
||||
if (!slp_node && grouped_access_p)
|
||||
@ -1083,8 +1225,8 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
|
||||
for each needed permute. */
|
||||
int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
|
||||
int nstmts = ncopies * ceil_log2 (group_size) * group_size;
|
||||
inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
|
||||
stmt_info, 0, vect_body);
|
||||
inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
|
||||
stmt_info, 0, vect_body);
|
||||
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
@ -1099,17 +1241,17 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
|
||||
/* N scalar loads plus gathering them into a vector. */
|
||||
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
||||
unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
|
||||
inside_cost += record_stmt_cost (body_cost_vec,
|
||||
inside_cost += record_stmt_cost (cost_vec,
|
||||
ncopies * assumed_nunits,
|
||||
scalar_load, stmt_info, 0, vect_body);
|
||||
}
|
||||
else
|
||||
vect_get_load_cost (dr, ncopies, first_stmt_p,
|
||||
&inside_cost, &prologue_cost,
|
||||
prologue_cost_vec, body_cost_vec, true);
|
||||
cost_vec, cost_vec, true);
|
||||
if (memory_access_type == VMAT_ELEMENTWISE
|
||||
|| memory_access_type == VMAT_STRIDED_SLP)
|
||||
inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
|
||||
inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
|
||||
stmt_info, 0, vect_body);
|
||||
|
||||
if (dump_enabled_p ())
|
||||
@ -2851,7 +2993,8 @@ vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
|
||||
static bool
|
||||
vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
gimple **vec_stmt, slp_tree slp_node,
|
||||
tree vectype_in, enum vect_def_type *dt)
|
||||
tree vectype_in, enum vect_def_type *dt,
|
||||
stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
tree op, vectype;
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||
@ -2902,10 +3045,10 @@ vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
"\n");
|
||||
if (! slp_node)
|
||||
{
|
||||
add_stmt_cost (stmt_info->vinfo->target_cost_data,
|
||||
1, vector_stmt, stmt_info, 0, vect_prologue);
|
||||
add_stmt_cost (stmt_info->vinfo->target_cost_data,
|
||||
ncopies, vec_perm, stmt_info, 0, vect_body);
|
||||
record_stmt_cost (cost_vec,
|
||||
1, vector_stmt, stmt_info, 0, vect_prologue);
|
||||
record_stmt_cost (cost_vec,
|
||||
ncopies, vec_perm, stmt_info, 0, vect_body);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -2995,7 +3138,7 @@ simple_integer_narrowing (tree vectype_out, tree vectype_in,
|
||||
|
||||
static bool
|
||||
vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|
||||
slp_tree slp_node)
|
||||
slp_tree slp_node, stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
gcall *stmt;
|
||||
tree vec_dest;
|
||||
@ -3192,7 +3335,7 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|
||||
|| gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
|
||||
|| gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
|
||||
return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
|
||||
vectype_in, dt);
|
||||
vectype_in, dt, cost_vec);
|
||||
else
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
@ -3219,13 +3362,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
|
||||
"\n");
|
||||
if (!slp_node)
|
||||
{
|
||||
vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
|
||||
if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
|
||||
add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
|
||||
vec_promote_demote, stmt_info, 0, vect_body);
|
||||
}
|
||||
vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
|
||||
if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
|
||||
record_stmt_cost (cost_vec, ncopies / 2,
|
||||
vec_promote_demote, stmt_info, 0, vect_body);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -3596,7 +3736,8 @@ simd_clone_subparts (tree vectype)
|
||||
|
||||
static bool
|
||||
vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
gimple **vec_stmt, slp_tree slp_node)
|
||||
gimple **vec_stmt, slp_tree slp_node,
|
||||
stmt_vector_for_cost *)
|
||||
{
|
||||
tree vec_dest;
|
||||
tree scalar_dest;
|
||||
@ -3898,7 +4039,7 @@ vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"=== vectorizable_simd_clone_call ===\n");
|
||||
/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
|
||||
/* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -4443,7 +4584,8 @@ vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
|
||||
|
||||
static bool
|
||||
vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
gimple **vec_stmt, slp_tree slp_node)
|
||||
gimple **vec_stmt, slp_tree slp_node,
|
||||
stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
tree vec_dest;
|
||||
tree scalar_dest;
|
||||
@ -4742,20 +4884,20 @@ vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
|
||||
{
|
||||
STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
|
||||
if (!slp_node)
|
||||
vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
|
||||
vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
|
||||
cost_vec);
|
||||
}
|
||||
else if (modifier == NARROW)
|
||||
{
|
||||
STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
|
||||
if (!slp_node)
|
||||
vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
|
||||
vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
|
||||
cost_vec);
|
||||
}
|
||||
else
|
||||
{
|
||||
STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
|
||||
if (!slp_node)
|
||||
vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
|
||||
vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
|
||||
cost_vec);
|
||||
}
|
||||
interm_types.release ();
|
||||
return true;
|
||||
@ -5044,7 +5186,8 @@ vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
|
||||
static bool
|
||||
vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
gimple **vec_stmt, slp_tree slp_node)
|
||||
gimple **vec_stmt, slp_tree slp_node,
|
||||
stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
tree vec_dest;
|
||||
tree scalar_dest;
|
||||
@ -5152,8 +5295,7 @@ vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"=== vectorizable_assignment ===\n");
|
||||
if (!slp_node)
|
||||
vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
|
||||
vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -5248,7 +5390,8 @@ vect_supportable_shift (enum tree_code code, tree scalar_type)
|
||||
|
||||
static bool
|
||||
vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
gimple **vec_stmt, slp_tree slp_node)
|
||||
gimple **vec_stmt, slp_tree slp_node,
|
||||
stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
tree vec_dest;
|
||||
tree scalar_dest;
|
||||
@ -5517,8 +5660,7 @@ vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"=== vectorizable_shift ===\n");
|
||||
if (!slp_node)
|
||||
vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
|
||||
vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -5617,7 +5759,8 @@ vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
|
||||
static bool
|
||||
vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
gimple **vec_stmt, slp_tree slp_node)
|
||||
gimple **vec_stmt, slp_tree slp_node,
|
||||
stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
tree vec_dest;
|
||||
tree scalar_dest;
|
||||
@ -5841,8 +5984,7 @@ vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"=== vectorizable_operation ===\n");
|
||||
if (!slp_node)
|
||||
vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
|
||||
vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -6068,7 +6210,7 @@ get_group_alias_ptr_type (gimple *first_stmt)
|
||||
|
||||
static bool
|
||||
vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|
||||
slp_tree slp_node)
|
||||
slp_tree slp_node, stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
tree data_ref;
|
||||
tree op;
|
||||
@ -6264,10 +6406,8 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|
||||
memory_access_type, &gs_info);
|
||||
|
||||
STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
|
||||
/* The SLP costs are calculated during SLP analysis. */
|
||||
if (!slp_node)
|
||||
vect_model_store_cost (stmt_info, ncopies, memory_access_type,
|
||||
vls_type, NULL, NULL, NULL);
|
||||
vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
|
||||
vls_type, slp_node, cost_vec);
|
||||
return true;
|
||||
}
|
||||
gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
|
||||
@ -7230,7 +7370,8 @@ hoist_defs_of_uses (gimple *stmt, struct loop *loop)
|
||||
|
||||
static bool
|
||||
vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|
||||
slp_tree slp_node, slp_instance slp_node_instance)
|
||||
slp_tree slp_node, slp_instance slp_node_instance,
|
||||
stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
tree scalar_dest;
|
||||
tree vec_dest = NULL;
|
||||
@ -7491,10 +7632,8 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|
||||
memory_access_type, &gs_info);
|
||||
|
||||
STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
|
||||
/* The SLP costs are calculated during SLP analysis. */
|
||||
if (! slp_node)
|
||||
vect_model_load_cost (stmt_info, ncopies, memory_access_type,
|
||||
NULL, NULL, NULL);
|
||||
vect_model_load_cost (stmt_info, ncopies, memory_access_type,
|
||||
slp_node_instance, slp_node, cost_vec);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -8550,7 +8689,7 @@ vect_is_simple_cond (tree cond, vec_info *vinfo,
|
||||
bool
|
||||
vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
gimple **vec_stmt, tree reduc_def, int reduc_index,
|
||||
slp_tree slp_node)
|
||||
slp_tree slp_node, stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
tree scalar_dest = NULL_TREE;
|
||||
tree vec_dest = NULL_TREE;
|
||||
@ -8704,7 +8843,6 @@ vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
|
||||
if (!vec_stmt)
|
||||
{
|
||||
STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
|
||||
if (bitop1 != NOP_EXPR)
|
||||
{
|
||||
machine_mode mode = TYPE_MODE (comp_vectype);
|
||||
@ -8725,8 +8863,9 @@ vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
if (expand_vec_cond_expr_p (vectype, comp_vectype,
|
||||
cond_code))
|
||||
{
|
||||
if (!slp_node)
|
||||
vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
|
||||
STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
|
||||
vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
|
||||
cost_vec);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -8954,7 +9093,7 @@ vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
static bool
|
||||
vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
gimple **vec_stmt, tree reduc_def,
|
||||
slp_tree slp_node)
|
||||
slp_tree slp_node, stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
tree lhs, rhs1, rhs2;
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||
@ -9089,12 +9228,11 @@ vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
|
||||
if (!vec_stmt)
|
||||
{
|
||||
STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
|
||||
if (!slp_node)
|
||||
vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
|
||||
dts, ndts, NULL, NULL);
|
||||
if (bitop1 == NOP_EXPR)
|
||||
return expand_vec_cmp_expr_p (vectype, mask_type, code);
|
||||
{
|
||||
if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
machine_mode mode = TYPE_MODE (vectype);
|
||||
@ -9110,8 +9248,12 @@ vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
|
||||
vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
|
||||
dts, ndts, slp_node, cost_vec);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Transform. */
|
||||
@ -9221,7 +9363,8 @@ vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
|
||||
static bool
|
||||
can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
slp_tree slp_node, gimple **vec_stmt)
|
||||
slp_tree slp_node, gimple **vec_stmt,
|
||||
stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
if (slp_node)
|
||||
{
|
||||
@ -9232,12 +9375,13 @@ can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
|
||||
if (STMT_VINFO_LIVE_P (slp_stmt_info)
|
||||
&& !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
|
||||
vec_stmt))
|
||||
vec_stmt, cost_vec))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
|
||||
&& !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
|
||||
&& !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt,
|
||||
cost_vec))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
@ -9247,7 +9391,7 @@ can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
|
||||
bool
|
||||
vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
|
||||
slp_instance node_instance)
|
||||
slp_instance node_instance, stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
|
||||
@ -9327,7 +9471,7 @@ vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
|
||||
}
|
||||
|
||||
if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
|
||||
node_instance))
|
||||
node_instance, cost_vec))
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -9352,7 +9496,8 @@ vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
|
||||
}
|
||||
|
||||
if (!vect_analyze_stmt (pattern_def_stmt,
|
||||
need_to_vectorize, node, node_instance))
|
||||
need_to_vectorize, node, node_instance,
|
||||
cost_vec))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -9404,31 +9549,35 @@ vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
|
||||
if (!bb_vinfo
|
||||
&& (STMT_VINFO_RELEVANT_P (stmt_info)
|
||||
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
|
||||
ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
|
||||
|| vectorizable_conversion (stmt, NULL, NULL, node)
|
||||
|| vectorizable_shift (stmt, NULL, NULL, node)
|
||||
|| vectorizable_operation (stmt, NULL, NULL, node)
|
||||
|| vectorizable_assignment (stmt, NULL, NULL, node)
|
||||
|| vectorizable_load (stmt, NULL, NULL, node, NULL)
|
||||
|| vectorizable_call (stmt, NULL, NULL, node)
|
||||
|| vectorizable_store (stmt, NULL, NULL, node)
|
||||
|| vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
|
||||
|| vectorizable_induction (stmt, NULL, NULL, node)
|
||||
|| vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
|
||||
|| vectorizable_comparison (stmt, NULL, NULL, NULL, node));
|
||||
ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
|
||||
|| vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
|
||||
|| vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
|
||||
|| vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
|
||||
|| vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
|
||||
|| vectorizable_load (stmt, NULL, NULL, node, node_instance, cost_vec)
|
||||
|| vectorizable_call (stmt, NULL, NULL, node, cost_vec)
|
||||
|| vectorizable_store (stmt, NULL, NULL, node, cost_vec)
|
||||
|| vectorizable_reduction (stmt, NULL, NULL, node, node_instance,
|
||||
cost_vec)
|
||||
|| vectorizable_induction (stmt, NULL, NULL, node, cost_vec)
|
||||
|| vectorizable_condition (stmt, NULL, NULL, NULL, 0, node, cost_vec)
|
||||
|| vectorizable_comparison (stmt, NULL, NULL, NULL, node, cost_vec));
|
||||
else
|
||||
{
|
||||
if (bb_vinfo)
|
||||
ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
|
||||
|| vectorizable_conversion (stmt, NULL, NULL, node)
|
||||
|| vectorizable_shift (stmt, NULL, NULL, node)
|
||||
|| vectorizable_operation (stmt, NULL, NULL, node)
|
||||
|| vectorizable_assignment (stmt, NULL, NULL, node)
|
||||
|| vectorizable_load (stmt, NULL, NULL, node, NULL)
|
||||
|| vectorizable_call (stmt, NULL, NULL, node)
|
||||
|| vectorizable_store (stmt, NULL, NULL, node)
|
||||
|| vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
|
||||
|| vectorizable_comparison (stmt, NULL, NULL, NULL, node));
|
||||
ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
|
||||
|| vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
|
||||
|| vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
|
||||
|| vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
|
||||
|| vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
|
||||
|| vectorizable_load (stmt, NULL, NULL, node, node_instance,
|
||||
cost_vec)
|
||||
|| vectorizable_call (stmt, NULL, NULL, node, cost_vec)
|
||||
|| vectorizable_store (stmt, NULL, NULL, node, cost_vec)
|
||||
|| vectorizable_condition (stmt, NULL, NULL, NULL, 0, node,
|
||||
cost_vec)
|
||||
|| vectorizable_comparison (stmt, NULL, NULL, NULL, node,
|
||||
cost_vec));
|
||||
}
|
||||
|
||||
if (!ok)
|
||||
@ -9444,13 +9593,11 @@ vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (bb_vinfo)
|
||||
return true;
|
||||
|
||||
/* Stmts that are (also) "live" (i.e. - that are used out of the loop)
|
||||
need extra handling, except for vectorizable reductions. */
|
||||
if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
|
||||
&& !can_vectorize_live_stmts (stmt, NULL, node, NULL))
|
||||
if (!bb_vinfo
|
||||
&& STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
|
||||
&& !can_vectorize_live_stmts (stmt, NULL, node, NULL, cost_vec))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
{
|
||||
@ -9493,38 +9640,38 @@ vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
case type_demotion_vec_info_type:
|
||||
case type_promotion_vec_info_type:
|
||||
case type_conversion_vec_info_type:
|
||||
done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
|
||||
done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node, NULL);
|
||||
gcc_assert (done);
|
||||
break;
|
||||
|
||||
case induc_vec_info_type:
|
||||
done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
|
||||
done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node, NULL);
|
||||
gcc_assert (done);
|
||||
break;
|
||||
|
||||
case shift_vec_info_type:
|
||||
done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
|
||||
done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node, NULL);
|
||||
gcc_assert (done);
|
||||
break;
|
||||
|
||||
case op_vec_info_type:
|
||||
done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
|
||||
done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node, NULL);
|
||||
gcc_assert (done);
|
||||
break;
|
||||
|
||||
case assignment_vec_info_type:
|
||||
done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
|
||||
done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node, NULL);
|
||||
gcc_assert (done);
|
||||
break;
|
||||
|
||||
case load_vec_info_type:
|
||||
done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
|
||||
slp_node_instance);
|
||||
slp_node_instance, NULL);
|
||||
gcc_assert (done);
|
||||
break;
|
||||
|
||||
case store_vec_info_type:
|
||||
done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
|
||||
done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node, NULL);
|
||||
gcc_assert (done);
|
||||
if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
|
||||
{
|
||||
@ -9543,28 +9690,28 @@ vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
break;
|
||||
|
||||
case condition_vec_info_type:
|
||||
done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
|
||||
done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node, NULL);
|
||||
gcc_assert (done);
|
||||
break;
|
||||
|
||||
case comparison_vec_info_type:
|
||||
done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
|
||||
done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node, NULL);
|
||||
gcc_assert (done);
|
||||
break;
|
||||
|
||||
case call_vec_info_type:
|
||||
done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
|
||||
done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node, NULL);
|
||||
stmt = gsi_stmt (*gsi);
|
||||
break;
|
||||
|
||||
case call_simd_clone_vec_info_type:
|
||||
done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
|
||||
done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node, NULL);
|
||||
stmt = gsi_stmt (*gsi);
|
||||
break;
|
||||
|
||||
case reduc_vec_info_type:
|
||||
done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
|
||||
slp_node_instance);
|
||||
slp_node_instance, NULL);
|
||||
gcc_assert (done);
|
||||
break;
|
||||
|
||||
@ -9626,7 +9773,7 @@ vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
being vectorized. */
|
||||
if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
|
||||
{
|
||||
done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
|
||||
done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt, NULL);
|
||||
gcc_assert (done);
|
||||
}
|
||||
|
||||
|
@ -78,6 +78,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "tree-scalar-evolution.h"
|
||||
#include "stringpool.h"
|
||||
#include "attribs.h"
|
||||
#include "gimple-pretty-print.h"
|
||||
|
||||
|
||||
/* Loop or bb location. */
|
||||
@ -85,6 +86,96 @@ source_location vect_location;
|
||||
|
||||
/* Vector mapping GIMPLE stmt to stmt_vec_info. */
|
||||
vec<stmt_vec_info> stmt_vec_info_vec;
|
||||
|
||||
/* Dump a cost entry according to args to F. */
|
||||
|
||||
void
|
||||
dump_stmt_cost (FILE *f, void *data, int count, enum vect_cost_for_stmt kind,
|
||||
stmt_vec_info stmt_info, int misalign,
|
||||
enum vect_cost_model_location where)
|
||||
{
|
||||
fprintf (f, "%p ", data);
|
||||
if (stmt_info)
|
||||
{
|
||||
print_gimple_expr (f, STMT_VINFO_STMT (stmt_info), 0, TDF_SLIM);
|
||||
fprintf (f, " ");
|
||||
}
|
||||
else
|
||||
fprintf (f, "<unknown> ");
|
||||
fprintf (f, "%d times ", count);
|
||||
const char *ks = "unknown";
|
||||
switch (kind)
|
||||
{
|
||||
case scalar_stmt:
|
||||
ks = "scalar_stmt";
|
||||
break;
|
||||
case scalar_load:
|
||||
ks = "scalar_load";
|
||||
break;
|
||||
case scalar_store:
|
||||
ks = "scalar_store";
|
||||
break;
|
||||
case vector_stmt:
|
||||
ks = "vector_stmt";
|
||||
break;
|
||||
case vector_load:
|
||||
ks = "vector_load";
|
||||
break;
|
||||
case vector_gather_load:
|
||||
ks = "vector_gather_load";
|
||||
break;
|
||||
case unaligned_load:
|
||||
ks = "unaligned_load";
|
||||
break;
|
||||
case unaligned_store:
|
||||
ks = "unaligned_store";
|
||||
break;
|
||||
case vector_store:
|
||||
ks = "unaligned_store";
|
||||
break;
|
||||
case vector_scatter_store:
|
||||
ks = "unaligned_store";
|
||||
break;
|
||||
case vec_to_scalar:
|
||||
ks = "unaligned_store";
|
||||
break;
|
||||
case scalar_to_vec:
|
||||
ks = "unaligned_store";
|
||||
break;
|
||||
case cond_branch_not_taken:
|
||||
ks = "unaligned_store";
|
||||
break;
|
||||
case cond_branch_taken:
|
||||
ks = "unaligned_store";
|
||||
break;
|
||||
case vec_perm:
|
||||
ks = "unaligned_store";
|
||||
break;
|
||||
case vec_promote_demote:
|
||||
ks = "unaligned_store";
|
||||
break;
|
||||
case vec_construct:
|
||||
ks = "unaligned_store";
|
||||
break;
|
||||
}
|
||||
fprintf (f, "%s ", ks);
|
||||
if (kind == unaligned_load || kind == unaligned_store)
|
||||
fprintf (f, "(misalign %d) ", misalign);
|
||||
const char *ws = "unknown";
|
||||
switch (where)
|
||||
{
|
||||
case vect_prologue:
|
||||
ws = "prologue";
|
||||
break;
|
||||
case vect_body:
|
||||
ws = "body";
|
||||
break;
|
||||
case vect_epilogue:
|
||||
ws = "epilogue";
|
||||
break;
|
||||
}
|
||||
fprintf (f, "in %s\n", ws);
|
||||
}
|
||||
|
||||
/* For mapping simduid to vectorization factor. */
|
||||
|
||||
|
@ -94,6 +94,7 @@ enum vect_reduction_type {
|
||||
struct stmt_info_for_cost {
|
||||
int count;
|
||||
enum vect_cost_for_stmt kind;
|
||||
enum vect_cost_model_location where;
|
||||
gimple *stmt;
|
||||
int misalign;
|
||||
};
|
||||
@ -1171,6 +1172,9 @@ init_cost (struct loop *loop_info)
|
||||
return targetm.vectorize.init_cost (loop_info);
|
||||
}
|
||||
|
||||
extern void dump_stmt_cost (FILE *, void *, int, enum vect_cost_for_stmt,
|
||||
stmt_vec_info, int, enum vect_cost_model_location);
|
||||
|
||||
/* Alias targetm.vectorize.add_stmt_cost. */
|
||||
|
||||
static inline unsigned
|
||||
@ -1178,6 +1182,8 @@ add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
|
||||
stmt_vec_info stmt_info, int misalign,
|
||||
enum vect_cost_model_location where)
|
||||
{
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
dump_stmt_cost (dump_file, data, count, kind, stmt_info, misalign, where);
|
||||
return targetm.vectorize.add_stmt_cost (data, count, kind,
|
||||
stmt_info, misalign, where);
|
||||
}
|
||||
@ -1199,6 +1205,17 @@ destroy_cost_data (void *data)
|
||||
targetm.vectorize.destroy_cost_data (data);
|
||||
}
|
||||
|
||||
inline void
|
||||
add_stmt_costs (void *data, stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
stmt_info_for_cost *cost;
|
||||
unsigned i;
|
||||
FOR_EACH_VEC_ELT (*cost_vec, i, cost)
|
||||
add_stmt_cost (data, cost->count, cost->kind,
|
||||
cost->stmt ? vinfo_for_stmt (cost->stmt) : NULL,
|
||||
cost->misalign, cost->where);
|
||||
}
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
/* Info on data references alignment. */
|
||||
/*-----------------------------------------------------------------*/
|
||||
@ -1422,16 +1439,6 @@ extern bool supportable_narrowing_operation (enum tree_code, tree, tree,
|
||||
int *, vec<tree> *);
|
||||
extern stmt_vec_info new_stmt_vec_info (gimple *stmt, vec_info *);
|
||||
extern void free_stmt_vec_info (gimple *stmt);
|
||||
extern void vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type *,
|
||||
int, stmt_vector_for_cost *,
|
||||
stmt_vector_for_cost *);
|
||||
extern void vect_model_store_cost (stmt_vec_info, int, vect_memory_access_type,
|
||||
vec_load_store_type, slp_tree,
|
||||
stmt_vector_for_cost *,
|
||||
stmt_vector_for_cost *);
|
||||
extern void vect_model_load_cost (stmt_vec_info, int, vect_memory_access_type,
|
||||
slp_tree, stmt_vector_for_cost *,
|
||||
stmt_vector_for_cost *);
|
||||
extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
|
||||
enum vect_cost_for_stmt, stmt_vec_info,
|
||||
int, enum vect_cost_model_location);
|
||||
@ -1452,9 +1459,11 @@ extern tree vect_get_vec_def_for_stmt_copy (enum vect_def_type, tree);
|
||||
extern bool vect_transform_stmt (gimple *, gimple_stmt_iterator *,
|
||||
bool *, slp_tree, slp_instance);
|
||||
extern void vect_remove_stores (gimple *);
|
||||
extern bool vect_analyze_stmt (gimple *, bool *, slp_tree, slp_instance);
|
||||
extern bool vect_analyze_stmt (gimple *, bool *, slp_tree, slp_instance,
|
||||
stmt_vector_for_cost *);
|
||||
extern bool vectorizable_condition (gimple *, gimple_stmt_iterator *,
|
||||
gimple **, tree, int, slp_tree);
|
||||
gimple **, tree, int, slp_tree,
|
||||
stmt_vector_for_cost *);
|
||||
extern void vect_get_load_cost (struct data_reference *, int, bool,
|
||||
unsigned int *, unsigned int *,
|
||||
stmt_vector_for_cost *,
|
||||
@ -1539,11 +1548,14 @@ extern tree vect_get_loop_mask (gimple_stmt_iterator *, vec_loop_masks *,
|
||||
extern struct loop *vect_transform_loop (loop_vec_info);
|
||||
extern loop_vec_info vect_analyze_loop_form (struct loop *);
|
||||
extern bool vectorizable_live_operation (gimple *, gimple_stmt_iterator *,
|
||||
slp_tree, int, gimple **);
|
||||
slp_tree, int, gimple **,
|
||||
stmt_vector_for_cost *);
|
||||
extern bool vectorizable_reduction (gimple *, gimple_stmt_iterator *,
|
||||
gimple **, slp_tree, slp_instance);
|
||||
gimple **, slp_tree, slp_instance,
|
||||
stmt_vector_for_cost *);
|
||||
extern bool vectorizable_induction (gimple *, gimple_stmt_iterator *,
|
||||
gimple **, slp_tree);
|
||||
gimple **, slp_tree,
|
||||
stmt_vector_for_cost *);
|
||||
extern tree get_initial_def_for_reduction (gimple *, tree, tree *);
|
||||
extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code);
|
||||
extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,
|
||||
|
Loading…
x
Reference in New Issue
Block a user