tree-vectorizer.h (struct _slp_instance): Remove body_cost_vec member.

2015-05-28  Richard Biener  <rguenther@suse.de>

	* tree-vectorizer.h (struct _slp_instance): Remove body_cost_vec
	member.
	(SLP_INSTANCE_BODY_COST_VEC): Remove.
	(vect_update_slp_costs_according_to_vf): Likewise.
	(vect_slp_analyze_operations): Update prototype.
	* tree-vect-loop.c (vect_analyze_loop_2): Remove call to
	vect_update_slp_costs_according_to_vf, adjust.
	* tree-vect-slp.c (vect_free_slp_instance): Adjust.
	(vect_analyze_slp_cost_1): Likewise.
	(vect_analyze_slp_cost): Likewise.  Properly deal with
	widening reduction ops.  Commit body costs.
	(vect_analyze_slp_instance): Adjust.  Do not analyze SLP
	cost for loops from here.
	(vect_slp_analyze_operations): But do it from here when
	the vectorization factor is known and stmts are analyzed.
	(vect_bb_vectorization_profitable_p): Simplify.
	(vect_slp_analyze_bb_1): Do not compute SLP cost here.
	(vect_update_slp_costs_according_to_vf): Remove.

From-SVN: r223798
This commit is contained in:
Richard Biener 2015-05-28 07:15:57 +00:00 committed by Richard Biener
parent 428b381275
commit 1a4b99c172
4 changed files with 78 additions and 107 deletions

View File

@ -1,3 +1,24 @@
2015-05-28 Richard Biener <rguenther@suse.de>
* tree-vectorizer.h (struct _slp_instance): Remove body_cost_vec
member.
(SLP_INSTANCE_BODY_COST_VEC): Remove.
(vect_update_slp_costs_according_to_vf): Likewise.
(vect_slp_analyze_operations): Update prototype.
* tree-vect-loop.c (vect_analyze_loop_2): Remove call to
vect_update_slp_costs_according_to_vf, adjust.
* tree-vect-slp.c (vect_free_slp_instance): Adjust.
(vect_analyze_slp_cost_1): Likewise.
(vect_analyze_slp_cost): Likewise. Properly deal with
widening reduction ops. Commit body costs.
(vect_analyze_slp_instance): Adjust. Do not analyze SLP
cost for loops from here.
(vect_slp_analyze_operations): But do it from here when
the vectorization factor is known and stmts are analyzed.
(vect_bb_vectorization_profitable_p): Simplify.
(vect_slp_analyze_bb_1): Do not compute SLP cost here.
(vect_update_slp_costs_according_to_vf): Remove.
2015-05-27 Magnus Granberg <zorry@gentoo.org>
H.J. Lu <hongjiu.lu@intel.com>

View File

@ -1814,15 +1814,12 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
/* Update the vectorization factor based on the SLP decision. */
vect_update_vf_for_slp (loop_vinfo);
/* Once VF is set, SLP costs should be updated since the number of
created vector stmts depends on VF. */
vect_update_slp_costs_according_to_vf (loop_vinfo);
/* Analyze operations in the SLP instances. Note this may
remove unsupported SLP instances which makes the above
SLP kind detection invalid. */
unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
return false;
}

View File

@ -130,7 +130,6 @@ vect_free_slp_instance (slp_instance instance)
{
vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
SLP_INSTANCE_LOADS (instance).release ();
SLP_INSTANCE_BODY_COST_VEC (instance).release ();
free (instance);
}
@ -1546,13 +1545,11 @@ vect_find_last_scalar_stmt_in_slp (slp_tree node)
/* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
static void
vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
slp_instance instance, slp_tree node,
vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
stmt_vector_for_cost *prologue_cost_vec,
stmt_vector_for_cost *body_cost_vec,
unsigned ncopies_for_cost)
{
stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
unsigned i;
slp_tree child;
gimple stmt, s;
@ -1563,9 +1560,8 @@ vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
/* Recurse down the SLP tree. */
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
if (child)
vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
instance, child, prologue_cost_vec,
ncopies_for_cost);
vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
body_cost_vec, ncopies_for_cost);
/* Look at the first scalar stmt to determine the cost. */
stmt = SLP_TREE_SCALAR_STMTS (node)[0];
@ -1622,7 +1618,8 @@ vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
enum vect_def_type dt;
if (!op || op == lhs)
continue;
if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
if (vect_is_simple_use (op, NULL, STMT_VINFO_LOOP_VINFO (stmt_info),
STMT_VINFO_BB_VINFO (stmt_info),
&def_stmt, &def, &dt))
{
/* Without looking at the actual initializer a vector of
@ -1642,8 +1639,7 @@ vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
/* Compute the cost for the SLP instance INSTANCE. */
static void
vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
slp_instance instance, unsigned nunits)
vect_analyze_slp_cost (slp_instance instance, void *data)
{
stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
unsigned ncopies_for_cost;
@ -1654,20 +1650,38 @@ vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
GROUP_SIZE / NUNITS otherwise. */
unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
slp_tree node = SLP_INSTANCE_TREE (instance);
stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
/* Adjust the group_size by the vectorization factor which is always one
for basic-block vectorization. */
if (STMT_VINFO_LOOP_VINFO (stmt_info))
group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
/* For reductions look at a reduction operand in case the reduction
operation is widening like DOT_PROD or SAD. */
if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
{
gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
switch (gimple_assign_rhs_code (stmt))
{
case DOT_PROD_EXPR:
case SAD_EXPR:
nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
(TREE_TYPE (gimple_assign_rhs1 (stmt))));
break;
default:;
}
}
ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
prologue_cost_vec.create (10);
body_cost_vec.create (10);
SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
instance, SLP_INSTANCE_TREE (instance),
&prologue_cost_vec, ncopies_for_cost);
vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
&prologue_cost_vec, &body_cost_vec,
ncopies_for_cost);
/* Record the prologue costs, which were delayed until we were
sure that SLP was successful. Unlike the body costs, we know
the final values now regardless of the loop vectorization factor. */
void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
: BB_VINFO_TARGET_COST_DATA (bb_vinfo));
sure that SLP was successful. */
FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
{
struct _stmt_vec_info *stmt_info
@ -1676,7 +1690,17 @@ vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
si->misalign, vect_prologue);
}
/* Record the instance's instructions in the target cost model. */
FOR_EACH_VEC_ELT (body_cost_vec, i, si)
{
struct _stmt_vec_info *stmt_info
= si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
(void) add_stmt_cost (data, si->count, si->kind, stmt_info,
si->misalign, vect_body);
}
prologue_cost_vec.release ();
body_cost_vec.release ();
}
/* Analyze an SLP instance starting from a group of grouped stores. Call
@ -1811,7 +1835,6 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
SLP_INSTANCE_TREE (new_instance) = node;
SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
SLP_INSTANCE_LOADS (new_instance) = loads;
/* Compute the load permutation. */
@ -1863,13 +1886,7 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
if (loop_vinfo)
{
/* Compute the costs of this SLP instance. Delay this for BB
vectorization as we don't have vector types computed yet. */
vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
new_instance, TYPE_VECTOR_SUBPARTS (vectype));
LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
}
LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
else
BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
@ -2237,7 +2254,7 @@ vect_slp_analyze_node_operations (slp_tree node)
operations are supported. */
bool
vect_slp_analyze_operations (vec<slp_instance> slp_instances)
vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
{
slp_instance instance;
int i;
@ -2259,7 +2276,11 @@ vect_slp_analyze_operations (vec<slp_instance> slp_instances)
slp_instances.ordered_remove (i);
}
else
i++;
{
/* Compute the costs of the SLP instance. */
vect_analyze_slp_cost (instance, data);
i++;
}
}
if (!slp_instances.length ())
@ -2342,26 +2363,9 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
{
vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
slp_instance instance;
int i, j;
int i;
unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
stmt_vec_info stmt_info = NULL;
stmt_vector_for_cost body_cost_vec;
stmt_info_for_cost *ci;
/* Calculate vector costs. */
FOR_EACH_VEC_ELT (slp_instances, i, instance)
{
body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
{
stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
(void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
stmt_info, ci->misalign, vect_body);
}
}
/* Calculate scalar cost. */
FOR_EACH_VEC_ELT (slp_instances, i, instance)
@ -2519,7 +2523,8 @@ vect_slp_analyze_bb_1 (basic_block bb)
return NULL;
}
if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo)))
if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@ -2529,15 +2534,6 @@ vect_slp_analyze_bb_1 (basic_block bb)
return NULL;
}
/* Compute the costs of the SLP instances. */
FOR_EACH_VEC_ELT (slp_instances, i, instance)
{
gimple stmt = SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[0];
tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
vect_analyze_slp_cost (NULL, bb_vinfo,
instance, TYPE_VECTOR_SUBPARTS (vectype));
}
/* Cost model: check if the vectorization is worthwhile. */
if (!unlimited_cost_model (NULL)
&& !vect_bb_vectorization_profitable_p (bb_vinfo))
@ -2616,45 +2612,6 @@ vect_slp_analyze_bb (basic_block bb)
}
/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
the number of created vector stmts depends on the unrolling factor).
However, the actual number of vector stmts for every SLP node depends on
VF which is set later in vect_analyze_operations (). Hence, SLP costs
should be updated. In this function we assume that the inside costs
calculated in vect_model_xxx_cost are linear in ncopies. */
void
vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
{
unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
slp_instance instance;
stmt_vector_for_cost body_cost_vec;
stmt_info_for_cost *si;
void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"=== vect_update_slp_costs_according_to_vf ===\n");
FOR_EACH_VEC_ELT (slp_instances, i, instance)
{
/* We assume that costs are linear in ncopies. */
int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
/* Record the instance's instructions in the target cost model.
This was delayed until here because the count of instructions
isn't known beforehand. */
body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
FOR_EACH_VEC_ELT (body_cost_vec, j, si)
(void) add_stmt_cost (data, si->count * ncopies, si->kind,
vinfo_for_stmt (si->stmt), si->misalign,
vect_body);
}
}
/* For constant and loop invariant defs of SLP_NODE this function returns
(vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of

View File

@ -128,9 +128,6 @@ typedef struct _slp_instance {
/* The unrolling factor required to vectorized this SLP instance. */
unsigned int unrolling_factor;
/* Vectorization costs associated with SLP instance. */
stmt_vector_for_cost body_cost_vec;
/* The group of nodes that contain loads of this SLP instance. */
vec<slp_tree> loads;
} *slp_instance;
@ -140,7 +137,6 @@ typedef struct _slp_instance {
#define SLP_INSTANCE_TREE(S) (S)->root
#define SLP_INSTANCE_GROUP_SIZE(S) (S)->group_size
#define SLP_INSTANCE_UNROLLING_FACTOR(S) (S)->unrolling_factor
#define SLP_INSTANCE_BODY_COST_VEC(S) (S)->body_cost_vec
#define SLP_INSTANCE_LOADS(S) (S)->loads
#define SLP_TREE_CHILDREN(S) (S)->children
@ -1114,9 +1110,9 @@ extern void vect_free_slp_instance (slp_instance);
extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> ,
gimple_stmt_iterator *, int,
slp_instance, bool);
extern bool vect_slp_analyze_operations (vec<slp_instance> slp_instances);
extern bool vect_slp_analyze_operations (vec<slp_instance> slp_instances,
void *);
extern bool vect_schedule_slp (loop_vec_info, bb_vec_info);
extern void vect_update_slp_costs_according_to_vf (loop_vec_info);
extern bool vect_analyze_slp (loop_vec_info, bb_vec_info, unsigned);
extern bool vect_make_slp_decision (loop_vec_info);
extern void vect_detect_hybrid_slp (loop_vec_info);