tree-vectorizer.h (struct _slp_instance): Remove body_cost_vec member.
2015-05-28 Richard Biener <rguenther@suse.de> * tree-vectorizer.h (struct _slp_instance): Remove body_cost_vec member. (SLP_INSTANCE_BODY_COST_VEC): Remove. (vect_update_slp_costs_according_to_vf): Likewise. (vect_slp_analyze_operations): Update prototype. * tree-vect-loop.c (vect_analyze_loop_2): Remove call to vect_update_slp_costs_according_to_vf, adjust. * tree-vect-slp.c (vect_free_slp_instance): Adjust. (vect_analyze_slp_cost_1): Likewise. (vect_analyze_slp_cost): Likewise. Properly deal with widening reduction ops. Commit body costs. (vect_analyze_slp_instance): Adjust. Do not analyze SLP cost for loops from here. (vect_slp_analyze_operations): But do it from here when the vectorization factor is known and stmts are analyzed. (vect_bb_vectorization_profitable_p): Simplify. (vect_slp_analyze_bb_1): Do not compute SLP cost here. (vect_update_slp_costs_according_to_vf): Remove. From-SVN: r223798
This commit is contained in:
parent
428b381275
commit
1a4b99c172
|
@ -1,3 +1,24 @@
|
|||
2015-05-28 Richard Biener <rguenther@suse.de>
|
||||
|
||||
* tree-vectorizer.h (struct _slp_instance): Remove body_cost_vec
|
||||
member.
|
||||
(SLP_INSTANCE_BODY_COST_VEC): Remove.
|
||||
(vect_update_slp_costs_according_to_vf): Likewise.
|
||||
(vect_slp_analyze_operations): Update prototype.
|
||||
* tree-vect-loop.c (vect_analyze_loop_2): Remove call to
|
||||
vect_update_slp_costs_according_to_vf, adjust.
|
||||
* tree-vect-slp.c (vect_free_slp_instance): Adjust.
|
||||
(vect_analyze_slp_cost_1): Likewise.
|
||||
(vect_analyze_slp_cost): Likewise. Properly deal with
|
||||
widening reduction ops. Commit body costs.
|
||||
(vect_analyze_slp_instance): Adjust. Do not analyze SLP
|
||||
cost for loops from here.
|
||||
(vect_slp_analyze_operations): But do it from here when
|
||||
the vectorization factor is known and stmts are analyzed.
|
||||
(vect_bb_vectorization_profitable_p): Simplify.
|
||||
(vect_slp_analyze_bb_1): Do not compute SLP cost here.
|
||||
(vect_update_slp_costs_according_to_vf): Remove.
|
||||
|
||||
2015-05-27 Magnus Granberg <zorry@gentoo.org>
|
||||
H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
|
|
|
@ -1814,15 +1814,12 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
|
|||
/* Update the vectorization factor based on the SLP decision. */
|
||||
vect_update_vf_for_slp (loop_vinfo);
|
||||
|
||||
/* Once VF is set, SLP costs should be updated since the number of
|
||||
created vector stmts depends on VF. */
|
||||
vect_update_slp_costs_according_to_vf (loop_vinfo);
|
||||
|
||||
/* Analyze operations in the SLP instances. Note this may
|
||||
remove unsupported SLP instances which makes the above
|
||||
SLP kind detection invalid. */
|
||||
unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
|
||||
vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
|
||||
vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
|
||||
LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
|
||||
if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -130,7 +130,6 @@ vect_free_slp_instance (slp_instance instance)
|
|||
{
|
||||
vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
|
||||
SLP_INSTANCE_LOADS (instance).release ();
|
||||
SLP_INSTANCE_BODY_COST_VEC (instance).release ();
|
||||
free (instance);
|
||||
}
|
||||
|
||||
|
@ -1546,13 +1545,11 @@ vect_find_last_scalar_stmt_in_slp (slp_tree node)
|
|||
/* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
|
||||
|
||||
static void
|
||||
vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
||||
slp_instance instance, slp_tree node,
|
||||
vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
|
||||
stmt_vector_for_cost *prologue_cost_vec,
|
||||
stmt_vector_for_cost *body_cost_vec,
|
||||
unsigned ncopies_for_cost)
|
||||
{
|
||||
stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
|
||||
|
||||
unsigned i;
|
||||
slp_tree child;
|
||||
gimple stmt, s;
|
||||
|
@ -1563,9 +1560,8 @@ vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
|||
/* Recurse down the SLP tree. */
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
|
||||
if (child)
|
||||
vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
|
||||
instance, child, prologue_cost_vec,
|
||||
ncopies_for_cost);
|
||||
vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
|
||||
body_cost_vec, ncopies_for_cost);
|
||||
|
||||
/* Look at the first scalar stmt to determine the cost. */
|
||||
stmt = SLP_TREE_SCALAR_STMTS (node)[0];
|
||||
|
@ -1622,7 +1618,8 @@ vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
|||
enum vect_def_type dt;
|
||||
if (!op || op == lhs)
|
||||
continue;
|
||||
if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
|
||||
if (vect_is_simple_use (op, NULL, STMT_VINFO_LOOP_VINFO (stmt_info),
|
||||
STMT_VINFO_BB_VINFO (stmt_info),
|
||||
&def_stmt, &def, &dt))
|
||||
{
|
||||
/* Without looking at the actual initializer a vector of
|
||||
|
@ -1642,8 +1639,7 @@ vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
|||
/* Compute the cost for the SLP instance INSTANCE. */
|
||||
|
||||
static void
|
||||
vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
||||
slp_instance instance, unsigned nunits)
|
||||
vect_analyze_slp_cost (slp_instance instance, void *data)
|
||||
{
|
||||
stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
|
||||
unsigned ncopies_for_cost;
|
||||
|
@ -1654,20 +1650,38 @@ vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
|||
factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
|
||||
GROUP_SIZE / NUNITS otherwise. */
|
||||
unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
|
||||
slp_tree node = SLP_INSTANCE_TREE (instance);
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
|
||||
/* Adjust the group_size by the vectorization factor which is always one
|
||||
for basic-block vectorization. */
|
||||
if (STMT_VINFO_LOOP_VINFO (stmt_info))
|
||||
group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
|
||||
unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
|
||||
/* For reductions look at a reduction operand in case the reduction
|
||||
operation is widening like DOT_PROD or SAD. */
|
||||
if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
|
||||
{
|
||||
gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
|
||||
switch (gimple_assign_rhs_code (stmt))
|
||||
{
|
||||
case DOT_PROD_EXPR:
|
||||
case SAD_EXPR:
|
||||
nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
|
||||
(TREE_TYPE (gimple_assign_rhs1 (stmt))));
|
||||
break;
|
||||
default:;
|
||||
}
|
||||
}
|
||||
ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
|
||||
|
||||
prologue_cost_vec.create (10);
|
||||
body_cost_vec.create (10);
|
||||
SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
|
||||
vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
|
||||
instance, SLP_INSTANCE_TREE (instance),
|
||||
&prologue_cost_vec, ncopies_for_cost);
|
||||
vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
|
||||
&prologue_cost_vec, &body_cost_vec,
|
||||
ncopies_for_cost);
|
||||
|
||||
/* Record the prologue costs, which were delayed until we were
|
||||
sure that SLP was successful. Unlike the body costs, we know
|
||||
the final values now regardless of the loop vectorization factor. */
|
||||
void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
|
||||
: BB_VINFO_TARGET_COST_DATA (bb_vinfo));
|
||||
sure that SLP was successful. */
|
||||
FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
|
||||
{
|
||||
struct _stmt_vec_info *stmt_info
|
||||
|
@ -1676,7 +1690,17 @@ vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
|||
si->misalign, vect_prologue);
|
||||
}
|
||||
|
||||
/* Record the instance's instructions in the target cost model. */
|
||||
FOR_EACH_VEC_ELT (body_cost_vec, i, si)
|
||||
{
|
||||
struct _stmt_vec_info *stmt_info
|
||||
= si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
|
||||
(void) add_stmt_cost (data, si->count, si->kind, stmt_info,
|
||||
si->misalign, vect_body);
|
||||
}
|
||||
|
||||
prologue_cost_vec.release ();
|
||||
body_cost_vec.release ();
|
||||
}
|
||||
|
||||
/* Analyze an SLP instance starting from a group of grouped stores. Call
|
||||
|
@ -1811,7 +1835,6 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
|||
SLP_INSTANCE_TREE (new_instance) = node;
|
||||
SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
|
||||
SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
|
||||
SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
|
||||
SLP_INSTANCE_LOADS (new_instance) = loads;
|
||||
|
||||
/* Compute the load permutation. */
|
||||
|
@ -1863,13 +1886,7 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
|||
|
||||
|
||||
if (loop_vinfo)
|
||||
{
|
||||
/* Compute the costs of this SLP instance. Delay this for BB
|
||||
vectorization as we don't have vector types computed yet. */
|
||||
vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
|
||||
new_instance, TYPE_VECTOR_SUBPARTS (vectype));
|
||||
LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
|
||||
}
|
||||
LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
|
||||
else
|
||||
BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
|
||||
|
||||
|
@ -2237,7 +2254,7 @@ vect_slp_analyze_node_operations (slp_tree node)
|
|||
operations are supported. */
|
||||
|
||||
bool
|
||||
vect_slp_analyze_operations (vec<slp_instance> slp_instances)
|
||||
vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
|
||||
{
|
||||
slp_instance instance;
|
||||
int i;
|
||||
|
@ -2259,7 +2276,11 @@ vect_slp_analyze_operations (vec<slp_instance> slp_instances)
|
|||
slp_instances.ordered_remove (i);
|
||||
}
|
||||
else
|
||||
i++;
|
||||
{
|
||||
/* Compute the costs of the SLP instance. */
|
||||
vect_analyze_slp_cost (instance, data);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
if (!slp_instances.length ())
|
||||
|
@ -2342,26 +2363,9 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
|
|||
{
|
||||
vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
|
||||
slp_instance instance;
|
||||
int i, j;
|
||||
int i;
|
||||
unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
|
||||
unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
|
||||
void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
|
||||
stmt_vec_info stmt_info = NULL;
|
||||
stmt_vector_for_cost body_cost_vec;
|
||||
stmt_info_for_cost *ci;
|
||||
|
||||
/* Calculate vector costs. */
|
||||
FOR_EACH_VEC_ELT (slp_instances, i, instance)
|
||||
{
|
||||
body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
|
||||
|
||||
FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
|
||||
{
|
||||
stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
|
||||
(void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
|
||||
stmt_info, ci->misalign, vect_body);
|
||||
}
|
||||
}
|
||||
|
||||
/* Calculate scalar cost. */
|
||||
FOR_EACH_VEC_ELT (slp_instances, i, instance)
|
||||
|
@ -2519,7 +2523,8 @@ vect_slp_analyze_bb_1 (basic_block bb)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo)))
|
||||
if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
|
||||
BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
|
@ -2529,15 +2534,6 @@ vect_slp_analyze_bb_1 (basic_block bb)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/* Compute the costs of the SLP instances. */
|
||||
FOR_EACH_VEC_ELT (slp_instances, i, instance)
|
||||
{
|
||||
gimple stmt = SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[0];
|
||||
tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
|
||||
vect_analyze_slp_cost (NULL, bb_vinfo,
|
||||
instance, TYPE_VECTOR_SUBPARTS (vectype));
|
||||
}
|
||||
|
||||
/* Cost model: check if the vectorization is worthwhile. */
|
||||
if (!unlimited_cost_model (NULL)
|
||||
&& !vect_bb_vectorization_profitable_p (bb_vinfo))
|
||||
|
@ -2616,45 +2612,6 @@ vect_slp_analyze_bb (basic_block bb)
|
|||
}
|
||||
|
||||
|
||||
/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
|
||||
the number of created vector stmts depends on the unrolling factor).
|
||||
However, the actual number of vector stmts for every SLP node depends on
|
||||
VF which is set later in vect_analyze_operations (). Hence, SLP costs
|
||||
should be updated. In this function we assume that the inside costs
|
||||
calculated in vect_model_xxx_cost are linear in ncopies. */
|
||||
|
||||
void
|
||||
vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
|
||||
{
|
||||
unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
|
||||
vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
|
||||
slp_instance instance;
|
||||
stmt_vector_for_cost body_cost_vec;
|
||||
stmt_info_for_cost *si;
|
||||
void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
|
||||
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"=== vect_update_slp_costs_according_to_vf ===\n");
|
||||
|
||||
FOR_EACH_VEC_ELT (slp_instances, i, instance)
|
||||
{
|
||||
/* We assume that costs are linear in ncopies. */
|
||||
int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
|
||||
|
||||
/* Record the instance's instructions in the target cost model.
|
||||
This was delayed until here because the count of instructions
|
||||
isn't known beforehand. */
|
||||
body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
|
||||
|
||||
FOR_EACH_VEC_ELT (body_cost_vec, j, si)
|
||||
(void) add_stmt_cost (data, si->count * ncopies, si->kind,
|
||||
vinfo_for_stmt (si->stmt), si->misalign,
|
||||
vect_body);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* For constant and loop invariant defs of SLP_NODE this function returns
|
||||
(vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
|
||||
OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
|
||||
|
|
|
@ -128,9 +128,6 @@ typedef struct _slp_instance {
|
|||
/* The unrolling factor required to vectorized this SLP instance. */
|
||||
unsigned int unrolling_factor;
|
||||
|
||||
/* Vectorization costs associated with SLP instance. */
|
||||
stmt_vector_for_cost body_cost_vec;
|
||||
|
||||
/* The group of nodes that contain loads of this SLP instance. */
|
||||
vec<slp_tree> loads;
|
||||
} *slp_instance;
|
||||
|
@ -140,7 +137,6 @@ typedef struct _slp_instance {
|
|||
#define SLP_INSTANCE_TREE(S) (S)->root
|
||||
#define SLP_INSTANCE_GROUP_SIZE(S) (S)->group_size
|
||||
#define SLP_INSTANCE_UNROLLING_FACTOR(S) (S)->unrolling_factor
|
||||
#define SLP_INSTANCE_BODY_COST_VEC(S) (S)->body_cost_vec
|
||||
#define SLP_INSTANCE_LOADS(S) (S)->loads
|
||||
|
||||
#define SLP_TREE_CHILDREN(S) (S)->children
|
||||
|
@ -1114,9 +1110,9 @@ extern void vect_free_slp_instance (slp_instance);
|
|||
extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> ,
|
||||
gimple_stmt_iterator *, int,
|
||||
slp_instance, bool);
|
||||
extern bool vect_slp_analyze_operations (vec<slp_instance> slp_instances);
|
||||
extern bool vect_slp_analyze_operations (vec<slp_instance> slp_instances,
|
||||
void *);
|
||||
extern bool vect_schedule_slp (loop_vec_info, bb_vec_info);
|
||||
extern void vect_update_slp_costs_according_to_vf (loop_vec_info);
|
||||
extern bool vect_analyze_slp (loop_vec_info, bb_vec_info, unsigned);
|
||||
extern bool vect_make_slp_decision (loop_vec_info);
|
||||
extern void vect_detect_hybrid_slp (loop_vec_info);
|
||||
|
|
Loading…
Reference in New Issue