re PR middle-end/37150 (basic-block vectorization misses some unrolled loops)
2016-11-07 Richard Biener <rguenther@suse.de> PR tree-optimization/37150 * tree-vectorizer.h (vect_transform_slp_perm_load): Add n_perms parameter. * tree-vect-slp.c (vect_supported_load_permutation_p): Adjust. (vect_analyze_slp_cost_1): Account for the real number of permutations emitted and for dead loads. (vect_transform_slp_perm_load): Add n_perms parameter counting the number of emitted permutations. * tree-vect-stmts.c (vectorizable_load): Adjust. From-SVN: r241893
This commit is contained in:
parent
a0d3edf8a5
commit
29afecdf1b
@ -1,3 +1,15 @@
|
|||||||
|
2016-11-07 Richard Biener <rguenther@suse.de>
|
||||||
|
|
||||||
|
PR tree-optimization/37150
|
||||||
|
* tree-vectorizer.h (vect_transform_slp_perm_load): Add n_perms
|
||||||
|
parameter.
|
||||||
|
* tree-vect-slp.c (vect_supported_load_permutation_p): Adjust.
|
||||||
|
(vect_analyze_slp_cost_1): Account for the real number of
|
||||||
|
permutations emitted and for dead loads.
|
||||||
|
(vect_transform_slp_perm_load): Add n_perms parameter counting
|
||||||
|
the number of emitted permutations.
|
||||||
|
* tree-vect-stmts.c (vectorizable_load): Adjust.
|
||||||
|
|
||||||
2016-11-07 Richard Biener <rguenther@suse.de>
|
2016-11-07 Richard Biener <rguenther@suse.de>
|
||||||
|
|
||||||
PR tree-optimization/78189
|
PR tree-optimization/78189
|
||||||
|
@ -1461,8 +1461,9 @@ vect_supported_load_permutation_p (slp_instance slp_instn)
|
|||||||
{
|
{
|
||||||
/* Verify the permutation can be generated. */
|
/* Verify the permutation can be generated. */
|
||||||
vec<tree> tem;
|
vec<tree> tem;
|
||||||
|
unsigned n_perms;
|
||||||
if (!vect_transform_slp_perm_load (node, tem, NULL,
|
if (!vect_transform_slp_perm_load (node, tem, NULL,
|
||||||
1, slp_instn, true))
|
1, slp_instn, true, &n_perms))
|
||||||
{
|
{
|
||||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION,
|
dump_printf_loc (MSG_MISSED_OPTIMIZATION,
|
||||||
vect_location,
|
vect_location,
|
||||||
@ -1475,11 +1476,13 @@ vect_supported_load_permutation_p (slp_instance slp_instn)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* For loop vectorization verify we can generate the permutation. */
|
/* For loop vectorization verify we can generate the permutation. */
|
||||||
|
unsigned n_perms;
|
||||||
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
|
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
|
||||||
if (node->load_permutation.exists ()
|
if (node->load_permutation.exists ()
|
||||||
&& !vect_transform_slp_perm_load
|
&& !vect_transform_slp_perm_load
|
||||||
(node, vNULL, NULL,
|
(node, vNULL, NULL,
|
||||||
SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
|
SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true,
|
||||||
|
&n_perms))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@ -1548,14 +1551,38 @@ vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
|
|||||||
stmt = GROUP_FIRST_ELEMENT (stmt_info);
|
stmt = GROUP_FIRST_ELEMENT (stmt_info);
|
||||||
stmt_info = vinfo_for_stmt (stmt);
|
stmt_info = vinfo_for_stmt (stmt);
|
||||||
/* Record the cost for the permutation. */
|
/* Record the cost for the permutation. */
|
||||||
record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
|
unsigned n_perms;
|
||||||
|
vect_transform_slp_perm_load (node, vNULL, NULL,
|
||||||
|
ncopies_for_cost, instance, true,
|
||||||
|
&n_perms);
|
||||||
|
record_stmt_cost (body_cost_vec, n_perms, vec_perm,
|
||||||
stmt_info, 0, vect_body);
|
stmt_info, 0, vect_body);
|
||||||
/* And adjust the number of loads performed. */
|
|
||||||
unsigned nunits
|
unsigned nunits
|
||||||
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
|
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
|
||||||
ncopies_for_cost
|
/* And adjust the number of loads performed. This handles
|
||||||
= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
|
redundancies as well as loads that are later dead. */
|
||||||
+ nunits - 1) / nunits;
|
auto_sbitmap perm (GROUP_SIZE (stmt_info));
|
||||||
|
bitmap_clear (perm);
|
||||||
|
for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
|
||||||
|
bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]);
|
||||||
|
ncopies_for_cost = 0;
|
||||||
|
bool load_seen = false;
|
||||||
|
for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
|
||||||
|
{
|
||||||
|
if (i % nunits == 0)
|
||||||
|
{
|
||||||
|
if (load_seen)
|
||||||
|
ncopies_for_cost++;
|
||||||
|
load_seen = false;
|
||||||
|
}
|
||||||
|
if (bitmap_bit_p (perm, i))
|
||||||
|
load_seen = true;
|
||||||
|
}
|
||||||
|
if (load_seen)
|
||||||
|
ncopies_for_cost++;
|
||||||
|
gcc_assert (ncopies_for_cost
|
||||||
|
<= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
|
||||||
|
+ nunits - 1) / nunits);
|
||||||
ncopies_for_cost *= SLP_INSTANCE_UNROLLING_FACTOR (instance);
|
ncopies_for_cost *= SLP_INSTANCE_UNROLLING_FACTOR (instance);
|
||||||
}
|
}
|
||||||
/* Record the cost for the vector loads. */
|
/* Record the cost for the vector loads. */
|
||||||
@ -3402,7 +3429,8 @@ vect_create_mask_and_perm (gimple *stmt,
|
|||||||
bool
|
bool
|
||||||
vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
|
vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
|
||||||
gimple_stmt_iterator *gsi, int vf,
|
gimple_stmt_iterator *gsi, int vf,
|
||||||
slp_instance slp_node_instance, bool analyze_only)
|
slp_instance slp_node_instance, bool analyze_only,
|
||||||
|
unsigned *n_perms)
|
||||||
{
|
{
|
||||||
gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
|
gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
|
||||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||||
@ -3457,6 +3485,7 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
|
|||||||
int first_vec_index = -1;
|
int first_vec_index = -1;
|
||||||
int second_vec_index = -1;
|
int second_vec_index = -1;
|
||||||
bool noop_p = true;
|
bool noop_p = true;
|
||||||
|
*n_perms = 0;
|
||||||
|
|
||||||
for (int j = 0; j < unroll_factor; j++)
|
for (int j = 0; j < unroll_factor; j++)
|
||||||
{
|
{
|
||||||
@ -3513,6 +3542,9 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (! noop_p)
|
||||||
|
++*n_perms;
|
||||||
|
|
||||||
if (!analyze_only)
|
if (!analyze_only)
|
||||||
{
|
{
|
||||||
tree mask_vec = NULL_TREE;
|
tree mask_vec = NULL_TREE;
|
||||||
|
@ -6978,8 +6978,11 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (slp_perm)
|
if (slp_perm)
|
||||||
vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
|
{
|
||||||
slp_node_instance, false);
|
unsigned n_perms;
|
||||||
|
vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
|
||||||
|
slp_node_instance, false, &n_perms);
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -7497,8 +7500,10 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|
|||||||
|
|
||||||
if (slp_perm)
|
if (slp_perm)
|
||||||
{
|
{
|
||||||
|
unsigned n_perms;
|
||||||
if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
|
if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
|
||||||
slp_node_instance, false))
|
slp_node_instance, false,
|
||||||
|
&n_perms))
|
||||||
{
|
{
|
||||||
dr_chain.release ();
|
dr_chain.release ();
|
||||||
return false;
|
return false;
|
||||||
|
@ -1166,7 +1166,7 @@ extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,
|
|||||||
extern void vect_free_slp_instance (slp_instance);
|
extern void vect_free_slp_instance (slp_instance);
|
||||||
extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> ,
|
extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> ,
|
||||||
gimple_stmt_iterator *, int,
|
gimple_stmt_iterator *, int,
|
||||||
slp_instance, bool);
|
slp_instance, bool, unsigned *);
|
||||||
extern bool vect_slp_analyze_operations (vec<slp_instance> slp_instances,
|
extern bool vect_slp_analyze_operations (vec<slp_instance> slp_instances,
|
||||||
void *);
|
void *);
|
||||||
extern bool vect_schedule_slp (vec_info *);
|
extern bool vect_schedule_slp (vec_info *);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user