re PR tree-optimization/78205 (BB vectorization confused by too large load groups)

2016-11-08  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/78205
	* tree-vect-stmts.c (vectorizable_load): Move check whether
	we may run into gaps when BB vectorizing SLP permutations ...
	* tree-vect-slp.c (vect_supported_load_permutation_p): ...
	here where we can do a more precise check.

	* gcc.dg/vect/bb-slp-pr78205.c: New testcase.

From-SVN: r241956
This commit is contained in:
Richard Biener 2016-11-08 08:06:42 +00:00 committed by Richard Biener
parent ed053eb824
commit fe73a33284
5 changed files with 57 additions and 12 deletions

View File

@ -1,3 +1,11 @@
2016-11-08 Richard Biener <rguenther@suse.de>
PR tree-optimization/78205
* tree-vect-stmts.c (vectorizable_load): Move check whether
we may run into gaps when BB vectorizing SLP permutations ...
* tree-vect-slp.c (vect_supported_load_permutation_p): ...
here where we can do a more precise check.
2016-11-08 Richard Biener <rguenther@suse.de>
PR tree-optimization/78224

View File

@ -1,3 +1,8 @@
2016-11-08 Richard Biener <rguenther@suse.de>
PR tree-optimization/78205
* gcc.dg/vect/bb-slp-pr78205.c: New testcase.
2016-11-08 Richard Biener <rguenther@suse.de>
PR tree-optimization/78224

View File

@ -0,0 +1,25 @@
/* { dg-do compile } */
/* { dg-require-effective-target vect_double } */
/* { dg-additional-options "-fdump-tree-optimized" } */
double x[2], a[4], b[4], c[5];
void foo ()
{
a[0] = c[0];
a[1] = c[1];
a[2] = c[0];
a[3] = c[1];
b[0] = c[2];
b[1] = c[3];
b[2] = c[2];
b[3] = c[3];
x[0] = c[4];
x[1] = c[4];
}
/* We may not vectorize the store to x[] as it accesses c out-of bounds
but we do want to vectorize the other two store groups. */
/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
/* { dg-final { scan-tree-dump-times "x\\\[\[0-1\]\\\] = " 2 "optimized" } } */

View File

@ -1459,6 +1459,25 @@ vect_supported_load_permutation_p (slp_instance slp_instn)
SLP_TREE_LOAD_PERMUTATION (node).release ();
else
{
stmt_vec_info group_info
= vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info));
unsigned nunits
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info));
unsigned k, maxk = 0;
FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
if (k > maxk)
maxk = k;
/* In BB vectorization we may not actually use a loaded vector
accessing elements in excess of GROUP_SIZE. */
if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1)))
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"BB vectorization with gaps at the end of "
"a load is not supported\n");
return false;
}
/* Verify the permutation can be generated. */
vec<tree> tem;
unsigned n_perms;

View File

@ -6548,18 +6548,6 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
slp_perm = true;
/* ??? The following is overly pessimistic (as well as the loop
case above) in the case we can statically determine the excess
elements loaded are within the bounds of a decl that is accessed.
Likewise for BB vectorizations using masked loads is a possibility. */
if (bb_vinfo && slp_perm && group_size % nunits != 0)
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"BB vectorization with gaps at the end of a load "
"is not supported\n");
return false;
}
/* Invalidate assumptions made by dependence analysis when vectorization
on the unrolled body effectively re-orders stmts. */
if (!PURE_SLP_STMT (stmt_info)