re PR tree-optimization/78205 (BB vectorization confused by too large load groups)
2016-11-08 Richard Biener <rguenther@suse.de> PR tree-optimization/78205 * tree-vect-stmts.c (vectorizable_load): Move check whether we may run into gaps when BB vectorizing SLP permutations ... * tree-vect-slp.c (vect_supported_load_permutation_p): ... here where we can do a more precise check. * gcc.dg/vect/bb-slp-pr78205.c: New testcase. From-SVN: r241956
This commit is contained in:
parent
ed053eb824
commit
fe73a33284
|
@ -1,3 +1,11 @@
|
|||
2016-11-08 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/78205
|
||||
* tree-vect-stmts.c (vectorizable_load): Move check whether
|
||||
we may run into gaps when BB vectorizing SLP permutations ...
|
||||
* tree-vect-slp.c (vect_supported_load_permutation_p): ...
|
||||
here where we can do a more precise check.
|
||||
|
||||
2016-11-08 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/78224
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
2016-11-08 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/78205
|
||||
* gcc.dg/vect/bb-slp-pr78205.c: New testcase.
|
||||
|
||||
2016-11-08 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/78224
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_double } */
|
||||
/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
|
||||
double x[2], a[4], b[4], c[5];
|
||||
|
||||
void foo ()
|
||||
{
|
||||
a[0] = c[0];
|
||||
a[1] = c[1];
|
||||
a[2] = c[0];
|
||||
a[3] = c[1];
|
||||
b[0] = c[2];
|
||||
b[1] = c[3];
|
||||
b[2] = c[2];
|
||||
b[3] = c[3];
|
||||
x[0] = c[4];
|
||||
x[1] = c[4];
|
||||
}
|
||||
|
||||
/* We may not vectorize the store to x[] as it accesses c out-of bounds
|
||||
but we do want to vectorize the other two store groups. */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
|
||||
/* { dg-final { scan-tree-dump-times "x\\\[\[0-1\]\\\] = " 2 "optimized" } } */
|
|
@ -1459,6 +1459,25 @@ vect_supported_load_permutation_p (slp_instance slp_instn)
|
|||
SLP_TREE_LOAD_PERMUTATION (node).release ();
|
||||
else
|
||||
{
|
||||
stmt_vec_info group_info
|
||||
= vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
|
||||
group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info));
|
||||
unsigned nunits
|
||||
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info));
|
||||
unsigned k, maxk = 0;
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
|
||||
if (k > maxk)
|
||||
maxk = k;
|
||||
/* In BB vectorization we may not actually use a loaded vector
|
||||
accessing elements in excess of GROUP_SIZE. */
|
||||
if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1)))
|
||||
{
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"BB vectorization with gaps at the end of "
|
||||
"a load is not supported\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Verify the permutation can be generated. */
|
||||
vec<tree> tem;
|
||||
unsigned n_perms;
|
||||
|
|
|
@ -6548,18 +6548,6 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|
|||
if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
|
||||
slp_perm = true;
|
||||
|
||||
/* ??? The following is overly pessimistic (as well as the loop
|
||||
case above) in the case we can statically determine the excess
|
||||
elements loaded are within the bounds of a decl that is accessed.
|
||||
Likewise for BB vectorizations using masked loads is a possibility. */
|
||||
if (bb_vinfo && slp_perm && group_size % nunits != 0)
|
||||
{
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"BB vectorization with gaps at the end of a load "
|
||||
"is not supported\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Invalidate assumptions made by dependence analysis when vectorization
|
||||
on the unrolled body effectively re-orders stmts. */
|
||||
if (!PURE_SLP_STMT (stmt_info)
|
||||
|
|
Loading…
Reference in New Issue