tree-optimization/101207 - fix BB reduc permute elide with life stmts

This fixes breakage of live lane extracts from permuted loads we elide
from BB reduction vectorization by handling the un-permuting the same
as in the regular eliding code - apply the reverse permute to
both the scalar stmts and the load permutation.

2021-06-28  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/101207
	* tree-vect-slp.c (vect_optimize_slp): Do BB reduction
	permute eliding for load permutations properly.

	* gcc.dg/vect/bb-slp-pr101207.c: New testcase.
This commit is contained in:
Richard Biener 2021-06-28 09:42:58 +02:00
parent 0ad9d88a3d
commit 2ad71efb5d
2 changed files with 71 additions and 42 deletions

View File

@ -0,0 +1,25 @@
/* { dg-do run } */
/* { dg-additional-options "-ffast-math" } */
#include "tree-vect.h"
double a[2];
double x, y;
void __attribute__((noipa)) foo ()
{
x = a[1] - a[0];
y = a[0] + a[1];
}
int main()
{
check_vect ();
a[0] = 0.;
a[1] = 1.;
foo ();
if (x != 1. || y != 1.)
__builtin_abort ();
return 0;
}

View File

@ -3921,6 +3921,52 @@ vect_optimize_slp (vec_info *vinfo)
}
}
/* Elide any permutations at BB reduction roots. */
if (is_a <bb_vec_info> (vinfo))
{
for (slp_instance instance : vinfo->slp_instances)
{
if (SLP_INSTANCE_KIND (instance) != slp_inst_kind_bb_reduc)
continue;
slp_tree old = SLP_INSTANCE_TREE (instance);
if (SLP_TREE_CODE (old) == VEC_PERM_EXPR
&& SLP_TREE_CHILDREN (old).length () == 1)
{
slp_tree child = SLP_TREE_CHILDREN (old)[0];
if (SLP_TREE_DEF_TYPE (child) == vect_external_def)
{
/* Preserve the special VEC_PERM we use to shield existing
vector defs from the rest. But make it a no-op. */
unsigned i = 0;
for (std::pair<unsigned, unsigned> &p
: SLP_TREE_LANE_PERMUTATION (old))
p.second = i++;
}
else
{
SLP_INSTANCE_TREE (instance) = child;
SLP_TREE_REF_COUNT (child)++;
vect_free_slp_tree (old);
}
}
else if (SLP_TREE_LOAD_PERMUTATION (old).exists ()
&& SLP_TREE_REF_COUNT (old) == 1
&& vertices[old->vertex].materialize)
{
/* ??? For loads the situation is more complex since
we can't modify the permute in place in case the
node is used multiple times. In fact for loads this
should be somehow handled in the propagation engine. */
/* Apply the reverse permutation to our stmts. */
int perm = vertices[old->vertex].get_perm_in ();
vect_slp_permute (perms[perm],
SLP_TREE_SCALAR_STMTS (old), true);
vect_slp_permute (perms[perm],
SLP_TREE_LOAD_PERMUTATION (old), true);
}
}
}
/* Free the perms vector used for propagation. */
while (!perms.is_empty ())
perms.pop ().release ();
@ -3987,48 +4033,6 @@ vect_optimize_slp (vec_info *vinfo)
}
}
}
/* And any permutations of BB reductions. */
if (is_a <bb_vec_info> (vinfo))
{
for (slp_instance instance : vinfo->slp_instances)
{
if (SLP_INSTANCE_KIND (instance) != slp_inst_kind_bb_reduc)
continue;
slp_tree old = SLP_INSTANCE_TREE (instance);
if (SLP_TREE_CODE (old) == VEC_PERM_EXPR
&& SLP_TREE_CHILDREN (old).length () == 1)
{
slp_tree child = SLP_TREE_CHILDREN (old)[0];
if (SLP_TREE_DEF_TYPE (child) == vect_external_def)
{
/* Preserve the special VEC_PERM we use to shield existing
vector defs from the rest. But make it a no-op. */
unsigned i = 0;
for (std::pair<unsigned, unsigned> &p
: SLP_TREE_LANE_PERMUTATION (old))
p.second = i++;
}
else
{
SLP_INSTANCE_TREE (instance) = child;
SLP_TREE_REF_COUNT (child)++;
vect_free_slp_tree (old);
}
}
else if (SLP_TREE_LOAD_PERMUTATION (old).exists ()
&& SLP_TREE_REF_COUNT (old) == 1)
{
/* ??? For loads the situation is more complex since
we can't modify the permute in place in case the
node is used multiple times. In fact for loads this
should be somehow handled in the propagation engine. */
auto fn = [] (const void *a, const void *b)
{ return *(const int *)a - *(const int *)b; };
SLP_TREE_LOAD_PERMUTATION (old).qsort (fn);
}
}
}
}
/* Gather loads reachable from the individual SLP graph entries. */