Implement constant-folding simplifications of reductions.

This patch addresses a code quality regression in GCC 12 by implementing
some constant folding/simplification transformations for REDUC_PLUS_EXPR
in match.pd.  The motivating example is gcc.dg/vect/pr89440.c which with
-O2 -ffast-math (with vectorization now enabled) gets optimized to:

float f (float x)
{
  vector(4) float vect_x_14.11;
  vector(4) float _2;
  float _32;

  _2 = {x_9(D), 0.0, 0.0, 0.0};
  vect_x_14.11_29 = _2 + { 1.0e+1, 2.6e+1, 4.2e+1, 5.8e+1 };
  _32 = .REDUC_PLUS (vect_x_14.11_29); [tail call]
  return _32;
}

With these proposed new transformations, we can simplify the
above code even further.

float f (float x)
{
  float _32;
  _32 = x_9(D) + 1.36e+2;
  return _32;
}

[which happens to match what we'd produce with -fno-tree-vectorize,
and with GCC 11].

2022-02-22  Roger Sayle  <roger@nextmovesoftware.com>
	    Richard Biener  <rguenther@suse.de>

gcc/ChangeLog
	* fold-const.cc (ctor_single_nonzero_element): New function to
	return the single non-zero element of a (vector) constructor.
	* fold-const.h (ctor_single_nonzero_element): Prototype here.
	* match.pd (reduc (constructor@0)): Simplify reductions of a
	constructor containing a single non-zero element.
	(reduc (@0 op VECTOR_CST) ->  (reduc @0) op CONST): Simplify
	reductions of vector operations of the same operator with
	constant vector operands.

gcc/testsuite/ChangeLog
	* gcc.dg/fold-reduc-1.c: New test case.
This commit is contained in:
Roger Sayle 2022-02-22 12:32:22 +00:00
parent 2f59f06761
commit 2ef0e75d0b
4 changed files with 58 additions and 0 deletions

View File

@ -16792,6 +16792,26 @@ address_compare (tree_code code, tree type, tree op0, tree op1,
return equal;
}
/* Return the single non-zero element of a CONSTRUCTOR or NULL_TREE. */
tree
ctor_single_nonzero_element (const_tree t)
{
unsigned HOST_WIDE_INT idx;
constructor_elt *ce;
tree elt = NULL_TREE;
if (TREE_CODE (t) != CONSTRUCTOR)
return NULL_TREE;
for (idx = 0; vec_safe_iterate (CONSTRUCTOR_ELTS (t), idx, &ce); idx++)
if (!integer_zerop (ce->value) && !real_zerop (ce->value))
{
if (elt)
return NULL_TREE;
elt = ce->value;
}
return elt;
}
#if CHECKING_P
namespace selftest {

View File

@ -225,6 +225,7 @@ extern const char *c_getstr (tree);
extern wide_int tree_nonzero_bits (const_tree);
extern int address_compare (tree_code, tree, tree, tree, tree &, tree &,
poly_int64 &, poly_int64 &, bool);
extern tree ctor_single_nonzero_element (const_tree);
/* Return OFF converted to a pointer offset type suitable as offset for
POINTER_PLUS_EXPR. Use location LOC for this conversion. */

View File

@ -7532,6 +7532,24 @@ and,
(BIT_FIELD_REF:elt_type @0 { size; } { pos; })
{ elt; })))))))
/* Fold reduction of a single nonzero element constructor. */
(for reduc (IFN_REDUC_PLUS IFN_REDUC_IOR IFN_REDUC_XOR)
(simplify (reduc (CONSTRUCTOR@0))
(with { tree ctor = (TREE_CODE (@0) == SSA_NAME
? gimple_assign_rhs1 (SSA_NAME_DEF_STMT (@0)) : @0);
tree elt = ctor_single_nonzero_element (ctor); }
(if (elt
&& !HONOR_SNANS (type)
&& !HONOR_SIGNED_ZEROS (type))
{ elt; }))))
/* Fold REDUC (@0 op VECTOR_CST) as REDUC (@0) op REDUC (VECTOR_CST). */
(for reduc (IFN_REDUC_PLUS IFN_REDUC_MAX IFN_REDUC_MIN IFN_REDUC_FMAX
IFN_REDUC_FMIN IFN_REDUC_AND IFN_REDUC_IOR IFN_REDUC_XOR)
op (plus max min IFN_FMAX IFN_FMIN bit_and bit_ior bit_xor)
(simplify (reduc (op @0 VECTOR_CST@1))
(op (reduc:type @0) (reduc:type @1))))
(simplify
(vec_perm @0 @1 VECTOR_CST@2)
(with

View File

@ -0,0 +1,19 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ffast-math -fdump-tree-optimized" } */
float foo (float x)
{
int i;
float j;
float a = 0;
for (i = 0; i < 4; ++i)
{
for (j = 0; j < 4; ++j)
{
a += 1;
x += a;
}
}
return x;
}
/* { dg-final { scan-tree-dump-not "REDUC_PLUS" "optimized"} } */