Implement constant-folding simplifications of reductions.
This patch addresses a code quality regression in GCC 12 by implementing some constant folding/simplification transformations for REDUC_PLUS_EXPR in match.pd. The motivating example is gcc.dg/vect/pr89440.c which with -O2 -ffast-math (with vectorization now enabled) gets optimized to: float f (float x) { vector(4) float vect_x_14.11; vector(4) float _2; float _32; _2 = {x_9(D), 0.0, 0.0, 0.0}; vect_x_14.11_29 = _2 + { 1.0e+1, 2.6e+1, 4.2e+1, 5.8e+1 }; _32 = .REDUC_PLUS (vect_x_14.11_29); [tail call] return _32; } With these proposed new transformations, we can simplify the above code even further. float f (float x) { float _32; _32 = x_9(D) + 1.36e+2; return _32; } [which happens to match what we'd produce with -fno-tree-vectorize, and with GCC 11]. 2022-02-22 Roger Sayle <roger@nextmovesoftware.com> Richard Biener <rguenther@suse.de> gcc/ChangeLog * fold-const.cc (ctor_single_nonzero_element): New function to return the single non-zero element of a (vector) constructor. * fold-const.h (ctor_single_nonzero_element): Prototype here. * match.pd (reduc (constructor@0)): Simplify reductions of a constructor containing a single non-zero element. (reduc (@0 op VECTOR_CST) -> (reduc @0) op CONST): Simplify reductions of vector operations of the same operator with constant vector operands. gcc/testsuite/ChangeLog * gcc.dg/fold-reduc-1.c: New test case.
This commit is contained in:
parent
2f59f06761
commit
2ef0e75d0b
@ -16792,6 +16792,26 @@ address_compare (tree_code code, tree type, tree op0, tree op1,
|
||||
return equal;
|
||||
}
|
||||
|
||||
/* Return the single non-zero element of a CONSTRUCTOR or NULL_TREE. */
|
||||
tree
|
||||
ctor_single_nonzero_element (const_tree t)
|
||||
{
|
||||
unsigned HOST_WIDE_INT idx;
|
||||
constructor_elt *ce;
|
||||
tree elt = NULL_TREE;
|
||||
|
||||
if (TREE_CODE (t) != CONSTRUCTOR)
|
||||
return NULL_TREE;
|
||||
for (idx = 0; vec_safe_iterate (CONSTRUCTOR_ELTS (t), idx, &ce); idx++)
|
||||
if (!integer_zerop (ce->value) && !real_zerop (ce->value))
|
||||
{
|
||||
if (elt)
|
||||
return NULL_TREE;
|
||||
elt = ce->value;
|
||||
}
|
||||
return elt;
|
||||
}
|
||||
|
||||
#if CHECKING_P
|
||||
|
||||
namespace selftest {
|
||||
|
@ -225,6 +225,7 @@ extern const char *c_getstr (tree);
|
||||
extern wide_int tree_nonzero_bits (const_tree);
|
||||
extern int address_compare (tree_code, tree, tree, tree, tree &, tree &,
|
||||
poly_int64 &, poly_int64 &, bool);
|
||||
extern tree ctor_single_nonzero_element (const_tree);
|
||||
|
||||
/* Return OFF converted to a pointer offset type suitable as offset for
|
||||
POINTER_PLUS_EXPR. Use location LOC for this conversion. */
|
||||
|
18
gcc/match.pd
18
gcc/match.pd
@ -7532,6 +7532,24 @@ and,
|
||||
(BIT_FIELD_REF:elt_type @0 { size; } { pos; })
|
||||
{ elt; })))))))
|
||||
|
||||
/* Fold reduction of a single nonzero element constructor. */
|
||||
(for reduc (IFN_REDUC_PLUS IFN_REDUC_IOR IFN_REDUC_XOR)
|
||||
(simplify (reduc (CONSTRUCTOR@0))
|
||||
(with { tree ctor = (TREE_CODE (@0) == SSA_NAME
|
||||
? gimple_assign_rhs1 (SSA_NAME_DEF_STMT (@0)) : @0);
|
||||
tree elt = ctor_single_nonzero_element (ctor); }
|
||||
(if (elt
|
||||
&& !HONOR_SNANS (type)
|
||||
&& !HONOR_SIGNED_ZEROS (type))
|
||||
{ elt; }))))
|
||||
|
||||
/* Fold REDUC (@0 op VECTOR_CST) as REDUC (@0) op REDUC (VECTOR_CST). */
|
||||
(for reduc (IFN_REDUC_PLUS IFN_REDUC_MAX IFN_REDUC_MIN IFN_REDUC_FMAX
|
||||
IFN_REDUC_FMIN IFN_REDUC_AND IFN_REDUC_IOR IFN_REDUC_XOR)
|
||||
op (plus max min IFN_FMAX IFN_FMIN bit_and bit_ior bit_xor)
|
||||
(simplify (reduc (op @0 VECTOR_CST@1))
|
||||
(op (reduc:type @0) (reduc:type @1))))
|
||||
|
||||
(simplify
|
||||
(vec_perm @0 @1 VECTOR_CST@2)
|
||||
(with
|
||||
|
19
gcc/testsuite/gcc.dg/fold-reduc-1.c
Normal file
19
gcc/testsuite/gcc.dg/fold-reduc-1.c
Normal file
@ -0,0 +1,19 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ffast-math -fdump-tree-optimized" } */
|
||||
float foo (float x)
|
||||
{
|
||||
int i;
|
||||
float j;
|
||||
float a = 0;
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
for (j = 0; j < 4; ++j)
|
||||
{
|
||||
a += 1;
|
||||
x += a;
|
||||
}
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-not "REDUC_PLUS" "optimized"} } */
|
Loading…
Reference in New Issue
Block a user