middle-end: Parity and popcount folding optimizations.
This patch implements several constant folding optimizations for __builtin_parity and friends. We canonicalize popcount(x)&1 as parity(x) in gimple, and potentially convert back again when we expand to RTL. parity(~x) is simplified to parity(x), which is true for all integer modes with an even number of bits. But probably most usefully, parity(x)^parity(y) can be simplified to a parity(x^y), requiring only a single libcall or popcount. This patch optimizes popcount and parity of an argument known to have at most a single bit set, to be that single bit. Hence, popcount(x&8) is simplified to (x>>3)&1. This generalizes the existing optimization of popcount(x&1) being simplified to x&1, which is cleaned up with this patch. 2020-07-28 Roger Sayle <roger@nextmovesoftware.com> Richard Biener <rguenther@suse.de> gcc/ChangeLog * match.pd (popcount(x)&1 -> parity(x)): New simplification. (parity(~x) -> parity(x)): New simplification. (parity(x)^parity(y) -> parity(x^y)): New simplification. (parity(x&1) -> x&1): New simplification. (popcount(x) -> x>>C): New simplification. gcc/testsuite/ChangeLog * gcc.dg/fold-popcount-5.c: New test. * gcc.dg/fold-parity-1.c: Likewise. * gcc.dg/fold-parity-2.c: Likewise. * gcc.dg/fold-parity-3.c: Likewise. * gcc.dg/fold-parity-4.c: Likewise. * gcc.dg/fold-parity-5.c: Likewise.
This commit is contained in:
parent
f3665bd111
commit
33bf56ddc6
52
gcc/match.pd
52
gcc/match.pd
|
@ -5964,25 +5964,51 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
|||
(IFN_FMA @0 @1 @2))))
|
||||
|
||||
/* POPCOUNT simplifications. */
|
||||
(for popcount (BUILT_IN_POPCOUNT BUILT_IN_POPCOUNTL BUILT_IN_POPCOUNTLL
|
||||
BUILT_IN_POPCOUNTIMAX)
|
||||
/* popcount(X&1) is nop_expr(X&1). */
|
||||
(simplify
|
||||
(popcount @0)
|
||||
(if (tree_nonzero_bits (@0) == 1)
|
||||
(convert @0)))
|
||||
/* popcount(X) + popcount(Y) is popcount(X|Y) when X&Y must be zero. */
|
||||
(simplify
|
||||
(plus (popcount:s @0) (popcount:s @1))
|
||||
(if (wi::bit_and (tree_nonzero_bits (@0), tree_nonzero_bits (@1)) == 0)
|
||||
(popcount (bit_ior @0 @1))))
|
||||
/* popcount(X) == 0 is X == 0, and related (in)equalities. */
|
||||
/* popcount(X) + popcount(Y) is popcount(X|Y) when X&Y must be zero. */
|
||||
(simplify
|
||||
(plus (POPCOUNT:s @0) (POPCOUNT:s @1))
|
||||
(if (wi::bit_and (tree_nonzero_bits (@0), tree_nonzero_bits (@1)) == 0)
|
||||
(POPCOUNT (bit_ior @0 @1))))
|
||||
|
||||
/* popcount(X) == 0 is X == 0, and related (in)equalities. */
|
||||
(for popcount (POPCOUNT)
|
||||
(for cmp (le eq ne gt)
|
||||
rep (eq eq ne ne)
|
||||
(simplify
|
||||
(cmp (popcount @0) integer_zerop)
|
||||
(rep @0 { build_zero_cst (TREE_TYPE (@0)); }))))
|
||||
|
||||
/* Canonicalize POPCOUNT(x)&1 as PARITY(X). */
|
||||
(simplify
|
||||
(bit_and (POPCOUNT @0) integer_onep)
|
||||
(PARITY @0))
|
||||
|
||||
/* PARITY simplifications. */
|
||||
/* parity(~X) is parity(X). */
|
||||
(simplify
|
||||
(PARITY (bit_not @0))
|
||||
(PARITY @0))
|
||||
|
||||
/* parity(X)^parity(Y) is parity(X^Y). */
|
||||
(simplify
|
||||
(bit_xor (PARITY:s @0) (PARITY:s @1))
|
||||
(PARITY (bit_xor @0 @1)))
|
||||
|
||||
/* Common POPCOUNT/PARITY simplifications. */
|
||||
/* popcount(X&C1) is (X>>C2)&1 when C1 == 1<<C2. Same for parity(X&C1). */
|
||||
(for pfun (POPCOUNT PARITY)
|
||||
(simplify
|
||||
(pfun @0)
|
||||
(with { wide_int nz = tree_nonzero_bits (@0); }
|
||||
(switch
|
||||
(if (nz == 1)
|
||||
(convert @0))
|
||||
(if (wi::popcount (nz) == 1)
|
||||
(with { tree utype = unsigned_type_for (TREE_TYPE (@0)); }
|
||||
(convert (rshift:utype (convert:utype @0)
|
||||
{ build_int_cst (integer_type_node,
|
||||
wi::ctz (nz)); }))))))))
|
||||
|
||||
#if GIMPLE
|
||||
/* 64- and 32-bits branchless implementations of popcount are detected:
|
||||
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-original" } */
|
||||
|
||||
int foo(unsigned int x)
|
||||
{
|
||||
return __builtin_popcount(x) & 1;
|
||||
}
|
||||
|
||||
int fool(unsigned long x)
|
||||
{
|
||||
return __builtin_popcountl(x) & 1;
|
||||
}
|
||||
|
||||
int fooll(unsigned long long x)
|
||||
{
|
||||
return __builtin_popcountll(x) & 1;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "__builtin_popcount" 0 "original" } } */
|
||||
/* { dg-final { scan-tree-dump-times "__builtin_parity" 3 "original" } } */
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
|
||||
int foo(unsigned int x)
|
||||
{
|
||||
return __builtin_parity(~x);
|
||||
}
|
||||
|
||||
int fool(unsigned long x)
|
||||
{
|
||||
return __builtin_parityl(~x);
|
||||
}
|
||||
|
||||
int fooll(unsigned long long x)
|
||||
{
|
||||
return __builtin_parityll(~x);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "~" 0 "optimized" } } */
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
|
||||
int foo(unsigned int x)
|
||||
{
|
||||
return __builtin_parity(x&1);
|
||||
}
|
||||
|
||||
int fool(unsigned long x)
|
||||
{
|
||||
return __builtin_parityl(x&1);
|
||||
}
|
||||
|
||||
int fooll(unsigned long long x)
|
||||
{
|
||||
return __builtin_parityll(x&1);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "__builtin_parity" 0 "optimized" } } */
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
|
||||
int foo(unsigned int x, unsigned int y)
|
||||
{
|
||||
return __builtin_parity(x) ^ __builtin_parity(y);
|
||||
}
|
||||
|
||||
int fool(unsigned long x, unsigned long y)
|
||||
{
|
||||
return __builtin_parityl(x) ^ __builtin_parityl(y);
|
||||
}
|
||||
|
||||
int fooll(unsigned long long x, unsigned long long y)
|
||||
{
|
||||
return __builtin_parityll(x) ^ __builtin_parityll(y);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "__builtin_parity" 3 "optimized" } } */
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
|
||||
int test_and4(unsigned int a)
|
||||
{
|
||||
return __builtin_parity(a&4);
|
||||
}
|
||||
|
||||
int test_and4l(unsigned long b)
|
||||
{
|
||||
return __builtin_parityl(b&4);
|
||||
}
|
||||
|
||||
int test_and4ll(unsigned long long c)
|
||||
{
|
||||
return __builtin_parityll(c&4);
|
||||
}
|
||||
|
||||
int test_shift(unsigned int d)
|
||||
{
|
||||
int bits = 8*sizeof(unsigned int)-1;
|
||||
return __builtin_parity(d<<31);
|
||||
}
|
||||
|
||||
int test_shiftl(unsigned long e)
|
||||
{
|
||||
int bits = 8*sizeof(unsigned long)-1;
|
||||
return __builtin_parityl(e<<bits);
|
||||
}
|
||||
|
||||
int test_shiftll(unsigned long long f)
|
||||
{
|
||||
int bits = 8*sizeof(unsigned long long)-1;
|
||||
return __builtin_parityll(f<<bits);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "parity" 0 "optimized" } } */
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
|
||||
int test_and4(unsigned int a)
|
||||
{
|
||||
return __builtin_popcount(a&4);
|
||||
}
|
||||
|
||||
int test_and4l(unsigned long b)
|
||||
{
|
||||
return __builtin_popcountl(b&4);
|
||||
}
|
||||
|
||||
int test_and4ll(unsigned long long c)
|
||||
{
|
||||
return __builtin_popcountll(c&4);
|
||||
}
|
||||
|
||||
int test_shift(unsigned int d)
|
||||
{
|
||||
int bits = 8*sizeof(unsigned int)-1;
|
||||
return __builtin_popcount(d<<31);
|
||||
}
|
||||
|
||||
int test_shiftl(unsigned long e)
|
||||
{
|
||||
int bits = 8*sizeof(unsigned long)-1;
|
||||
return __builtin_popcountl(e<<bits);
|
||||
}
|
||||
|
||||
int test_shiftll(unsigned long long f)
|
||||
{
|
||||
int bits = 8*sizeof(unsigned long long)-1;
|
||||
return __builtin_popcountll(f<<bits);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "popcount" 0 "optimized" } } */
|
||||
|
Loading…
Reference in New Issue