From 6fc14f1963dfefead588a4cd8902d641ed69255c Mon Sep 17 00:00:00 2001 From: Roger Sayle Date: Tue, 9 Aug 2022 18:54:43 +0100 Subject: [PATCH] middle-end: Optimize ((X >> C1) & C2) != C3 for more cases. Following my middle-end patch for PR tree-optimization/94026, I'd promised Jeff Law that I'd clean up the dead-code in fold-const.cc now that these optimizations are handled in match.pd. Alas, I discovered things aren't quite that simple, as the transformations I'd added avoided cases where C2 overlapped with the new bits introduced by the shift, but the original code handled any value of C2 provided that it had a single-bit set (under the condition that C3 was always zero). This patch upgrades the transformations supported by match.pd to cover any values of C2 and C3, provided that C1 is a valid bit shift constant, for all three shift types (logical right, arithmetic right and left). This then makes the code in fold-const.cc fully redundant, and adds support for some new (corner) cases not previously handled. If the constant C1 is valid for the type's precision, the shift is now always eliminated (with C2 and C3 possibly updated to test the sign bit). Interestingly, the fold-const.cc code that I'm now deleting was originally added by me back in 2006 to resolve PR middle-end/21137. I've confirmed that those testcase(s) remain resolved with this patch (and I'll close 21137 in Bugzilla). This patch also implements most (but not all) of the examples mentioned in PR tree-optimization/98954, for which I have some follow-up patches. 2022-08-09 Roger Sayle Richard Biener gcc/ChangeLog PR middle-end/21137 PR tree-optimization/98954 * fold-const.cc (fold_binary_loc): Remove optimizations to optimize ((X >> C1) & C2) ==/!= 0. * match.pd (cmp (bit_and (lshift @0 @1) @2) @3): Remove wi::ctz check, and handle all values of INTEGER_CSTs @2 and @3. (cmp (bit_and (rshift @0 @1) @2) @3): Likewise, remove wi::clz checks, and handle all values of INTEGER_CSTs @2 and @3. gcc/testsuite/ChangeLog PR middle-end/21137 PR tree-optimization/98954 * gcc.dg/fold-eqandshift-4.c: New test case. --- gcc/fold-const.cc | 54 ------------------------ gcc/match.pd | 45 +++++++++++++++----- gcc/testsuite/gcc.dg/fold-eqandshift-4.c | 46 ++++++++++++++++++++ 3 files changed, 80 insertions(+), 65 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/fold-eqandshift-4.c diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc index 99021a82df4..4f4ec81c8d4 100644 --- a/gcc/fold-const.cc +++ b/gcc/fold-const.cc @@ -12204,60 +12204,6 @@ fold_binary_loc (location_t loc, enum tree_code code, tree type, } } - /* Fold ((X >> C1) & C2) == 0 and ((X >> C1) & C2) != 0 where - C1 is a valid shift constant, and C2 is a power of two, i.e. - a single bit. */ - if (TREE_CODE (arg0) == BIT_AND_EXPR - && integer_pow2p (TREE_OPERAND (arg0, 1)) - && integer_zerop (arg1)) - { - tree arg00 = TREE_OPERAND (arg0, 0); - STRIP_NOPS (arg00); - if (TREE_CODE (arg00) == RSHIFT_EXPR - && TREE_CODE (TREE_OPERAND (arg00, 1)) == INTEGER_CST) - { - tree itype = TREE_TYPE (arg00); - tree arg001 = TREE_OPERAND (arg00, 1); - prec = TYPE_PRECISION (itype); - - /* Check for a valid shift count. */ - if (wi::ltu_p (wi::to_wide (arg001), prec)) - { - tree arg01 = TREE_OPERAND (arg0, 1); - tree arg000 = TREE_OPERAND (arg00, 0); - unsigned HOST_WIDE_INT log2 = tree_log2 (arg01); - /* If (C2 << C1) doesn't overflow, then - ((X >> C1) & C2) != 0 can be rewritten as - (X & (C2 << C1)) != 0. */ - if ((log2 + TREE_INT_CST_LOW (arg001)) < prec) - { - tem = fold_build2_loc (loc, LSHIFT_EXPR, itype, - arg01, arg001); - tem = fold_build2_loc (loc, BIT_AND_EXPR, itype, - arg000, tem); - return fold_build2_loc (loc, code, type, tem, - fold_convert_loc (loc, itype, arg1)); - } - /* Otherwise, for signed (arithmetic) shifts, - ((X >> C1) & C2) != 0 is rewritten as X < 0, and - ((X >> C1) & C2) == 0 is rewritten as X >= 0. */ - else if (!TYPE_UNSIGNED (itype)) - return fold_build2_loc (loc, code == EQ_EXPR ? GE_EXPR - : LT_EXPR, - type, arg000, - build_int_cst (itype, 0)); - /* Otherwise, of unsigned (logical) shifts, - ((X >> C1) & C2) != 0 is rewritten as (X,false), and - ((X >> C1) & C2) == 0 is rewritten as (X,true). */ - else - return omit_one_operand_loc (loc, type, - code == EQ_EXPR ? integer_one_node - : integer_zero_node, - arg000); - } - } - } - /* If this is a comparison of a field, we may be able to simplify it. */ if ((TREE_CODE (arg0) == COMPONENT_REF || TREE_CODE (arg0) == BIT_FIELD_REF) diff --git a/gcc/match.pd b/gcc/match.pd index f82f94ad1fe..c22bc2ca1e8 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3601,21 +3601,44 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (cmp (bit_and:s (lshift:s @0 INTEGER_CST@1) INTEGER_CST@2) INTEGER_CST@3) (if (tree_fits_shwi_p (@1) && tree_to_shwi (@1) > 0 - && tree_to_shwi (@1) < TYPE_PRECISION (TREE_TYPE (@0)) - && tree_to_shwi (@1) <= wi::ctz (wi::to_wide (@3))) - (with { wide_int c1 = wi::to_wide (@1); - wide_int c2 = wi::lrshift (wi::to_wide (@2), c1); - wide_int c3 = wi::lrshift (wi::to_wide (@3), c1); } - (cmp (bit_and @0 { wide_int_to_tree (TREE_TYPE (@0), c2); }) - { wide_int_to_tree (TREE_TYPE (@0), c3); })))) + && tree_to_shwi (@1) < TYPE_PRECISION (TREE_TYPE (@0))) + (if (tree_to_shwi (@1) > wi::ctz (wi::to_wide (@3))) + { constant_boolean_node (cmp == NE_EXPR, type); } + (with { wide_int c1 = wi::to_wide (@1); + wide_int c2 = wi::lrshift (wi::to_wide (@2), c1); + wide_int c3 = wi::lrshift (wi::to_wide (@3), c1); } + (cmp (bit_and @0 { wide_int_to_tree (TREE_TYPE (@0), c2); }) + { wide_int_to_tree (TREE_TYPE (@0), c3); }))))) (simplify (cmp (bit_and:s (rshift:s @0 INTEGER_CST@1) INTEGER_CST@2) INTEGER_CST@3) (if (tree_fits_shwi_p (@1) && tree_to_shwi (@1) > 0 - && tree_to_shwi (@1) < TYPE_PRECISION (TREE_TYPE (@0)) - && tree_to_shwi (@1) <= wi::clz (wi::to_wide (@2)) - && tree_to_shwi (@1) <= wi::clz (wi::to_wide (@3))) - (cmp (bit_and @0 (lshift @2 @1)) (lshift @3 @1))))) + && tree_to_shwi (@1) < TYPE_PRECISION (TREE_TYPE (@0))) + (with { tree t0 = TREE_TYPE (@0); + unsigned int prec = TYPE_PRECISION (t0); + wide_int c1 = wi::to_wide (@1); + wide_int c2 = wi::to_wide (@2); + wide_int c3 = wi::to_wide (@3); + wide_int sb = wi::set_bit_in_zero (prec - 1, prec); } + (if ((c2 & c3) != c3) + { constant_boolean_node (cmp == NE_EXPR, type); } + (if (TYPE_UNSIGNED (t0)) + (if ((c3 & wi::arshift (sb, c1 - 1)) != 0) + { constant_boolean_node (cmp == NE_EXPR, type); } + (cmp (bit_and @0 { wide_int_to_tree (t0, c2 << c1); }) + { wide_int_to_tree (t0, c3 << c1); })) + (with { wide_int smask = wi::arshift (sb, c1); } + (switch + (if ((c2 & smask) == 0) + (cmp (bit_and @0 { wide_int_to_tree (t0, c2 << c1); }) + { wide_int_to_tree (t0, c3 << c1); })) + (if ((c3 & smask) == 0) + (cmp (bit_and @0 { wide_int_to_tree (t0, (c2 << c1) | sb); }) + { wide_int_to_tree (t0, c3 << c1); })) + (if ((c2 & smask) != (c3 & smask)) + { constant_boolean_node (cmp == NE_EXPR, type); }) + (cmp (bit_and @0 { wide_int_to_tree (t0, (c2 << c1) | sb); }) + { wide_int_to_tree (t0, (c3 << c1) | sb); }))))))))) /* Fold (X << C1) & C2 into (X << C1) & (C2 | ((1 << C1) - 1)) (X >> C1) & C2 into (X >> C1) & (C2 | ~((type) -1 >> C1)) diff --git a/gcc/testsuite/gcc.dg/fold-eqandshift-4.c b/gcc/testsuite/gcc.dg/fold-eqandshift-4.c new file mode 100644 index 00000000000..42d5190703e --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-eqandshift-4.c @@ -0,0 +1,46 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int sr30eq00(char x) { return ((x >> 4) & 0x30) == 0; } +int sr30ne00(char x) { return ((x >> 4) & 0x30) != 0; } +int sr30eq20(char z) { return ((z >> 4) & 0x30) == 0x20; } +int sr30ne20(char z) { return ((z >> 4) & 0x30) != 0x20; } +int sr30eq30(char x) { return ((x >> 4) & 0x30) == 0x30; } +int sr30ne30(char x) { return ((x >> 4) & 0x30) != 0x30; } +int sr33eq33(char x) { return ((x >> 4) & 0x33) == 0x33; } +int sr33ne33(char x) { return ((x >> 4) & 0x33) != 0x33; } + +int ur30eq00(unsigned char z) { return ((z >> 4) & 0x30) == 0; } +int ur30ne00(unsigned char z) { return ((z >> 4) & 0x30) != 0; } +int ur30eq30(unsigned char z) { return ((z >> 4) & 0x30) == 0x30; } +int ur30ne30(unsigned char z) { return ((z >> 4) & 0x30) != 0x30; } +int ur33eq03(unsigned char x) { return ((x >> 4) & 0x33) == 0x03; } +int ur33ne03(unsigned char x) { return ((x >> 4) & 0x33) != 0x03; } +int ur33eq30(unsigned char z) { return ((z >> 4) & 0x33) == 0x30; } +int ur33ne30(unsigned char z) { return ((z >> 4) & 0x33) != 0x30; } +int ur33eq33(unsigned char z) { return ((z >> 4) & 0x33) == 0x33; } +int ur33ne33(unsigned char z) { return ((z >> 4) & 0x33) != 0x33; } + +int sl30eq00(char x) { return ((char)(x << 4) & 0x30) == 0; } +int sl30ne00(char x) { return ((char)(x << 4) & 0x30) != 0; } +int sl30eq20(char x) { return ((char)(x << 4) & 0x30) == 0x20; } +int sl30ne20(char x) { return ((char)(x << 4) & 0x30) != 0x20; } +int sl30eq30(char x) { return ((char)(x << 4) & 0x30) == 0x30; } +int sl30ne30(char x) { return ((char)(x << 4) & 0x30) != 0x30; } +int sl33eq00(char x) { return ((char)(x << 4) & 0x33) == 0; } +int sl33ne00(char x) { return ((char)(x << 4) & 0x33) != 0; } +int sl33eq03(char z) { return ((char)(z << 4) & 0x33) == 0x03; } +int sl33ne03(char z) { return ((char)(z << 4) & 0x33) != 0x03; } +int sl33eq30(char x) { return ((char)(x << 4) & 0x33) == 0x30; } +int sl33ne30(char x) { return ((char)(x << 4) & 0x33) != 0x30; } +int sl33eq33(char z) { return ((char)(z << 4) & 0x33) == 0x33; } +int sl33ne33(char z) { return ((char)(z << 4) & 0x33) != 0x33; } + +/* { dg-final { scan-tree-dump-not " >> " "optimized" } } */ +/* { dg-final { scan-tree-dump-not " << " "optimized" } } */ +/* { dg-final { scan-tree-dump-not "z_\[0-9\]\\(D\\)" "optimized" } } */ +/* { dg-final { scan-tree-dump-times "return \[01\]" 14 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "char z\\)" 14 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "x_\[0-9\]\\(D\\)" 18 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "char x\\)" 18 "optimized" } } */ +