From d70720c2382e687e192a9d666e80acb41bfda856 Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Thu, 4 Nov 2021 17:32:09 +0000 Subject: [PATCH] middle-end: convert negate + right shift into compare greater. This turns an inversion of the sign bit + arithmetic right shift into a comparison with 0. i.e. void fun1(int32_t *x, int n) { for (int i = 0; i < (n & -16); i++) x[i] = (-x[i]) >> 31; } now generates: .L3: ldr q0, [x0] cmgt v0.4s, v0.4s, #0 str q0, [x0], 16 cmp x0, x1 bne .L3 instead of: .L3: ldr q0, [x0] neg v0.4s, v0.4s sshr v0.4s, v0.4s, 31 str q0, [x0], 16 cmp x0, x1 bne .L3 gcc/ChangeLog: * match.pd: New negate+shift pattern. gcc/testsuite/ChangeLog: * gcc.dg/signbit-2.c: New test. * gcc.dg/signbit-3.c: New test. * gcc.dg/signbit-4.c: New test. * gcc.dg/signbit-5.c: New test. * gcc.dg/signbit-6.c: New test. * gcc.target/aarch64/signbit-1.c: New test. --- gcc/match.pd | 36 +++++++++- gcc/testsuite/gcc.dg/signbit-2.c | 19 ++++++ gcc/testsuite/gcc.dg/signbit-3.c | 13 ++++ gcc/testsuite/gcc.dg/signbit-4.c | 65 ++++++++++++++++++ gcc/testsuite/gcc.dg/signbit-5.c | 65 ++++++++++++++++++ gcc/testsuite/gcc.dg/signbit-6.c | 72 ++++++++++++++++++++ gcc/testsuite/gcc.target/aarch64/signbit-1.c | 20 ++++++ 7 files changed, 289 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/signbit-2.c create mode 100644 gcc/testsuite/gcc.dg/signbit-3.c create mode 100644 gcc/testsuite/gcc.dg/signbit-4.c create mode 100644 gcc/testsuite/gcc.dg/signbit-5.c create mode 100644 gcc/testsuite/gcc.dg/signbit-6.c create mode 100644 gcc/testsuite/gcc.target/aarch64/signbit-1.c diff --git a/gcc/match.pd b/gcc/match.pd index 651982c28fe..917833dd14b 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -37,7 +37,8 @@ along with GCC; see the file COPYING3. If not see integer_pow2p uniform_integer_cst_p HONOR_NANS - uniform_vector_p) + uniform_vector_p + expand_vec_cmp_expr_p) /* Operator lists. */ (define_operator_list tcc_comparison @@ -831,6 +832,39 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) { tree utype = unsigned_type_for (type); } (convert (rshift (lshift (convert:utype @0) @2) @3)))))) +/* Fold (-x >> C) into -(x > 0) where C = precision(type) - 1. */ +(for cst (INTEGER_CST VECTOR_CST) + (simplify + (rshift (negate:s @0) cst@1) + (if (!TYPE_UNSIGNED (type) + && TYPE_OVERFLOW_UNDEFINED (type)) + (with { tree stype = TREE_TYPE (@1); + tree bt = truth_type_for (type); + tree zeros = build_zero_cst (type); + tree cst = NULL_TREE; } + (switch + /* Handle scalar case. */ + (if (INTEGRAL_TYPE_P (type) + /* If we apply the rule to the scalar type before vectorization + we will enforce the result of the comparison being a bool + which will require an extra AND on the result that will be + indistinguishable from when the user did actually want 0 + or 1 as the result so it can't be removed. */ + && canonicalize_math_after_vectorization_p () + && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (type) - 1)) + (negate (convert (gt @0 { zeros; })))) + /* Handle vector case. */ + (if (VECTOR_INTEGER_TYPE_P (type) + /* First check whether the target has the same mode for vector + comparison results as it's operands do. */ + && TYPE_MODE (bt) == TYPE_MODE (type) + /* Then check to see if the target is able to expand the comparison + with the given type later on, otherwise we may ICE. */ + && expand_vec_cmp_expr_p (type, bt, GT_EXPR) + && (cst = uniform_integer_cst_p (@1)) != NULL + && wi::eq_p (wi::to_wide (cst), element_precision (type) - 1)) + (view_convert (gt:bt @0 { zeros; })))))))) + /* Fold (C1/X)*C2 into (C1*C2)/X. */ (simplify (mult (rdiv@3 REAL_CST@0 @1) REAL_CST@2) diff --git a/gcc/testsuite/gcc.dg/signbit-2.c b/gcc/testsuite/gcc.dg/signbit-2.c new file mode 100644 index 00000000000..fc0157cbc5c --- /dev/null +++ b/gcc/testsuite/gcc.dg/signbit-2.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 --save-temps -fdump-tree-optimized" } */ + +#include + +void fun1(int32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (-x[i]) >> 31; +} + +void fun2(int32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (-x[i]) >> 30; +} + +/* { dg-final { scan-tree-dump-times {\s+>\s+\{ 0, 0, 0, 0 \}} 1 optimized } } */ +/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */ diff --git a/gcc/testsuite/gcc.dg/signbit-3.c b/gcc/testsuite/gcc.dg/signbit-3.c new file mode 100644 index 00000000000..19e9c06c349 --- /dev/null +++ b/gcc/testsuite/gcc.dg/signbit-3.c @@ -0,0 +1,13 @@ +/* { dg-do assemble } */ +/* { dg-options "-O1 --save-temps -fdump-tree-optimized" } */ + +#include + +void fun1(int32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (-x[i]) >> 31; +} + +/* { dg-final { scan-tree-dump-times {\s+>\s+0;} 1 optimized } } */ +/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */ diff --git a/gcc/testsuite/gcc.dg/signbit-4.c b/gcc/testsuite/gcc.dg/signbit-4.c new file mode 100644 index 00000000000..bc459ba60a7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/signbit-4.c @@ -0,0 +1,65 @@ +/* { dg-do run } */ +/* { dg-options "-O1 -fwrapv" } */ + +#include +#include +#include + +#ifndef N +#define N 65 +#endif + +#ifndef TYPE +#define TYPE int32_t +#endif + +#ifndef DEBUG +#define DEBUG 1 +#endif + +#define BASE ((TYPE) -1 < 0 ? -126 : 4) + +__attribute__ ((noinline, noipa)) +void fun1(TYPE *x, int n) +{ + for (int i = 0; i < n; i++) + x[i] = (-x[i]) >> 31; +} + +__attribute__ ((noinline, noipa, optimize("O0"))) +void fun2(TYPE *x, int n) +{ + for (int i = 0; i < n; i++) + x[i] = (-x[i]) >> 31; +} + +int main () +{ + TYPE a[N]; + TYPE b[N]; + + a[0] = INT_MIN; + b[0] = INT_MIN; + + for (int i = 1; i < N; ++i) + { + a[i] = BASE + i * 13; + b[i] = BASE + i * 13; + if (DEBUG) + printf ("%d: 0x%x\n", i, a[i]); + } + + fun1 (a, N); + fun2 (b, N); + + for (int i = 0; i < N; ++i) + { + if (DEBUG) + printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]); + + if (a[i] != b[i]) + __builtin_abort (); + } + return 0; +} + diff --git a/gcc/testsuite/gcc.dg/signbit-5.c b/gcc/testsuite/gcc.dg/signbit-5.c new file mode 100644 index 00000000000..22a92704773 --- /dev/null +++ b/gcc/testsuite/gcc.dg/signbit-5.c @@ -0,0 +1,65 @@ +/* { dg-do run } */ +/* { dg-options "-O3" } */ + +#include +#include +#include + +#ifndef N +#define N 65 +#endif + +#ifndef TYPE +#define TYPE int32_t +#endif + +#ifndef DEBUG +#define DEBUG 1 +#endif + +#define BASE ((TYPE) -1 < 0 ? -126 : 4) + +__attribute__ ((noinline, noipa)) +void fun1(TYPE *x, int n) +{ + for (int i = 0; i < n; i++) + x[i] = (-x[i]) >> 31; +} + +__attribute__ ((noinline, noipa, optimize("O1"))) +void fun2(TYPE *x, int n) +{ + for (int i = 0; i < n; i++) + x[i] = (-x[i]) >> 31; +} + +int main () +{ + TYPE a[N]; + TYPE b[N]; + + a[0] = INT_MIN; + b[0] = INT_MIN; + + for (int i = 1; i < N; ++i) + { + a[i] = BASE + i * 13; + b[i] = BASE + i * 13; + if (DEBUG) + printf ("%d: 0x%x\n", i, a[i]); + } + + fun1 (a, N); + fun2 (b, N); + + for (int i = 0; i < N; ++i) + { + if (DEBUG) + printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]); + + if (a[i] != b[i]) + __builtin_abort (); + } + return 0; +} + diff --git a/gcc/testsuite/gcc.dg/signbit-6.c b/gcc/testsuite/gcc.dg/signbit-6.c new file mode 100644 index 00000000000..da186624cfa --- /dev/null +++ b/gcc/testsuite/gcc.dg/signbit-6.c @@ -0,0 +1,72 @@ +/* { dg-do run } */ +/* { dg-options "-O1" } */ + +#include +#include +#include + +#ifndef N +#define N 65 +#endif + +#ifndef TYPE +#define TYPE int32_t +#endif + +#ifndef DEBUG +#define DEBUG 1 +#endif + +#define BASE ((TYPE) -1 < 0 ? -126 : 4) + +__attribute__ ((noinline, noipa)) +void fun1(TYPE *x, int n) +{ + for (int i = 0; i < n; i++) + x[i] = (-x[i]) >> 31; +} + +__attribute__ ((noinline, noipa, optimize("O0"))) +void fun2(TYPE *x, int n) +{ + for (int i = 0; i < n; i++) + x[i] = (-x[i]) >> 31; +} + +int main () +{ + TYPE a[N]; + TYPE b[N]; + + a[0] = INT_MIN; + b[0] = INT_MIN; + + for (int i = 1; i < N; ++i) + { + a[i] = BASE + i * 13; + b[i] = BASE + i * 13; + if (DEBUG) + printf ("%d: 0x%x\n", i, a[i]); + } + + fun1 (a, N); + fun2 (b, N); + + if (DEBUG) + printf ("%d = 0x%x == 0x%x\n", 0, a[0], b[0]); + + if (a[0] != 0x0 || b[0] != -1) + __builtin_abort (); + + + for (int i = 1; i < N; ++i) + { + if (DEBUG) + printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]); + + if (a[i] != b[i]) + __builtin_abort (); + } + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/signbit-1.c b/gcc/testsuite/gcc.target/aarch64/signbit-1.c new file mode 100644 index 00000000000..a5654d139cb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/signbit-1.c @@ -0,0 +1,20 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 --save-temps" } */ + +#include + +#pragma GCC target "+nosve" + +void fun1(int32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (-x[i]) >> 31; +} + +void fun2(int32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (-x[i]) >> 30; +} + +/* { dg-final { scan-assembler-times {\tcmgt\t} 1 } } */