middle-end: convert negate + right shift into compare greater.

This turns an inversion of the sign bit + arithmetic right shift into a
comparison with 0.

i.e.

void fun1(int32_t *x, int n)
{
    for (int i = 0; i < (n & -16); i++)
      x[i] = (-x[i]) >> 31;
}

now generates:

.L3:
        ldr     q0, [x0]
        cmgt    v0.4s, v0.4s, #0
        str     q0, [x0], 16
        cmp     x0, x1
        bne     .L3

instead of:

.L3:
        ldr     q0, [x0]
        neg     v0.4s, v0.4s
        sshr    v0.4s, v0.4s, 31
        str     q0, [x0], 16
        cmp     x0, x1
        bne     .L3

gcc/ChangeLog:

	* match.pd: New negate+shift pattern.

gcc/testsuite/ChangeLog:

	* gcc.dg/signbit-2.c: New test.
	* gcc.dg/signbit-3.c: New test.
	* gcc.dg/signbit-4.c: New test.
	* gcc.dg/signbit-5.c: New test.
	* gcc.dg/signbit-6.c: New test.
	* gcc.target/aarch64/signbit-1.c: New test.
This commit is contained in:
Tamar Christina 2021-11-04 17:32:09 +00:00
parent 004afb984b
commit d70720c238
7 changed files with 289 additions and 1 deletions

View File

@ -37,7 +37,8 @@ along with GCC; see the file COPYING3. If not see
integer_pow2p
uniform_integer_cst_p
HONOR_NANS
uniform_vector_p)
uniform_vector_p
expand_vec_cmp_expr_p)
/* Operator lists. */
(define_operator_list tcc_comparison
@ -831,6 +832,39 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
{ tree utype = unsigned_type_for (type); }
(convert (rshift (lshift (convert:utype @0) @2) @3))))))
/* Fold (-x >> C) into -(x > 0) where C = precision(type) - 1. */
(for cst (INTEGER_CST VECTOR_CST)
(simplify
(rshift (negate:s @0) cst@1)
(if (!TYPE_UNSIGNED (type)
&& TYPE_OVERFLOW_UNDEFINED (type))
(with { tree stype = TREE_TYPE (@1);
tree bt = truth_type_for (type);
tree zeros = build_zero_cst (type);
tree cst = NULL_TREE; }
(switch
/* Handle scalar case. */
(if (INTEGRAL_TYPE_P (type)
/* If we apply the rule to the scalar type before vectorization
we will enforce the result of the comparison being a bool
which will require an extra AND on the result that will be
indistinguishable from when the user did actually want 0
or 1 as the result so it can't be removed. */
&& canonicalize_math_after_vectorization_p ()
&& wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (type) - 1))
(negate (convert (gt @0 { zeros; }))))
/* Handle vector case. */
(if (VECTOR_INTEGER_TYPE_P (type)
/* First check whether the target has the same mode for vector
comparison results as it's operands do. */
&& TYPE_MODE (bt) == TYPE_MODE (type)
/* Then check to see if the target is able to expand the comparison
with the given type later on, otherwise we may ICE. */
&& expand_vec_cmp_expr_p (type, bt, GT_EXPR)
&& (cst = uniform_integer_cst_p (@1)) != NULL
&& wi::eq_p (wi::to_wide (cst), element_precision (type) - 1))
(view_convert (gt:bt @0 { zeros; }))))))))
/* Fold (C1/X)*C2 into (C1*C2)/X. */
(simplify
(mult (rdiv@3 REAL_CST@0 @1) REAL_CST@2)

View File

@ -0,0 +1,19 @@
/* { dg-do assemble } */
/* { dg-options "-O3 --save-temps -fdump-tree-optimized" } */
#include <stdint.h>
void fun1(int32_t *x, int n)
{
for (int i = 0; i < (n & -16); i++)
x[i] = (-x[i]) >> 31;
}
void fun2(int32_t *x, int n)
{
for (int i = 0; i < (n & -16); i++)
x[i] = (-x[i]) >> 30;
}
/* { dg-final { scan-tree-dump-times {\s+>\s+\{ 0, 0, 0, 0 \}} 1 optimized } } */
/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */

View File

@ -0,0 +1,13 @@
/* { dg-do assemble } */
/* { dg-options "-O1 --save-temps -fdump-tree-optimized" } */
#include <stdint.h>
void fun1(int32_t *x, int n)
{
for (int i = 0; i < (n & -16); i++)
x[i] = (-x[i]) >> 31;
}
/* { dg-final { scan-tree-dump-times {\s+>\s+0;} 1 optimized } } */
/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */

View File

@ -0,0 +1,65 @@
/* { dg-do run } */
/* { dg-options "-O1 -fwrapv" } */
#include <stdint.h>
#include <limits.h>
#include <stdio.h>
#ifndef N
#define N 65
#endif
#ifndef TYPE
#define TYPE int32_t
#endif
#ifndef DEBUG
#define DEBUG 1
#endif
#define BASE ((TYPE) -1 < 0 ? -126 : 4)
__attribute__ ((noinline, noipa))
void fun1(TYPE *x, int n)
{
for (int i = 0; i < n; i++)
x[i] = (-x[i]) >> 31;
}
__attribute__ ((noinline, noipa, optimize("O0")))
void fun2(TYPE *x, int n)
{
for (int i = 0; i < n; i++)
x[i] = (-x[i]) >> 31;
}
int main ()
{
TYPE a[N];
TYPE b[N];
a[0] = INT_MIN;
b[0] = INT_MIN;
for (int i = 1; i < N; ++i)
{
a[i] = BASE + i * 13;
b[i] = BASE + i * 13;
if (DEBUG)
printf ("%d: 0x%x\n", i, a[i]);
}
fun1 (a, N);
fun2 (b, N);
for (int i = 0; i < N; ++i)
{
if (DEBUG)
printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]);
if (a[i] != b[i])
__builtin_abort ();
}
return 0;
}

View File

@ -0,0 +1,65 @@
/* { dg-do run } */
/* { dg-options "-O3" } */
#include <stdint.h>
#include <limits.h>
#include <stdio.h>
#ifndef N
#define N 65
#endif
#ifndef TYPE
#define TYPE int32_t
#endif
#ifndef DEBUG
#define DEBUG 1
#endif
#define BASE ((TYPE) -1 < 0 ? -126 : 4)
__attribute__ ((noinline, noipa))
void fun1(TYPE *x, int n)
{
for (int i = 0; i < n; i++)
x[i] = (-x[i]) >> 31;
}
__attribute__ ((noinline, noipa, optimize("O1")))
void fun2(TYPE *x, int n)
{
for (int i = 0; i < n; i++)
x[i] = (-x[i]) >> 31;
}
int main ()
{
TYPE a[N];
TYPE b[N];
a[0] = INT_MIN;
b[0] = INT_MIN;
for (int i = 1; i < N; ++i)
{
a[i] = BASE + i * 13;
b[i] = BASE + i * 13;
if (DEBUG)
printf ("%d: 0x%x\n", i, a[i]);
}
fun1 (a, N);
fun2 (b, N);
for (int i = 0; i < N; ++i)
{
if (DEBUG)
printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]);
if (a[i] != b[i])
__builtin_abort ();
}
return 0;
}

View File

@ -0,0 +1,72 @@
/* { dg-do run } */
/* { dg-options "-O1" } */
#include <stdint.h>
#include <limits.h>
#include <stdio.h>
#ifndef N
#define N 65
#endif
#ifndef TYPE
#define TYPE int32_t
#endif
#ifndef DEBUG
#define DEBUG 1
#endif
#define BASE ((TYPE) -1 < 0 ? -126 : 4)
__attribute__ ((noinline, noipa))
void fun1(TYPE *x, int n)
{
for (int i = 0; i < n; i++)
x[i] = (-x[i]) >> 31;
}
__attribute__ ((noinline, noipa, optimize("O0")))
void fun2(TYPE *x, int n)
{
for (int i = 0; i < n; i++)
x[i] = (-x[i]) >> 31;
}
int main ()
{
TYPE a[N];
TYPE b[N];
a[0] = INT_MIN;
b[0] = INT_MIN;
for (int i = 1; i < N; ++i)
{
a[i] = BASE + i * 13;
b[i] = BASE + i * 13;
if (DEBUG)
printf ("%d: 0x%x\n", i, a[i]);
}
fun1 (a, N);
fun2 (b, N);
if (DEBUG)
printf ("%d = 0x%x == 0x%x\n", 0, a[0], b[0]);
if (a[0] != 0x0 || b[0] != -1)
__builtin_abort ();
for (int i = 1; i < N; ++i)
{
if (DEBUG)
printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]);
if (a[i] != b[i])
__builtin_abort ();
}
return 0;
}

View File

@ -0,0 +1,20 @@
/* { dg-do assemble } */
/* { dg-options "-O3 --save-temps" } */
#include <stdint.h>
#pragma GCC target "+nosve"
void fun1(int32_t *x, int n)
{
for (int i = 0; i < (n & -16); i++)
x[i] = (-x[i]) >> 31;
}
void fun2(int32_t *x, int n)
{
for (int i = 0; i < (n & -16); i++)
x[i] = (-x[i]) >> 30;
}
/* { dg-final { scan-assembler-times {\tcmgt\t} 1 } } */