re PR tree-optimization/85466 (Performance is slow when doing 'branchless' conditional style math operations)

PR libstdc++/85466 * real.h (real_nextafter): Declare. * real.c (real_nextafter): New function. * fold-const-call.c (fold_const_nextafter): New function. (fold_const_call_sss): Call it for CASE_CFN_NEXTAFTER and CASE_CFN_NEXTTOWARD. (fold_const_call_1): For CASE_CFN_NEXTTOWARD call fold_const_call_sss even when arg1_mode is different from arg0_mode. * gcc.dg/nextafter-1.c: New test. * gcc.dg/nextafter-2.c: New test. * gcc.dg/nextafter-3.c: New test. * gcc.dg/nextafter-4.c: New test. From-SVN: r259921
2018-05-04 09:19:45 +02:00 · 2018-05-04 09:19:45 +02:00 · 047823853d
commit 047823853d
parent 105073e1cc
9 changed files with 365 additions and 3 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,14 @@
+2018-05-04  Jakub Jelinek  <jakub@redhat.com>
+
+	PR libstdc++/85466
+	* real.h (real_nextafter): Declare.
+	* real.c (real_nextafter): New function.
+	* fold-const-call.c (fold_const_nextafter): New function.
+	(fold_const_call_sss): Call it for CASE_CFN_NEXTAFTER and
+	CASE_CFN_NEXTTOWARD.
+	(fold_const_call_1): For CASE_CFN_NEXTTOWARD call fold_const_call_sss
+	even when arg1_mode is different from arg0_mode.
+
 2018-05-03  Nathan Sidwell  <nathan@acm.org>

 	* doc/extend.texi (Deprecated Features): Remove
--- a/gcc/fold-const-call.c
+++ b/gcc/fold-const-call.c
@ -527,6 +527,48 @@ fold_const_pow (real_value *result, const real_value *arg0,
  return false;
 }

+/* Try to evaluate:
+
+      *RESULT = nextafter (*ARG0, *ARG1)
+
+   or
+
+      *RESULT = nexttoward (*ARG0, *ARG1)
+
+   in format FORMAT.  Return true on success.  */
+
+static bool
+fold_const_nextafter (real_value *result, const real_value *arg0,
+		      const real_value *arg1, const real_format *format)
+{
+  if (REAL_VALUE_ISSIGNALING_NAN (*arg0)
+      || REAL_VALUE_ISSIGNALING_NAN (*arg1))
+    return false;
+
+  /* Don't handle composite modes, nor decimal, nor modes without
+     inf or denorm at least for now.  */
+  if (format->pnan < format->p
+      || format->b == 10
+      || !format->has_inf
+      || !format->has_denorm)
+    return false;
+
+  if (real_nextafter (result, format, arg0, arg1)
+      /* If raising underflow or overflow and setting errno to ERANGE,
+	 fail if we care about those side-effects.  */
+      && (flag_trapping_math || flag_errno_math))
+    return false;
+  /* Similarly for nextafter (0, 1) raising underflow.  */
+  else if (flag_trapping_math
+	   && arg0->cl == rvc_zero
+	   && result->cl != rvc_zero)
+    return false;
+
+  real_convert (result, format, result);
+
+  return true;
+}
+
 /* Try to evaluate:

      *RESULT = ldexp (*ARG0, ARG1)
@ -1260,6 +1302,10 @@ fold_const_call_sss (real_value *result, combined_fn fn,
    CASE_CFN_POW:
      return fold_const_pow (result, arg0, arg1, format);

+    CASE_CFN_NEXTAFTER:
+    CASE_CFN_NEXTTOWARD:
+      return fold_const_nextafter (result, arg0, arg1, format);
+
    default:
      return false;
    }
@ -1365,20 +1411,33 @@ fold_const_call_1 (combined_fn fn, tree type, tree arg0, tree arg1)
  machine_mode arg0_mode = TYPE_MODE (TREE_TYPE (arg0));
  machine_mode arg1_mode = TYPE_MODE (TREE_TYPE (arg1));

-  if (arg0_mode == arg1_mode
+  if (mode == arg0_mode
      && real_cst_p (arg0)
      && real_cst_p (arg1))
    {
      gcc_checking_assert (SCALAR_FLOAT_MODE_P (arg0_mode));
-      if (mode == arg0_mode)
+      REAL_VALUE_TYPE result;
+      if (arg0_mode == arg1_mode)
 	{
 	  /* real, real -> real.  */
-	  REAL_VALUE_TYPE result;
 	  if (fold_const_call_sss (&result, fn, TREE_REAL_CST_PTR (arg0),
 				   TREE_REAL_CST_PTR (arg1),
 				   REAL_MODE_FORMAT (mode)))
 	    return build_real (type, result);
 	}
+      else if (arg1_mode == TYPE_MODE (long_double_type_node))
+	switch (fn)
+	  {
+	  CASE_CFN_NEXTTOWARD:
+	    /* real, long double -> real.  */
+	    if (fold_const_call_sss (&result, fn, TREE_REAL_CST_PTR (arg0),
+				     TREE_REAL_CST_PTR (arg1),
+				     REAL_MODE_FORMAT (mode)))
+	      return build_real (type, result);
+	    break;
+	  default:
+	    break;
+	  }
      return NULL_TREE;
    }

--- a/gcc/real.c
+++ b/gcc/real.c
@ -5048,6 +5048,102 @@ real_isinteger (const REAL_VALUE_TYPE *c, HOST_WIDE_INT *int_out)
  return false;
 }

+/* Calculate nextafter (X, Y) or nexttoward (X, Y).  Return true if
+   underflow or overflow needs to be raised.  */
+
+bool
+real_nextafter (REAL_VALUE_TYPE *r, format_helper fmt,
+		const REAL_VALUE_TYPE *x, const REAL_VALUE_TYPE *y)
+{
+  int cmp = do_compare (x, y, 2);
+  /* If either operand is NaN, return qNaN.  */
+  if (cmp == 2)
+    {
+      get_canonical_qnan (r, 0);
+      return false;
+    }
+  /* If x == y, return y cast to target type.  */
+  if (cmp == 0)
+    {
+      real_convert (r, fmt, y);
+      return false;
+    }
+
+  if (x->cl == rvc_zero)
+    {
+      get_zero (r, y->sign);
+      r->cl = rvc_normal;
+      SET_REAL_EXP (r, fmt->emin - fmt->p + 1);
+      r->sig[SIGSZ - 1] = SIG_MSB;
+      return false;
+    }
+
+  int np2 = SIGNIFICAND_BITS - fmt->p;
+  /* For denormals adjust np2 correspondingly.  */
+  if (x->cl == rvc_normal && REAL_EXP (x) < fmt->emin)
+    np2 += fmt->emin - REAL_EXP (x);
+
+  REAL_VALUE_TYPE u;
+  get_zero (r, x->sign);
+  get_zero (&u, 0);
+  set_significand_bit (&u, np2);
+  r->cl = rvc_normal;
+  SET_REAL_EXP (r, REAL_EXP (x));
+
+  if (x->cl == rvc_inf)
+    {
+      bool borrow = sub_significands (r, r, &u, 0);
+      gcc_assert (borrow);
+      SET_REAL_EXP (r, fmt->emax);
+    }
+  else if (cmp == (x->sign ? 1 : -1))
+    {
+      if (add_significands (r, x, &u))
+	{
+	  /* Overflow.  Means the significand had been all ones, and
+	     is now all zeros.  Need to increase the exponent, and
+	     possibly re-normalize it.  */
+	  SET_REAL_EXP (r, REAL_EXP (r) + 1);
+	  if (REAL_EXP (r) > fmt->emax)
+	    {
+	      get_inf (r, x->sign);
+	      return true;
+	    }
+	  r->sig[SIGSZ - 1] = SIG_MSB;
+	}
+    }
+  else
+    {
+      if (REAL_EXP (x) > fmt->emin && x->sig[SIGSZ - 1] == SIG_MSB)
+	{
+	  int i;
+	  for (i = SIGSZ - 2; i >= 0; i--)
+	    if (x->sig[i])
+	      break;
+	  if (i < 0)
+	    {
+	      /* When mantissa is 1.0, we need to subtract only
+		 half of u: nextafter (1.0, 0.0) is 1.0 - __DBL_EPSILON__ / 2
+		 rather than 1.0 - __DBL_EPSILON__.  */
+	      clear_significand_bit (&u, np2);
+	      np2--;
+	      set_significand_bit (&u, np2);
+	    }
+	}
+      sub_significands (r, x, &u, 0);
+    }
+
+  /* Clear out trailing garbage.  */
+  clear_significand_below (r, np2);
+  normalize (r);
+  if (REAL_EXP (r) <= fmt->emin - fmt->p)
+    {
+      get_zero (r, x->sign);
+      return true;
+    }
+  return r->cl == rvc_zero;
+}
+
 /* Write into BUF the maximum representable finite floating-point
   number, (1 - b**-p) * b**emax for a given FP format FMT as a hex
   float string.  LEN is the size of BUF, and the buffer must be large
--- a/gcc/real.h
+++ b/gcc/real.h
@ -507,6 +507,10 @@ extern void real_copysign (REAL_VALUE_TYPE *, const REAL_VALUE_TYPE *);
 extern bool real_isinteger (const REAL_VALUE_TYPE *, format_helper);
 extern bool real_isinteger (const REAL_VALUE_TYPE *, HOST_WIDE_INT *);

+/* Calculate nextafter (X, Y) in format FMT.  */
+extern bool real_nextafter (REAL_VALUE_TYPE *, format_helper,
+			    const REAL_VALUE_TYPE *, const REAL_VALUE_TYPE *);
+
 /* Write into BUF the maximum representable finite floating-point
   number, (1 - b**-p) * b**emax for a given FP format FMT as a hex
   float string.  BUF must be large enough to contain the result.  */
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@ -1,3 +1,11 @@
+2018-05-04  Jakub Jelinek  <jakub@redhat.com>
+
+	PR libstdc++/85466
+	* gcc.dg/nextafter-1.c: New test.
+	* gcc.dg/nextafter-2.c: New test.
+	* gcc.dg/nextafter-3.c: New test.
+	* gcc.dg/nextafter-4.c: New test.
+
 2018-05-03  Nathan Sidwell  <nathan@acm.org>

 	Remove -ffriend-injection.
--- a/gcc/testsuite/gcc.dg/nextafter-1.c
+++ b/gcc/testsuite/gcc.dg/nextafter-1.c
@ -0,0 +1,159 @@
+/* PR libstdc++/85466 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-math-errno -fno-trapping-math -fdump-tree-optimized" } */
+/* { dg-add-options ieee } */
+/* { dg-final { scan-tree-dump-not "nextafter" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "nexttoward" "optimized" } } */
+
+float nextafterf (float, float);
+double nextafter (double, double);
+long double nextafterl (long double, long double);
+float nexttowardf (float, long double);
+double nexttoward (double, long double);
+long double nexttowardl (long double, long double);
+
+#define CHECK(x) if (!(x)) __builtin_abort ()
+
+#ifndef NEED_ERRNO
+#define NEED_ERRNO 0
+#endif
+#ifndef NEED_EXC
+#define NEED_EXC 0
+#endif
+
+#define TEST(name, fn, type, L1, L2, l1, l2, MIN1,			     \
+	     MAX1, DENORM_MIN1, EPSILON1, MIN2, MAX2, DENORM_MIN2)	     \
+void									     \
+name (void)								     \
+{									     \
+  const type a = fn (0.0##L1, 0.0##L2);					     \
+  CHECK (a == 0.0##L1 && !__builtin_signbit (a));			     \
+  const type b = fn (0.0##L1, -0.0##L2);				     \
+  CHECK (b == 0.0##L1 && __builtin_signbit (b));			     \
+  const type c = fn (__builtin_nan##l1 (""), 0.0##L2);			     \
+  CHECK (__builtin_isnan##l1 (c));					     \
+  const type d = fn (2.0##L1, __builtin_nan##l2 (""));			     \
+  CHECK (__builtin_isnan##l1 (d));					     \
+  const type e = NEED_EXC ? DENORM_MIN1 : fn (0.0##L1, 8.0##L2);	     \
+  CHECK (e == DENORM_MIN1);						     \
+  const type f = fn (1.0##L1, 8.0##L2);					     \
+  CHECK (f == 1.0##L1 + EPSILON1);					     \
+  const type g = fn (1.0##L1, -8.0##L2);				     \
+  CHECK (g == 1.0##L1 - EPSILON1 / 2.0##L1);				     \
+  const type h = fn (__builtin_inf (), 0.0##L2);			     \
+  CHECK (h == MAX1);							     \
+  const type i = fn (-1.0##L1, -__builtin_inf ());			     \
+  CHECK (i == -1.0##L1 - EPSILON1);					     \
+  const type j = fn (1.5##L1, __builtin_inf ());			     \
+  CHECK (j == 1.5##L1 + EPSILON1);					     \
+  const type k = fn (1.5##L1 - EPSILON1, 100.0##L2);			     \
+  CHECK (k == 1.5##L1);							     \
+  const type l								     \
+    = (NEED_EXC || NEED_ERRNO) ? 0.0##L1 : fn (DENORM_MIN1, 0.0##L2);	     \
+  CHECK (l == 0.0##L1 && !__builtin_signbit (l));			     \
+  const type m								     \
+    = (NEED_EXC || NEED_ERRNO) ? __builtin_inf##l1 ()			     \
+      : fn (MAX1, __builtin_inf ());					     \
+  CHECK (__builtin_isinf##l1 (m) && !__builtin_signbit (m));		     \
+  const type n = fn (DENORM_MIN1, 12.0##L2);				     \
+  CHECK (n == 2.0##L1 * DENORM_MIN1);					     \
+  const type o = fn (n, 24.0##L2);					     \
+  CHECK (o == 3.0##L1 * DENORM_MIN1);					     \
+  const type p = fn (o, 132.0##L2);					     \
+  CHECK (p == 4.0##L1 * DENORM_MIN1);					     \
+  const type q = fn (2.0##L1 * DENORM_MIN1, -__builtin_inf ());		     \
+  CHECK (q == DENORM_MIN1);						     \
+  const type r = fn (3.0##L1 * DENORM_MIN1, DENORM_MIN2);		     \
+  CHECK (r == 2.0##L1 * DENORM_MIN1);					     \
+  const type s = fn (4.0##L1 * DENORM_MIN1, 2.0##L2 * DENORM_MIN2);	     \
+  CHECK (s == 3.0##L1 * DENORM_MIN1);					     \
+  const type t = fn (MIN1, 0.0##L2);					     \
+  CHECK (t == MIN1 - DENORM_MIN1);					     \
+  const type u = fn (MIN1 - DENORM_MIN1, -MIN2);			     \
+  CHECK (u == MIN1 - 2.0##L1 * DENORM_MIN1);				     \
+  const type v = fn (MIN1 - 2.0##L1 * DENORM_MIN1, 100.0##L2);		     \
+  CHECK (v == MIN1 - DENORM_MIN1);					     \
+  const type w = fn (MIN1 - DENORM_MIN1, MAX2);				     \
+  CHECK (w == MIN1);							     \
+  const type x = fn (MIN1, 17.0##L2);					     \
+  CHECK (x == MIN1 + DENORM_MIN1);					     \
+  const type y = fn (MIN1 + DENORM_MIN1, __builtin_inf##l2 ());		     \
+  CHECK (y == MIN1 + 2.0##L1 * DENORM_MIN1);				     \
+  const type z = fn (MIN1 / 2.0##L1, -MIN2);				     \
+  CHECK (z == MIN1 / 2.0##L1 - DENORM_MIN1);				     \
+  const type aa = fn (-MIN1 / 4.0##L1, MIN2);				     \
+  CHECK (aa == -MIN1 / 4.0##L1 + DENORM_MIN1);				     \
+  const type ab = fn (MIN1 * 2.0##L1, -MIN2);				     \
+  CHECK (ab == MIN1 * 2.0##L1 - DENORM_MIN1);				     \
+  const type ac = fn (MIN1 * 4.0##L1, MIN2);				     \
+  CHECK (ac == MIN1 * 4.0##L1 - DENORM_MIN1 * 2.0##L1);			     \
+  const type ad = fn (MIN1 * 64.0##L1, MIN2);				     \
+  CHECK (ad == MIN1 * 64.0##L1 - DENORM_MIN1 * 32.0##L1);		     \
+  const type ae = fn (MIN1 / 2.0##L1 - DENORM_MIN1, 100.0##L2);		     \
+  CHECK (ae == MIN1 / 2.0##L1);						     \
+  const type af = fn (-MIN1 / 4 + DENORM_MIN1, -100.0##L2);		     \
+  CHECK (af == -MIN1 / 4.0##L1);					     \
+  const type ag = fn (MIN1 * 2.0##L1 - DENORM_MIN1, 100.0##L2);		     \
+  CHECK (ag == MIN1 * 2.0##L1);						     \
+  const type ah = fn (MIN1 * 4.0##L1 - 2.0##L1 * DENORM_MIN1, 100.0##L2);    \
+  CHECK (ah == MIN1 * 4.0##L1);						     \
+  const type ai = fn (MIN1 * 64.0##L1 - 32.0##L1 * DENORM_MIN1, 100.0##L2);  \
+  CHECK (ai == MIN1 * 64.0##L1);					     \
+  const type aj = fn (MIN1 * 64.0##L1, 100.0##L2);			     \
+  CHECK (aj == MIN1 * 64.0##L1 + 64.0##L1 * DENORM_MIN1);		     \
+  const type ak = fn (MIN1 * 64.0##L1 + DENORM_MIN1 * 64.0##L1, 1024.0##L2); \
+  CHECK (ak == MIN1 * 64.0##L1 + 128.0##L1 * DENORM_MIN1);		     \
+  const type al = fn (128.0##L1, 128.0##L2);				     \
+  CHECK (al == 128.0##L1);						     \
+  const type am = fn (128.0##L1, 129.0##L2);				     \
+  CHECK (am == 128.0##L1 + 128.0##L1 * EPSILON1);			     \
+  const type an = fn (-128.0##L1 + -128.0##L1 * EPSILON1, -130.0##L2);	     \
+  CHECK (an == -128.0##L1 - 256.0##L1 * EPSILON1);			     \
+  const type ao = fn (128.0##L1 + 256.0##L1 * EPSILON1, 256.0##L2);	     \
+  CHECK (ao == 128.0##L1 + 384.0##L1 * EPSILON1);			     \
+  const type ap = fn (128.0##L1 + 384.0##L1 * EPSILON1, -0.0##L2);	     \
+  CHECK (ap == 128.0##L1 + 256.0##L1 * EPSILON1);			     \
+  const type aq = fn (128.0##L1 + 256.0##L1 * EPSILON1, 1.0##L2);	     \
+  CHECK (aq == 128.0##L1 + 128.0##L1 * EPSILON1);			     \
+  const type ar = fn (128.0##L1 + 128.0##L1 * EPSILON1, 0.0##L2);	     \
+  CHECK (ar == 128.0##L1);						     \
+  const type as = fn (128.0##L1, 0.0##L2);				     \
+  CHECK (as == 128.0##L1 - 64.0##L1 * EPSILON1);			     \
+  const type at = fn (128.0##L1 - 64.0##L1 * EPSILON1, 5.0##L2);	     \
+  CHECK (at == 128.0##L1 - 128.0##L1 * EPSILON1);			     \
+}
+
+TEST (test1, nextafterf, float, F, F, f, f, __FLT_MIN__, __FLT_MAX__,
+      __FLT_DENORM_MIN__, __FLT_EPSILON__, __FLT_MIN__, __FLT_MAX__,
+      __FLT_DENORM_MIN__)
+TEST (test2, nextafter, double, , , , , __DBL_MIN__, __DBL_MAX__,
+      __DBL_DENORM_MIN__, __DBL_EPSILON__, __DBL_MIN__, __DBL_MAX__,
+      __DBL_DENORM_MIN__)
+#if __LDBL_MANT_DIG__ != 106
+TEST (test3, nextafterl, long double, L, L, l, l, __LDBL_MIN__, __LDBL_MAX__,
+      __LDBL_DENORM_MIN__, __LDBL_EPSILON__, __LDBL_MIN__, __LDBL_MAX__,
+      __LDBL_DENORM_MIN__)
+TEST (test4, nexttowardf, float, F, L, f, l, __FLT_MIN__, __FLT_MAX__,
+      __FLT_DENORM_MIN__, __FLT_EPSILON__, __LDBL_MIN__, __LDBL_MAX__,
+      __LDBL_DENORM_MIN__)
+TEST (test5, nexttoward, double, , L, , l, __DBL_MIN__, __DBL_MAX__,
+      __DBL_DENORM_MIN__, __DBL_EPSILON__, __LDBL_MIN__, __LDBL_MAX__,
+      __LDBL_DENORM_MIN__)
+TEST (test6, nexttowardl, long double, L, L, l, l, __LDBL_MIN__, __LDBL_MAX__,
+      __LDBL_DENORM_MIN__, __LDBL_EPSILON__, __LDBL_MIN__, __LDBL_MAX__,
+      __LDBL_DENORM_MIN__)
+#endif
+
+int
+main ()
+{
+  test1 ();
+  test2 ();
+#if __LDBL_MANT_DIG__ != 106
+  test3 ();
+  test4 ();
+  test5 ();
+  test6 ();
+#endif
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/nextafter-2.c
+++ b/gcc/testsuite/gcc.dg/nextafter-2.c
@ -0,0 +1,6 @@
+/* PR libstdc++/85466 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-builtin" } */
+/* { dg-add-options ieee } */
+
+#include "nextafter-1.c"
--- a/gcc/testsuite/gcc.dg/nextafter-3.c
+++ b/gcc/testsuite/gcc.dg/nextafter-3.c
@ -0,0 +1,9 @@
+/* PR libstdc++/85466 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fmath-errno -fno-trapping-math -fdump-tree-optimized" } */
+/* { dg-add-options ieee } */
+/* { dg-final { scan-tree-dump-not "nextafter" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "nexttoward" "optimized" } } */
+
+#define NEED_ERRNO 1
+#include "nextafter-1.c"
--- a/gcc/testsuite/gcc.dg/nextafter-4.c
+++ b/gcc/testsuite/gcc.dg/nextafter-4.c
@ -0,0 +1,10 @@
+/* PR libstdc++/85466 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fmath-errno -ftrapping-math -fdump-tree-optimized" } */
+/* { dg-add-options ieee } */
+/* { dg-final { scan-tree-dump-not "nextafter" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "nexttoward" "optimized" } } */
+
+#define NEED_ERRNO 1
+#define NEED_EXC 1
+#include "nextafter-1.c"