re PR tree-optimization/85466 (Performance is slow when doing 'branchless' conditional style math operations)

PR libstdc++/85466
	* real.h (real_nextafter): Declare.
	* real.c (real_nextafter): New function.
	* fold-const-call.c (fold_const_nextafter): New function.
	(fold_const_call_sss): Call it for CASE_CFN_NEXTAFTER and
	CASE_CFN_NEXTTOWARD.
	(fold_const_call_1): For CASE_CFN_NEXTTOWARD call fold_const_call_sss
	even when arg1_mode is different from arg0_mode.

	* gcc.dg/nextafter-1.c: New test.
	* gcc.dg/nextafter-2.c: New test.
	* gcc.dg/nextafter-3.c: New test.
	* gcc.dg/nextafter-4.c: New test.

From-SVN: r259921
This commit is contained in:
Jakub Jelinek 2018-05-04 09:19:45 +02:00 committed by Jakub Jelinek
parent 105073e1cc
commit 047823853d
9 changed files with 365 additions and 3 deletions

View File

@ -1,3 +1,14 @@
2018-05-04 Jakub Jelinek <jakub@redhat.com>
PR libstdc++/85466
* real.h (real_nextafter): Declare.
* real.c (real_nextafter): New function.
* fold-const-call.c (fold_const_nextafter): New function.
(fold_const_call_sss): Call it for CASE_CFN_NEXTAFTER and
CASE_CFN_NEXTTOWARD.
(fold_const_call_1): For CASE_CFN_NEXTTOWARD call fold_const_call_sss
even when arg1_mode is different from arg0_mode.
2018-05-03 Nathan Sidwell <nathan@acm.org>
* doc/extend.texi (Deprecated Features): Remove

View File

@ -527,6 +527,48 @@ fold_const_pow (real_value *result, const real_value *arg0,
return false;
}
/* Try to evaluate:
*RESULT = nextafter (*ARG0, *ARG1)
or
*RESULT = nexttoward (*ARG0, *ARG1)
in format FORMAT. Return true on success. */
static bool
fold_const_nextafter (real_value *result, const real_value *arg0,
const real_value *arg1, const real_format *format)
{
if (REAL_VALUE_ISSIGNALING_NAN (*arg0)
|| REAL_VALUE_ISSIGNALING_NAN (*arg1))
return false;
/* Don't handle composite modes, nor decimal, nor modes without
inf or denorm at least for now. */
if (format->pnan < format->p
|| format->b == 10
|| !format->has_inf
|| !format->has_denorm)
return false;
if (real_nextafter (result, format, arg0, arg1)
/* If raising underflow or overflow and setting errno to ERANGE,
fail if we care about those side-effects. */
&& (flag_trapping_math || flag_errno_math))
return false;
/* Similarly for nextafter (0, 1) raising underflow. */
else if (flag_trapping_math
&& arg0->cl == rvc_zero
&& result->cl != rvc_zero)
return false;
real_convert (result, format, result);
return true;
}
/* Try to evaluate:
*RESULT = ldexp (*ARG0, ARG1)
@ -1260,6 +1302,10 @@ fold_const_call_sss (real_value *result, combined_fn fn,
CASE_CFN_POW:
return fold_const_pow (result, arg0, arg1, format);
CASE_CFN_NEXTAFTER:
CASE_CFN_NEXTTOWARD:
return fold_const_nextafter (result, arg0, arg1, format);
default:
return false;
}
@ -1365,20 +1411,33 @@ fold_const_call_1 (combined_fn fn, tree type, tree arg0, tree arg1)
machine_mode arg0_mode = TYPE_MODE (TREE_TYPE (arg0));
machine_mode arg1_mode = TYPE_MODE (TREE_TYPE (arg1));
if (arg0_mode == arg1_mode
if (mode == arg0_mode
&& real_cst_p (arg0)
&& real_cst_p (arg1))
{
gcc_checking_assert (SCALAR_FLOAT_MODE_P (arg0_mode));
if (mode == arg0_mode)
REAL_VALUE_TYPE result;
if (arg0_mode == arg1_mode)
{
/* real, real -> real. */
REAL_VALUE_TYPE result;
if (fold_const_call_sss (&result, fn, TREE_REAL_CST_PTR (arg0),
TREE_REAL_CST_PTR (arg1),
REAL_MODE_FORMAT (mode)))
return build_real (type, result);
}
else if (arg1_mode == TYPE_MODE (long_double_type_node))
switch (fn)
{
CASE_CFN_NEXTTOWARD:
/* real, long double -> real. */
if (fold_const_call_sss (&result, fn, TREE_REAL_CST_PTR (arg0),
TREE_REAL_CST_PTR (arg1),
REAL_MODE_FORMAT (mode)))
return build_real (type, result);
break;
default:
break;
}
return NULL_TREE;
}

View File

@ -5048,6 +5048,102 @@ real_isinteger (const REAL_VALUE_TYPE *c, HOST_WIDE_INT *int_out)
return false;
}
/* Calculate nextafter (X, Y) or nexttoward (X, Y). Return true if
underflow or overflow needs to be raised. */
bool
real_nextafter (REAL_VALUE_TYPE *r, format_helper fmt,
const REAL_VALUE_TYPE *x, const REAL_VALUE_TYPE *y)
{
int cmp = do_compare (x, y, 2);
/* If either operand is NaN, return qNaN. */
if (cmp == 2)
{
get_canonical_qnan (r, 0);
return false;
}
/* If x == y, return y cast to target type. */
if (cmp == 0)
{
real_convert (r, fmt, y);
return false;
}
if (x->cl == rvc_zero)
{
get_zero (r, y->sign);
r->cl = rvc_normal;
SET_REAL_EXP (r, fmt->emin - fmt->p + 1);
r->sig[SIGSZ - 1] = SIG_MSB;
return false;
}
int np2 = SIGNIFICAND_BITS - fmt->p;
/* For denormals adjust np2 correspondingly. */
if (x->cl == rvc_normal && REAL_EXP (x) < fmt->emin)
np2 += fmt->emin - REAL_EXP (x);
REAL_VALUE_TYPE u;
get_zero (r, x->sign);
get_zero (&u, 0);
set_significand_bit (&u, np2);
r->cl = rvc_normal;
SET_REAL_EXP (r, REAL_EXP (x));
if (x->cl == rvc_inf)
{
bool borrow = sub_significands (r, r, &u, 0);
gcc_assert (borrow);
SET_REAL_EXP (r, fmt->emax);
}
else if (cmp == (x->sign ? 1 : -1))
{
if (add_significands (r, x, &u))
{
/* Overflow. Means the significand had been all ones, and
is now all zeros. Need to increase the exponent, and
possibly re-normalize it. */
SET_REAL_EXP (r, REAL_EXP (r) + 1);
if (REAL_EXP (r) > fmt->emax)
{
get_inf (r, x->sign);
return true;
}
r->sig[SIGSZ - 1] = SIG_MSB;
}
}
else
{
if (REAL_EXP (x) > fmt->emin && x->sig[SIGSZ - 1] == SIG_MSB)
{
int i;
for (i = SIGSZ - 2; i >= 0; i--)
if (x->sig[i])
break;
if (i < 0)
{
/* When mantissa is 1.0, we need to subtract only
half of u: nextafter (1.0, 0.0) is 1.0 - __DBL_EPSILON__ / 2
rather than 1.0 - __DBL_EPSILON__. */
clear_significand_bit (&u, np2);
np2--;
set_significand_bit (&u, np2);
}
}
sub_significands (r, x, &u, 0);
}
/* Clear out trailing garbage. */
clear_significand_below (r, np2);
normalize (r);
if (REAL_EXP (r) <= fmt->emin - fmt->p)
{
get_zero (r, x->sign);
return true;
}
return r->cl == rvc_zero;
}
/* Write into BUF the maximum representable finite floating-point
number, (1 - b**-p) * b**emax for a given FP format FMT as a hex
float string. LEN is the size of BUF, and the buffer must be large

View File

@ -507,6 +507,10 @@ extern void real_copysign (REAL_VALUE_TYPE *, const REAL_VALUE_TYPE *);
extern bool real_isinteger (const REAL_VALUE_TYPE *, format_helper);
extern bool real_isinteger (const REAL_VALUE_TYPE *, HOST_WIDE_INT *);
/* Calculate nextafter (X, Y) in format FMT. */
extern bool real_nextafter (REAL_VALUE_TYPE *, format_helper,
const REAL_VALUE_TYPE *, const REAL_VALUE_TYPE *);
/* Write into BUF the maximum representable finite floating-point
number, (1 - b**-p) * b**emax for a given FP format FMT as a hex
float string. BUF must be large enough to contain the result. */

View File

@ -1,3 +1,11 @@
2018-05-04 Jakub Jelinek <jakub@redhat.com>
PR libstdc++/85466
* gcc.dg/nextafter-1.c: New test.
* gcc.dg/nextafter-2.c: New test.
* gcc.dg/nextafter-3.c: New test.
* gcc.dg/nextafter-4.c: New test.
2018-05-03 Nathan Sidwell <nathan@acm.org>
Remove -ffriend-injection.

View File

@ -0,0 +1,159 @@
/* PR libstdc++/85466 */
/* { dg-do run } */
/* { dg-options "-O2 -fno-math-errno -fno-trapping-math -fdump-tree-optimized" } */
/* { dg-add-options ieee } */
/* { dg-final { scan-tree-dump-not "nextafter" "optimized" } } */
/* { dg-final { scan-tree-dump-not "nexttoward" "optimized" } } */
float nextafterf (float, float);
double nextafter (double, double);
long double nextafterl (long double, long double);
float nexttowardf (float, long double);
double nexttoward (double, long double);
long double nexttowardl (long double, long double);
#define CHECK(x) if (!(x)) __builtin_abort ()
#ifndef NEED_ERRNO
#define NEED_ERRNO 0
#endif
#ifndef NEED_EXC
#define NEED_EXC 0
#endif
#define TEST(name, fn, type, L1, L2, l1, l2, MIN1, \
MAX1, DENORM_MIN1, EPSILON1, MIN2, MAX2, DENORM_MIN2) \
void \
name (void) \
{ \
const type a = fn (0.0##L1, 0.0##L2); \
CHECK (a == 0.0##L1 && !__builtin_signbit (a)); \
const type b = fn (0.0##L1, -0.0##L2); \
CHECK (b == 0.0##L1 && __builtin_signbit (b)); \
const type c = fn (__builtin_nan##l1 (""), 0.0##L2); \
CHECK (__builtin_isnan##l1 (c)); \
const type d = fn (2.0##L1, __builtin_nan##l2 ("")); \
CHECK (__builtin_isnan##l1 (d)); \
const type e = NEED_EXC ? DENORM_MIN1 : fn (0.0##L1, 8.0##L2); \
CHECK (e == DENORM_MIN1); \
const type f = fn (1.0##L1, 8.0##L2); \
CHECK (f == 1.0##L1 + EPSILON1); \
const type g = fn (1.0##L1, -8.0##L2); \
CHECK (g == 1.0##L1 - EPSILON1 / 2.0##L1); \
const type h = fn (__builtin_inf (), 0.0##L2); \
CHECK (h == MAX1); \
const type i = fn (-1.0##L1, -__builtin_inf ()); \
CHECK (i == -1.0##L1 - EPSILON1); \
const type j = fn (1.5##L1, __builtin_inf ()); \
CHECK (j == 1.5##L1 + EPSILON1); \
const type k = fn (1.5##L1 - EPSILON1, 100.0##L2); \
CHECK (k == 1.5##L1); \
const type l \
= (NEED_EXC || NEED_ERRNO) ? 0.0##L1 : fn (DENORM_MIN1, 0.0##L2); \
CHECK (l == 0.0##L1 && !__builtin_signbit (l)); \
const type m \
= (NEED_EXC || NEED_ERRNO) ? __builtin_inf##l1 () \
: fn (MAX1, __builtin_inf ()); \
CHECK (__builtin_isinf##l1 (m) && !__builtin_signbit (m)); \
const type n = fn (DENORM_MIN1, 12.0##L2); \
CHECK (n == 2.0##L1 * DENORM_MIN1); \
const type o = fn (n, 24.0##L2); \
CHECK (o == 3.0##L1 * DENORM_MIN1); \
const type p = fn (o, 132.0##L2); \
CHECK (p == 4.0##L1 * DENORM_MIN1); \
const type q = fn (2.0##L1 * DENORM_MIN1, -__builtin_inf ()); \
CHECK (q == DENORM_MIN1); \
const type r = fn (3.0##L1 * DENORM_MIN1, DENORM_MIN2); \
CHECK (r == 2.0##L1 * DENORM_MIN1); \
const type s = fn (4.0##L1 * DENORM_MIN1, 2.0##L2 * DENORM_MIN2); \
CHECK (s == 3.0##L1 * DENORM_MIN1); \
const type t = fn (MIN1, 0.0##L2); \
CHECK (t == MIN1 - DENORM_MIN1); \
const type u = fn (MIN1 - DENORM_MIN1, -MIN2); \
CHECK (u == MIN1 - 2.0##L1 * DENORM_MIN1); \
const type v = fn (MIN1 - 2.0##L1 * DENORM_MIN1, 100.0##L2); \
CHECK (v == MIN1 - DENORM_MIN1); \
const type w = fn (MIN1 - DENORM_MIN1, MAX2); \
CHECK (w == MIN1); \
const type x = fn (MIN1, 17.0##L2); \
CHECK (x == MIN1 + DENORM_MIN1); \
const type y = fn (MIN1 + DENORM_MIN1, __builtin_inf##l2 ()); \
CHECK (y == MIN1 + 2.0##L1 * DENORM_MIN1); \
const type z = fn (MIN1 / 2.0##L1, -MIN2); \
CHECK (z == MIN1 / 2.0##L1 - DENORM_MIN1); \
const type aa = fn (-MIN1 / 4.0##L1, MIN2); \
CHECK (aa == -MIN1 / 4.0##L1 + DENORM_MIN1); \
const type ab = fn (MIN1 * 2.0##L1, -MIN2); \
CHECK (ab == MIN1 * 2.0##L1 - DENORM_MIN1); \
const type ac = fn (MIN1 * 4.0##L1, MIN2); \
CHECK (ac == MIN1 * 4.0##L1 - DENORM_MIN1 * 2.0##L1); \
const type ad = fn (MIN1 * 64.0##L1, MIN2); \
CHECK (ad == MIN1 * 64.0##L1 - DENORM_MIN1 * 32.0##L1); \
const type ae = fn (MIN1 / 2.0##L1 - DENORM_MIN1, 100.0##L2); \
CHECK (ae == MIN1 / 2.0##L1); \
const type af = fn (-MIN1 / 4 + DENORM_MIN1, -100.0##L2); \
CHECK (af == -MIN1 / 4.0##L1); \
const type ag = fn (MIN1 * 2.0##L1 - DENORM_MIN1, 100.0##L2); \
CHECK (ag == MIN1 * 2.0##L1); \
const type ah = fn (MIN1 * 4.0##L1 - 2.0##L1 * DENORM_MIN1, 100.0##L2); \
CHECK (ah == MIN1 * 4.0##L1); \
const type ai = fn (MIN1 * 64.0##L1 - 32.0##L1 * DENORM_MIN1, 100.0##L2); \
CHECK (ai == MIN1 * 64.0##L1); \
const type aj = fn (MIN1 * 64.0##L1, 100.0##L2); \
CHECK (aj == MIN1 * 64.0##L1 + 64.0##L1 * DENORM_MIN1); \
const type ak = fn (MIN1 * 64.0##L1 + DENORM_MIN1 * 64.0##L1, 1024.0##L2); \
CHECK (ak == MIN1 * 64.0##L1 + 128.0##L1 * DENORM_MIN1); \
const type al = fn (128.0##L1, 128.0##L2); \
CHECK (al == 128.0##L1); \
const type am = fn (128.0##L1, 129.0##L2); \
CHECK (am == 128.0##L1 + 128.0##L1 * EPSILON1); \
const type an = fn (-128.0##L1 + -128.0##L1 * EPSILON1, -130.0##L2); \
CHECK (an == -128.0##L1 - 256.0##L1 * EPSILON1); \
const type ao = fn (128.0##L1 + 256.0##L1 * EPSILON1, 256.0##L2); \
CHECK (ao == 128.0##L1 + 384.0##L1 * EPSILON1); \
const type ap = fn (128.0##L1 + 384.0##L1 * EPSILON1, -0.0##L2); \
CHECK (ap == 128.0##L1 + 256.0##L1 * EPSILON1); \
const type aq = fn (128.0##L1 + 256.0##L1 * EPSILON1, 1.0##L2); \
CHECK (aq == 128.0##L1 + 128.0##L1 * EPSILON1); \
const type ar = fn (128.0##L1 + 128.0##L1 * EPSILON1, 0.0##L2); \
CHECK (ar == 128.0##L1); \
const type as = fn (128.0##L1, 0.0##L2); \
CHECK (as == 128.0##L1 - 64.0##L1 * EPSILON1); \
const type at = fn (128.0##L1 - 64.0##L1 * EPSILON1, 5.0##L2); \
CHECK (at == 128.0##L1 - 128.0##L1 * EPSILON1); \
}
TEST (test1, nextafterf, float, F, F, f, f, __FLT_MIN__, __FLT_MAX__,
__FLT_DENORM_MIN__, __FLT_EPSILON__, __FLT_MIN__, __FLT_MAX__,
__FLT_DENORM_MIN__)
TEST (test2, nextafter, double, , , , , __DBL_MIN__, __DBL_MAX__,
__DBL_DENORM_MIN__, __DBL_EPSILON__, __DBL_MIN__, __DBL_MAX__,
__DBL_DENORM_MIN__)
#if __LDBL_MANT_DIG__ != 106
TEST (test3, nextafterl, long double, L, L, l, l, __LDBL_MIN__, __LDBL_MAX__,
__LDBL_DENORM_MIN__, __LDBL_EPSILON__, __LDBL_MIN__, __LDBL_MAX__,
__LDBL_DENORM_MIN__)
TEST (test4, nexttowardf, float, F, L, f, l, __FLT_MIN__, __FLT_MAX__,
__FLT_DENORM_MIN__, __FLT_EPSILON__, __LDBL_MIN__, __LDBL_MAX__,
__LDBL_DENORM_MIN__)
TEST (test5, nexttoward, double, , L, , l, __DBL_MIN__, __DBL_MAX__,
__DBL_DENORM_MIN__, __DBL_EPSILON__, __LDBL_MIN__, __LDBL_MAX__,
__LDBL_DENORM_MIN__)
TEST (test6, nexttowardl, long double, L, L, l, l, __LDBL_MIN__, __LDBL_MAX__,
__LDBL_DENORM_MIN__, __LDBL_EPSILON__, __LDBL_MIN__, __LDBL_MAX__,
__LDBL_DENORM_MIN__)
#endif
int
main ()
{
test1 ();
test2 ();
#if __LDBL_MANT_DIG__ != 106
test3 ();
test4 ();
test5 ();
test6 ();
#endif
return 0;
}

View File

@ -0,0 +1,6 @@
/* PR libstdc++/85466 */
/* { dg-do run } */
/* { dg-options "-O2 -fno-builtin" } */
/* { dg-add-options ieee } */
#include "nextafter-1.c"

View File

@ -0,0 +1,9 @@
/* PR libstdc++/85466 */
/* { dg-do run } */
/* { dg-options "-O2 -fmath-errno -fno-trapping-math -fdump-tree-optimized" } */
/* { dg-add-options ieee } */
/* { dg-final { scan-tree-dump-not "nextafter" "optimized" } } */
/* { dg-final { scan-tree-dump-not "nexttoward" "optimized" } } */
#define NEED_ERRNO 1
#include "nextafter-1.c"

View File

@ -0,0 +1,10 @@
/* PR libstdc++/85466 */
/* { dg-do run } */
/* { dg-options "-O2 -fmath-errno -ftrapping-math -fdump-tree-optimized" } */
/* { dg-add-options ieee } */
/* { dg-final { scan-tree-dump-not "nextafter" "optimized" } } */
/* { dg-final { scan-tree-dump-not "nexttoward" "optimized" } } */
#define NEED_ERRNO 1
#define NEED_EXC 1
#include "nextafter-1.c"