glibc/sysdeps/ieee754/ldbl-96/s_fma.c

/* Compute x * y + z as ternary operation.
   Copyright (C) 2010-2018 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */

#include <float.h>
#include <math.h>
#include <fenv.h>
#include <ieee754.h>
#include <math-barriers.h>
#include <math_private.h>
#include <libm-alias-double.h>

/* This implementation uses rounding to odd to avoid problems with
   double rounding.  See a paper by Boldo and Melquiond:
   http://www.lri.fr/~melquion/doc/08-tc.pdf  */

double
__fma (double x, double y, double z)
{
  if (__glibc_unlikely (!isfinite (x) || !isfinite (y)))
    return x * y + z;
  else if (__glibc_unlikely (!isfinite (z)))
    /* If z is Inf, but x and y are finite, the result should be z
       rather than NaN.  */
    return (z + x) + y;

  /* Ensure correct sign of exact 0 + 0.  */
  if (__glibc_unlikely ((x == 0 || y == 0) && z == 0))
    {
      x = math_opt_barrier (x);
      return x * y + z;
    }

  fenv_t env;
  feholdexcept (&env);
  fesetround (FE_TONEAREST);

  /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
#define C ((1ULL << (LDBL_MANT_DIG + 1) / 2) + 1)
  long double x1 = (long double) x * C;
  long double y1 = (long double) y * C;
  long double m1 = (long double) x * y;
  x1 = (x - x1) + x1;
  y1 = (y - y1) + y1;
  long double x2 = x - x1;
  long double y2 = y - y1;
  long double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;

  /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */
  long double a1 = z + m1;
  long double t1 = a1 - z;
  long double t2 = a1 - t1;
  t1 = m1 - t1;
  t2 = z - t2;
  long double a2 = t1 + t2;
  /* Ensure the arithmetic is not scheduled after feclearexcept call.  */
  math_force_eval (m2);
  math_force_eval (a2);
  feclearexcept (FE_INEXACT);

  /* If the result is an exact zero, ensure it has the correct sign.  */
  if (a1 == 0 && m2 == 0)
    {
      feupdateenv (&env);
      /* Ensure that round-to-nearest value of z + m1 is not reused.  */
      z = math_opt_barrier (z);
      return z + m1;
    }

  fesetround (FE_TOWARDZERO);
  /* Perform m2 + a2 addition with round to odd.  */
  a2 = a2 + m2;

  /* Add that to a1 again using rounding to odd.  */
  union ieee854_long_double u;
  u.d = a1 + a2;
  if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff)
    u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
  feupdateenv (&env);

  /* Add finally round to double precision.  */
  return u.d;
}
#ifndef __fma
libm_alias_double (__fma, fma)
#endif
Implement accurate fma. 2010-10-14 04:27:03 +02:00			`/* Compute x * y + z as ternary operation.`
Update copyright dates with scripts/update-copyrights. * All files with FSF copyright notices: Update copyright dates using scripts/update-copyrights. * locale/programs/charmap-kw.h: Regenerated. * locale/programs/locfile-kw.h: Likewise. 2018-01-01 01:32:25 +01:00			`Copyright (C) 2010-2018 Free Software Foundation, Inc.`
Implement accurate fma. 2010-10-14 04:27:03 +02:00			`This file is part of the GNU C Library.`
			`Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.`

			`The GNU C Library is free software; you can redistribute it and/or`
			`modify it under the terms of the GNU Lesser General Public`
			`License as published by the Free Software Foundation; either`
			`version 2.1 of the License, or (at your option) any later version.`

			`The GNU C Library is distributed in the hope that it will be useful,`
			`but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`Lesser General Public License for more details.`

			`You should have received a copy of the GNU Lesser General Public`
Replace FSF snail mail address with URLs. 2012-02-10 00:18:22 +01:00			`License along with the GNU C Library; if not, see`
			`<http://www.gnu.org/licenses/>. */`
Implement accurate fma. 2010-10-14 04:27:03 +02:00
			`#include <float.h>`
			`#include <math.h>`
			`#include <fenv.h>`
			`#include <ieee754.h>`
Do not include math-barriers.h in math_private.h. This patch continues the math_private.h cleanup by stopping math_private.h from including math-barriers.h and making the users of the barrier macros include the latter header directly. No attempt is made to remove any math_private.h includes that are now unused, except in strtod_l.c where that is done to avoid line number changes in assertions, so that installed stripped shared libraries can be compared before and after the patch. (I think the floating-point environment support in math_private.h should also move out - some architectures already have fenv_private.h as an architecture-internal header included from their math_private.h - and after moving that out might be a better time to identify unused math_private.h includes.) Tested for x86_64 and x86, and tested with build-many-glibcs.py that installed stripped shared libraries are unchanged by the patch. * sysdeps/generic/math_private.h: Do not include <math-barriers.h>. * stdlib/strtod_l.c: Include <math-barriers.h> instead of <math_private.h>. * math/fromfp.h: Include <math-barriers.h>. * math/math-narrow.h: Likewise. * math/s_nextafter.c: Likewise. * math/s_nexttowardf.c: Likewise. * sysdeps/aarch64/fpu/s_llrint.c: Likewise. * sysdeps/aarch64/fpu/s_llrintf.c: Likewise. * sysdeps/aarch64/fpu/s_lrint.c: Likewise. * sysdeps/aarch64/fpu/s_lrintf.c: Likewise. * sysdeps/i386/fpu/s_nextafterl.c: Likewise. * sysdeps/i386/fpu/s_nexttoward.c: Likewise. * sysdeps/i386/fpu/s_nexttowardf.c: Likewise. * sysdeps/ieee754/dbl-64/e_atan2.c: Likewise. * sysdeps/ieee754/dbl-64/e_atanh.c: Likewise. * sysdeps/ieee754/dbl-64/e_exp.c: Likewise. * sysdeps/ieee754/dbl-64/e_exp2.c: Likewise. * sysdeps/ieee754/dbl-64/e_j0.c: Likewise. * sysdeps/ieee754/dbl-64/e_sqrt.c: Likewise. * sysdeps/ieee754/dbl-64/s_expm1.c: Likewise. * sysdeps/ieee754/dbl-64/s_fma.c: Likewise. * sysdeps/ieee754/dbl-64/s_fmaf.c: Likewise. * sysdeps/ieee754/dbl-64/s_log1p.c: Likewise. * sysdeps/ieee754/dbl-64/s_nearbyint.c: Likewise. * sysdeps/ieee754/dbl-64/wordsize-64/s_nearbyint.c: Likewise. * sysdeps/ieee754/flt-32/e_atanhf.c: Likewise. * sysdeps/ieee754/flt-32/e_j0f.c: Likewise. * sysdeps/ieee754/flt-32/s_expm1f.c: Likewise. * sysdeps/ieee754/flt-32/s_log1pf.c: Likewise. * sysdeps/ieee754/flt-32/s_nearbyintf.c: Likewise. * sysdeps/ieee754/flt-32/s_nextafterf.c: Likewise. * sysdeps/ieee754/k_standardl.c: Likewise. * sysdeps/ieee754/ldbl-128/e_asinl.c: Likewise. * sysdeps/ieee754/ldbl-128/e_expl.c: Likewise. * sysdeps/ieee754/ldbl-128/e_powl.c: Likewise. * sysdeps/ieee754/ldbl-128/s_fmal.c: Likewise. * sysdeps/ieee754/ldbl-128/s_nearbyintl.c: Likewise. * sysdeps/ieee754/ldbl-128/s_nextafterl.c: Likewise. * sysdeps/ieee754/ldbl-128/s_nexttoward.c: Likewise. * sysdeps/ieee754/ldbl-128/s_nexttowardf.c: Likewise. * sysdeps/ieee754/ldbl-128ibm/e_asinl.c: Likewise. * sysdeps/ieee754/ldbl-128ibm/s_fmal.c: Likewise. * sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c: Likewise. * sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c: Likewise. * sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c: Likewise. * sysdeps/ieee754/ldbl-128ibm/s_rintl.c: Likewise. * sysdeps/ieee754/ldbl-96/e_atanhl.c: Likewise. * sysdeps/ieee754/ldbl-96/e_j0l.c: Likewise. * sysdeps/ieee754/ldbl-96/s_fma.c: Likewise. * sysdeps/ieee754/ldbl-96/s_fmal.c: Likewise. * sysdeps/ieee754/ldbl-96/s_nexttoward.c: Likewise. * sysdeps/ieee754/ldbl-96/s_nexttowardf.c: Likewise. * sysdeps/ieee754/ldbl-opt/s_nexttowardfd.c: Likewise. * sysdeps/m68k/m680x0/fpu/s_nextafterl.c: Likewise. 2018-05-11 17:11:38 +02:00			`#include <math-barriers.h>`
Fix missing <math_private.h> in ldbl-96 fma 2014-08-04 10:20:03 +02:00			`#include <math_private.h>`
Use libm_alias_double in ldbl-128, ldbl-96 fma. This patch makes the ldbl-128 and ldbl-96 implementations of fma use libm_alias_double. Tested for x86_64, and tested with build-many-glibcs.py that installed stripped shared libraries are unchanged by the patch. * sysdeps/ieee754/ldbl-128/s_fma.c: Include <libm-alias-double.h>. [!__fma] (fma): Define using libm_alias_double. * sysdeps/ieee754/ldbl-96/s_fma.c: Include <libm-alias-double.h>. [!__fma] (fma): Define using libm_alias_double. 2017-10-06 22:23:58 +02:00			`#include <libm-alias-double.h>`
Implement accurate fma. 2010-10-14 04:27:03 +02:00
			`/* This implementation uses rounding to odd to avoid problems with`
			`double rounding. See a paper by Boldo and Melquiond:`
			`http://www.lri.fr/~melquion/doc/08-tc.pdf */`

			`double`
			`__fma (double x, double y, double z)`
			`{`
Fix ldbl-96 fma (Inf, Inf, finite) (bug 23272). As reported in bug 23272, the ldbl-96 implementation of fma (fma for double, in terms of ldbl-96 as the internal arithmetic type, as used on 32-bit x86) is missing some of the special-case handling for non-finite arguments, resulting in incorrect NaN results when the first two arguments are infinities, the third is finite and so the infinities go through the logic for finite arguments. This patch fixes it by handling all cases of non-finite arguments up front, with additional fma tests for the problem cases being added to the testsuite. Tested for x86_64 and x86. [BZ #23272] * sysdeps/ieee754/ldbl-96/s_fma.c (__fma): Start by handling all cases of non-finite arguments. * math/libm-test-fma.inc (fma_test_data): Add more tests. 2018-06-11 18:33:42 +02:00			`if (__glibc_unlikely (!isfinite (x) \|\| !isfinite (y)))`
			`return x * y + z;`
			`else if (__glibc_unlikely (!isfinite (z)))`
			`/* If z is Inf, but x and y are finite, the result should be z`
			`rather than NaN. */`
			`return (z + x) + y;`
Implement fmal, some fma bugfixes 2010-10-15 21:26:06 +02:00
Fix sign of exact zero return from fma (bug 14638). 2012-09-29 20:31:54 +02:00			`/* Ensure correct sign of exact 0 + 0. */`
Use glibc_likely instead __builtin_expect. 2014-02-10 14:45:42 +01:00			`if (__glibc_unlikely ((x == 0 \|\| y == 0) && z == 0))`
Call math_opt_barrier inside if Since floating-point operation may trigger floating-point exceptions, we call math_opt_barrier inside if to prevent code motion. [BZ #19465] * sysdeps/ieee754/dbl-64/s_fma.c (__fma): Call math_opt_barrier inside if. * sysdeps/ieee754/ldbl-128/s_fmal.c (__fmal): Likewise. * sysdeps/ieee754/ldbl-96/s_fma.c (__fma): Likewise. * sysdeps/ieee754/ldbl-96/s_fmal.c (__fmal): Likewise. 2016-01-15 14:22:59 +01:00			`{`
			`x = math_opt_barrier (x);`
			`return x * y + z;`
			`}`
Fix sign of exact zero return from fma (bug 14638). 2012-09-29 20:31:54 +02:00
Make fma use of Dekker and Knuth algorithms use round-to-nearest (bug 14796). 2012-11-03 20:48:53 +01:00			`fenv_t env;`
			`feholdexcept (&env);`
			`fesetround (FE_TONEAREST);`

Implement accurate fma. 2010-10-14 04:27:03 +02:00			`/* Multiplication m1 + m2 = x * y using Dekker's algorithm. */`
			`#define C ((1ULL << (LDBL_MANT_DIG + 1) / 2) + 1)`
Implement fmal, some fma bugfixes 2010-10-15 21:26:06 +02:00			`long double x1 = (long double) x * C;`
			`long double y1 = (long double) y * C;`
			`long double m1 = (long double) x * y;`
Implement accurate fma. 2010-10-14 04:27:03 +02:00			`x1 = (x - x1) + x1;`
			`y1 = (y - y1) + y1;`
			`long double x2 = x - x1;`
			`long double y2 = y - y1;`
			`long double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;`

			`/* Addition a1 + a2 = z + m1 using Knuth's algorithm. */`
			`long double a1 = z + m1;`
			`long double t1 = a1 - z;`
			`long double t2 = a1 - t1;`
			`t1 = m1 - t1;`
			`t2 = z - t2;`
			`long double a2 = t1 + t2;`
Force eval for fma implementations 2014-08-02 00:13:50 +02:00			`/* Ensure the arithmetic is not scheduled after feclearexcept call. */`
			`math_force_eval (m2);`
			`math_force_eval (a2);`
Make fma use of Dekker and Knuth algorithms use round-to-nearest (bug 14796). 2012-11-03 20:48:53 +01:00			`feclearexcept (FE_INEXACT);`

Force eval for fma implementations 2014-08-02 00:13:50 +02:00			`/* If the result is an exact zero, ensure it has the correct sign. */`
Make fma use of Dekker and Knuth algorithms use round-to-nearest (bug 14796). 2012-11-03 20:48:53 +01:00			`if (a1 == 0 && m2 == 0)`
			`{`
			`feupdateenv (&env);`
Force eval for fma implementations 2014-08-02 00:13:50 +02:00			`/* Ensure that round-to-nearest value of z + m1 is not reused. */`
			`z = math_opt_barrier (z);`
Make fma use of Dekker and Knuth algorithms use round-to-nearest (bug 14796). 2012-11-03 20:48:53 +01:00			`return z + m1;`
			`}`
Implement accurate fma. 2010-10-14 04:27:03 +02:00
			`fesetround (FE_TOWARDZERO);`
			`/* Perform m2 + a2 addition with round to odd. */`
			`a2 = a2 + m2;`

			`/* Add that to a1 again using rounding to odd. */`
			`union ieee854_long_double u;`
			`u.d = a1 + a2;`
			`if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff)`
			`u.ieee.mantissa1 \|= fetestexcept (FE_INEXACT) != 0;`
			`feupdateenv (&env);`

			`/* Add finally round to double precision. */`
			`return u.d;`
			`}`
			`#ifndef __fma`
Use libm_alias_double in ldbl-128, ldbl-96 fma. This patch makes the ldbl-128 and ldbl-96 implementations of fma use libm_alias_double. Tested for x86_64, and tested with build-many-glibcs.py that installed stripped shared libraries are unchanged by the patch. * sysdeps/ieee754/ldbl-128/s_fma.c: Include <libm-alias-double.h>. [!__fma] (fma): Define using libm_alias_double. * sysdeps/ieee754/ldbl-96/s_fma.c: Include <libm-alias-double.h>. [!__fma] (fma): Define using libm_alias_double. 2017-10-06 22:23:58 +02:00			`libm_alias_double (__fma, fma)`
Implement accurate fma. 2010-10-14 04:27:03 +02:00			`#endif`