glibc/sysdeps/x86_64/fpu/math_private.h

#ifndef _MATH_PRIVATE_H

#define math_opt_barrier(x) \
({ __typeof(x) __x;					\
   if (sizeof (x) <= sizeof (double))			\
     __asm ("" : "=x" (__x) : "0" (x));			\
   else							\
     __asm ("" : "=t" (__x) : "0" (x));			\
   __x; })
#define math_force_eval(x) \
do							\
  {							\
    if (sizeof (x) <= sizeof (double))			\
      __asm __volatile ("" : : "x" (x));		\
    else						\
      __asm __volatile ("" : : "f" (x));		\
  }							\
while (0)

#include <math/math_private.h>

/* We can do a few things better on x86-64.  */

/* Direct movement of float into integer register.  */
#undef EXTRACT_WORDS64
#define EXTRACT_WORDS64(i,d)					\
do {								\
  long int i_;							\
  asm ("movd %1, %0" : "=rm" (i_) : "x" (d));			\
  (i) = i_;							\
} while (0)

/* And the reverse.  */
#undef INSERT_WORDS64
#define INSERT_WORDS64(d,i) \
do {								\
  long int i_ = i;						\
  asm ("movd %1, %0" : "=x" (d) : "rm" (i_));			\
} while (0)

/* Direct movement of float into integer register.  */
#undef GET_FLOAT_WORD
#define GET_FLOAT_WORD(i,d) \
do {								\
  int i_;							\
  asm ("movd %1, %0" : "=rm" (i_) : "x" (d));			\
  (i) = i_;							\
} while (0)

/* And the reverse.  */
#undef SET_FLOAT_WORD
#define SET_FLOAT_WORD(d,i) \
do {								\
  int i_ = i;							\
  asm ("movd %1, %0" : "=x" (d) : "rm" (i_));			\
} while (0)

#endif

#define __isnan(d) \
  ({ long int __di; EXTRACT_WORDS64 (__di, (double) (d));		      \
     (__di & 0x7fffffffffffffffl) > 0x7ff0000000000000l; })
#define __isnanf(d) \
  ({ int __di; GET_FLOAT_WORD (__di, (float) d);			      \
     (__di & 0x7fffffff) > 0x7f800000; })

#define __isinf_ns(d) \
  ({ long int __di; EXTRACT_WORDS64 (__di, (double) (d));		      \
     (__di & 0x7fffffffffffffffl) == 0x7ff0000000000000l; })
#define __isinf_nsf(d) \
  ({ int __di; GET_FLOAT_WORD (__di, (float) d);			      \
     (__di & 0x7fffffff) == 0x7f800000; })

#define __finite(d) \
  ({ long int __di; EXTRACT_WORDS64 (__di, (double) (d));		      \
     (__di & 0x7fffffffffffffffl) < 0x7ff0000000000000l; })
#define __finitef(d) \
  ({ int __di; GET_FLOAT_WORD (__di, (float) d);			      \
     (__di & 0x7fffffff) < 0x7f800000; })

#define __ieee754_sqrt(d) \
  ({ double __res;							      \
    asm ("sqrtsd %1, %0" : "=x" (__res) : "xm" ((double) (d)));		      \
     __res; })
#define __ieee754_sqrtf(d) \
  ({ float __res;							      \
    asm ("sqrtss %1, %0" : "=x" (__res) : "xm" ((float) (d)));		      \
     __res; })
#define __ieee754_sqrtl(d) \
  ({ long double __res;							      \
    asm ("fsqrt" : "=t" (__res) : "0" ((long double) (d)));		      \
     __res; })

#ifdef __SSE4_1__
# ifndef __rint
#  define __rint(d) \
  ({ double __res; \
    asm ("roundsd $4, %1, %0" : "=x" (__res) : "xm" ((double) (d)));	      \
     __res; })
# endif
# ifndef __rintf
#  define __rintf(d) \
  ({ float __res; \
    asm ("roundss $4, %1, %0" : "=x" (__res) : "xm" ((float) (d)));	      \
     __res; })
# endif

# ifndef __floor
#  define __floor(d) \
  ({ double __res; \
    asm ("roundsd $1, %1, %0" : "=x" (__res) : "xm" ((double) (d)));	      \
     __res; })
# endif
# ifndef __floorf
#  define __floorf(d) \
  ({ float __res; \
    asm ("roundss $1, %1, %0" : "=x" (__res) : "xm" ((float) (d)));	      \
     __res; })
# endif
#endif


/* Specialized variants of the <fenv.h> interfaces which only handle
   either the FPU or the SSE unit.  */
#undef libc_fegetround
#define libc_fegetround() \
  ({									      \
     unsigned int mxcsr;						      \
     asm volatile ("stmxcsr %0" : "=m" (*&mxcsr));			      \
     (mxcsr & 0x6000) >> 3;						      \
  })
// #define libc_fegetroundf() fegetround ()
// #define libc_fegetroundl() fegetround ()

#undef libc_fesetround
#define libc_fesetround(r) \
  do {									      \
     unsigned int mxcsr;						      \
     asm ("stmxcsr %0" : "=m" (*&mxcsr));				      \
     mxcsr = (mxcsr & ~0x6000) | ((r) << 3);				      \
     asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr));			      \
  } while (0)
// #define libc_fesetroundf(r) (void) fesetround (r)
// #define libc_fesetroundl(r) (void) fesetround (r)

#undef libc_feholdexcept
#define libc_feholdexcept(e) \
  do {									      \
     unsigned int mxcsr;						      \
     asm ("stmxcsr %0" : "=m" (*&mxcsr));				      \
     (e)->__mxcsr = mxcsr;						      \
     mxcsr = (mxcsr | 0x1f80) & ~0x3f;					      \
     asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr));			      \
  } while (0)
// #define libc_feholdexceptf(e) (void) feholdexcept (e)
// #define libc_feholdexceptl(e) (void) feholdexcept (e)

#undef libc_feholdexcept_setround
#define libc_feholdexcept_setround(e, r) \
  do {									      \
     unsigned int mxcsr;						      \
     asm ("stmxcsr %0" : "=m" (*&mxcsr));				      \
     (e)->__mxcsr = mxcsr;						      \
     mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | ((r) << 3);			      \
     asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr));			      \
  } while (0)
// #define libc_feholdexcept_setroundf(e, r) ...
// #define libc_feholdexcept_setroundl(e, r) ...

#undef libc_fesetenv
#define libc_fesetenv(e) \
  asm volatile ("ldmxcsr %0" : : "m" ((e)->__mxcsr))
// #define libc_fesetenvf(e) (void) fesetenv (e)
// #define libc_fesetenvl(e) (void) fesetenv (e)
[BZ #3306] 2007-03-27 Jakub Jelinek <jakub@redhat.com> [BZ #3306] * math/math_private.h (math_opt_barrier, math_force_eval): Define. * sysdeps/i386/fpu/math_private.h: New file. * sysdeps/x86_64/fpu/math_private.h: New file. * math/s_nexttowardf.c (__nexttowardf): Use math_opt_barrier and math_force_eval macros. Use "+m" constraint on asm rather than "=m" and "m". * math/s_nextafter.c (__nextafter): Likewise. * sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c (__nexttoward): Likewise. * sysdeps/ieee754/flt-32/s_nextafterf.c (__nextafterf): Likewise. * sysdeps/ieee754/ldbl-128/s_nexttoward.c (__nexttoward): Likewise. * sysdeps/ieee754/ldbl-96/s_nexttoward.c (__nexttoward): Likewise. * sysdeps/i386/fpu/s_nextafterl.c (__nextafterl): Use math_opt_barrier and math_force_eval macros. * sysdeps/ieee754/ldbl-128/s_nextafterl.c (__nextafterl): Likewise. * sysdeps/ieee754/ldbl-96/s_nextafterl.c (__nextafterl): Likewise. * sysdeps/i386/fpu/s_nexttoward.c: Include float.h. (__nexttoward): Use math_opt_barrier and math_force_eval macros. Use "+m" constraint on asm rather than "=m" and "m". Only use asm to force double result if FLT_EVAL_METHOD is 2. * sysdeps/i386/fpu/s_nexttowardf.c: Include float.h. (__nexttowardf): Use math_opt_barrier and math_force_eval macros. Use "+m" constraint on asm rather than "=m" and "m". Only use asm to force double result if FLT_EVAL_METHOD is not 0. * sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c: Include float.h. (__nexttowardf): Use math_opt_barrier and math_force_eval macros. If FLT_EVAL_METHOD is not 0, force x to float using asm. * sysdeps/ieee754/ldbl-opt/s_nexttowardfd.c: Include float.h. (__nldbl_nexttowardf): Use math_opt_barrier and math_force_eval macros. If FLT_EVAL_METHOD is not 0, force x to float using asm. * sysdeps/ieee754/ldbl-96/s_nexttowardf.c: Include float.h. (__nexttowardf): Use math_opt_barrier and math_force_eval macros. If FLT_EVAL_METHOD is not 0, force x to float using asm. * math/bug-nextafter.c (zero, inf): New variables. (main): Add new tests. * math/bug-nexttoward.c (zero, inf): New variables. (main): Add new tests. 2007-04-16 22:41:42 +02:00			`#ifndef _MATH_PRIVATE_H`

			`#define math_opt_barrier(x) \`
			`({ __typeof(x) __x; \`
			`if (sizeof (x) <= sizeof (double)) \`
			`__asm ("" : "=x" (__x) : "0" (x)); \`
			`else \`
			`__asm ("" : "=t" (__x) : "0" (x)); \`
			`__x; })`
			`#define math_force_eval(x) \`
			`do \`
			`{ \`
			`if (sizeof (x) <= sizeof (double)) \`
			`__asm __volatile ("" : : "x" (x)); \`
			`else \`
			`__asm __volatile ("" : : "f" (x)); \`
			`} \`
			`while (0)`

			`#include <math/math_private.h>`
Optimize float construction/extraction on x86-64. 2009-08-24 23:52:49 +02:00
			`/* We can do a few things better on x86-64. */`

Add ceil implementation for 64-bit machines. On 64-bit machines we should not split doubles into two 32 bit integer and handle the words separately. We have wide registers. This patch implements a 64-bit ceil version. Ideally all other functions will be converted over time. 2009-08-25 03:05:48 +02:00			`/* Direct movement of float into integer register. */`
			`#undef EXTRACT_WORDS64`
			`#define EXTRACT_WORDS64(i,d) \`
			`do { \`
			`long int i_; \`
			`asm ("movd %1, %0" : "=rm" (i_) : "x" (d)); \`
			`(i) = i_; \`
			`} while (0)`

			`/* And the reverse. */`
			`#undef INSERT_WORDS64`
			`#define INSERT_WORDS64(d,i) \`
			`do { \`
			`long int i_ = i; \`
			`asm ("movd %1, %0" : "=x" (d) : "rm" (i_)); \`
			`} while (0)`

Optimize float construction/extraction on x86-64. 2009-08-24 23:52:49 +02:00			`/* Direct movement of float into integer register. */`
			`#undef GET_FLOAT_WORD`
			`#define GET_FLOAT_WORD(i,d) \`
			`do { \`
			`int i_; \`
			`asm ("movd %1, %0" : "=rm" (i_) : "x" (d)); \`
			`(i) = i_; \`
			`} while (0)`

			`/* And the reverse. */`
			`#undef SET_FLOAT_WORD`
			`#define SET_FLOAT_WORD(d,i) \`
			`do { \`
			`int i_ = i; \`
			`asm ("movd %1, %0" : "=x" (d) : "rm" (i_)); \`
			`} while (0)`

[BZ #3306] 2007-03-27 Jakub Jelinek <jakub@redhat.com> [BZ #3306] * math/math_private.h (math_opt_barrier, math_force_eval): Define. * sysdeps/i386/fpu/math_private.h: New file. * sysdeps/x86_64/fpu/math_private.h: New file. * math/s_nexttowardf.c (__nexttowardf): Use math_opt_barrier and math_force_eval macros. Use "+m" constraint on asm rather than "=m" and "m". * math/s_nextafter.c (__nextafter): Likewise. * sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c (__nexttoward): Likewise. * sysdeps/ieee754/flt-32/s_nextafterf.c (__nextafterf): Likewise. * sysdeps/ieee754/ldbl-128/s_nexttoward.c (__nexttoward): Likewise. * sysdeps/ieee754/ldbl-96/s_nexttoward.c (__nexttoward): Likewise. * sysdeps/i386/fpu/s_nextafterl.c (__nextafterl): Use math_opt_barrier and math_force_eval macros. * sysdeps/ieee754/ldbl-128/s_nextafterl.c (__nextafterl): Likewise. * sysdeps/ieee754/ldbl-96/s_nextafterl.c (__nextafterl): Likewise. * sysdeps/i386/fpu/s_nexttoward.c: Include float.h. (__nexttoward): Use math_opt_barrier and math_force_eval macros. Use "+m" constraint on asm rather than "=m" and "m". Only use asm to force double result if FLT_EVAL_METHOD is 2. * sysdeps/i386/fpu/s_nexttowardf.c: Include float.h. (__nexttowardf): Use math_opt_barrier and math_force_eval macros. Use "+m" constraint on asm rather than "=m" and "m". Only use asm to force double result if FLT_EVAL_METHOD is not 0. * sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c: Include float.h. (__nexttowardf): Use math_opt_barrier and math_force_eval macros. If FLT_EVAL_METHOD is not 0, force x to float using asm. * sysdeps/ieee754/ldbl-opt/s_nexttowardfd.c: Include float.h. (__nldbl_nexttowardf): Use math_opt_barrier and math_force_eval macros. If FLT_EVAL_METHOD is not 0, force x to float using asm. * sysdeps/ieee754/ldbl-96/s_nexttowardf.c: Include float.h. (__nexttowardf): Use math_opt_barrier and math_force_eval macros. If FLT_EVAL_METHOD is not 0, force x to float using asm. * math/bug-nextafter.c (zero, inf): New variables. (main): Add new tests. * math/bug-nexttoward.c (zero, inf): New variables. (main): Add new tests. 2007-04-16 22:41:42 +02:00			`#endif`
Optimize use of isnan, isinf, finite 2011-10-08 16:18:26 +02:00
			`#define __isnan(d) \`
Makr x86-64 math_private.h more robust 2011-10-17 22:00:39 +02:00			`({ long int __di; EXTRACT_WORDS64 (__di, (double) (d)); \`
Optimize use of isnan, isinf, finite 2011-10-08 16:18:26 +02:00			`(__di & 0x7fffffffffffffffl) > 0x7ff0000000000000l; })`
			`#define __isnanf(d) \`
Optimize libm libm is now somewhat integrated with gcc's -ffinite-math-only option and lots of the wrapper functions have been optimized. 2011-10-12 17:27:51 +02:00			`({ int __di; GET_FLOAT_WORD (__di, (float) d); \`
Optimize use of isnan, isinf, finite 2011-10-08 16:18:26 +02:00			`(__di & 0x7fffffff) > 0x7f800000; })`

			`#define __isinf_ns(d) \`
Makr x86-64 math_private.h more robust 2011-10-17 22:00:39 +02:00			`({ long int __di; EXTRACT_WORDS64 (__di, (double) (d)); \`
Optimize use of isnan, isinf, finite 2011-10-08 16:18:26 +02:00			`(__di & 0x7fffffffffffffffl) == 0x7ff0000000000000l; })`
			`#define __isinf_nsf(d) \`
Optimize libm libm is now somewhat integrated with gcc's -ffinite-math-only option and lots of the wrapper functions have been optimized. 2011-10-12 17:27:51 +02:00			`({ int __di; GET_FLOAT_WORD (__di, (float) d); \`
Optimize use of isnan, isinf, finite 2011-10-08 16:18:26 +02:00			`(__di & 0x7fffffff) == 0x7f800000; })`

			`#define __finite(d) \`
Makr x86-64 math_private.h more robust 2011-10-17 22:00:39 +02:00			`({ long int __di; EXTRACT_WORDS64 (__di, (double) (d)); \`
Optimize use of isnan, isinf, finite 2011-10-08 16:18:26 +02:00			`(__di & 0x7fffffffffffffffl) < 0x7ff0000000000000l; })`
			`#define __finitef(d) \`
Optimize libm libm is now somewhat integrated with gcc's -ffinite-math-only option and lots of the wrapper functions have been optimized. 2011-10-12 17:27:51 +02:00			`({ int __di; GET_FLOAT_WORD (__di, (float) d); \`
Optimize use of isnan, isinf, finite 2011-10-08 16:18:26 +02:00			`(__di & 0x7fffffff) < 0x7f800000; })`
Optimize libm libm is now somewhat integrated with gcc's -ffinite-math-only option and lots of the wrapper functions have been optimized. 2011-10-12 17:27:51 +02:00
			`#define __ieee754_sqrt(d) \`
			`({ double __res; \`
Makr x86-64 math_private.h more robust 2011-10-17 22:00:39 +02:00			`asm ("sqrtsd %1, %0" : "=x" (__res) : "xm" ((double) (d))); \`
Optimize libm libm is now somewhat integrated with gcc's -ffinite-math-only option and lots of the wrapper functions have been optimized. 2011-10-12 17:27:51 +02:00			`__res; })`
			`#define __ieee754_sqrtf(d) \`
			`({ float __res; \`
Makr x86-64 math_private.h more robust 2011-10-17 22:00:39 +02:00			`asm ("sqrtss %1, %0" : "=x" (__res) : "xm" ((float) (d))); \`
Optimize libm libm is now somewhat integrated with gcc's -ffinite-math-only option and lots of the wrapper functions have been optimized. 2011-10-12 17:27:51 +02:00			`__res; })`
			`#define __ieee754_sqrtl(d) \`
			`({ long double __res; \`
Makr x86-64 math_private.h more robust 2011-10-17 22:00:39 +02:00			`asm ("fsqrt" : "=t" (__res) : "0" ((long double) (d))); \`
Optimize libm libm is now somewhat integrated with gcc's -ffinite-math-only option and lots of the wrapper functions have been optimized. 2011-10-12 17:27:51 +02:00			`__res; })`
Provide internal optimizations on x86-64 with SSE4.1 Provide macros so that the internal users can, if possible, directly use the new instructions. Also fix up the mathinline.h header when compiling with SSE4.1 enabled. 2011-10-17 17:23:40 +02:00
			`#ifdef __SSE4_1__`
			`# ifndef __rint`
			`# define __rint(d) \`
			`({ double __res; \`
Relax asm requirements for recently added x86-64 math interfaces 2011-10-18 02:30:52 +02:00			`asm ("roundsd $4, %1, %0" : "=x" (__res) : "xm" ((double) (d))); \`
Provide internal optimizations on x86-64 with SSE4.1 Provide macros so that the internal users can, if possible, directly use the new instructions. Also fix up the mathinline.h header when compiling with SSE4.1 enabled. 2011-10-17 17:23:40 +02:00			`__res; })`
			`# endif`
			`# ifndef __rintf`
			`# define __rintf(d) \`
			`({ float __res; \`
Relax asm requirements for recently added x86-64 math interfaces 2011-10-18 02:30:52 +02:00			`asm ("roundss $4, %1, %0" : "=x" (__res) : "xm" ((float) (d))); \`
Provide internal optimizations on x86-64 with SSE4.1 Provide macros so that the internal users can, if possible, directly use the new instructions. Also fix up the mathinline.h header when compiling with SSE4.1 enabled. 2011-10-17 17:23:40 +02:00			`__res; })`
			`# endif`

			`# ifndef __floor`
			`# define __floor(d) \`
			`({ double __res; \`
Relax asm requirements for recently added x86-64 math interfaces 2011-10-18 02:30:52 +02:00			`asm ("roundsd $1, %1, %0" : "=x" (__res) : "xm" ((double) (d))); \`
Provide internal optimizations on x86-64 with SSE4.1 Provide macros so that the internal users can, if possible, directly use the new instructions. Also fix up the mathinline.h header when compiling with SSE4.1 enabled. 2011-10-17 17:23:40 +02:00			`__res; })`
			`# endif`
			`# ifndef __floorf`
			`# define __floorf(d) \`
			`({ float __res; \`
Relax asm requirements for recently added x86-64 math interfaces 2011-10-18 02:30:52 +02:00			`asm ("roundss $1, %1, %0" : "=x" (__res) : "xm" ((float) (d))); \`
Provide internal optimizations on x86-64 with SSE4.1 Provide macros so that the internal users can, if possible, directly use the new instructions. Also fix up the mathinline.h header when compiling with SSE4.1 enabled. 2011-10-17 17:23:40 +02:00			`__res; })`
			`# endif`
			`#endif`
Start optimizing the use of the fenv interfaces in libm itself 2011-10-18 15:00:46 +02:00

			`/* Specialized variants of the <fenv.h> interfaces which only handle`
			`either the FPU or the SSE unit. */`
			`#undef libc_fegetround`
			`#define libc_fegetround() \`
			`({ \`
			`unsigned int mxcsr; \`
			`asm volatile ("stmxcsr %0" : "=m" (*&mxcsr)); \`
			`(mxcsr & 0x6000) >> 3; \`
			`})`
			`// #define libc_fegetroundf() fegetround ()`
			`// #define libc_fegetroundl() fegetround ()`

			`#undef libc_fesetround`
			`#define libc_fesetround(r) \`
			`do { \`
			`unsigned int mxcsr; \`
			`asm ("stmxcsr %0" : "=m" (*&mxcsr)); \`
			`mxcsr = (mxcsr & ~0x6000) \| ((r) << 3); \`
			`asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr)); \`
			`} while (0)`
			`// #define libc_fesetroundf(r) (void) fesetround (r)`
			`// #define libc_fesetroundl(r) (void) fesetround (r)`

			`#undef libc_feholdexcept`
			`#define libc_feholdexcept(e) \`
Provide combined internal feholdexcept/fesetround interface 2011-10-18 15:59:04 +02:00			`do { \`
Start optimizing the use of the fenv interfaces in libm itself 2011-10-18 15:00:46 +02:00			`unsigned int mxcsr; \`
			`asm ("stmxcsr %0" : "=m" (*&mxcsr)); \`
			`(e)->__mxcsr = mxcsr; \`
			`mxcsr = (mxcsr \| 0x1f80) & ~0x3f; \`
			`asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr)); \`
			`} while (0)`
			`// #define libc_feholdexceptf(e) (void) feholdexcept (e)`
			`// #define libc_feholdexceptl(e) (void) feholdexcept (e)`

Provide combined internal feholdexcept/fesetround interface 2011-10-18 15:59:04 +02:00			`#undef libc_feholdexcept_setround`
			`#define libc_feholdexcept_setround(e, r) \`
			`do { \`
			`unsigned int mxcsr; \`
			`asm ("stmxcsr %0" : "=m" (*&mxcsr)); \`
			`(e)->__mxcsr = mxcsr; \`
			`mxcsr = ((mxcsr \| 0x1f80) & ~0x603f) \| ((r) << 3); \`
			`asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr)); \`
			`} while (0)`
			`// #define libc_feholdexcept_setroundf(e, r) ...`
			`// #define libc_feholdexcept_setroundl(e, r) ...`

Start optimizing the use of the fenv interfaces in libm itself 2011-10-18 15:00:46 +02:00			`#undef libc_fesetenv`
			`#define libc_fesetenv(e) \`
			`asm volatile ("ldmxcsr %0" : : "m" ((e)->__mxcsr))`
			`// #define libc_fesetenvf(e) (void) fesetenv (e)`
			`// #define libc_fesetenvl(e) (void) fesetenv (e)`