glibc/sysdeps/x86_64/fpu/math_private.h

#ifndef X86_64_MATH_PRIVATE_H
#define X86_64_MATH_PRIVATE_H 1

/* We can do a few things better on x86-64.  */

#if defined __AVX__ || defined SSE2AVX
# define MOVD "vmovd"
# define MOVQ "vmovq"
#else
# define MOVD "movd"
# define MOVQ "movq"
#endif

/* Direct movement of float into integer register.  */
#define EXTRACT_WORDS64(i, d)						      \
  do {									      \
    int64_t i_;								      \
    asm (MOVQ " %1, %0" : "=rm" (i_) : "x" ((double) (d)));		      \
    (i) = i_;								      \
  } while (0)

/* And the reverse.  */
#define INSERT_WORDS64(d, i) \
  do {									      \
    int64_t i_ = i;							      \
    double d__;								      \
    asm (MOVQ " %1, %0" : "=x" (d__) : "rm" (i_));			      \
    d = d__;								      \
  } while (0)

/* Direct movement of float into integer register.  */
#define GET_FLOAT_WORD(i, d) \
  do {									      \
    int i_;								      \
    asm (MOVD " %1, %0" : "=rm" (i_) : "x" ((float) (d)));		      \
    (i) = i_;								      \
  } while (0)

/* And the reverse.  */
#define SET_FLOAT_WORD(f, i) \
  do {									      \
    int i_ = i;								      \
    float f__;								      \
    asm (MOVD " %1, %0" : "=x" (f__) : "rm" (i_));			      \
    f = f__;								      \
  } while (0)

#include <sysdeps/i386/fpu/fenv_private.h>
#include_next <math_private.h>

extern __always_inline double
__ieee754_sqrt (double d)
{
  double res;
#if defined __AVX__ || defined SSE2AVX
  asm ("vsqrtsd %1, %0, %0" : "=x" (res) : "xm" (d));
#else
  asm ("sqrtsd %1, %0" : "=x" (res) : "xm" (d));
#endif
  return res;
}

extern __always_inline float
__ieee754_sqrtf (float d)
{
  float res;
#if defined __AVX__ || defined SSE2AVX
  asm ("vsqrtss %1, %0, %0" : "=x" (res) : "xm" (d));
#else
  asm ("sqrtss %1, %0" : "=x" (res) : "xm" (d));
#endif
  return res;
}

extern __always_inline long double
__ieee754_sqrtl (long double d)
{
  long double res;
  asm ("fsqrt" : "=t" (res) : "0" (d));
  return res;
}

#ifdef __SSE4_1__
extern __always_inline double
__rint (double d)
{
  double res;
# if defined __AVX__ || defined SSE2AVX
  asm ("vroundsd $4, %1, %0, %0" : "=x" (res) : "xm" (d));
# else
  asm ("roundsd $4, %1, %0" : "=x" (res) : "xm" (d));
# endif
  return res;
}

extern __always_inline float
__rintf (float d)
{
  float res;
# if defined __AVX__ || defined SSE2AVX
  asm ("vroundss $4, %1, %0, %0" : "=x" (res) : "xm" (d));
# else
  asm ("roundss $4, %1, %0" : "=x" (res) : "xm" (d));
# endif
  return res;
}

extern __always_inline double
__floor (double d)
{
  double res;
# if defined __AVX__ || defined SSE2AVX
  asm ("vroundsd $1, %1, %0, %0" : "=x" (res) : "xm" (d));
# else
  asm ("roundsd $1, %1, %0" : "=x" (res) : "xm" (d));
# endif
  return res;
}

extern __always_inline float
__floorf (float d)
{
  float res;
# if defined __AVX__ || defined SSE2AVX
  asm ("vroundss $1, %1, %0, %0" : "=x" (res) : "xm" (d));
# else
  asm ("roundss $1, %1, %0" : "=x" (res) : "xm" (d));
#  endif
  return res;
}
#endif /* __SSE4_1__ */

#endif /* X86_64_MATH_PRIVATE_H */
x86_64: Convert __ieee754_sqrt{,f,l} from macros to inlines. 2012-03-07 00:41:14 +01:00			`#ifndef X86_64_MATH_PRIVATE_H`
			`#define X86_64_MATH_PRIVATE_H 1`
[BZ #3306] 2007-03-27 Jakub Jelinek <jakub@redhat.com> [BZ #3306] * math/math_private.h (math_opt_barrier, math_force_eval): Define. * sysdeps/i386/fpu/math_private.h: New file. * sysdeps/x86_64/fpu/math_private.h: New file. * math/s_nexttowardf.c (__nexttowardf): Use math_opt_barrier and math_force_eval macros. Use "+m" constraint on asm rather than "=m" and "m". * math/s_nextafter.c (__nextafter): Likewise. * sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c (__nexttoward): Likewise. * sysdeps/ieee754/flt-32/s_nextafterf.c (__nextafterf): Likewise. * sysdeps/ieee754/ldbl-128/s_nexttoward.c (__nexttoward): Likewise. * sysdeps/ieee754/ldbl-96/s_nexttoward.c (__nexttoward): Likewise. * sysdeps/i386/fpu/s_nextafterl.c (__nextafterl): Use math_opt_barrier and math_force_eval macros. * sysdeps/ieee754/ldbl-128/s_nextafterl.c (__nextafterl): Likewise. * sysdeps/ieee754/ldbl-96/s_nextafterl.c (__nextafterl): Likewise. * sysdeps/i386/fpu/s_nexttoward.c: Include float.h. (__nexttoward): Use math_opt_barrier and math_force_eval macros. Use "+m" constraint on asm rather than "=m" and "m". Only use asm to force double result if FLT_EVAL_METHOD is 2. * sysdeps/i386/fpu/s_nexttowardf.c: Include float.h. (__nexttowardf): Use math_opt_barrier and math_force_eval macros. Use "+m" constraint on asm rather than "=m" and "m". Only use asm to force double result if FLT_EVAL_METHOD is not 0. * sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c: Include float.h. (__nexttowardf): Use math_opt_barrier and math_force_eval macros. If FLT_EVAL_METHOD is not 0, force x to float using asm. * sysdeps/ieee754/ldbl-opt/s_nexttowardfd.c: Include float.h. (__nldbl_nexttowardf): Use math_opt_barrier and math_force_eval macros. If FLT_EVAL_METHOD is not 0, force x to float using asm. * sysdeps/ieee754/ldbl-96/s_nexttowardf.c: Include float.h. (__nexttowardf): Use math_opt_barrier and math_force_eval macros. If FLT_EVAL_METHOD is not 0, force x to float using asm. * math/bug-nextafter.c (zero, inf): New variables. (main): Add new tests. * math/bug-nexttoward.c (zero, inf): New variables. (main): Add new tests. 2007-04-16 22:41:42 +02:00
Optimize float construction/extraction on x86-64. 2009-08-24 23:52:49 +02:00			`/* We can do a few things better on x86-64. */`

Use -msse2avx option for x86-64 libm functions 2012-01-28 20:48:46 +01:00			`#if defined __AVX__ \|\| defined SSE2AVX`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 14:17:57 +02:00			`# define MOVD "vmovd"`
Use movq for 64-bit operations The EXTRACT_WORDS64 and INSERT_WORDS64 macros use movd for a 64-bit operation. Somehow gcc manages to turn this into movq, but LLVM won't. 2013-05-15 Peter Collingbourne <pcc@google.com> * sysdeps/x86_64/fpu/math_private.h (MOVQ): New macro. (EXTRACT_WORDS64) Use where appropriate. (INSERT_WORDS64) Likewise. 2013-05-15 20:33:45 +02:00			`# define MOVQ "vmovq"`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 14:17:57 +02:00			`#else`
			`# define MOVD "movd"`
Use movq for 64-bit operations The EXTRACT_WORDS64 and INSERT_WORDS64 macros use movd for a 64-bit operation. Somehow gcc manages to turn this into movq, but LLVM won't. 2013-05-15 Peter Collingbourne <pcc@google.com> * sysdeps/x86_64/fpu/math_private.h (MOVQ): New macro. (EXTRACT_WORDS64) Use where appropriate. (INSERT_WORDS64) Likewise. 2013-05-15 20:33:45 +02:00			`# define MOVQ "movq"`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 14:17:57 +02:00			`#endif`

Add ceil implementation for 64-bit machines. On 64-bit machines we should not split doubles into two 32 bit integer and handle the words separately. We have wide registers. This patch implements a 64-bit ceil version. Ideally all other functions will be converted over time. 2009-08-25 03:05:48 +02:00			`/* Direct movement of float into integer register. */`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 14:17:57 +02:00			`#define EXTRACT_WORDS64(i, d) \`
			`do { \`
Use int64_t in x86_64/fpu/math_private.h 2012-03-19 23:09:58 +01:00			`int64_t i_; \`
Use movq for 64-bit operations The EXTRACT_WORDS64 and INSERT_WORDS64 macros use movd for a 64-bit operation. Somehow gcc manages to turn this into movq, but LLVM won't. 2013-05-15 Peter Collingbourne <pcc@google.com> * sysdeps/x86_64/fpu/math_private.h (MOVQ): New macro. (EXTRACT_WORDS64) Use where appropriate. (INSERT_WORDS64) Likewise. 2013-05-15 20:33:45 +02:00			`asm (MOVQ " %1, %0" : "=rm" (i_) : "x" ((double) (d))); \`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 14:17:57 +02:00			`(i) = i_; \`
			`} while (0)`
Add ceil implementation for 64-bit machines. On 64-bit machines we should not split doubles into two 32 bit integer and handle the words separately. We have wide registers. This patch implements a 64-bit ceil version. Ideally all other functions will be converted over time. 2009-08-25 03:05:48 +02:00
			`/* And the reverse. */`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 14:17:57 +02:00			`#define INSERT_WORDS64(d, i) \`
			`do { \`
Use int64_t in x86_64/fpu/math_private.h 2012-03-19 23:09:58 +01:00			`int64_t i_ = i; \`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 14:17:57 +02:00			`double d__; \`
Use movq for 64-bit operations The EXTRACT_WORDS64 and INSERT_WORDS64 macros use movd for a 64-bit operation. Somehow gcc manages to turn this into movq, but LLVM won't. 2013-05-15 Peter Collingbourne <pcc@google.com> * sysdeps/x86_64/fpu/math_private.h (MOVQ): New macro. (EXTRACT_WORDS64) Use where appropriate. (INSERT_WORDS64) Likewise. 2013-05-15 20:33:45 +02:00			`asm (MOVQ " %1, %0" : "=x" (d__) : "rm" (i_)); \`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 14:17:57 +02:00			`d = d__; \`
			`} while (0)`
Add ceil implementation for 64-bit machines. On 64-bit machines we should not split doubles into two 32 bit integer and handle the words separately. We have wide registers. This patch implements a 64-bit ceil version. Ideally all other functions will be converted over time. 2009-08-25 03:05:48 +02:00
Optimize float construction/extraction on x86-64. 2009-08-24 23:52:49 +02:00			`/* Direct movement of float into integer register. */`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 14:17:57 +02:00			`#define GET_FLOAT_WORD(i, d) \`
			`do { \`
			`int i_; \`
			`asm (MOVD " %1, %0" : "=rm" (i_) : "x" ((float) (d))); \`
			`(i) = i_; \`
			`} while (0)`
Optimize float construction/extraction on x86-64. 2009-08-24 23:52:49 +02:00
			`/* And the reverse. */`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 14:17:57 +02:00			`#define SET_FLOAT_WORD(f, i) \`
			`do { \`
			`int i_ = i; \`
			`float f__; \`
			`asm (MOVD " %1, %0" : "=x" (f__) : "rm" (i_)); \`
			`f = f__; \`
			`} while (0)`
Optimize float construction/extraction on x86-64. 2009-08-24 23:52:49 +02:00
Optimize private 387 fenv access; share code between i386 and x86_64. 2012-03-18 23:58:00 +01:00			`#include <sysdeps/i386/fpu/fenv_private.h>`
Make inline __isnan, __isinf_ns, __finite generic. For code generation to stay identical on x86_64, this requires that we define the fp word manipulation macros before including the generic header. 2012-03-09 21:38:23 +01:00			`#include_next <math_private.h>`
Optimize libm libm is now somewhat integrated with gcc's -ffinite-math-only option and lots of the wrapper functions have been optimized. 2011-10-12 17:27:51 +02:00
x86_64: Convert __ieee754_sqrt{,f,l} from macros to inlines. 2012-03-07 00:41:14 +01:00			`extern __always_inline double`
			`__ieee754_sqrt (double d)`
			`{`
			`double res;`
Use -msse2avx option for x86-64 libm functions 2012-01-28 20:48:46 +01:00			`#if defined __AVX__ \|\| defined SSE2AVX`
x86_64: Convert __ieee754_sqrt{,f,l} from macros to inlines. 2012-03-07 00:41:14 +01:00			`asm ("vsqrtsd %1, %0, %0" : "=x" (res) : "xm" (d));`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 14:17:57 +02:00			`#else`
x86_64: Convert __ieee754_sqrt{,f,l} from macros to inlines. 2012-03-07 00:41:14 +01:00			`asm ("sqrtsd %1, %0" : "=x" (res) : "xm" (d));`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 14:17:57 +02:00			`#endif`
x86_64: Convert __ieee754_sqrt{,f,l} from macros to inlines. 2012-03-07 00:41:14 +01:00			`return res;`
			`}`

			`extern __always_inline float`
			`__ieee754_sqrtf (float d)`
			`{`
			`float res;`
			`#if defined __AVX__ \|\| defined SSE2AVX`
			`asm ("vsqrtss %1, %0, %0" : "=x" (res) : "xm" (d));`
			`#else`
			`asm ("sqrtss %1, %0" : "=x" (res) : "xm" (d));`
			`#endif`
			`return res;`
			`}`

			`extern __always_inline long double`
			`__ieee754_sqrtl (long double d)`
			`{`
			`long double res;`
			`asm ("fsqrt" : "=t" (res) : "0" (d));`
			`return res;`
			`}`
Provide internal optimizations on x86-64 with SSE4.1 Provide macros so that the internal users can, if possible, directly use the new instructions. Also fix up the mathinline.h header when compiling with SSE4.1 enabled. 2011-10-17 17:23:40 +02:00
			`#ifdef __SSE4_1__`
x86_64: Convert __rint* and __floor* from macros to inlines. 2012-03-07 00:58:51 +01:00			`extern __always_inline double`
			`__rint (double d)`
			`{`
			`double res;`
			`# if defined __AVX__ \|\| defined SSE2AVX`
			`asm ("vroundsd $4, %1, %0, %0" : "=x" (res) : "xm" (d));`
			`# else`
			`asm ("roundsd $4, %1, %0" : "=x" (res) : "xm" (d));`
Provide internal optimizations on x86-64 with SSE4.1 Provide macros so that the internal users can, if possible, directly use the new instructions. Also fix up the mathinline.h header when compiling with SSE4.1 enabled. 2011-10-17 17:23:40 +02:00			`# endif`
x86_64: Convert __rint* and __floor* from macros to inlines. 2012-03-07 00:58:51 +01:00			`return res;`
			`}`

			`extern __always_inline float`
			`__rintf (float d)`
			`{`
			`float res;`
			`# if defined __AVX__ \|\| defined SSE2AVX`
			`asm ("vroundss $4, %1, %0, %0" : "=x" (res) : "xm" (d));`
			`# else`
			`asm ("roundss $4, %1, %0" : "=x" (res) : "xm" (d));`
Provide internal optimizations on x86-64 with SSE4.1 Provide macros so that the internal users can, if possible, directly use the new instructions. Also fix up the mathinline.h header when compiling with SSE4.1 enabled. 2011-10-17 17:23:40 +02:00			`# endif`
x86_64: Convert __rint* and __floor* from macros to inlines. 2012-03-07 00:58:51 +01:00			`return res;`
			`}`
Provide internal optimizations on x86-64 with SSE4.1 Provide macros so that the internal users can, if possible, directly use the new instructions. Also fix up the mathinline.h header when compiling with SSE4.1 enabled. 2011-10-17 17:23:40 +02:00
x86_64: Convert __rint* and __floor* from macros to inlines. 2012-03-07 00:58:51 +01:00			`extern __always_inline double`
			`__floor (double d)`
			`{`
			`double res;`
			`# if defined __AVX__ \|\| defined SSE2AVX`
			`asm ("vroundsd $1, %1, %0, %0" : "=x" (res) : "xm" (d));`
			`# else`
			`asm ("roundsd $1, %1, %0" : "=x" (res) : "xm" (d));`
Provide internal optimizations on x86-64 with SSE4.1 Provide macros so that the internal users can, if possible, directly use the new instructions. Also fix up the mathinline.h header when compiling with SSE4.1 enabled. 2011-10-17 17:23:40 +02:00			`# endif`
x86_64: Convert __rint* and __floor* from macros to inlines. 2012-03-07 00:58:51 +01:00			`return res;`
			`}`

			`extern __always_inline float`
			`__floorf (float d)`
			`{`
			`float res;`
			`# if defined __AVX__ \|\| defined SSE2AVX`
			`asm ("vroundss $1, %1, %0, %0" : "=x" (res) : "xm" (d));`
			`# else`
			`asm ("roundss $1, %1, %0" : "=x" (res) : "xm" (d));`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 14:17:57 +02:00			`# endif`
x86_64: Convert __rint* and __floor* from macros to inlines. 2012-03-07 00:58:51 +01:00			`return res;`
			`}`
			`#endif /* __SSE4_1__ */`
Start optimizing the use of the fenv interfaces in libm itself 2011-10-18 15:00:46 +02:00
x86_64: Convert __ieee754_sqrt{,f,l} from macros to inlines. 2012-03-07 00:41:14 +01:00			`#endif /* X86_64_MATH_PRIVATE_H */`