Use -msse2avx option for x86-64 libm functions

This commit is contained in:
Ulrich Drepper 2012-01-28 14:48:46 -05:00
parent 73139a7628
commit 56f6f6a240
13 changed files with 83 additions and 60 deletions

View File

@ -1,5 +1,22 @@
2012-01-28 Ulrich Drepper <drepper@gmail.com>
* config.h.in: Define HAVE_SSE2AVX_SUPPORT.
* math/math_private.h: Remove libc_fegetround* and
libc_fesetround*.
* sysdeps/i386/configure.in: Check for -msse2avx.
* sysdeps/x86_64/fpu/math_private.h: Use VEX-encoded instructions
also if SSE2AVX is defined.
Remove libc_fegetround* and libc_fesetround*.
* sysdeps/x86_64/fpu/multiarch/Makefile: Compile *-avx functions
if config-cflags-sse2avx is yes. Also add -DSSE2AVX to defines.
* sysdeps/x86_64/fpu/multiarch/e_atan2.c: Use HAS_AVX again instead
of HAS_YMM_USABLE.
* sysdeps/x86_64/fpu/multiarch/e_exp.c: Likewise.
* sysdeps/x86_64/fpu/multiarch/e_log.c: Likewise.
* sysdeps/x86_64/fpu/multiarch/s_atan.c: Likewise.
* sysdeps/x86_64/fpu/multiarch/s_sin.c: Likewise.
* sysdeps/x86_64/fpu/multiarch/s_tan.c: Likewise.
* sysdeps/x86_64/fpu/math_private.h: Simplify use of AVX instructions.
2012-01-19 Adhemerval Zanella <azanella@linux.vnet.ibm.com>

View File

@ -90,7 +90,7 @@
certain registers (CR0, MQ, CTR, LR) in asm statements. */
#undef BROKEN_PPC_ASM_CR0
/* Defined on SPARC if ld doesn't handle R_SPARC_WDISP22 against .hidden
/* Defined on SPARC if ld does not handle R_SPARC_WDISP22 against .hidden
symbol. sysdeps/sparc/sparc32/elf/configure. */
#undef BROKEN_SPARC_WDISP22
@ -106,17 +106,20 @@
/* Define if gcc supports AVX. */
#undef HAVE_AVX_SUPPORT
/* Define if gcc supports VEX encoding. */
#undef HAVE_SSE2AVX_SUPPORT
/* Define if gcc supports FMA4. */
#undef HAVE_FMA4_SUPPORT
/* Define if the compiler's exception support is based on libunwind. */
/* Define if the compiler\'s exception support is based on libunwind. */
#undef HAVE_CC_WITH_LIBUNWIND
/* Define if the access to static and hidden variables is position independent
and does not need relocations. */
#undef PI_STATIC_AND_HIDDEN
/* Define this to disable the `hidden_proto' et al macros in
/* Define this to disable the 'hidden_proto' et al macros in
include/libc-symbols.h that avoid PLT slots in the shared objects. */
#undef NO_HIDDEN

View File

@ -365,14 +365,6 @@ extern void __docos (double __x, double __dx, double __v[]);
know what operations are going to be performed. Therefore we
define additional interfaces. By default they refer to the normal
interfaces. */
#define libc_fegetround() fegetround ()
#define libc_fegetroundf() fegetround ()
#define libc_fegetroundl() fegetround ()
#define libc_fesetround(r) (void) fesetround (r)
#define libc_fesetroundf(r) (void) fesetround (r)
#define libc_fesetroundl(r) (void) fesetround (r)
#define libc_feholdexcept(e) (void) feholdexcept (e)
#define libc_feholdexceptf(e) (void) feholdexcept (e)
#define libc_feholdexceptl(e) (void) feholdexcept (e)

View File

@ -756,6 +756,29 @@ if test $libc_cv_cc_avx = yes; then
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX encoding of SSE instructions" >&5
$as_echo_n "checking for AVX encoding of SSE instructions... " >&6; }
if ${libc_cv_cc_sse2avx+:} false; then :
$as_echo_n "(cached) " >&6
else
if { ac_try='${CC-cc} -msse2avx -xc /dev/null -S -o /dev/null'
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
(eval $ac_try) 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then
libc_cv_cc_sse2avx=yes
else
libc_cv_cc_sse2avx=no
fi
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_sse2avx" >&5
$as_echo "$libc_cv_cc_sse2avx" >&6; }
if test $libc_cv_cc_sse2avx = yes; then
$as_echo "#define HAVE_SSE2AVX_SUPPORT 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for FMA4 support" >&5
$as_echo_n "checking for FMA4 support... " >&6; }
if ${libc_cv_cc_fma4+:} false; then :

View File

@ -67,6 +67,17 @@ if test $libc_cv_cc_avx = yes; then
AC_DEFINE(HAVE_AVX_SUPPORT)
fi
dnl Check if -msse2avx works.
AC_CACHE_CHECK(for AVX encoding of SSE instructions, libc_cv_cc_sse2avx, [dnl
if AC_TRY_COMMAND([${CC-cc} -msse2avx -xc /dev/null -S -o /dev/null]); then
libc_cv_cc_sse2avx=yes
else
libc_cv_cc_sse2avx=no
fi])
if test $libc_cv_cc_sse2avx = yes; then
AC_DEFINE(HAVE_SSE2AVX_SUPPORT)
fi
dnl Check if -mfma4 works.
AC_CACHE_CHECK(for FMA4 support, libc_cv_cc_fma4, [dnl
if AC_TRY_COMMAND([${CC-cc} -mfma4 -xc /dev/null -S -o /dev/null]); then

View File

@ -19,7 +19,7 @@
/* We can do a few things better on x86-64. */
#ifdef __AVX__
#if defined __AVX__ || defined SSE2AVX
# define MOVD "vmovd"
# define STMXCSR "vstmxcsr"
# define LDMXCSR "vldmxcsr"
@ -90,7 +90,7 @@
({ int __di; GET_FLOAT_WORD (__di, (float) d); \
(__di & 0x7fffffff) < 0x7f800000; })
#ifdef __AVX__
#if defined __AVX__ || defined SSE2AVX
# define __ieee754_sqrt(d) \
({ double __res; \
asm ("vsqrtsd %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \
@ -116,7 +116,7 @@
#ifdef __SSE4_1__
# ifndef __rint
# ifdef __AVX__
# if defined __AVX__ || defined SSE2AVX
# define __rint(d) \
({ double __res; \
asm ("vroundsd $4, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \
@ -129,7 +129,7 @@
# endif
# endif
# ifndef __rintf
# ifdef __AVX__
# if defined __AVX__ || defined SSE2AVX
# define __rintf(d) \
({ float __res; \
asm ("vroundss $4, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d))); \
@ -143,7 +143,7 @@
# endif
# ifndef __floor
# ifdef __AVX__
# if defined __AVX__ || defined SSE2AVX
# define __floor(d) \
({ double __res; \
asm ("vroundsd $1, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \
@ -156,7 +156,7 @@
# endif
# endif
# ifndef __floorf
# ifdef __AVX__
# if defined __AVX__ || defined SSE2AVX
# define __floorf(d) \
({ float __res; \
asm ("vroundss $1, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d))); \
@ -173,29 +173,6 @@
/* Specialized variants of the <fenv.h> interfaces which only handle
either the FPU or the SSE unit. */
#undef libc_fegetround
#define libc_fegetround() \
({ \
unsigned int mxcsr; \
asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \
(mxcsr & 0x6000) >> 3; \
})
#undef libc_fegetroundf
#define libc_fegetroundf() libc_fegetround ()
// #define libc_fegetroundl() fegetround ()
#undef libc_fesetround
#define libc_fesetround(r) \
do { \
unsigned int mxcsr; \
asm (STMXCSR " %0" : "=m" (*&mxcsr)); \
mxcsr = (mxcsr & ~0x6000) | ((r) << 3); \
asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); \
} while (0)
#undef libc_fesetroundf
#define libc_fesetroundf(r) libc_fesetround (r)
// #define libc_fesetroundl(r) (void) fesetround (r)
#undef libc_feholdexcept
#define libc_feholdexcept(e) \
do { \
@ -224,7 +201,8 @@
#undef libc_fetestexcept
#define libc_fetestexcept(e) \
({ unsigned int mxcsr; asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \
({ unsigned int mxcsr; \
asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \
mxcsr & (e) & FE_ALL_EXCEPT; })
#undef libc_fetestexceptf
#define libc_fetestexceptf(e) libc_fetestexcept (e)

View File

@ -34,21 +34,21 @@ CFLAGS-s_sin-fma4.c = -mfma4
CFLAGS-s_tan-fma4.c = -mfma4
endif
ifeq ($(config-cflags-avx),yes)
ifeq ($(config-cflags-sse2avx),yes)
libm-sysdep_routines += e_exp-avx e_log-avx s_atan-avx \
e_atan2-avx s_sin-avx s_tan-avx \
mplog-avx mpa-avx slowexp-avx \
mpexp-avx
CFLAGS-e_atan2-avx.c = -mavx
CFLAGS-e_exp-avx.c = -mavx
CFLAGS-e_log-avx.c = -mavx
CFLAGS-mpa-avx.c = -mavx
CFLAGS-mpexp-avx.c = -mavx
CFLAGS-mplog-avx.c = -mavx
CFLAGS-s_atan-avx.c = -mavx
CFLAGS-s_sin-avx.c = -mavx
CFLAGS-slowexp-avx.c = -mavx
CFLAGS-s_tan-avx.c = -mavx
CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX
CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX
CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX
CFLAGS-mpa-avx.c = -msse2avx -DSSE2AVX
CFLAGS-mpexp-avx.c = -msse2avx -DSSE2AVX
CFLAGS-mplog-avx.c = -msse2avx -DSSE2AVX
CFLAGS-s_atan-avx.c = -msse2avx -DSSE2AVX
CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX
CFLAGS-slowexp-avx.c = -sse2mavx -DSSE2AVX
CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX
endif
endif

View File

@ -14,7 +14,7 @@ extern double __ieee754_atan2_fma4 (double, double);
libm_ifunc (__ieee754_atan2,
HAS_FMA4 ? __ieee754_atan2_fma4
: (HAS_YMM_USABLE ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
: (HAS_AVX ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
strong_alias (__ieee754_atan2, __atan2_finite)
# define __ieee754_atan2 __ieee754_atan2_sse2

View File

@ -14,7 +14,7 @@ extern double __ieee754_exp_fma4 (double);
libm_ifunc (__ieee754_exp,
HAS_FMA4 ? __ieee754_exp_fma4
: (HAS_YMM_USABLE ? __ieee754_exp_avx : __ieee754_exp_sse2));
: (HAS_AVX ? __ieee754_exp_avx : __ieee754_exp_sse2));
strong_alias (__ieee754_exp, __exp_finite)
# define __ieee754_exp __ieee754_exp_sse2

View File

@ -14,8 +14,7 @@ extern double __ieee754_log_fma4 (double);
libm_ifunc (__ieee754_log,
HAS_FMA4 ? __ieee754_log_fma4
: (HAS_YMM_USABLE ? __ieee754_log_avx
: __ieee754_log_sse2));
: (HAS_AVX ? __ieee754_log_avx : __ieee754_log_sse2));
strong_alias (__ieee754_log, __log_finite)
# define __ieee754_log __ieee754_log_sse2

View File

@ -13,7 +13,7 @@ extern double __atan_fma4 (double);
# endif
libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 :
HAS_YMM_USABLE ? __atan_avx : __atan_sse2));
HAS_AVX ? __atan_avx : __atan_sse2));
# define atan __atan_sse2
#endif

View File

@ -18,11 +18,11 @@ extern double __sin_fma4 (double);
# endif
libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 :
HAS_YMM_USABLE ? __cos_avx : __cos_sse2));
HAS_AVX ? __cos_avx : __cos_sse2));
weak_alias (__cos, cos)
libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 :
HAS_YMM_USABLE ? __sin_avx : __sin_sse2));
HAS_AVX ? __sin_avx : __sin_sse2));
weak_alias (__sin, sin)
# define __cos __cos_sse2

View File

@ -13,7 +13,7 @@ extern double __tan_fma4 (double);
# endif
libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 :
HAS_YMM_USABLE ? __tan_avx : __tan_sse2));
HAS_AVX ? __tan_avx : __tan_sse2));
# define tan __tan_sse2
#endif