Check for FMA4 support and generate appropriate fma functions

This commit is contained in:
Ulrich Drepper 2011-10-20 22:43:15 -04:00
parent 8d4f46c613
commit ed72b6545f
8 changed files with 115 additions and 27 deletions

View File

@ -1,5 +1,14 @@
2011-10-20 Ulrich Drepper <drepper@gmail.com>
* sysdeps/i386/configure.in: Test for -mfma4 option.
* config.h.in: Add HAVE_FMA4_SUPPORT entry.
* sysdeps/x86_64/multiarch/init-arch.h: Define HAS_FMA4 and
COMMON_CPUID_INDEX_80000001.
* sysdeps/x86_64/multiarch/init-arch.c: Read 80000001 leaf for AMD.
* sysdeps/x86_64/fpu/multiarch/s_fma.c: Test for FMA4 support and
use it if FMA3 is not supported.
* sysdeps/x86_64/fpu/multiarch/s_fmaf.c: Likewise.
* sysdeps/x86_64/multiarch/s_fma.c: Moved to ../fpu/multiarch.
* sysdeps/x86_64/multiarch/s_fmaf.c: Likewise.

View File

@ -118,6 +118,9 @@
/* Define if gcc supports AVX. */
#undef HAVE_AVX_SUPPORT
/* Define if gcc supports FMA4. */
#undef HAVE_FMA4_SUPPORT
/* Define if the compiler's exception support is based on libunwind. */
#undef HAVE_CC_WITH_LIBUNWIND

View File

@ -167,7 +167,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
ac_retval=1
fi
eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
as_fn_set_status $ac_retval
} # ac_fn_c_try_compile
@ -193,7 +193,7 @@ $as_echo "$ac_try_echo"; } >&5
mv -f conftest.er1 conftest.err
fi
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; } >/dev/null && {
test $ac_status = 0; } > conftest.i && {
test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
test ! -s conftest.err
}; then :
@ -204,7 +204,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
ac_retval=1
fi
eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
as_fn_set_status $ac_retval
} # ac_fn_c_try_cpp
@ -217,10 +217,10 @@ fi
ac_fn_c_check_header_mongrel ()
{
as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
if eval "test \"\${$3+set}\"" = set; then :
if eval \${$3+:} false; then :
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
$as_echo_n "checking for $2... " >&6; }
if eval "test \"\${$3+set}\"" = set; then :
if eval \${$3+:} false; then :
$as_echo_n "(cached) " >&6
fi
eval ac_res=\$$3
@ -256,7 +256,7 @@ if ac_fn_c_try_cpp "$LINENO"; then :
else
ac_header_preproc=no
fi
rm -f conftest.err conftest.$ac_ext
rm -f conftest.err conftest.i conftest.$ac_ext
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
$as_echo "$ac_header_preproc" >&6; }
@ -283,7 +283,7 @@ $as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
esac
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
$as_echo_n "checking for $2... " >&6; }
if eval "test \"\${$3+set}\"" = set; then :
if eval \${$3+:} false; then :
$as_echo_n "(cached) " >&6
else
eval "$3=\$ac_header_compiler"
@ -292,7 +292,7 @@ eval ac_res=\$$3
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
$as_echo "$ac_res" >&6; }
fi
eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
} # ac_fn_c_check_header_mongrel
@ -333,7 +333,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
ac_retval=$ac_status
fi
rm -rf conftest.dSYM conftest_ipa8_conftest.oo
eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
as_fn_set_status $ac_retval
} # ac_fn_c_try_run
@ -347,7 +347,7 @@ ac_fn_c_check_header_compile ()
as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
$as_echo_n "checking for $2... " >&6; }
if eval "test \"\${$3+set}\"" = set; then :
if eval \${$3+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@ -365,7 +365,7 @@ fi
eval ac_res=\$$3
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
$as_echo "$ac_res" >&6; }
eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
} # ac_fn_c_check_header_compile
# This file is generated from configure.in by Autoconf. DO NOT EDIT!
@ -375,7 +375,7 @@ $as_echo "$ac_res" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
$as_echo_n "checking for grep that handles long lines and -e... " >&6; }
if test "${ac_cv_path_GREP+set}" = set; then :
if ${ac_cv_path_GREP+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -z "$GREP"; then
@ -438,7 +438,7 @@ $as_echo "$ac_cv_path_GREP" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5
$as_echo_n "checking for egrep... " >&6; }
if test "${ac_cv_path_EGREP+set}" = set; then :
if ${ac_cv_path_EGREP+:} false; then :
$as_echo_n "(cached) " >&6
else
if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
@ -505,7 +505,7 @@ $as_echo "$ac_cv_path_EGREP" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
$as_echo_n "checking for ANSI C header files... " >&6; }
if test "${ac_cv_header_stdc+set}" = set; then :
if ${ac_cv_header_stdc+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@ -633,7 +633,7 @@ done
ac_fn_c_check_header_mongrel "$LINENO" "cpuid.h" "ac_cv_header_cpuid_h" "$ac_includes_default"
if test "x$ac_cv_header_cpuid_h" = x""yes; then :
if test "x$ac_cv_header_cpuid_h" = xyes; then :
else
as_fn_error $? "gcc must provide the <cpuid.h> header" "$LINENO" 5
@ -643,7 +643,7 @@ fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if -g produces usable source locations for assembler-with-cpp" >&5
$as_echo_n "checking if -g produces usable source locations for assembler-with-cpp... " >&6; }
if test "${libc_cv_cpp_asm_debuginfo+set}" = set; then :
if ${libc_cv_cpp_asm_debuginfo+:} false; then :
$as_echo_n "(cached) " >&6
else
cat > conftest.S <<EOF
@ -693,7 +693,7 @@ fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SSE4 support" >&5
$as_echo_n "checking for SSE4 support... " >&6; }
if test "${libc_cv_cc_sse4+set}" = set; then :
if ${libc_cv_cc_sse4+:} false; then :
$as_echo_n "(cached) " >&6
else
if { ac_try='${CC-cc} -msse4 -xc /dev/null -S -o /dev/null'
@ -716,7 +716,7 @@ fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler -mtune=i686 support" >&5
$as_echo_n "checking for assembler -mtune=i686 support... " >&6; }
if test "${libc_cv_as_i686+set}" = set; then :
if ${libc_cv_as_i686+:} false; then :
$as_echo_n "(cached) " >&6
else
if { ac_try='${CC-cc} -Wa,-mtune=i686 -xc /dev/null -S -o /dev/null'
@ -735,7 +735,7 @@ $as_echo "$libc_cv_as_i686" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX support" >&5
$as_echo_n "checking for AVX support... " >&6; }
if test "${libc_cv_cc_avx+set}" = set; then :
if ${libc_cv_cc_avx+:} false; then :
$as_echo_n "(cached) " >&6
else
if { ac_try='${CC-cc} -mavx -xc /dev/null -S -o /dev/null'
@ -756,9 +756,32 @@ if test $libc_cv_cc_avx = yes; then
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for FMA4 support" >&5
$as_echo_n "checking for FMA4 support... " >&6; }
if ${libc_cv_cc_fma4+:} false; then :
$as_echo_n "(cached) " >&6
else
if { ac_try='${CC-cc} -mfma4 -xc /dev/null -S -o /dev/null'
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
(eval $ac_try) 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then
libc_cv_cc_fma4=yes
else
libc_cv_cc_fma4=no
fi
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_fma4" >&5
$as_echo "$libc_cv_cc_fma4" >&6; }
if test $libc_cv_cc_fma4 = yes; then
$as_echo "#define HAVE_FMA4_SUPPORT 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for -mno-vzeroupper support" >&5
$as_echo_n "checking for -mno-vzeroupper support... " >&6; }
if test "${libc_cv_cc_novzeroupper+set}" = set; then :
if ${libc_cv_cc_novzeroupper+:} false; then :
$as_echo_n "(cached) " >&6
else
if { ac_try='${CC-cc} -mno-vzeroupper -xc /dev/null -S -o /dev/null'

View File

@ -67,6 +67,17 @@ if test $libc_cv_cc_avx = yes; then
AC_DEFINE(HAVE_AVX_SUPPORT)
fi
dnl Check if -mfma4 works.
AC_CACHE_CHECK(for FMA4 support, libc_cv_cc_fma4, [dnl
if AC_TRY_COMMAND([${CC-cc} -mfma4 -xc /dev/null -S -o /dev/null]); then
libc_cv_cc_fma4=yes
else
libc_cv_cc_fma4=no
fi])
if test $libc_cv_cc_fma4 = yes; then
AC_DEFINE(HAVE_FMA4_SUPPORT)
fi
dnl Check if -mno-vzeroupper works.
AC_CACHE_CHECK(for -mno-vzeroupper support, libc_cv_cc_novzeroupper, [dnl
if AC_TRY_COMMAND([${CC-cc} -mno-vzeroupper -xc /dev/null -S -o /dev/null]); then

View File

@ -1,5 +1,5 @@
/* FMA version of fma.
Copyright (C) 2009, 2010 Free Software Foundation, Inc.
Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@ -28,13 +28,29 @@ extern double __fma_sse2 (double x, double y, double z) attribute_hidden;
static double
__fma_fma (double x, double y, double z)
__fma_fma3 (double x, double y, double z)
{
asm ("vfmadd213sd %3, %2, %0" : "=x" (x) : "0" (x), "x" (y), "xm" (z));
return x;
}
libm_ifunc (__fma, HAS_FMA ? __fma_fma : __fma_sse2);
# ifdef HAVE_FMA4_SUPPORT
static double
__fma_fma4 (double x, double y, double z)
{
asm ("vfmaddsd %3, %2, %1, %0" : "=x" (x) : "x" (x), "xm" (y), "xm" (z));
return x;
}
# else
# undef HAS_FMA4
# define HAS_FMA4 0
# define __fma_fma4 NULL
# endif
libm_ifunc (__fma, HAS_FMA
? __fma_fma3 : (HAS_FMA4 ? __fma_fma4 : __fma_sse2));
weak_alias (__fma, fma)
# define __fma __fma_sse2

View File

@ -1,5 +1,5 @@
/* FMA version of fmaf.
Copyright (C) 2009, 2010 Free Software Foundation, Inc.
Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@ -27,13 +27,29 @@ extern float __fmaf_sse2 (float x, float y, float z) attribute_hidden;
static float
__fmaf_fma (float x, float y, float z)
__fmaf_fma3 (float x, float y, float z)
{
asm ("vfmadd213ss %3, %2, %0" : "=x" (x) : "0" (x), "x" (y), "xm" (z));
return x;
}
libm_ifunc (__fmaf, HAS_FMA ? __fmaf_fma : __fmaf_sse2);
# ifdef HAVE_FMA4_SUPPORT
static float
__fmaf_fma4 (float x, float y, float z)
{
asm ("vfmaddss %3, %2, %1, %0" : "=x" (x) : "x" (x), "xm" (y), "xm" (z));
return x;
}
# else
# undef HAS_FMA4
# define HAS_FMA4 0
# define __fmaf_fma4 NULL
# endif
libm_ifunc (__fmaf, HAS_FMA
? __fmaf_fma3 : (HAS_FMA4 ? __fmaf_fma4 : __fmaf_sse2));
weak_alias (__fmaf, fmaf)
# define __fmaf __fmaf_sse2

View File

@ -86,7 +86,7 @@ __init_cpu_features (void)
default:
/* Unknown family 0x06 processors. Assuming this is one
of Core i3/i5/i7 processors if AVX is available. */
of Core i3/i5/i7 processors if AVX is available. */
if ((ecx & bit_AVX) == 0)
break;
@ -131,6 +131,14 @@ __init_cpu_features (void)
if ((ecx & 0x200))
__cpu_features.feature[index_Prefer_SSE_for_memop]
|= bit_Prefer_SSE_for_memop;
__cpuid (0x80000000, eax, ebx, ecx, edx);
if (eax >= 0x80000001)
__cpuid (0x80000001,
__cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].eax,
__cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ebx,
__cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx,
__cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].edx);
}
else
kind = arch_kind_other;

View File

@ -53,6 +53,7 @@
enum
{
COMMON_CPUID_INDEX_1 = 0,
COMMON_CPUID_INDEX_80000001, /* for AMD */
/* Keep the following line at the end. */
COMMON_CPUID_INDEX_MAX
};
@ -113,6 +114,7 @@ extern const struct cpu_features *__get_cpu_features (void)
# define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 19)
# define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 20)
# define HAS_FMA HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 12)
# define HAS_FMA4 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_80000001, ecx, 16)
# define index_Fast_Rep_String FEATURE_INDEX_1
# define index_Fast_Copy_Backward FEATURE_INDEX_1