Save and restore AVX-512 zmm registers to x86-64 ld.so

AVX-512 ISA adds 512-bit zmm registers.  This patch updates
_dl_runtime_profile to pass zmm registers to run-time audit. It also
changes _dl_x86_64_save_sse and _dl_x86_64_restore_sse to upport zmm
registers, which are called when only when RTLD_PREPARE_FOREIGN_CALL
is used.  Its performance impact is minimum.

	* config.h.in (HAVE_AVX512_SUPPORT): New #undef.
	(HAVE_AVX512_ASM_SUPPORT): Likewise.
	* sysdeps/x86_64/bits/link.h (La_x86_64_zmm): New.
	(La_x86_64_vector): Add zmm.
	* sysdeps/x86_64/Makefile (tests): Add tst-audit10.
	(modules-names): Add tst-auditmod10a and tst-auditmod10b.
	($(objpfx)tst-audit10): New target.
	($(objpfx)tst-audit10.out): Likewise.
	(tst-audit10-ENV): New.
	(AVX512-CFLAGS): Likewise.
	(CFLAGS-tst-audit10.c): Likewise.
	(CFLAGS-tst-auditmod10a.c): Likewise.
	(CFLAGS-tst-auditmod10b.c): Likewise.
	* sysdeps/x86_64/configure.ac: Set config-cflags-avx512,
	HAVE_AVX512_SUPPORT and HAVE_AVX512_ASM_SUPPORT.
	* sysdeps/x86_64/configure: Regenerated.
	* sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Add
	AVX-512 zmm register support.
	(_dl_x86_64_save_sse): Likewise.
	(_dl_x86_64_restore_sse): Likewise.
	* sysdeps/x86_64/dl-trampoline.h: Updated to support different
	size vector registers.
	* sysdeps/x86_64/link-defines.sym (YMM_SIZE): New.
	(ZMM_SIZE): Likewise.
	* sysdeps/x86_64/tst-audit10.c: New file.
	* sysdeps/x86_64/tst-auditmod10a.c: Likewise.
	* sysdeps/x86_64/tst-auditmod10b.c: Likewise.
This commit is contained in:
Igor Zamyatin 2014-03-13 11:10:22 -07:00 committed by H.J. Lu
parent 44c4e5d598
commit 2d63a517e4
12 changed files with 609 additions and 40 deletions

View File

@ -1,3 +1,33 @@
2014-03-13 Igor Zamyatin <igor.zamyatin@intel.com>
* config.h.in (HAVE_AVX512_SUPPORT): New #undef.
(HAVE_AVX512_ASM_SUPPORT): Likewise.
* sysdeps/x86_64/bits/link.h (La_x86_64_zmm): New.
(La_x86_64_vector): Add zmm.
* sysdeps/x86_64/Makefile (tests): Add tst-audit10.
(modules-names): Add tst-auditmod10a and tst-auditmod10b.
($(objpfx)tst-audit10): New target.
($(objpfx)tst-audit10.out): Likewise.
(tst-audit10-ENV): New.
(AVX512-CFLAGS): Likewise.
(CFLAGS-tst-audit10.c): Likewise.
(CFLAGS-tst-auditmod10a.c): Likewise.
(CFLAGS-tst-auditmod10b.c): Likewise.
* sysdeps/x86_64/configure.ac: Set config-cflags-avx512,
HAVE_AVX512_SUPPORT and HAVE_AVX512_ASM_SUPPORT.
* sysdeps/x86_64/configure: Regenerated.
* sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Add
AVX-512 zmm register support.
(_dl_x86_64_save_sse): Likewise.
(_dl_x86_64_restore_sse): Likewise.
* sysdeps/x86_64/dl-trampoline.h: Updated to support different
size vector registers.
* sysdeps/x86_64/link-defines.sym (YMM_SIZE): New.
(ZMM_SIZE): Likewise.
* sysdeps/x86_64/tst-audit10.c: New file.
* sysdeps/x86_64/tst-auditmod10a.c: Likewise.
* sysdeps/x86_64/tst-auditmod10b.c: Likewise.
2014-03-13 Roland McGrath <roland@hack.frob.com>
* configure.ac (HAVE_EHDR_START): New check.

View File

@ -98,6 +98,12 @@
/* Define if gcc supports VEX encoding. */
#undef HAVE_SSE2AVX_SUPPORT
/* Define if compiler supports AVX512. */
#undef HAVE_AVX512_SUPPORT
/* Define if assembler supports AVX512. */
#undef HAVE_AVX512_ASM_SUPPORT
/* Define if gcc supports FMA4. */
#undef HAVE_FMA4_SUPPORT

View File

@ -66,6 +66,8 @@ __END_DECLS
typedef float La_x86_64_xmm __attribute__ ((__vector_size__ (16)));
typedef float La_x86_64_ymm
__attribute__ ((__vector_size__ (32), __aligned__ (16)));
typedef double La_x86_64_zmm
__attribute__ ((__vector_size__ (64), __aligned__ (16)));
# else
typedef float La_x86_64_xmm __attribute__ ((__mode__ (__V4SF__)));
# endif
@ -74,6 +76,7 @@ typedef union
{
# if __GNUC_PREREQ (4,0)
La_x86_64_ymm ymm[2];
La_x86_64_zmm zmm[1];
# endif
La_x86_64_xmm xmm[4];
} La_x86_64_vector __attribute__ ((__aligned__ (16)));

View File

@ -38,7 +38,7 @@ tests-pie += $(quad-pie-test)
$(objpfx)tst-quad1pie: $(objpfx)tst-quadmod1pie.o
$(objpfx)tst-quad2pie: $(objpfx)tst-quadmod2pie.o
tests += tst-audit3 tst-audit4 tst-audit5
tests += tst-audit3 tst-audit4 tst-audit5 tst-audit10
ifeq (yes,$(config-cflags-avx))
tests += tst-audit6 tst-audit7
endif
@ -46,7 +46,8 @@ modules-names += tst-auditmod3a tst-auditmod3b \
tst-auditmod4a tst-auditmod4b \
tst-auditmod5a tst-auditmod5b \
tst-auditmod6a tst-auditmod6b tst-auditmod6c \
tst-auditmod7a tst-auditmod7b
tst-auditmod7a tst-auditmod7b \
tst-auditmod10a tst-auditmod10b
$(objpfx)tst-audit3: $(objpfx)tst-auditmod3a.so
$(objpfx)tst-audit3.out: $(objpfx)tst-auditmod3b.so
@ -69,6 +70,10 @@ $(objpfx)tst-audit7: $(objpfx)tst-auditmod7a.so
$(objpfx)tst-audit7.out: $(objpfx)tst-auditmod7b.so
tst-audit7-ENV = LD_AUDIT=$(objpfx)tst-auditmod7b.so
$(objpfx)tst-audit10: $(objpfx)tst-auditmod10a.so
$(objpfx)tst-audit10.out: $(objpfx)tst-auditmod10b.so
tst-audit10-ENV = LD_AUDIT=$(objpfx)tst-auditmod10b.so
ifeq (yes,$(config-cflags-avx))
AVX-CFLAGS=-mavx
ifeq (yes,$(config-cflags-novzeroupper))
@ -81,6 +86,12 @@ CFLAGS-tst-auditmod6b.c += $(AVX-CFLAGS)
CFLAGS-tst-auditmod6c.c += $(AVX-CFLAGS)
CFLAGS-tst-auditmod7b.c += $(AVX-CFLAGS)
endif
ifeq (yes,$(config-cflags-avx512))
AVX512-CFLAGS = -mavx512f
CFLAGS-tst-audit10.c += $(AVX512-CFLAGS)
CFLAGS-tst-auditmod10a.c += $(AVX512-CFLAGS)
CFLAGS-tst-auditmod10b.c += $(AVX512-CFLAGS)
endif
endif
ifeq ($(subdir),csu)

View File

@ -95,6 +95,59 @@ fi
config_vars="$config_vars
config-cflags-avx = $libc_cv_cc_avx"
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX512 support" >&5
$as_echo_n "checking for AVX512 support... " >&6; }
if ${libc_cv_cc_avx512+:} false; then :
$as_echo_n "(cached) " >&6
else
if { ac_try='${CC-cc} -mavx512f -xc /dev/null -S -o /dev/null'
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
(eval $ac_try) 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then :
libc_cv_cc_avx512=yes
else
libc_cv_cc_avx512=no
fi
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_avx512" >&5
$as_echo "$libc_cv_cc_avx512" >&6; }
if test $libc_cv_cc_avx512 = yes; then
$as_echo "#define HAVE_AVX512_SUPPORT 1" >>confdefs.h
fi
config_vars="$config_vars
config-cflags-avx512 = $libc_cv_cc_avx512"
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX512 support in assembler" >&5
$as_echo_n "checking for AVX512 support in assembler... " >&6; }
if ${libc_cv_asm_avx512+:} false; then :
$as_echo_n "(cached) " >&6
else
cat > conftest.s <<\EOF
vmovdqu64 %zmm0, (%rsp)
EOF
if { ac_try='${CC-cc} -c $ASFLAGS conftest.s 1>&5'
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
(eval $ac_try) 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then
libc_cv_asm_avx512=yes
else
libc_cv_asm_avx512=no
fi
rm -f conftest*
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_avx512" >&5
$as_echo "$libc_cv_asm_avx512" >&6; }
if test $libc_cv_asm_avx512 == yes; then
$as_echo "#define HAVE_AVX512_ASM_SUPPORT 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX encoding of SSE instructions" >&5
$as_echo_n "checking for AVX encoding of SSE instructions... " >&6; }
if ${libc_cv_cc_sse2avx+:} false; then :

View File

@ -23,6 +23,30 @@ if test $libc_cv_cc_avx = yes; then
fi
LIBC_CONFIG_VAR([config-cflags-avx], [$libc_cv_cc_avx])
dnl Check if -mavx512f works.
AC_CACHE_CHECK(for AVX512 support, libc_cv_cc_avx512, [dnl
LIBC_TRY_CC_OPTION([-mavx512f], [libc_cv_cc_avx512=yes], [libc_cv_cc_avx512=no])
])
if test $libc_cv_cc_avx512 = yes; then
AC_DEFINE(HAVE_AVX512_SUPPORT)
fi
LIBC_CONFIG_VAR([config-cflags-avx512], [$libc_cv_cc_avx512])
dnl Check if asm supports AVX512.
AC_CACHE_CHECK(for AVX512 support in assembler, libc_cv_asm_avx512, [dnl
cat > conftest.s <<\EOF
vmovdqu64 %zmm0, (%rsp)
EOF
if AC_TRY_COMMAND(${CC-cc} -c $ASFLAGS conftest.s 1>&AS_MESSAGE_LOG_FD); then
libc_cv_asm_avx512=yes
else
libc_cv_asm_avx512=no
fi
rm -f conftest*])
if test $libc_cv_asm_avx512 == yes; then
AC_DEFINE(HAVE_AVX512_ASM_SUPPORT)
fi
dnl Check if -msse2avx works.
AC_CACHE_CHECK(for AVX encoding of SSE instructions, libc_cv_cc_sse2avx, [dnl
LIBC_TRY_CC_OPTION([-msse2avx],

View File

@ -96,7 +96,7 @@ _dl_runtime_profile:
/* Actively align the La_x86_64_regs structure. */
andq $0xfffffffffffffff0, %rsp
# ifdef HAVE_AVX_SUPPORT
# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
/* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers
to detect if any xmm0-xmm7 registers are changed by audit
module. */
@ -130,7 +130,7 @@ _dl_runtime_profile:
movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
# ifdef HAVE_AVX_SUPPORT
# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
.data
L(have_avx):
.zero 4
@ -138,7 +138,7 @@ L(have_avx):
.previous
cmpl $0, L(have_avx)(%rip)
jne 1f
jne L(defined)
movq %rbx, %r11 # Save rbx
movl $1, %eax
cpuid
@ -147,18 +147,54 @@ L(have_avx):
// AVX and XSAVE supported?
andl $((1 << 28) | (1 << 27)), %ecx
cmpl $((1 << 28) | (1 << 27)), %ecx
jne 2f
jne 10f
# ifdef HAVE_AVX512_ASM_SUPPORT
// AVX512 supported in processor?
movq %rbx, %r11 # Save rbx
xorl %ecx, %ecx
mov $0x7, %eax
cpuid
andl $(1 << 16), %ebx
# endif
xorl %ecx, %ecx
// Get XFEATURE_ENABLED_MASK
xgetbv
andl $0x6, %eax
2: subl $0x5, %eax
# ifdef HAVE_AVX512_ASM_SUPPORT
test %ebx, %ebx
movq %r11, %rbx # Restore rbx
je 20f
// Verify that XCR0[7:5] = '111b' and
// XCR0[2:1] = '11b' which means
// that zmm state is enabled
andl $0xe6, %eax
cmpl $0xe6, %eax
jne 20f
movl %eax, L(have_avx)(%rip)
L(avx512):
# define RESTORE_AVX
# define VMOV vmovdqu64
# define VEC(i) zmm##i
# define MORE_CODE
# include "dl-trampoline.h"
# undef VMOV
# undef VEC
# undef RESTORE_AVX
# endif
20: andl $0x6, %eax
10: subl $0x5, %eax
movl %eax, L(have_avx)(%rip)
cmpl $0, %eax
1: js L(no_avx)
L(defined):
js L(no_avx)
# ifdef HAVE_AVX512_ASM_SUPPORT
cmpl $0xe6, L(have_avx)(%rip)
je L(avx512)
# endif
# define RESTORE_AVX
# define VMOV vmovdqu
# define VEC(i) ymm##i
# define MORE_CODE
# include "dl-trampoline.h"
@ -180,9 +216,9 @@ L(no_avx):
.align 16
cfi_startproc
_dl_x86_64_save_sse:
# ifdef HAVE_AVX_SUPPORT
# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
cmpl $0, L(have_avx)(%rip)
jne 1f
jne L(defined_5)
movq %rbx, %r11 # Save rbx
movl $1, %eax
cpuid
@ -191,21 +227,43 @@ _dl_x86_64_save_sse:
// AVX and XSAVE supported?
andl $((1 << 28) | (1 << 27)), %ecx
cmpl $((1 << 28) | (1 << 27)), %ecx
jne 2f
jne 1f
# ifdef HAVE_AVX512_ASM_SUPPORT
// AVX512 supported in a processor?
movq %rbx, %r11 # Save rbx
xorl %ecx,%ecx
mov $0x7,%eax
cpuid
andl $(1 << 16), %ebx
# endif
xorl %ecx, %ecx
// Get XFEATURE_ENABLED_MASK
xgetbv
andl $0x6, %eax
cmpl $0x6, %eax
// Nonzero if SSE and AVX state saving is enabled.
sete %al
2: leal -1(%eax,%eax), %eax
# ifdef HAVE_AVX512_ASM_SUPPORT
test %ebx, %ebx
movq %r11, %rbx # Restore rbx
je 2f
// Verify that XCR0[7:5] = '111b' and
// XCR0[2:1] = '11b' which means
// that zmm state is enabled
andl $0xe6, %eax
movl %eax, L(have_avx)(%rip)
cmpl $0xe6, %eax
je L(avx512_5)
# endif
2: andl $0x6, %eax
1: subl $0x5, %eax
movl %eax, L(have_avx)(%rip)
cmpl $0, %eax
1: js L(no_avx5)
L(defined_5):
js L(no_avx5)
# ifdef HAVE_AVX512_ASM_SUPPORT
cmpl $0xe6, L(have_avx)(%rip)
je L(avx512_5)
# endif
# define YMM_SIZE 32
vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE
vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE
vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE
@ -215,6 +273,18 @@ _dl_x86_64_save_sse:
vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE
vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE
ret
# ifdef HAVE_AVX512_ASM_SUPPORT
L(avx512_5):
vmovdqu64 %zmm0, %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE
vmovdqu64 %zmm1, %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE
vmovdqu64 %zmm2, %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE
vmovdqu64 %zmm3, %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE
vmovdqu64 %zmm4, %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE
vmovdqu64 %zmm5, %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE
vmovdqu64 %zmm6, %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE
vmovdqu64 %zmm7, %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE
ret
# endif
L(no_avx5):
# endif
movdqa %xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE
@ -235,9 +305,13 @@ L(no_avx5):
.align 16
cfi_startproc
_dl_x86_64_restore_sse:
# ifdef HAVE_AVX_SUPPORT
# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
cmpl $0, L(have_avx)(%rip)
js L(no_avx6)
# ifdef HAVE_AVX512_ASM_SUPPORT
cmpl $0xe6, L(have_avx)(%rip)
je L(avx512_6)
# endif
vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0
vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1
@ -248,6 +322,18 @@ _dl_x86_64_restore_sse:
vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6
vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7
ret
# ifdef HAVE_AVX512_ASM_SUPPORT
L(avx512_6):
vmovdqu64 %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE, %zmm0
vmovdqu64 %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE, %zmm1
vmovdqu64 %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE, %zmm2
vmovdqu64 %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE, %zmm3
vmovdqu64 %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE, %zmm4
vmovdqu64 %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE, %zmm5
vmovdqu64 %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE, %zmm6
vmovdqu64 %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE, %zmm7
ret
# endif
L(no_avx6):
# endif
movdqa %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0

View File

@ -19,14 +19,14 @@
#ifdef RESTORE_AVX
/* This is to support AVX audit modules. */
vmovdqu %ymm0, (LR_VECTOR_OFFSET)(%rsp)
vmovdqu %ymm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
vmovdqu %ymm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
vmovdqu %ymm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
vmovdqu %ymm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
vmovdqu %ymm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
vmovdqu %ymm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
vmovdqu %ymm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
VMOV %VEC(0), (LR_VECTOR_OFFSET)(%rsp)
VMOV %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
VMOV %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
VMOV %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
VMOV %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
VMOV %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
VMOV %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
VMOV %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
/* Save xmm0-xmm7 registers to detect if any of them are
changed by audit module. */
@ -72,7 +72,7 @@
je 2f
vmovdqa %xmm0, (LR_VECTOR_OFFSET)(%rsp)
jmp 1f
2: vmovdqu (LR_VECTOR_OFFSET)(%rsp), %ymm0
2: VMOV (LR_VECTOR_OFFSET)(%rsp), %VEC(0)
vmovdqa %xmm0, (LR_XMM_OFFSET)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8
@ -81,7 +81,7 @@
je 2f
vmovdqa %xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
jmp 1f
2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1
2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1)
vmovdqa %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8
@ -90,7 +90,7 @@
je 2f
vmovdqa %xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
jmp 1f
2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2
2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2)
vmovdqa %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8
@ -99,7 +99,7 @@
je 2f
vmovdqa %xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
jmp 1f
2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3
2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3)
vmovdqa %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8
@ -108,7 +108,7 @@
je 2f
vmovdqa %xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
jmp 1f
2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4
2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4)
vmovdqa %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8
@ -117,7 +117,7 @@
je 2f
vmovdqa %xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
jmp 1f
2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5
2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5)
vmovdqa %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8
@ -126,7 +126,7 @@
je 2f
vmovdqa %xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
jmp 1f
2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6
2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6)
vmovdqa %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8
@ -135,7 +135,7 @@
je 2f
vmovdqa %xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
jmp 1f
2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7
2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7)
vmovdqa %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
1:
@ -213,8 +213,8 @@
#ifdef RESTORE_AVX
/* This is to support AVX audit modules. */
vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx)
vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx)
VMOV %VEC(0), LRV_VECTOR0_OFFSET(%rcx)
VMOV %VEC(1), LRV_VECTOR1_OFFSET(%rcx)
/* Save xmm0/xmm1 registers to detect if they are changed
by audit module. */
@ -243,13 +243,13 @@
vpmovmskb %xmm2, %esi
cmpl $0xffff, %esi
jne 1f
vmovdqu LRV_VECTOR0_OFFSET(%rsp), %ymm0
VMOV LRV_VECTOR0_OFFSET(%rsp), %VEC(0)
1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2
vpmovmskb %xmm2, %esi
cmpl $0xffff, %esi
jne 1f
vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1
VMOV LRV_VECTOR1_OFFSET(%rsp), %VEC(1)
1:
#endif

View File

@ -4,6 +4,8 @@
--
VECTOR_SIZE sizeof (La_x86_64_vector)
XMM_SIZE sizeof (La_x86_64_xmm)
YMM_SIZE sizeof (La_x86_64_ymm)
ZMM_SIZE sizeof (La_x86_64_zmm)
LR_SIZE sizeof (struct La_x86_64_regs)
LR_RDX_OFFSET offsetof (struct La_x86_64_regs, lr_rdx)

View File

@ -0,0 +1,70 @@
/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
/* Test case for x86-64 preserved registers in dynamic linker. */
#ifdef __AVX512F__
#include <stdlib.h>
#include <string.h>
#include <cpuid.h>
#include <immintrin.h>
static int
avx512_enabled (void)
{
unsigned int eax, ebx, ecx, edx;
if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0
|| (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE))
return 0;
__cpuid_count (7, 0, eax, ebx, ecx, edx);
if (!(ebx & bit_AVX512F))
return 0;
asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
/* Verify that ZMM, YMM and XMM states are enabled. */
return (eax & 0xe6) == 0xe6;
}
extern __m512i audit_test (__m512i, __m512i, __m512i, __m512i,
__m512i, __m512i, __m512i, __m512i);
int
main (void)
{
/* Run AVX512 test only if AVX512 is supported. */
if (avx512_enabled ())
{
__m512i zmm = _mm512_setzero_si512 ();
__m512i ret = audit_test (zmm, zmm, zmm, zmm, zmm, zmm, zmm, zmm);
zmm = _mm512_set1_epi64 (0x12349876);
if (memcmp (&zmm, &ret, sizeof (ret)))
abort ();
}
return 0;
}
#else
int
main (void)
{
return 0;
}
#endif

View File

@ -0,0 +1,65 @@
/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
/* Test case for x86-64 preserved registers in dynamic linker. */
#ifdef __AVX512F__
#include <stdlib.h>
#include <string.h>
#include <immintrin.h>
__m512i
audit_test (__m512i x0, __m512i x1, __m512i x2, __m512i x3,
__m512i x4, __m512i x5, __m512i x6, __m512i x7)
{
__m512i zmm;
zmm = _mm512_set1_epi64 (1);
if (memcmp (&zmm, &x0, sizeof (zmm)))
abort ();
zmm = _mm512_set1_epi64 (2);
if (memcmp (&zmm, &x1, sizeof (zmm)))
abort ();
zmm = _mm512_set1_epi64 (3);
if (memcmp (&zmm, &x2, sizeof (zmm)))
abort ();
zmm = _mm512_set1_epi64 (4);
if (memcmp (&zmm, &x3, sizeof (zmm)))
abort ();
zmm = _mm512_set1_epi64 (5);
if (memcmp (&zmm, &x4, sizeof (zmm)))
abort ();
zmm = _mm512_set1_epi64 (6);
if (memcmp (&zmm, &x5, sizeof (zmm)))
abort ();
zmm = _mm512_set1_epi64 (7);
if (memcmp (&zmm, &x6, sizeof (zmm)))
abort ();
zmm = _mm512_set1_epi64 (8);
if (memcmp (&zmm, &x7, sizeof (zmm)))
abort ();
return _mm512_setzero_si512 ();
}
#endif

View File

@ -0,0 +1,219 @@
/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
/* Verify that changing AVX512 registers in audit library won't affect
function parameter passing/return. */
#include <dlfcn.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <bits/wordsize.h>
#include <gnu/lib-names.h>
unsigned int
la_version (unsigned int v)
{
setlinebuf (stdout);
printf ("version: %u\n", v);
char buf[20];
sprintf (buf, "%u", v);
return v;
}
void
la_activity (uintptr_t *cookie, unsigned int flag)
{
if (flag == LA_ACT_CONSISTENT)
printf ("activity: consistent\n");
else if (flag == LA_ACT_ADD)
printf ("activity: add\n");
else if (flag == LA_ACT_DELETE)
printf ("activity: delete\n");
else
printf ("activity: unknown activity %u\n", flag);
}
char *
la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag)
{
char buf[100];
const char *flagstr;
if (flag == LA_SER_ORIG)
flagstr = "LA_SET_ORIG";
else if (flag == LA_SER_LIBPATH)
flagstr = "LA_SER_LIBPATH";
else if (flag == LA_SER_RUNPATH)
flagstr = "LA_SER_RUNPATH";
else if (flag == LA_SER_CONFIG)
flagstr = "LA_SER_CONFIG";
else if (flag == LA_SER_DEFAULT)
flagstr = "LA_SER_DEFAULT";
else if (flag == LA_SER_SECURE)
flagstr = "LA_SER_SECURE";
else
{
sprintf (buf, "unknown flag %d", flag);
flagstr = buf;
}
printf ("objsearch: %s, %s\n", name, flagstr);
return (char *) name;
}
unsigned int
la_objopen (struct link_map *l, Lmid_t lmid, uintptr_t *cookie)
{
printf ("objopen: %ld, %s\n", lmid, l->l_name);
return 3;
}
void
la_preinit (uintptr_t *cookie)
{
printf ("preinit\n");
}
unsigned int
la_objclose (uintptr_t *cookie)
{
printf ("objclose\n");
return 0;
}
uintptr_t
la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook,
uintptr_t *defcook, unsigned int *flags, const char *symname)
{
printf ("symbind64: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
symname, (long int) sym->st_value, ndx, *flags);
return sym->st_value;
}
#include <tst-audit.h>
#ifdef __AVX512F__
#include <immintrin.h>
#include <cpuid.h>
static int
check_avx512 (void)
{
unsigned int eax, ebx, ecx, edx;
if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0
|| (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE))
return 0;
__cpuid_count (7, 0, eax, ebx, ecx, edx);
if (!(ebx & bit_AVX512F))
return 0;
asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
/* Verify that ZMM, YMM and XMM states are enabled. */
return (eax & 0xe6) == 0xe6;
}
#else
#include <emmintrin.h>
#endif
ElfW(Addr)
pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
uintptr_t *defcook, La_regs *regs, unsigned int *flags,
const char *symname, long int *framesizep)
{
printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
symname, (long int) sym->st_value, ndx, *flags);
#ifdef __AVX512F__
if (check_avx512 () && strcmp (symname, "audit_test") == 0)
{
__m512i zero = _mm512_setzero_si512 ();
if (memcmp (&regs->lr_vector[0], &zero, sizeof (zero))
|| memcmp (&regs->lr_vector[1], &zero, sizeof (zero))
|| memcmp (&regs->lr_vector[2], &zero, sizeof (zero))
|| memcmp (&regs->lr_vector[3], &zero, sizeof (zero))
|| memcmp (&regs->lr_vector[4], &zero, sizeof (zero))
|| memcmp (&regs->lr_vector[5], &zero, sizeof (zero))
|| memcmp (&regs->lr_vector[6], &zero, sizeof (zero))
|| memcmp (&regs->lr_vector[7], &zero, sizeof (zero)))
abort ();
for (int i = 0; i < 8; i++)
regs->lr_vector[i].zmm[0]
= (La_x86_64_zmm) _mm512_set1_epi64 (i + 1);
__m512i zmm = _mm512_set1_epi64 (-1);
asm volatile ("vmovdqa64 %0, %%zmm0" : : "x" (zmm) : "xmm0" );
asm volatile ("vmovdqa64 %0, %%zmm1" : : "x" (zmm) : "xmm1" );
asm volatile ("vmovdqa64 %0, %%zmm2" : : "x" (zmm) : "xmm2" );
asm volatile ("vmovdqa64 %0, %%zmm3" : : "x" (zmm) : "xmm3" );
asm volatile ("vmovdqa64 %0, %%zmm4" : : "x" (zmm) : "xmm4" );
asm volatile ("vmovdqa64 %0, %%zmm5" : : "x" (zmm) : "xmm5" );
asm volatile ("vmovdqa64 %0, %%zmm6" : : "x" (zmm) : "xmm6" );
asm volatile ("vmovdqa64 %0, %%zmm7" : : "x" (zmm) : "xmm7" );
*framesizep = 1024;
}
#endif
return sym->st_value;
}
unsigned int
pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
uintptr_t *defcook, const La_regs *inregs, La_retval *outregs,
const char *symname)
{
printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u, retval=%tu\n",
symname, (long int) sym->st_value, ndx,
(ptrdiff_t) outregs->int_retval);
#ifdef __AVX512F__
if (check_avx512 () && strcmp (symname, "audit_test") == 0)
{
__m512i zero = _mm512_setzero_si512 ();
if (memcmp (&outregs->lrv_vector0, &zero, sizeof (zero)))
abort ();
for (int i = 0; i < 8; i++)
{
__m512i zmm = _mm512_set1_epi64 (i + 1);
if (memcmp (&inregs->lr_vector[i], &zmm, sizeof (zmm)) != 0)
abort ();
}
outregs->lrv_vector0.zmm[0]
= (La_x86_64_zmm) _mm512_set1_epi64 (0x12349876);
__m512i zmm = _mm512_set1_epi64 (-1);
asm volatile ("vmovdqa64 %0, %%zmm0" : : "x" (zmm) : "xmm0" );
asm volatile ("vmovdqa64 %0, %%zmm1" : : "x" (zmm) : "xmm1" );
}
#endif
return 0;
}