x86-64: Fix memcpy IFUNC selection

Chek Fast_Unaligned_Load, instead of Slow_BSF, and also check for
Fast_Copy_Backward to enable __memcpy_ssse3_back.  Existing selection
order is updated with following selection order:

1. __memcpy_avx_unaligned if AVX_Fast_Unaligned_Load bit is set.
2. __memcpy_sse2_unaligned if Fast_Unaligned_Load bit is set.
3. __memcpy_sse2 if SSSE3 isn't available.
4. __memcpy_ssse3_back if Fast_Copy_Backward bit it set.
5. __memcpy_ssse3

	[BZ #18880]
	* sysdeps/x86_64/multiarch/memcpy.S: Check Fast_Unaligned_Load,
	instead of Slow_BSF, and also check for Fast_Copy_Backward to
	enable __memcpy_ssse3_back.
This commit is contained in:
H.J. Lu 2016-03-04 08:37:40 -08:00
parent 4b230f6a60
commit 14a1d7cc4c
2 changed files with 22 additions and 13 deletions

View File

@ -1,3 +1,11 @@
2016-03-04 Amit Pawar <Amit.Pawar@amd.com>
H.J. Lu <hongjiu.lu@intel.com>
[BZ #18880]
* sysdeps/x86_64/multiarch/memcpy.S: Check Fast_Unaligned_Load,
instead of Slow_BSF, and also check for Fast_Copy_Backward to
enable __memcpy_ssse3_back.
2016-03-03 H.J. Lu <hongjiu.lu@intel.com>
[BZ #19758]

View File

@ -35,22 +35,23 @@ ENTRY(__new_memcpy)
jz 1f
HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
jz 1f
leaq __memcpy_avx512_no_vzeroupper(%rip), %rax
lea __memcpy_avx512_no_vzeroupper(%rip), %RAX_LP
ret
#endif
1: leaq __memcpy_avx_unaligned(%rip), %rax
1: lea __memcpy_avx_unaligned(%rip), %RAX_LP
HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
jnz 2f
lea __memcpy_sse2_unaligned(%rip), %RAX_LP
HAS_ARCH_FEATURE (Fast_Unaligned_Load)
jnz 2f
lea __memcpy_sse2(%rip), %RAX_LP
HAS_CPU_FEATURE (SSSE3)
jz 2f
ret
2: leaq __memcpy_sse2(%rip), %rax
HAS_ARCH_FEATURE (Slow_BSF)
jnz 3f
leaq __memcpy_sse2_unaligned(%rip), %rax
ret
3: HAS_CPU_FEATURE (SSSE3)
jz 4f
leaq __memcpy_ssse3(%rip), %rax
4: ret
lea __memcpy_ssse3_back(%rip), %RAX_LP
HAS_ARCH_FEATURE (Fast_Copy_Backward)
jnz 2f
lea __memcpy_ssse3(%rip), %RAX_LP
2: ret
END(__new_memcpy)
# undef ENTRY