Implement x86-64 multiarch mempcpy in memcpy

Implement x86-64 multiarch mempcpy in memcpy to share most of code.  It
reduces code size of libc.so.

	[BZ #18858]
	* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Remove
	mempcpy-ssse3, mempcpy-ssse3-back, mempcpy-avx-unaligned
	and mempcpy-avx512-no-vzeroupper.
	* sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S (MEMPCPY_CHK):
	New.
	(MEMPCPY): Likewise.
	* sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S
	(MEMPCPY_CHK): New.
	(MEMPCPY): Likewise.
	* sysdeps/x86_64/multiarch/memcpy-ssse3-back.S (MEMPCPY_CHK): New.
	(MEMPCPY): Likewise.
	* sysdeps/x86_64/multiarch/memcpy-ssse3.S (MEMPCPY_CHK): New.
	(MEMPCPY): Likewise.
	* sysdeps/x86_64/multiarch/mempcpy-avx-unaligned.S: Removed.
	* sysdeps/x86_64/multiarch/mempcpy-avx512-no-vzeroupper.S:
	Likewise.
	* sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S: Likewise.
	* sysdeps/x86_64/multiarch/mempcpy-ssse3.S: Likewise.
This commit is contained in:
H.J. Lu 2016-03-28 13:13:36 -07:00
parent e41b395523
commit c365e615f7
10 changed files with 91 additions and 57 deletions

View File

@ -1,3 +1,25 @@
2016-03-28 H.J. Lu <hongjiu.lu@intel.com>
[BZ #18858]
* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Remove
mempcpy-ssse3, mempcpy-ssse3-back, mempcpy-avx-unaligned
and mempcpy-avx512-no-vzeroupper.
* sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S (MEMPCPY_CHK):
New.
(MEMPCPY): Likewise.
* sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S
(MEMPCPY_CHK): New.
(MEMPCPY): Likewise.
* sysdeps/x86_64/multiarch/memcpy-ssse3-back.S (MEMPCPY_CHK): New.
(MEMPCPY): Likewise.
* sysdeps/x86_64/multiarch/memcpy-ssse3.S (MEMPCPY_CHK): New.
(MEMPCPY): Likewise.
* sysdeps/x86_64/multiarch/mempcpy-avx-unaligned.S: Removed.
* sysdeps/x86_64/multiarch/mempcpy-avx512-no-vzeroupper.S:
Likewise.
* sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S: Likewise.
* sysdeps/x86_64/multiarch/mempcpy-ssse3.S: Likewise.
2016-03-28 H.J. Lu <hongjiu.lu@intel.com>
Amit Pawar <Amit.Pawar@amd.com>

View File

@ -8,10 +8,10 @@ ifeq ($(subdir),string)
sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
strcmp-sse2-unaligned strncmp-ssse3 \
memcmp-sse4 memcpy-ssse3 memcpy-sse2-unaligned \
memcpy-avx512-no-vzeroupper mempcpy-ssse3 memmove-ssse3 \
memcpy-ssse3-back mempcpy-ssse3-back memmove-avx-unaligned \
memcpy-avx-unaligned mempcpy-avx-unaligned \
mempcpy-avx512-no-vzeroupper memmove-ssse3-back \
memcpy-avx512-no-vzeroupper memmove-ssse3 \
memcpy-ssse3-back memmove-avx-unaligned \
memcpy-avx-unaligned \
memmove-ssse3-back \
memmove-avx512-no-vzeroupper strcasecmp_l-ssse3 \
strncase_l-ssse3 strcat-ssse3 strncat-ssse3\
strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \

View File

@ -25,11 +25,26 @@
#include "asm-syntax.h"
#ifndef MEMCPY
# define MEMCPY __memcpy_avx_unaligned
# define MEMCPY __memcpy_avx_unaligned
# define MEMCPY_CHK __memcpy_chk_avx_unaligned
# define MEMPCPY __mempcpy_avx_unaligned
# define MEMPCPY_CHK __mempcpy_chk_avx_unaligned
#endif
.section .text.avx,"ax",@progbits
#if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
ENTRY (MEMPCPY_CHK)
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
END (MEMPCPY_CHK)
ENTRY (MEMPCPY)
movq %rdi, %rax
addq %rdx, %rax
jmp L(start)
END (MEMPCPY)
#endif
#if !defined USE_AS_BCOPY
ENTRY (MEMCPY_CHK)
cmpq %rdx, %rcx
@ -42,6 +57,7 @@ ENTRY (MEMCPY)
#ifdef USE_AS_MEMPCPY
add %rdx, %rax
#endif
L(start):
cmp $256, %rdx
jae L(256bytesormore)
cmp $16, %dl

View File

@ -27,9 +27,24 @@
#ifndef MEMCPY
# define MEMCPY __memcpy_avx512_no_vzeroupper
# define MEMCPY_CHK __memcpy_chk_avx512_no_vzeroupper
# define MEMPCPY __mempcpy_avx512_no_vzeroupper
# define MEMPCPY_CHK __mempcpy_chk_avx512_no_vzeroupper
#endif
.section .text.avx512,"ax",@progbits
#if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
ENTRY (MEMPCPY_CHK)
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
END (MEMPCPY_CHK)
ENTRY (MEMPCPY)
movq %rdi, %rax
addq %rdx, %rax
jmp L(start)
END (MEMPCPY)
#endif
#if !defined USE_AS_BCOPY
ENTRY (MEMCPY_CHK)
cmpq %rdx, %rcx
@ -42,6 +57,7 @@ ENTRY (MEMCPY)
#ifdef USE_AS_MEMPCPY
add %rdx, %rax
#endif
L(start):
lea (%rsi, %rdx), %rcx
lea (%rdi, %rdx), %r9
cmp $512, %rdx

View File

@ -29,6 +29,8 @@
#ifndef MEMCPY
# define MEMCPY __memcpy_ssse3_back
# define MEMCPY_CHK __memcpy_chk_ssse3_back
# define MEMPCPY __mempcpy_ssse3_back
# define MEMPCPY_CHK __mempcpy_chk_ssse3_back
#endif
#define JMPTBL(I, B) I - B
@ -44,6 +46,19 @@
ud2
.section .text.ssse3,"ax",@progbits
#if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
ENTRY (MEMPCPY_CHK)
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
END (MEMPCPY_CHK)
ENTRY (MEMPCPY)
movq %rdi, %rax
addq %rdx, %rax
jmp L(start)
END (MEMPCPY)
#endif
#if !defined USE_AS_BCOPY
ENTRY (MEMCPY_CHK)
cmpq %rdx, %rcx
@ -66,6 +81,7 @@ ENTRY (MEMCPY)
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
L(copy_forward):
#endif
L(start):
cmp $144, %rdx
jae L(144bytesormore)

View File

@ -29,6 +29,8 @@
#ifndef MEMCPY
# define MEMCPY __memcpy_ssse3
# define MEMCPY_CHK __memcpy_chk_ssse3
# define MEMPCPY __mempcpy_ssse3
# define MEMPCPY_CHK __mempcpy_chk_ssse3
#endif
#define JMPTBL(I, B) I - B
@ -44,6 +46,19 @@
ud2
.section .text.ssse3,"ax",@progbits
#if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
ENTRY (MEMPCPY_CHK)
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
END (MEMPCPY_CHK)
ENTRY (MEMPCPY)
movq %rdi, %rax
addq %rdx, %rax
jmp L(start)
END (MEMPCPY)
#endif
#if !defined USE_AS_BCOPY
ENTRY (MEMCPY_CHK)
cmpq %rdx, %rcx
@ -66,6 +81,7 @@ ENTRY (MEMCPY)
jmp L(copy_backward)
L(copy_forward):
#endif
L(start):
cmp $79, %rdx
lea L(table_less_80bytes)(%rip), %r11
ja L(80bytesormore)

View File

@ -1,22 +0,0 @@
/* mempcpy with AVX
Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#define USE_AS_MEMPCPY
#define MEMCPY __mempcpy_avx_unaligned
#define MEMCPY_CHK __mempcpy_chk_avx_unaligned
#include "memcpy-avx-unaligned.S"

View File

@ -1,22 +0,0 @@
/* mempcpy optimized with AVX512 for KNL hardware.
Copyright (C) 2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#define USE_AS_MEMPCPY
#define MEMCPY __mempcpy_avx512_no_vzeroupper
#define MEMCPY_CHK __mempcpy_chk_avx512_no_vzeroupper
#include "memcpy-avx512-no-vzeroupper.S"

View File

@ -1,4 +0,0 @@
#define USE_AS_MEMPCPY
#define MEMCPY __mempcpy_ssse3_back
#define MEMCPY_CHK __mempcpy_chk_ssse3_back
#include "memcpy-ssse3-back.S"

View File

@ -1,4 +0,0 @@
#define USE_AS_MEMPCPY
#define MEMCPY __mempcpy_ssse3
#define MEMCPY_CHK __mempcpy_chk_ssse3
#include "memcpy-ssse3.S"