32bit memcmp/strcmp/strncmp optimized for SSSE3/SSS4.2
This commit is contained in:
parent
0ab85ce429
commit
904057bc17
16
ChangeLog
16
ChangeLog
@ -1,3 +1,19 @@
|
||||
2010-02-12 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add
|
||||
strcmp-ssse3, strcmp-sse4, strncmp-c, strncmp-ssse3, strncmp-sse4,
|
||||
memcmp-c, memcmp-ssse3, and memcmp-sse4.
|
||||
* sysdeps/i386/i686/multiarch/memcmp-sse4.S: New file.
|
||||
* sysdeps/i386/i686/multiarch/memcmp-ssse3.S: New file.
|
||||
* sysdeps/i386/i686/multiarch/memcmp.S: New file.
|
||||
* sysdeps/i386/i686/multiarch/strcmp-sse4.S: New file.
|
||||
* sysdeps/i386/i686/multiarch/strcmp-ssse3.S: New file.
|
||||
* sysdeps/i386/i686/multiarch/strcmp.S: New file.
|
||||
* sysdeps/i386/i686/multiarch/strncmp-c.c: New file.
|
||||
* sysdeps/i386/i686/multiarch/strncmp-sse4.S: New file.
|
||||
* sysdeps/i386/i686/multiarch/strncmp-ssse3.S: New file.
|
||||
* sysdeps/i386/i686/multiarch/strncmp.S: New file.
|
||||
|
||||
2010-02-12 Luis Machado <luisgpm@br.ibm.com>
|
||||
|
||||
* sysdeps/powerpc/powerpc32/dl-machine.h: Removed old PPC_REL16 check.
|
||||
|
@ -7,7 +7,9 @@ ifeq ($(subdir),string)
|
||||
sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
|
||||
memmove-ssse3 memcpy-ssse3-rep mempcpy-ssse3-rep \
|
||||
memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \
|
||||
memset-sse2-rep bzero-sse2-rep
|
||||
memset-sse2-rep bzero-sse2-rep strcmp-ssse3 \
|
||||
strcmp-sse4 strncmp-c strncmp-ssse3 strncmp-sse4 \
|
||||
memcmp-ssse3 memcmp-sse4
|
||||
ifeq (yes,$(config-cflags-sse4))
|
||||
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
|
||||
CFLAGS-strcspn-c.c += -msse4
|
||||
|
988
sysdeps/i386/i686/multiarch/memcmp-sse4.S
Normal file
988
sysdeps/i386/i686/multiarch/memcmp-sse4.S
Normal file
@ -0,0 +1,988 @@
|
||||
/* memcmp with SSE4.2
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
Contributed by Intel Corporation.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#ifndef NOT_IN_libc
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "asm-syntax.h"
|
||||
|
||||
#ifndef MEMCMP
|
||||
# define MEMCMP __memcmp_sse4_2
|
||||
#endif
|
||||
|
||||
#define CFI_PUSH(REG) \
|
||||
cfi_adjust_cfa_offset (4); \
|
||||
cfi_rel_offset (REG, 0)
|
||||
|
||||
#define CFI_POP(REG) \
|
||||
cfi_adjust_cfa_offset (-4); \
|
||||
cfi_restore (REG)
|
||||
|
||||
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
||||
#define POP(REG) popl REG; CFI_POP (REG)
|
||||
|
||||
#define PARMS 4
|
||||
#define BLK1 PARMS
|
||||
#define BLK2 BLK1+4
|
||||
#define LEN BLK2+4
|
||||
#define RETURN POP (%ebx); ret; CFI_PUSH (%ebx)
|
||||
|
||||
|
||||
#ifdef SHARED
|
||||
# define JMPTBL(I, B) I - B
|
||||
|
||||
/* Load an entry in a jump table into EBX and branch to it. TABLE is a
|
||||
jump table with relative offsets. INDEX is a register contains the
|
||||
index into the jump table. SCALE is the scale of INDEX. */
|
||||
# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
|
||||
/* We first load PC into EBX. */ \
|
||||
call __i686.get_pc_thunk.bx; \
|
||||
/* Get the address of the jump table. */ \
|
||||
addl $(TABLE - .), %ebx; \
|
||||
/* Get the entry and convert the relative offset to the \
|
||||
absolute address. */ \
|
||||
addl (%ebx,INDEX,SCALE), %ebx; \
|
||||
/* We loaded the jump table and adjuested EDX/ESI. Go. */ \
|
||||
jmp *%ebx
|
||||
|
||||
.section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
|
||||
.globl __i686.get_pc_thunk.bx
|
||||
.hidden __i686.get_pc_thunk.bx
|
||||
ALIGN (4)
|
||||
.type __i686.get_pc_thunk.bx,@function
|
||||
__i686.get_pc_thunk.bx:
|
||||
movl (%esp), %ebx
|
||||
ret
|
||||
#else
|
||||
# define JMPTBL(I, B) I
|
||||
|
||||
/* Load an entry in a jump table into EBX and branch to it. TABLE is a
|
||||
jump table with relative offsets. INDEX is a register contains the
|
||||
index into the jump table. SCALE is the scale of INDEX. */
|
||||
# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
|
||||
jmp *TABLE(,INDEX,SCALE)
|
||||
#endif
|
||||
|
||||
.section .text.sse4.2,"ax",@progbits
|
||||
ENTRY (MEMCMP)
|
||||
movl BLK1(%esp), %eax
|
||||
movl BLK2(%esp), %edx
|
||||
movl LEN(%esp), %ecx
|
||||
cmp $1, %ecx
|
||||
jbe L(less1bytes)
|
||||
pxor %xmm0, %xmm0
|
||||
cmp $64, %ecx
|
||||
ja L(64bytesormore)
|
||||
cmp $8, %ecx
|
||||
PUSH (%ebx)
|
||||
jb L(less8bytes)
|
||||
add %ecx, %edx
|
||||
add %ecx, %eax
|
||||
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
|
||||
L(less8bytes):
|
||||
|
||||
mov (%eax), %bl
|
||||
cmpb (%edx), %bl
|
||||
jne L(nonzero)
|
||||
|
||||
mov 1(%eax), %bl
|
||||
cmpb 1(%edx), %bl
|
||||
jne L(nonzero)
|
||||
|
||||
cmp $2, %ecx
|
||||
jz L(0bytes)
|
||||
|
||||
mov 2(%eax), %bl
|
||||
cmpb 2(%edx), %bl
|
||||
jne L(nonzero)
|
||||
|
||||
cmp $3, %ecx
|
||||
jz L(0bytes)
|
||||
|
||||
mov 3(%eax), %bl
|
||||
cmpb 3(%edx), %bl
|
||||
jne L(nonzero)
|
||||
|
||||
cmp $4, %ecx
|
||||
jz L(0bytes)
|
||||
|
||||
mov 4(%eax), %bl
|
||||
cmpb 4(%edx), %bl
|
||||
jne L(nonzero)
|
||||
|
||||
cmp $5, %ecx
|
||||
jz L(0bytes)
|
||||
|
||||
mov 5(%eax), %bl
|
||||
cmpb 5(%edx), %bl
|
||||
jne L(nonzero)
|
||||
|
||||
cmp $6, %ecx
|
||||
jz L(0bytes)
|
||||
|
||||
mov 6(%eax), %bl
|
||||
cmpb 6(%edx), %bl
|
||||
je L(0bytes)
|
||||
L(nonzero):
|
||||
POP (%ebx)
|
||||
mov $1, %eax
|
||||
ja L(above)
|
||||
neg %eax
|
||||
L(above):
|
||||
ret
|
||||
CFI_PUSH (%ebx)
|
||||
|
||||
ALIGN (4)
|
||||
L(0bytes):
|
||||
POP (%ebx)
|
||||
xor %eax, %eax
|
||||
ret
|
||||
CFI_PUSH (%ebx)
|
||||
|
||||
ALIGN (4)
|
||||
L(less1bytes):
|
||||
jb L(0bytesend)
|
||||
movzbl (%eax), %eax
|
||||
movzbl (%edx), %edx
|
||||
sub %edx, %eax
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
L(0bytesend):
|
||||
xor %eax, %eax
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
L(64bytesormore):
|
||||
PUSH (%ebx)
|
||||
mov %ecx, %ebx
|
||||
mov $64, %ecx
|
||||
sub $64, %ebx
|
||||
L(64bytesormore_loop):
|
||||
movdqu (%eax), %xmm1
|
||||
movdqu (%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(find_16diff)
|
||||
|
||||
movdqu 16(%eax), %xmm1
|
||||
movdqu 16(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(find_32diff)
|
||||
|
||||
movdqu 32(%eax), %xmm1
|
||||
movdqu 32(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(find_48diff)
|
||||
|
||||
movdqu 48(%eax), %xmm1
|
||||
movdqu 48(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(find_64diff)
|
||||
add %ecx, %eax
|
||||
add %ecx, %edx
|
||||
sub %ecx, %ebx
|
||||
jae L(64bytesormore_loop)
|
||||
add %ebx, %ecx
|
||||
add %ecx, %edx
|
||||
add %ecx, %eax
|
||||
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
|
||||
L(find_16diff):
|
||||
sub $16, %ecx
|
||||
L(find_32diff):
|
||||
sub $16, %ecx
|
||||
L(find_48diff):
|
||||
sub $16, %ecx
|
||||
L(find_64diff):
|
||||
add %ecx, %edx
|
||||
add %ecx, %eax
|
||||
jmp L(16bytes)
|
||||
ALIGN (4)
|
||||
|
||||
L(16bytes):
|
||||
mov -16(%eax), %ecx
|
||||
mov -16(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
L(12bytes):
|
||||
mov -12(%eax), %ecx
|
||||
mov -12(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
L(8bytes):
|
||||
mov -8(%eax), %ecx
|
||||
mov -8(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
L(4bytes):
|
||||
mov -4(%eax), %ecx
|
||||
mov -4(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
mov $0, %eax
|
||||
jne L(find_diff)
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
L(49bytes):
|
||||
movdqu -49(%eax), %xmm1
|
||||
movdqu -49(%edx), %xmm2
|
||||
mov $-49, %ebx
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(33bytes):
|
||||
movdqu -33(%eax), %xmm1
|
||||
movdqu -33(%edx), %xmm2
|
||||
mov $-33, %ebx
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(17bytes):
|
||||
mov -17(%eax), %ecx
|
||||
mov -17(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
L(13bytes):
|
||||
mov -13(%eax), %ecx
|
||||
mov -13(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
L(9bytes):
|
||||
mov -9(%eax), %ecx
|
||||
mov -9(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
L(5bytes):
|
||||
mov -5(%eax), %ecx
|
||||
mov -5(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
movzbl -1(%eax), %ecx
|
||||
cmp -1(%edx), %cl
|
||||
mov $0, %eax
|
||||
jne L(end)
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
L(50bytes):
|
||||
mov $-50, %ebx
|
||||
movdqu -50(%eax), %xmm1
|
||||
movdqu -50(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(34bytes):
|
||||
mov $-34, %ebx
|
||||
movdqu -34(%eax), %xmm1
|
||||
movdqu -34(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(18bytes):
|
||||
mov -18(%eax), %ecx
|
||||
mov -18(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
L(14bytes):
|
||||
mov -14(%eax), %ecx
|
||||
mov -14(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
L(10bytes):
|
||||
mov -10(%eax), %ecx
|
||||
mov -10(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
L(6bytes):
|
||||
mov -6(%eax), %ecx
|
||||
mov -6(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
L(2bytes):
|
||||
movzwl -2(%eax), %ecx
|
||||
movzwl -2(%edx), %ebx
|
||||
cmp %bl, %cl
|
||||
jne L(end)
|
||||
cmp %bh, %ch
|
||||
mov $0, %eax
|
||||
jne L(end)
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
L(51bytes):
|
||||
mov $-51, %ebx
|
||||
movdqu -51(%eax), %xmm1
|
||||
movdqu -51(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(35bytes):
|
||||
mov $-35, %ebx
|
||||
movdqu -35(%eax), %xmm1
|
||||
movdqu -35(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(19bytes):
|
||||
movl -19(%eax), %ecx
|
||||
movl -19(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
L(15bytes):
|
||||
movl -15(%eax), %ecx
|
||||
movl -15(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
L(11bytes):
|
||||
movl -11(%eax), %ecx
|
||||
movl -11(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
L(7bytes):
|
||||
movl -7(%eax), %ecx
|
||||
movl -7(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
L(3bytes):
|
||||
movzwl -3(%eax), %ecx
|
||||
movzwl -3(%edx), %ebx
|
||||
cmpb %bl, %cl
|
||||
jne L(end)
|
||||
cmp %bx, %cx
|
||||
jne L(end)
|
||||
L(1bytes):
|
||||
movzbl -1(%eax), %eax
|
||||
cmpb -1(%edx), %al
|
||||
mov $0, %eax
|
||||
jne L(end)
|
||||
RETURN
|
||||
|
||||
|
||||
L(52bytes):
|
||||
movdqu -52(%eax), %xmm1
|
||||
movdqu -52(%edx), %xmm2
|
||||
mov $-52, %ebx
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(36bytes):
|
||||
movdqu -36(%eax), %xmm1
|
||||
movdqu -36(%edx), %xmm2
|
||||
mov $-36, %ebx
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(20bytes):
|
||||
movdqu -20(%eax), %xmm1
|
||||
movdqu -20(%edx), %xmm2
|
||||
mov $-20, %ebx
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
mov -4(%eax), %ecx
|
||||
mov -4(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
mov $0, %eax
|
||||
jne L(find_diff)
|
||||
RETURN
|
||||
|
||||
L(53bytes):
|
||||
movdqu -53(%eax), %xmm1
|
||||
movdqu -53(%edx), %xmm2
|
||||
mov $-53, %ebx
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(37bytes):
|
||||
mov $-37, %ebx
|
||||
movdqu -37(%eax), %xmm1
|
||||
movdqu -37(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(21bytes):
|
||||
mov $-21, %ebx
|
||||
movdqu -21(%eax), %xmm1
|
||||
movdqu -21(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
mov -5(%eax), %ecx
|
||||
mov -5(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
movzbl -1(%eax), %ecx
|
||||
cmp -1(%edx), %cl
|
||||
mov $0, %eax
|
||||
jne L(end)
|
||||
RETURN
|
||||
|
||||
L(54bytes):
|
||||
movdqu -54(%eax), %xmm1
|
||||
movdqu -54(%edx), %xmm2
|
||||
mov $-54, %ebx
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(38bytes):
|
||||
mov $-38, %ebx
|
||||
movdqu -38(%eax), %xmm1
|
||||
movdqu -38(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(22bytes):
|
||||
mov $-22, %ebx
|
||||
movdqu -22(%eax), %xmm1
|
||||
movdqu -22(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
|
||||
mov -6(%eax), %ecx
|
||||
mov -6(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
movzwl -2(%eax), %ecx
|
||||
movzwl -2(%edx), %ebx
|
||||
cmp %bl, %cl
|
||||
jne L(end)
|
||||
cmp %bh, %ch
|
||||
mov $0, %eax
|
||||
jne L(end)
|
||||
RETURN
|
||||
|
||||
L(55bytes):
|
||||
movdqu -55(%eax), %xmm1
|
||||
movdqu -55(%edx), %xmm2
|
||||
mov $-55, %ebx
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(39bytes):
|
||||
mov $-39, %ebx
|
||||
movdqu -39(%eax), %xmm1
|
||||
movdqu -39(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(23bytes):
|
||||
mov $-23, %ebx
|
||||
movdqu -23(%eax), %xmm1
|
||||
movdqu -23(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
movl -7(%eax), %ecx
|
||||
movl -7(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
movzwl -3(%eax), %ecx
|
||||
movzwl -3(%edx), %ebx
|
||||
cmpb %bl, %cl
|
||||
jne L(end)
|
||||
cmp %bx, %cx
|
||||
jne L(end)
|
||||
movzbl -1(%eax), %eax
|
||||
cmpb -1(%edx), %al
|
||||
mov $0, %eax
|
||||
jne L(end)
|
||||
RETURN
|
||||
|
||||
L(56bytes):
|
||||
movdqu -56(%eax), %xmm1
|
||||
movdqu -56(%edx), %xmm2
|
||||
mov $-56, %ebx
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(40bytes):
|
||||
mov $-40, %ebx
|
||||
movdqu -40(%eax), %xmm1
|
||||
movdqu -40(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(24bytes):
|
||||
mov $-24, %ebx
|
||||
movdqu -24(%eax), %xmm1
|
||||
movdqu -24(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
|
||||
mov -8(%eax), %ecx
|
||||
mov -8(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
|
||||
mov -4(%eax), %ecx
|
||||
mov -4(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
mov $0, %eax
|
||||
jne L(find_diff)
|
||||
RETURN
|
||||
|
||||
L(57bytes):
|
||||
movdqu -57(%eax), %xmm1
|
||||
movdqu -57(%edx), %xmm2
|
||||
mov $-57, %ebx
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(41bytes):
|
||||
mov $-41, %ebx
|
||||
movdqu -41(%eax), %xmm1
|
||||
movdqu -41(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(25bytes):
|
||||
mov $-25, %ebx
|
||||
movdqu -25(%eax), %xmm1
|
||||
movdqu -25(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
mov -9(%eax), %ecx
|
||||
mov -9(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
mov -5(%eax), %ecx
|
||||
mov -5(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
movzbl -1(%eax), %ecx
|
||||
cmp -1(%edx), %cl
|
||||
mov $0, %eax
|
||||
jne L(end)
|
||||
RETURN
|
||||
|
||||
L(58bytes):
|
||||
movdqu -58(%eax), %xmm1
|
||||
movdqu -58(%edx), %xmm2
|
||||
mov $-58, %ebx
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(42bytes):
|
||||
mov $-42, %ebx
|
||||
movdqu -42(%eax), %xmm1
|
||||
movdqu -42(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(26bytes):
|
||||
mov $-26, %ebx
|
||||
movdqu -26(%eax), %xmm1
|
||||
movdqu -26(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
|
||||
mov -10(%eax), %ecx
|
||||
mov -10(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
|
||||
mov -6(%eax), %ecx
|
||||
mov -6(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
|
||||
movzwl -2(%eax), %ecx
|
||||
movzwl -2(%edx), %ebx
|
||||
cmp %bl, %cl
|
||||
jne L(end)
|
||||
cmp %bh, %ch
|
||||
mov $0, %eax
|
||||
jne L(end)
|
||||
RETURN
|
||||
|
||||
L(59bytes):
|
||||
movdqu -59(%eax), %xmm1
|
||||
movdqu -59(%edx), %xmm2
|
||||
mov $-59, %ebx
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(43bytes):
|
||||
mov $-43, %ebx
|
||||
movdqu -43(%eax), %xmm1
|
||||
movdqu -43(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(27bytes):
|
||||
mov $-27, %ebx
|
||||
movdqu -27(%eax), %xmm1
|
||||
movdqu -27(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
movl -11(%eax), %ecx
|
||||
movl -11(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
movl -7(%eax), %ecx
|
||||
movl -7(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
movzwl -3(%eax), %ecx
|
||||
movzwl -3(%edx), %ebx
|
||||
cmpb %bl, %cl
|
||||
jne L(end)
|
||||
cmp %bx, %cx
|
||||
jne L(end)
|
||||
movzbl -1(%eax), %eax
|
||||
cmpb -1(%edx), %al
|
||||
mov $0, %eax
|
||||
jne L(end)
|
||||
RETURN
|
||||
|
||||
L(60bytes):
|
||||
movdqu -60(%eax), %xmm1
|
||||
movdqu -60(%edx), %xmm2
|
||||
mov $-60, %ebx
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(44bytes):
|
||||
mov $-44, %ebx
|
||||
movdqu -44(%eax), %xmm1
|
||||
movdqu -44(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(28bytes):
|
||||
mov $-28, %ebx
|
||||
movdqu -28(%eax), %xmm1
|
||||
movdqu -28(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
mov -12(%eax), %ecx
|
||||
mov -12(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
mov -8(%eax), %ecx
|
||||
mov -8(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
mov -4(%eax), %ecx
|
||||
mov -4(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
mov $0, %eax
|
||||
jne L(find_diff)
|
||||
RETURN
|
||||
|
||||
L(61bytes):
|
||||
movdqu -61(%eax), %xmm1
|
||||
movdqu -61(%edx), %xmm2
|
||||
mov $-61, %ebx
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(45bytes):
|
||||
mov $-45, %ebx
|
||||
movdqu -45(%eax), %xmm1
|
||||
movdqu -45(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(29bytes):
|
||||
mov $-29, %ebx
|
||||
movdqu -29(%eax), %xmm1
|
||||
movdqu -29(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
|
||||
mov -13(%eax), %ecx
|
||||
mov -13(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
|
||||
mov -9(%eax), %ecx
|
||||
mov -9(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
|
||||
mov -5(%eax), %ecx
|
||||
mov -5(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
movzbl -1(%eax), %ecx
|
||||
cmp -1(%edx), %cl
|
||||
mov $0, %eax
|
||||
jne L(end)
|
||||
RETURN
|
||||
|
||||
L(62bytes):
|
||||
movdqu -62(%eax), %xmm1
|
||||
movdqu -62(%edx), %xmm2
|
||||
mov $-62, %ebx
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(46bytes):
|
||||
mov $-46, %ebx
|
||||
movdqu -46(%eax), %xmm1
|
||||
movdqu -46(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(30bytes):
|
||||
mov $-30, %ebx
|
||||
movdqu -30(%eax), %xmm1
|
||||
movdqu -30(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
mov -14(%eax), %ecx
|
||||
mov -14(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
mov -10(%eax), %ecx
|
||||
mov -10(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
mov -6(%eax), %ecx
|
||||
mov -6(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
movzwl -2(%eax), %ecx
|
||||
movzwl -2(%edx), %ebx
|
||||
cmp %bl, %cl
|
||||
jne L(end)
|
||||
cmp %bh, %ch
|
||||
mov $0, %eax
|
||||
jne L(end)
|
||||
RETURN
|
||||
|
||||
L(63bytes):
|
||||
movdqu -63(%eax), %xmm1
|
||||
movdqu -63(%edx), %xmm2
|
||||
mov $-63, %ebx
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(47bytes):
|
||||
mov $-47, %ebx
|
||||
movdqu -47(%eax), %xmm1
|
||||
movdqu -47(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(31bytes):
|
||||
mov $-31, %ebx
|
||||
movdqu -31(%eax), %xmm1
|
||||
movdqu -31(%edx), %xmm2
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
|
||||
movl -15(%eax), %ecx
|
||||
movl -15(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
movl -11(%eax), %ecx
|
||||
movl -11(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
movl -7(%eax), %ecx
|
||||
movl -7(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
movzwl -3(%eax), %ecx
|
||||
movzwl -3(%edx), %ebx
|
||||
cmpb %bl, %cl
|
||||
jne L(end)
|
||||
cmp %bx, %cx
|
||||
jne L(end)
|
||||
movzbl -1(%eax), %eax
|
||||
cmpb -1(%edx), %al
|
||||
mov $0, %eax
|
||||
jne L(end)
|
||||
RETURN
|
||||
|
||||
L(64bytes):
|
||||
movdqu -64(%eax), %xmm1
|
||||
movdqu -64(%edx), %xmm2
|
||||
mov $-64, %ebx
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(48bytes):
|
||||
movdqu -48(%eax), %xmm1
|
||||
movdqu -48(%edx), %xmm2
|
||||
mov $-48, %ebx
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
L(32bytes):
|
||||
movdqu -32(%eax), %xmm1
|
||||
movdqu -32(%edx), %xmm2
|
||||
mov $-32, %ebx
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
|
||||
mov -16(%eax), %ecx
|
||||
mov -16(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
|
||||
mov -12(%eax), %ecx
|
||||
mov -12(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
|
||||
mov -8(%eax), %ecx
|
||||
mov -8(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
|
||||
mov -4(%eax), %ecx
|
||||
mov -4(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
mov $0, %eax
|
||||
jne L(find_diff)
|
||||
RETURN
|
||||
|
||||
L(less16bytes):
|
||||
add %ebx, %eax
|
||||
add %ebx, %edx
|
||||
|
||||
mov (%eax), %ecx
|
||||
mov (%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
|
||||
mov 4(%eax), %ecx
|
||||
mov 4(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
|
||||
mov 8(%eax), %ecx
|
||||
mov 8(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
jne L(find_diff)
|
||||
|
||||
mov 12(%eax), %ecx
|
||||
mov 12(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
mov $0, %eax
|
||||
jne L(find_diff)
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
L(find_diff):
|
||||
cmpb %bl, %cl
|
||||
jne L(end)
|
||||
cmp %bx, %cx
|
||||
jne L(end)
|
||||
shr $16,%ecx
|
||||
shr $16,%ebx
|
||||
cmp %bl, %cl
|
||||
jne L(end)
|
||||
cmp %bx, %cx
|
||||
L(end):
|
||||
POP (%ebx)
|
||||
mov $1, %eax
|
||||
ja L(bigger)
|
||||
neg %eax
|
||||
L(bigger):
|
||||
ret
|
||||
|
||||
ALIGN (2)
|
||||
L(table_64bytes):
|
||||
.int JMPTBL (L(0bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(1bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(2bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(3bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(4bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(5bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(6bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(7bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(8bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(9bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(10bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(11bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(12bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(13bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(14bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(15bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(16bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(17bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(18bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(19bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(20bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(21bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(22bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(23bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(24bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(25bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(26bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(27bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(28bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(29bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(30bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(31bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(32bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(33bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(34bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(35bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(36bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(37bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(38bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(39bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(40bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(41bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(42bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(43bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(44bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(45bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(46bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(47bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(48bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(49bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(50bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(51bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(52bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(53bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(54bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(55bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(56bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(57bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(58bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(59bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(60bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(61bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(62bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(63bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(64bytes), L(table_64bytes))
|
||||
|
||||
END (MEMCMP)
|
||||
|
||||
#endif
|
1905
sysdeps/i386/i686/multiarch/memcmp-ssse3.S
Normal file
1905
sysdeps/i386/i686/multiarch/memcmp-ssse3.S
Normal file
File diff suppressed because it is too large
Load Diff
88
sysdeps/i386/i686/multiarch/memcmp.S
Normal file
88
sysdeps/i386/i686/multiarch/memcmp.S
Normal file
@ -0,0 +1,88 @@
|
||||
/* Multiple versions of memcmp
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
Contributed by Intel Corporation.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <init-arch.h>
|
||||
|
||||
/* Define multiple versions only for the definition in libc. */
|
||||
#ifndef NOT_IN_libc
|
||||
# ifdef SHARED
|
||||
.text
|
||||
ENTRY(memcmp)
|
||||
.type memcmp, @gnu_indirect_function
|
||||
pushl %ebx
|
||||
cfi_adjust_cfa_offset (4)
|
||||
cfi_rel_offset (ebx, 0)
|
||||
call __i686.get_pc_thunk.bx
|
||||
addl $_GLOBAL_OFFSET_TABLE_, %ebx
|
||||
cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
|
||||
jne 1f
|
||||
call __init_cpu_features
|
||||
1: leal __memcmp_ia32@GOTOFF(%ebx), %eax
|
||||
testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
|
||||
jz 2f
|
||||
leal __memcmp_ssse3@GOTOFF(%ebx), %eax
|
||||
testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
|
||||
jz 2f
|
||||
leal __memcmp_sse4_2@GOTOFF(%ebx), %eax
|
||||
2: popl %ebx
|
||||
cfi_adjust_cfa_offset (-4)
|
||||
cfi_restore (ebx)
|
||||
ret
|
||||
END(memcmp)
|
||||
# else
|
||||
.text
|
||||
ENTRY(memcmp)
|
||||
.type memcmp, @gnu_indirect_function
|
||||
cmpl $0, KIND_OFFSET+__cpu_features
|
||||
jne 1f
|
||||
call __init_cpu_features
|
||||
1: leal __memcmp_ia32, %eax
|
||||
testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
|
||||
jz 2f
|
||||
leal __memcmp_ssse3, %eax
|
||||
testl $bit_SSE4_2, FEATURE_OFFSET+index_SSE4_2+__cpu_features
|
||||
jz 2f
|
||||
leal __memcmp_sse4_2, %eax
|
||||
2: ret
|
||||
END(memcmp)
|
||||
# endif
|
||||
|
||||
# undef ENTRY
|
||||
# define ENTRY(name) \
|
||||
.type __memcmp_ia32, @function; \
|
||||
.p2align 4; \
|
||||
__memcmp_ia32: cfi_startproc; \
|
||||
CALL_MCOUNT
|
||||
# undef END
|
||||
# define END(name) \
|
||||
cfi_endproc; .size __memcmp_ia32, .-__memcmp_ia32
|
||||
|
||||
# ifdef SHARED
|
||||
# undef libc_hidden_builtin_def
|
||||
/* IFUNC doesn't work with the hidden functions in shared library since
|
||||
they will be called without setting up EBX needed for PLT which is
|
||||
used by IFUNC. */
|
||||
# define libc_hidden_builtin_def(name) \
|
||||
.globl __GI_memcmp; __GI_memcmp = __memcmp_ia32
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include "../memcmp.S"
|
378
sysdeps/i386/i686/multiarch/strcmp-sse4.S
Normal file
378
sysdeps/i386/i686/multiarch/strcmp-sse4.S
Normal file
@ -0,0 +1,378 @@
|
||||
/* strcmp with SSE4.2
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
Contributed by Intel Corporation.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#ifndef NOT_IN_libc
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "asm-syntax.h"
|
||||
|
||||
#define CFI_PUSH(REG) \
|
||||
cfi_adjust_cfa_offset (4); \
|
||||
cfi_rel_offset (REG, 0)
|
||||
|
||||
#define CFI_POP(REG) \
|
||||
cfi_adjust_cfa_offset (-4); \
|
||||
cfi_restore (REG)
|
||||
|
||||
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
||||
#define POP(REG) popl REG; CFI_POP (REG)
|
||||
|
||||
#ifndef USE_AS_STRNCMP
|
||||
# ifndef STRCMP
|
||||
# define STRCMP __strcmp_sse4_2
|
||||
# endif
|
||||
# define STR1 4
|
||||
# define STR2 STR1+4
|
||||
#else
|
||||
# ifndef STRCMP
|
||||
# define STRCMP __strncmp_sse4_2
|
||||
# endif
|
||||
# define STR1 8
|
||||
# define STR2 STR1+4
|
||||
# define CNT STR2+4
|
||||
#endif
|
||||
|
||||
.section .text.sse4.2,"ax",@progbits
|
||||
ENTRY (STRCMP)
|
||||
#ifdef USE_AS_STRNCMP
|
||||
PUSH (%ebp)
|
||||
#endif
|
||||
mov STR1(%esp), %edx
|
||||
mov STR2(%esp), %eax
|
||||
#ifdef USE_AS_STRNCMP
|
||||
movl CNT(%esp), %ebp
|
||||
test %ebp, %ebp
|
||||
je L(eq)
|
||||
#endif
|
||||
mov %dx, %cx
|
||||
and $0xfff, %cx
|
||||
cmp $0xff0, %cx
|
||||
ja L(first4bytes)
|
||||
movdqu (%edx), %xmm2
|
||||
mov %eax, %ecx
|
||||
and $0xfff, %ecx
|
||||
cmp $0xff0, %ecx
|
||||
ja L(first4bytes)
|
||||
movd %xmm2, %ecx
|
||||
cmp (%eax), %ecx
|
||||
jne L(less4bytes)
|
||||
movdqu (%eax), %xmm1
|
||||
pxor %xmm2, %xmm1
|
||||
pxor %xmm0, %xmm0
|
||||
ptest %xmm1, %xmm0
|
||||
jnc L(less16bytes)
|
||||
pcmpeqb %xmm0, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
sub $16, %ebp
|
||||
jbe L(eq)
|
||||
#endif
|
||||
add $16, %edx
|
||||
add $16, %eax
|
||||
L(first4bytes):
|
||||
movzbl (%eax), %ecx
|
||||
cmpb %cl, (%edx)
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $1, %ebp
|
||||
je L(eq)
|
||||
#endif
|
||||
|
||||
movzbl 1(%eax), %ecx
|
||||
cmpb %cl, 1(%edx)
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $2, %ebp
|
||||
je L(eq)
|
||||
#endif
|
||||
movzbl 2(%eax), %ecx
|
||||
cmpb %cl, 2(%edx)
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $3, %ebp
|
||||
je L(eq)
|
||||
#endif
|
||||
movzbl 3(%eax), %ecx
|
||||
cmpb %cl, 3(%edx)
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $4, %ebp
|
||||
je L(eq)
|
||||
#endif
|
||||
movzbl 4(%eax), %ecx
|
||||
cmpb %cl, 4(%edx)
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $5, %ebp
|
||||
je L(eq)
|
||||
#endif
|
||||
movzbl 5(%eax), %ecx
|
||||
cmpb %cl, 5(%edx)
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $6, %ebp
|
||||
je L(eq)
|
||||
#endif
|
||||
movzbl 6(%eax), %ecx
|
||||
cmpb %cl, 6(%edx)
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $7, %ebp
|
||||
je L(eq)
|
||||
#endif
|
||||
movzbl 7(%eax), %ecx
|
||||
cmpb %cl, 7(%edx)
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
sub $8, %ebp
|
||||
je L(eq)
|
||||
#endif
|
||||
add $8, %eax
|
||||
add $8, %edx
|
||||
|
||||
PUSH (%ebx)
|
||||
PUSH (%edi)
|
||||
PUSH (%esi)
|
||||
mov %edx, %edi
|
||||
mov %eax, %esi
|
||||
xorl %eax, %eax
|
||||
L(check_offset):
|
||||
movl %edi, %ebx
|
||||
movl %esi, %ecx
|
||||
andl $0xfff, %ebx
|
||||
andl $0xfff, %ecx
|
||||
cmpl %ebx, %ecx
|
||||
cmovl %ebx, %ecx
|
||||
lea -0xff0(%ecx), %edx
|
||||
sub %edx, %edi
|
||||
sub %edx, %esi
|
||||
testl %edx, %edx
|
||||
jg L(crosspage)
|
||||
L(loop):
|
||||
movdqu (%esi,%edx), %xmm2
|
||||
movdqu (%edi,%edx), %xmm1
|
||||
pcmpistri $0x1a, %xmm2, %xmm1
|
||||
jbe L(end)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
sub $16, %ebp
|
||||
jbe L(more16byteseq)
|
||||
#endif
|
||||
|
||||
add $16, %edx
|
||||
jle L(loop)
|
||||
L(crosspage):
|
||||
movzbl (%edi,%edx), %eax
|
||||
movzbl (%esi,%edx), %ebx
|
||||
subl %ebx, %eax
|
||||
jne L(ret)
|
||||
testl %ebx, %ebx
|
||||
je L(ret)
|
||||
#ifdef USE_AS_STRNCMP
|
||||
sub $1, %ebp
|
||||
jbe L(more16byteseq)
|
||||
#endif
|
||||
inc %edx
|
||||
cmp $15, %edx
|
||||
jle L(crosspage)
|
||||
add $16, %edi
|
||||
add $16, %esi
|
||||
jmp L(check_offset)
|
||||
|
||||
L(end):
|
||||
jnc L(ret)
|
||||
#ifdef USE_AS_STRNCMP
|
||||
sub %ecx, %ebp
|
||||
jbe L(more16byteseq)
|
||||
#endif
|
||||
lea (%ecx,%edx), %ebx
|
||||
movzbl (%edi,%ebx), %eax
|
||||
movzbl (%esi,%ebx), %ecx
|
||||
subl %ecx, %eax
|
||||
L(ret):
|
||||
POP (%esi)
|
||||
POP (%edi)
|
||||
POP (%ebx)
|
||||
#ifdef USE_AS_STRNCMP
|
||||
POP (%ebp)
|
||||
#endif
|
||||
ret
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
L(more16byteseq):
|
||||
POP (%esi)
|
||||
POP (%edi)
|
||||
POP (%ebx)
|
||||
#endif
|
||||
L(eq):
|
||||
xorl %eax, %eax
|
||||
#ifdef USE_AS_STRNCMP
|
||||
POP (%ebp)
|
||||
#endif
|
||||
ret
|
||||
L(neq):
|
||||
mov $1, %eax
|
||||
ja L(neq_bigger)
|
||||
neg %eax
|
||||
L(neq_bigger):
|
||||
#ifdef USE_AS_STRNCMP
|
||||
POP (%ebp)
|
||||
#endif
|
||||
ret
|
||||
.p2align 4
|
||||
L(less16bytes):
|
||||
add $0xfefefeff, %ecx
|
||||
jnc L(less4bytes)
|
||||
xor (%edx), %ecx
|
||||
or $0xfefefeff, %ecx
|
||||
add $1, %ecx
|
||||
jnz L(less4bytes)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $4, %ebp
|
||||
jbe L(eq)
|
||||
#endif
|
||||
mov 4(%edx), %ecx
|
||||
cmp 4(%eax), %ecx
|
||||
jne L(more4bytes)
|
||||
add $0xfefefeff, %ecx
|
||||
jnc L(more4bytes)
|
||||
xor 4(%edx), %ecx
|
||||
or $0xfefefeff, %ecx
|
||||
add $1, %ecx
|
||||
jnz L(more4bytes)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
sub $8, %ebp
|
||||
jbe L(eq)
|
||||
#endif
|
||||
|
||||
add $8, %edx
|
||||
add $8, %eax
|
||||
L(less4bytes):
|
||||
|
||||
movzbl (%eax), %ecx
|
||||
cmpb %cl, (%edx)
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $1, %ebp
|
||||
je L(eq)
|
||||
#endif
|
||||
movzbl 1(%eax), %ecx
|
||||
cmpb %cl, 1(%edx)
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $2, %ebp
|
||||
je L(eq)
|
||||
#endif
|
||||
|
||||
movzbl 2(%eax), %ecx
|
||||
cmpb %cl, 2(%edx)
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $3, %ebp
|
||||
je L(eq)
|
||||
#endif
|
||||
movzbl 3(%eax), %ecx
|
||||
cmpb %cl, 3(%edx)
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
L(more4bytes):
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $4, %ebp
|
||||
je L(eq)
|
||||
#endif
|
||||
movzbl 4(%eax), %ecx
|
||||
cmpb %cl, 4(%edx)
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $5, %ebp
|
||||
je L(eq)
|
||||
#endif
|
||||
movzbl 5(%eax), %ecx
|
||||
cmpb %cl, 5(%edx)
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $6, %ebp
|
||||
je L(eq)
|
||||
#endif
|
||||
movzbl 6(%eax), %ecx
|
||||
cmpb %cl, 6(%edx)
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $7, %ebp
|
||||
je L(eq)
|
||||
#endif
|
||||
movzbl 7(%eax), %ecx
|
||||
cmpb %cl, 7(%edx)
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
END (STRCMP)
|
||||
|
||||
#endif
|
2245
sysdeps/i386/i686/multiarch/strcmp-ssse3.S
Normal file
2245
sysdeps/i386/i686/multiarch/strcmp-ssse3.S
Normal file
File diff suppressed because it is too large
Load Diff
115
sysdeps/i386/i686/multiarch/strcmp.S
Normal file
115
sysdeps/i386/i686/multiarch/strcmp.S
Normal file
@ -0,0 +1,115 @@
|
||||
/* Multiple versions of strcmp
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
Contributed by Intel Corporation.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <init-arch.h>
|
||||
|
||||
#ifndef USE_AS_STRNCMP
|
||||
# define STRCMP strcmp
|
||||
# define __GI_STRCMP __GI_strcmp
|
||||
# define __STRCMP_IA32 __strcmp_ia32
|
||||
# define __STRCMP_SSSE3 __strcmp_ssse3
|
||||
# define __STRCMP_SSE4_2 __strcmp_sse4_2
|
||||
#else
|
||||
# define STRCMP strncmp
|
||||
# define __GI_STRCMP __GI_strncmp
|
||||
# define __STRCMP_IA32 __strncmp_ia32
|
||||
# define __STRCMP_SSSE3 __strncmp_ssse3
|
||||
# define __STRCMP_SSE4_2 __strncmp_sse4_2
|
||||
#endif
|
||||
|
||||
/* Define multiple versions only for the definition in libc. Don't
|
||||
define multiple versions for strncmp in static library since we
|
||||
need strncmp before the initialization happened. */
|
||||
#if (defined SHARED || !defined USE_AS_STRNCMP) && !defined NOT_IN_libc
|
||||
# ifdef SHARED
|
||||
.section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
|
||||
.globl __i686.get_pc_thunk.bx
|
||||
.hidden __i686.get_pc_thunk.bx
|
||||
.p2align 4
|
||||
.type __i686.get_pc_thunk.bx,@function
|
||||
__i686.get_pc_thunk.bx:
|
||||
movl (%esp), %ebx
|
||||
ret
|
||||
|
||||
.text
|
||||
ENTRY(STRCMP)
|
||||
.type STRCMP, @gnu_indirect_function
|
||||
pushl %ebx
|
||||
cfi_adjust_cfa_offset (4)
|
||||
cfi_rel_offset (ebx, 0)
|
||||
call __i686.get_pc_thunk.bx
|
||||
addl $_GLOBAL_OFFSET_TABLE_, %ebx
|
||||
cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
|
||||
jne 1f
|
||||
call __init_cpu_features
|
||||
1: leal __STRCMP_IA32@GOTOFF(%ebx), %eax
|
||||
testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
|
||||
jz 2f
|
||||
leal __STRCMP_SSSE3@GOTOFF(%ebx), %eax
|
||||
testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
|
||||
jz 2f
|
||||
leal __STRCMP_SSE4_2@GOTOFF(%ebx), %eax
|
||||
2: popl %ebx
|
||||
cfi_adjust_cfa_offset (-4)
|
||||
cfi_restore (ebx)
|
||||
ret
|
||||
END(STRCMP)
|
||||
# else
|
||||
.text
|
||||
ENTRY(STRCMP)
|
||||
.type STRCMP, @gnu_indirect_function
|
||||
cmpl $0, KIND_OFFSET+__cpu_features
|
||||
jne 1f
|
||||
call __init_cpu_features
|
||||
1: leal __STRCMP_IA32, %eax
|
||||
testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
|
||||
jz 2f
|
||||
leal __STRCMP_SSSE3, %eax
|
||||
testl $bit_SSE4_2, FEATURE_OFFSET+index_SSE4_2+__cpu_features
|
||||
jz 2f
|
||||
leal __STRCMP_SSE4_2, %eax
|
||||
2: ret
|
||||
END(STRCMP)
|
||||
# endif
|
||||
|
||||
# undef ENTRY
|
||||
# define ENTRY(name) \
|
||||
.type __STRCMP_IA32, @function; \
|
||||
.p2align 4; \
|
||||
__STRCMP_IA32: cfi_startproc; \
|
||||
CALL_MCOUNT
|
||||
# undef END
|
||||
# define END(name) \
|
||||
cfi_endproc; .size __STRCMP_IA32, .-__STRCMP_IA32
|
||||
|
||||
# ifdef SHARED
|
||||
# undef libc_hidden_builtin_def
|
||||
/* IFUNC doesn't work with the hidden functions in shared library since
|
||||
they will be called without setting up EBX needed for PLT which is
|
||||
used by IFUNC. */
|
||||
# define libc_hidden_builtin_def(name) \
|
||||
.globl __GI_STRCMP; __GI_STRCMP = __STRCMP_IA32
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef USE_AS_STRNCMP
|
||||
# include "../strcmp.S"
|
||||
#endif
|
8
sysdeps/i386/i686/multiarch/strncmp-c.c
Normal file
8
sysdeps/i386/i686/multiarch/strncmp-c.c
Normal file
@ -0,0 +1,8 @@
|
||||
#ifdef SHARED
|
||||
# define STRNCMP __strncmp_ia32
|
||||
# undef libc_hidden_builtin_def
|
||||
# define libc_hidden_builtin_def(name) \
|
||||
__hidden_ver1 (__strncmp_ia32, __GI_strncmp, __strncmp_ia32);
|
||||
#endif
|
||||
|
||||
#include "string/strncmp.c"
|
5
sysdeps/i386/i686/multiarch/strncmp-sse4.S
Normal file
5
sysdeps/i386/i686/multiarch/strncmp-sse4.S
Normal file
@ -0,0 +1,5 @@
|
||||
#ifdef SHARED
|
||||
# define USE_AS_STRNCMP
|
||||
# define STRCMP __strncmp_sse4_2
|
||||
# include "strcmp-sse4.S"
|
||||
#endif
|
5
sysdeps/i386/i686/multiarch/strncmp-ssse3.S
Normal file
5
sysdeps/i386/i686/multiarch/strncmp-ssse3.S
Normal file
@ -0,0 +1,5 @@
|
||||
#ifdef SHARED
|
||||
# define USE_AS_STRNCMP
|
||||
# define STRCMP __strncmp_ssse3
|
||||
# include "strcmp-ssse3.S"
|
||||
#endif
|
3
sysdeps/i386/i686/multiarch/strncmp.S
Normal file
3
sysdeps/i386/i686/multiarch/strncmp.S
Normal file
@ -0,0 +1,3 @@
|
||||
#define USE_AS_STRNCMP
|
||||
#define STRCMP strncmp
|
||||
#include "strcmp.S"
|
Loading…
x
Reference in New Issue
Block a user