Add SSE4.2 support for strcasecmp and strncasecmp on x86-32
This commit is contained in:
parent
76e3966e9e
commit
6abf346582
14
ChangeLog
14
ChangeLog
@ -1,3 +1,17 @@
|
||||
2011-11-14 Ulrich Drepper <drepper@gmail.com>
|
||||
|
||||
* sysdeps/i386/i686/multiarch/Makefile [subdir=string]
|
||||
(sysdep_routines): Add strcasecmp_l-sse4 and strncase_l-sse4.
|
||||
* sysdeps/i386/i686/multiarch/strcasecmp.S: Re-enable SSE4.2 code.
|
||||
* sysdeps/i386/i686/multiarch/strcmp.S: Likewise.
|
||||
* sysdeps/i386/i686/multiarch/strncase.S: Likewise.
|
||||
* sysdeps/i386/i686/multiarch/strcmp-sse4.S: Change to allow reuse
|
||||
to compile strcasecmp and strncasecmp.
|
||||
* sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S: New file.
|
||||
* sysdeps/i386/i686/multiarch/strncase_l-sse4.S: New file.
|
||||
|
||||
* sysdeps/i386/i686/multiarch/strcmp-ssse3.S: Use L macro consistently.
|
||||
|
||||
2011-11-13 Ulrich Drepper <drepper@gmail.com>
|
||||
|
||||
* sysdeps/i386/i686/multiarch/Makefile [subdir=string]: Add
|
||||
|
@ -21,7 +21,8 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
|
||||
rawmemchr-sse2 rawmemchr-sse2-bsf \
|
||||
strnlen-sse2 strnlen-c \
|
||||
strcasecmp_l-c strcasecmp-c strcasecmp_l-ssse3 \
|
||||
strncase_l-c strncase-c strncase_l-ssse3
|
||||
strncase_l-c strncase-c strncase_l-ssse3 \
|
||||
strcasecmp_l-sse4 strncase_l-sse4
|
||||
ifeq (yes,$(config-cflags-sse4))
|
||||
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
|
||||
CFLAGS-varshift.c += -msse4
|
||||
|
@ -36,12 +36,9 @@ ENTRY(__strcasecmp)
|
||||
testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
|
||||
jz 2f
|
||||
leal __strcasecmp_ssse3@GOTOFF(%ebx), %eax
|
||||
#if 0
|
||||
// XXX Temporarily
|
||||
testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
|
||||
jz 2f
|
||||
leal __strcasecmp_sse4_2@GOTOFF(%ebx), %eax
|
||||
#endif
|
||||
2: popl %ebx
|
||||
cfi_adjust_cfa_offset (-4)
|
||||
cfi_restore (ebx)
|
||||
|
2
sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S
Normal file
2
sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S
Normal file
@ -0,0 +1,2 @@
|
||||
#define USE_AS_STRCASECMP_L 1
|
||||
#include "strcmp-sse4.S"
|
@ -1,5 +1,5 @@
|
||||
/* strcmp with SSE4.2
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
||||
Contributed by Intel Corporation.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
@ -34,33 +34,156 @@
|
||||
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
||||
#define POP(REG) popl REG; CFI_POP (REG)
|
||||
|
||||
#ifndef USE_AS_STRNCMP
|
||||
# ifndef STRCMP
|
||||
# define STRCMP __strcmp_sse4_2
|
||||
# endif
|
||||
# define STR1 4
|
||||
# define STR2 STR1+4
|
||||
# define RETURN ret; .p2align 4
|
||||
#else
|
||||
#ifdef USE_AS_STRNCMP
|
||||
# ifndef STRCMP
|
||||
# define STRCMP __strncmp_sse4_2
|
||||
# endif
|
||||
# define STR1 8
|
||||
# define STR2 STR1+4
|
||||
# define CNT STR2+4
|
||||
# define RETURN POP (%ebp); ret; .p2align 4; CFI_PUSH (%ebp)
|
||||
# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM)
|
||||
# define REM %ebp
|
||||
#elif defined USE_AS_STRCASECMP_L
|
||||
# include "locale-defines.h"
|
||||
# ifndef STRCMP
|
||||
# define STRCMP __strcasecmp_l_sse4_2
|
||||
# endif
|
||||
# define STR1 12
|
||||
# define STR2 STR1+4
|
||||
# define LOCALE 12 /* Loaded before the adjustement. */
|
||||
# ifdef PIC
|
||||
# define RETURN POP (%edi); POP (%ebx); ret; \
|
||||
.p2align 4; CFI_PUSH (%ebx); CFI_PUSH (%edi)
|
||||
# else
|
||||
# define RETURN POP (%edi); ret; .p2align 4; CFI_PUSH (%edi)
|
||||
# endif
|
||||
# define NONASCII __strcasecmp_nonascii
|
||||
#elif defined USE_AS_STRNCASECMP_L
|
||||
# include "locale-defines.h"
|
||||
# ifndef STRCMP
|
||||
# define STRCMP __strncasecmp_l_sse4_2
|
||||
# endif
|
||||
# define STR1 16
|
||||
# define STR2 STR1+4
|
||||
# define CNT STR2+4
|
||||
# define LOCALE 16 /* Loaded before the adjustement. */
|
||||
# ifdef PIC
|
||||
# define RETURN POP (%edi); POP (REM); POP (%ebx); ret; \
|
||||
.p2align 4; \
|
||||
CFI_PUSH (%ebx); CFI_PUSH (REM); CFI_PUSH (%edi)
|
||||
# else
|
||||
# define RETURN POP (%edi); POP (REM); ret; \
|
||||
.p2align 4; CFI_PUSH (REM); CFI_PUSH (%edi)
|
||||
# endif
|
||||
# define REM %ebp
|
||||
# define NONASCII __strncasecmp_nonascii
|
||||
#else
|
||||
# ifndef STRCMP
|
||||
# define STRCMP __strcmp_sse4_2
|
||||
# endif
|
||||
# define STR1 4
|
||||
# define STR2 STR1+4
|
||||
# define RETURN ret; .p2align 4
|
||||
#endif
|
||||
|
||||
.section .text.sse4.2,"ax",@progbits
|
||||
ENTRY (STRCMP)
|
||||
#ifdef USE_AS_STRNCMP
|
||||
PUSH (%ebp)
|
||||
|
||||
#ifdef USE_AS_STRCASECMP_L
|
||||
ENTRY (__strcasecmp_sse4_2)
|
||||
# ifdef PIC
|
||||
PUSH (%ebx)
|
||||
call __i686.get_pc_thunk.bx
|
||||
addl $_GLOBAL_OFFSET_TABLE_, %ebx
|
||||
movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax
|
||||
# else
|
||||
movl __libc_tsd_LOCALE@NTPOFF, %eax
|
||||
# endif
|
||||
movl %gs:(%eax), %eax
|
||||
# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
|
||||
movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
|
||||
# else
|
||||
movl (%eax), %eax
|
||||
# endif
|
||||
testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
|
||||
jne __strcasecmp_nonascii
|
||||
jmp L(ascii)
|
||||
END (__strcasecmp_sse4_2)
|
||||
#endif
|
||||
|
||||
#ifdef USE_AS_STRNCASECMP_L
|
||||
ENTRY (__strncasecmp_sse4_2)
|
||||
# ifdef PIC
|
||||
PUSH (%ebx)
|
||||
call __i686.get_pc_thunk.bx
|
||||
addl $_GLOBAL_OFFSET_TABLE_, %ebx
|
||||
movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax
|
||||
# else
|
||||
movl __libc_tsd_LOCALE@NTPOFF, %eax
|
||||
# endif
|
||||
movl %gs:(%eax), %eax
|
||||
# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
|
||||
movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
|
||||
# else
|
||||
movl (%eax), %eax
|
||||
# endif
|
||||
testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
|
||||
jne __strncasecmp_nonascii
|
||||
jmp L(ascii)
|
||||
END (__strncasecmp_sse4_2)
|
||||
#endif
|
||||
|
||||
ENTRY (STRCMP)
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
movl LOCALE(%esp), %eax
|
||||
# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
|
||||
movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
|
||||
# else
|
||||
movl (%eax), %eax
|
||||
# endif
|
||||
testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
|
||||
jne NONASCII
|
||||
|
||||
# ifdef PIC
|
||||
PUSH (%ebx)
|
||||
call __i686.get_pc_thunk.bx
|
||||
addl $_GLOBAL_OFFSET_TABLE_, %ebx
|
||||
# endif
|
||||
L(ascii):
|
||||
.section .rodata.cst16,"aM",@progbits,16
|
||||
.align 16
|
||||
.Lbelowupper:
|
||||
.quad 0x4040404040404040
|
||||
.quad 0x4040404040404040
|
||||
.Ltopupper:
|
||||
.quad 0x5b5b5b5b5b5b5b5b
|
||||
.quad 0x5b5b5b5b5b5b5b5b
|
||||
.Ltouppermask:
|
||||
.quad 0x2020202020202020
|
||||
.quad 0x2020202020202020
|
||||
.previous
|
||||
|
||||
# ifdef PIC
|
||||
# define UCLOW_reg .Lbelowupper@GOTOFF(%ebx)
|
||||
# define UCHIGH_reg .Ltopupper@GOTOFF(%ebx)
|
||||
# define LCQWORD_reg .Ltouppermask@GOTOFF(%ebx)
|
||||
# else
|
||||
# define UCLOW_reg .Lbelowupper
|
||||
# define UCHIGH_reg .Ltopupper
|
||||
# define LCQWORD_reg .Ltouppermask
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
PUSH (REM)
|
||||
#endif
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
PUSH (%edi)
|
||||
#endif
|
||||
mov STR1(%esp), %edx
|
||||
mov STR2(%esp), %eax
|
||||
#ifdef USE_AS_STRNCMP
|
||||
movl CNT(%esp), %ebp
|
||||
test %ebp, %ebp
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
movl CNT(%esp), REM
|
||||
test REM, REM
|
||||
je L(eq)
|
||||
#endif
|
||||
mov %dx, %cx
|
||||
@ -72,10 +195,40 @@ ENTRY (STRCMP)
|
||||
and $0xfff, %ecx
|
||||
cmp $0xff0, %ecx
|
||||
ja L(first4bytes)
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
# define TOLOWER(reg1, reg2) \
|
||||
movdqa reg1, %xmm3; \
|
||||
movdqa UCHIGH_reg, %xmm4; \
|
||||
movdqa reg2, %xmm5; \
|
||||
movdqa UCHIGH_reg, %xmm6; \
|
||||
pcmpgtb UCLOW_reg, %xmm3; \
|
||||
pcmpgtb reg1, %xmm4; \
|
||||
pcmpgtb UCLOW_reg, %xmm5; \
|
||||
pcmpgtb reg2, %xmm6; \
|
||||
pand %xmm4, %xmm3; \
|
||||
pand %xmm6, %xmm5; \
|
||||
pand LCQWORD_reg, %xmm3; \
|
||||
pand LCQWORD_reg, %xmm5; \
|
||||
por %xmm3, reg1; \
|
||||
por %xmm5, reg2
|
||||
|
||||
movdqu (%eax), %xmm1
|
||||
TOLOWER (%xmm2, %xmm1)
|
||||
movd %xmm2, %ecx
|
||||
movd %xmm1, %edi
|
||||
movdqa %xmm2, %xmm3
|
||||
movdqa %xmm1, %xmm4
|
||||
cmpl %edi, %ecx
|
||||
#else
|
||||
# define TOLOWER(reg1, reg)
|
||||
|
||||
movd %xmm2, %ecx
|
||||
cmp (%eax), %ecx
|
||||
#endif
|
||||
jne L(less4bytes)
|
||||
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
||||
movdqu (%eax), %xmm1
|
||||
#endif
|
||||
pxor %xmm2, %xmm1
|
||||
pxor %xmm0, %xmm0
|
||||
ptest %xmm1, %xmm0
|
||||
@ -84,113 +237,210 @@ ENTRY (STRCMP)
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
sub $16, %ebp
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
sub $16, REM
|
||||
jbe L(eq)
|
||||
#endif
|
||||
add $16, %edx
|
||||
add $16, %eax
|
||||
L(first4bytes):
|
||||
movzbl (%eax), %ecx
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
movzbl (%edx), %edi
|
||||
# ifdef PIC
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
|
||||
# else
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
|
||||
# endif
|
||||
cmpl %ecx, %edi
|
||||
#else
|
||||
cmpb %cl, (%edx)
|
||||
#endif
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $1, %ebp
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
cmp $1, REM
|
||||
je L(eq)
|
||||
#endif
|
||||
|
||||
movzbl 1(%eax), %ecx
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
movzbl 1(%edx), %edi
|
||||
# ifdef PIC
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
|
||||
# else
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
|
||||
# endif
|
||||
cmpl %ecx, %edi
|
||||
#else
|
||||
cmpb %cl, 1(%edx)
|
||||
#endif
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $2, %ebp
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
cmp $2, REM
|
||||
je L(eq)
|
||||
#endif
|
||||
movzbl 2(%eax), %ecx
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
movzbl 2(%edx), %edi
|
||||
# ifdef PIC
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
|
||||
# else
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
|
||||
# endif
|
||||
cmpl %ecx, %edi
|
||||
#else
|
||||
cmpb %cl, 2(%edx)
|
||||
#endif
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $3, %ebp
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
cmp $3, REM
|
||||
je L(eq)
|
||||
#endif
|
||||
movzbl 3(%eax), %ecx
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
movzbl 3(%edx), %edi
|
||||
# ifdef PIC
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
|
||||
# else
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
|
||||
# endif
|
||||
cmpl %ecx, %edi
|
||||
#else
|
||||
cmpb %cl, 3(%edx)
|
||||
#endif
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $4, %ebp
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
cmp $4, REM
|
||||
je L(eq)
|
||||
#endif
|
||||
movzbl 4(%eax), %ecx
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
movzbl 4(%edx), %edi
|
||||
# ifdef PIC
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
|
||||
# else
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
|
||||
# endif
|
||||
cmpl %ecx, %edi
|
||||
#else
|
||||
cmpb %cl, 4(%edx)
|
||||
#endif
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $5, %ebp
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
cmp $5, REM
|
||||
je L(eq)
|
||||
#endif
|
||||
movzbl 5(%eax), %ecx
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
movzbl 5(%edx), %edi
|
||||
# ifdef PIC
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
|
||||
# else
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
|
||||
# endif
|
||||
cmpl %ecx, %edi
|
||||
#else
|
||||
cmpb %cl, 5(%edx)
|
||||
#endif
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $6, %ebp
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
cmp $6, REM
|
||||
je L(eq)
|
||||
#endif
|
||||
movzbl 6(%eax), %ecx
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
movzbl 6(%edx), %edi
|
||||
# ifdef PIC
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
|
||||
# else
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
|
||||
# endif
|
||||
cmpl %ecx, %edi
|
||||
#else
|
||||
cmpb %cl, 6(%edx)
|
||||
#endif
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $7, %ebp
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
cmp $7, REM
|
||||
je L(eq)
|
||||
#endif
|
||||
movzbl 7(%eax), %ecx
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
movzbl 7(%edx), %edi
|
||||
# ifdef PIC
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
|
||||
# else
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
|
||||
# endif
|
||||
cmpl %ecx, %edi
|
||||
#else
|
||||
cmpb %cl, 7(%edx)
|
||||
#endif
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
sub $8, %ebp
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
sub $8, REM
|
||||
je L(eq)
|
||||
#endif
|
||||
add $8, %eax
|
||||
add $8, %edx
|
||||
|
||||
PUSH (%ebx)
|
||||
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
||||
PUSH (%edi)
|
||||
#endif
|
||||
PUSH (%esi)
|
||||
#ifdef USE_AS_STRNCMP
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
cfi_remember_state
|
||||
#endif
|
||||
mov %edx, %edi
|
||||
mov %eax, %esi
|
||||
xorl %eax, %eax
|
||||
L(check_offset):
|
||||
movl %edi, %ebx
|
||||
movl %edi, %edx
|
||||
movl %esi, %ecx
|
||||
andl $0xfff, %ebx
|
||||
andl $0xfff, %edx
|
||||
andl $0xfff, %ecx
|
||||
cmpl %ebx, %ecx
|
||||
cmovl %ebx, %ecx
|
||||
cmpl %edx, %ecx
|
||||
cmovl %edx, %ecx
|
||||
lea -0xff0(%ecx), %edx
|
||||
sub %edx, %edi
|
||||
sub %edx, %esi
|
||||
@ -199,11 +449,12 @@ L(check_offset):
|
||||
L(loop):
|
||||
movdqu (%esi,%edx), %xmm2
|
||||
movdqu (%edi,%edx), %xmm1
|
||||
TOLOWER (%xmm2, %xmm1)
|
||||
pcmpistri $0x1a, %xmm2, %xmm1
|
||||
jbe L(end)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
sub $16, %ebp
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
sub $16, REM
|
||||
jbe L(more16byteseq)
|
||||
#endif
|
||||
|
||||
@ -211,13 +462,22 @@ L(loop):
|
||||
jle L(loop)
|
||||
L(crosspage):
|
||||
movzbl (%edi,%edx), %eax
|
||||
movzbl (%esi,%edx), %ebx
|
||||
subl %ebx, %eax
|
||||
movzbl (%esi,%edx), %ecx
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
# ifdef PIC
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
|
||||
# else
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
|
||||
# endif
|
||||
#endif
|
||||
subl %ecx, %eax
|
||||
jne L(ret)
|
||||
testl %ebx, %ebx
|
||||
testl %ecx, %ecx
|
||||
je L(ret)
|
||||
#ifdef USE_AS_STRNCMP
|
||||
sub $1, %ebp
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
sub $1, REM
|
||||
jbe L(more16byteseq)
|
||||
#endif
|
||||
inc %edx
|
||||
@ -230,30 +490,44 @@ L(crosspage):
|
||||
.p2align 4
|
||||
L(end):
|
||||
jnc L(ret)
|
||||
#ifdef USE_AS_STRNCMP
|
||||
sub %ecx, %ebp
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
sub %ecx, REM
|
||||
jbe L(more16byteseq)
|
||||
#endif
|
||||
lea (%ecx,%edx), %ebx
|
||||
movzbl (%edi,%ebx), %eax
|
||||
movzbl (%esi,%ebx), %ecx
|
||||
lea (%ecx,%edx), %ecx
|
||||
movzbl (%edi,%ecx), %eax
|
||||
movzbl (%esi,%ecx), %ecx
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
# ifdef PIC
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
|
||||
# else
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
|
||||
# endif
|
||||
#endif
|
||||
subl %ecx, %eax
|
||||
L(ret):
|
||||
POP (%esi)
|
||||
POP (%edi)
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
POP (REM)
|
||||
#endif
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
# ifdef PIC
|
||||
POP (%ebx)
|
||||
#ifdef USE_AS_STRNCMP
|
||||
POP (%ebp)
|
||||
# endif
|
||||
#endif
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
#ifdef USE_AS_STRNCMP
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
cfi_restore_state
|
||||
L(more16byteseq):
|
||||
POP (%esi)
|
||||
# ifdef USE_AS_STRNCMP
|
||||
POP (%edi)
|
||||
POP (%ebx)
|
||||
# endif
|
||||
#endif
|
||||
L(eq):
|
||||
xorl %eax, %eax
|
||||
@ -269,27 +543,45 @@ L(neq_bigger):
|
||||
L(less16bytes):
|
||||
add $0xfefefeff, %ecx
|
||||
jnc L(less4bytes)
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
movd %xmm3, %edi
|
||||
xor %edi, %ecx
|
||||
#else
|
||||
xor (%edx), %ecx
|
||||
#endif
|
||||
or $0xfefefeff, %ecx
|
||||
add $1, %ecx
|
||||
jnz L(less4bytes)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $4, %ebp
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
cmp $4, REM
|
||||
jbe L(eq)
|
||||
#endif
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
psrldq $4, %xmm3
|
||||
psrldq $4, %xmm4
|
||||
movd %xmm3, %ecx
|
||||
movd %xmm4, %edi
|
||||
cmp %edi, %ecx
|
||||
mov %ecx, %edi
|
||||
#else
|
||||
mov 4(%edx), %ecx
|
||||
cmp 4(%eax), %ecx
|
||||
#endif
|
||||
jne L(more4bytes)
|
||||
add $0xfefefeff, %ecx
|
||||
jnc L(more4bytes)
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
xor %edi, %ecx
|
||||
#else
|
||||
xor 4(%edx), %ecx
|
||||
#endif
|
||||
or $0xfefefeff, %ecx
|
||||
add $1, %ecx
|
||||
jnz L(more4bytes)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
sub $8, %ebp
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
sub $8, REM
|
||||
jbe L(eq)
|
||||
#endif
|
||||
|
||||
@ -298,80 +590,176 @@ L(less16bytes):
|
||||
L(less4bytes):
|
||||
|
||||
movzbl (%eax), %ecx
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
movzbl (%edx), %edi
|
||||
# ifdef PIC
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
|
||||
# else
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
|
||||
# endif
|
||||
cmpl %ecx, %edi
|
||||
#else
|
||||
cmpb %cl, (%edx)
|
||||
#endif
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $1, %ebp
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
cmp $1, REM
|
||||
je L(eq)
|
||||
#endif
|
||||
movzbl 1(%eax), %ecx
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
movzbl 1(%edx), %edi
|
||||
# ifdef PIC
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
|
||||
# else
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
|
||||
# endif
|
||||
cmpl %ecx, %edi
|
||||
#else
|
||||
cmpb %cl, 1(%edx)
|
||||
#endif
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $2, %ebp
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
cmp $2, REM
|
||||
je L(eq)
|
||||
#endif
|
||||
|
||||
movzbl 2(%eax), %ecx
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
movzbl 2(%edx), %edi
|
||||
# ifdef PIC
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
|
||||
# else
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
|
||||
# endif
|
||||
cmpl %ecx, %edi
|
||||
#else
|
||||
cmpb %cl, 2(%edx)
|
||||
#endif
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $3, %ebp
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
cmp $3, REM
|
||||
je L(eq)
|
||||
#endif
|
||||
movzbl 3(%eax), %ecx
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
movzbl 3(%edx), %edi
|
||||
# ifdef PIC
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
|
||||
# else
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
|
||||
# endif
|
||||
cmpl %ecx, %edi
|
||||
#else
|
||||
cmpb %cl, 3(%edx)
|
||||
#endif
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
L(more4bytes):
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $4, %ebp
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
cmp $4, REM
|
||||
je L(eq)
|
||||
#endif
|
||||
movzbl 4(%eax), %ecx
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
movzbl 4(%edx), %edi
|
||||
# ifdef PIC
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
|
||||
# else
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
|
||||
# endif
|
||||
cmpl %ecx, %edi
|
||||
#else
|
||||
cmpb %cl, 4(%edx)
|
||||
#endif
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $5, %ebp
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
cmp $5, REM
|
||||
je L(eq)
|
||||
#endif
|
||||
movzbl 5(%eax), %ecx
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
movzbl 5(%edx), %edi
|
||||
# ifdef PIC
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
|
||||
# else
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
|
||||
# endif
|
||||
cmpl %ecx, %edi
|
||||
#else
|
||||
cmpb %cl, 5(%edx)
|
||||
#endif
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $6, %ebp
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
cmp $6, REM
|
||||
je L(eq)
|
||||
#endif
|
||||
movzbl 6(%eax), %ecx
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
movzbl 6(%edx), %edi
|
||||
# ifdef PIC
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
|
||||
# else
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
|
||||
# endif
|
||||
cmpl %ecx, %edi
|
||||
#else
|
||||
cmpb %cl, 6(%edx)
|
||||
#endif
|
||||
jne L(neq)
|
||||
cmpl $0, %ecx
|
||||
je L(eq)
|
||||
|
||||
#ifdef USE_AS_STRNCMP
|
||||
cmp $7, %ebp
|
||||
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
||||
cmp $7, REM
|
||||
je L(eq)
|
||||
#endif
|
||||
movzbl 7(%eax), %ecx
|
||||
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
||||
movzbl 7(%edx), %edi
|
||||
# ifdef PIC
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
|
||||
# else
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
|
||||
movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
|
||||
# endif
|
||||
cmpl %ecx, %edi
|
||||
#else
|
||||
cmpb %cl, 7(%edx)
|
||||
#endif
|
||||
jne L(neq)
|
||||
jmp L(eq)
|
||||
|
||||
|
@ -123,7 +123,7 @@ ENTRY (__strcasecmp_ssse3)
|
||||
# endif
|
||||
testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
|
||||
jne __strcasecmp_nonascii
|
||||
jmp .Lascii
|
||||
jmp L(ascii)
|
||||
END (__strcasecmp_ssse3)
|
||||
#endif
|
||||
|
||||
@ -145,7 +145,7 @@ ENTRY (__strncasecmp_ssse3)
|
||||
# endif
|
||||
testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
|
||||
jne __strncasecmp_nonascii
|
||||
jmp .Lascii
|
||||
jmp L(ascii)
|
||||
END (__strncasecmp_ssse3)
|
||||
#endif
|
||||
|
||||
@ -165,7 +165,7 @@ ENTRY (STRCMP)
|
||||
call __i686.get_pc_thunk.bx
|
||||
addl $_GLOBAL_OFFSET_TABLE_, %ebx
|
||||
# endif
|
||||
.Lascii:
|
||||
L(ascii):
|
||||
.section .rodata.cst16,"aM",@progbits,16
|
||||
.align 16
|
||||
.Lbelowupper:
|
||||
|
@ -76,12 +76,9 @@ ENTRY(STRCMP)
|
||||
testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
|
||||
jz 2f
|
||||
leal __STRCMP_SSSE3@GOTOFF(%ebx), %eax
|
||||
#if 0
|
||||
// XXX Temporarily
|
||||
testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
|
||||
jz 2f
|
||||
leal __STRCMP_SSE4_2@GOTOFF(%ebx), %eax
|
||||
#endif
|
||||
2: popl %ebx
|
||||
cfi_adjust_cfa_offset (-4)
|
||||
cfi_restore (ebx)
|
||||
@ -98,12 +95,9 @@ ENTRY(STRCMP)
|
||||
testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
|
||||
jz 2f
|
||||
leal __STRCMP_SSSE3, %eax
|
||||
#if 0
|
||||
// XXX Temporarily
|
||||
testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
|
||||
jz 2f
|
||||
leal __STRCMP_SSE4_2, %eax
|
||||
#endif
|
||||
2: ret
|
||||
END(STRCMP)
|
||||
# endif
|
||||
|
@ -36,12 +36,9 @@ ENTRY(__strncasecmp)
|
||||
testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
|
||||
jz 2f
|
||||
leal __strncasecmp_ssse3@GOTOFF(%ebx), %eax
|
||||
#if 0
|
||||
// XXX Temporarily
|
||||
testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
|
||||
jz 2f
|
||||
leal __strncasecmp_sse4_2@GOTOFF(%ebx), %eax
|
||||
#endif
|
||||
2: popl %ebx
|
||||
cfi_adjust_cfa_offset (-4)
|
||||
cfi_restore (ebx)
|
||||
|
2
sysdeps/i386/i686/multiarch/strncase_l-sse4.S
Normal file
2
sysdeps/i386/i686/multiarch/strncase_l-sse4.S
Normal file
@ -0,0 +1,2 @@
|
||||
#define USE_AS_STRNCASECMP_L 1
|
||||
#include "strcmp-sse4.S"
|
Loading…
Reference in New Issue
Block a user