Add optimized wcslen and strnlen for x86-32

This commit is contained in:
Liubov Dmitrieva 2011-10-23 15:17:23 -04:00 committed by Ulrich Drepper
parent 09229f3e1b
commit fc2ee42abe
12 changed files with 783 additions and 73 deletions

View File

@ -1,3 +1,18 @@
2011-10-23 Ulrich Drepper <drepper@gmail.com>
* string/strnlen.c: Define and use STRNLEN macro.
* sysdeps/i386/i686/multiarch/Makefile [string] (sysdep_routines):
Add strnlen-sse2, strnlen-c, wcslen-sse2, and wcslen-c.
* sysdeps/i386/i686/multiarch/strlen-sse2.S: Add support for strnlen.
* wcsmbs/wcslen.c: Define and use WCSLEN.
* sysdeps/i386/i686/multiarch/strnlen-c.c: New file.
* sysdeps/i386/i686/multiarch/strnlen-sse2.S: New file.
* sysdeps/i386/i686/multiarch/strnlen.S: New file.
* sysdeps/i386/i686/multiarch/wcslen-c.c: New file.
* sysdeps/i386/i686/multiarch/wcslen-sse2.S: New file.
* sysdeps/i386/i686/multiarch/wcslen.S: New file.
Patch by Liubov Dmitrieva <liubov.dmitrieva@gmail.com>.
2011-10-20 Liubov Dmitrieva <liubov.dmitrieva@gmail.com> 2011-10-20 Liubov Dmitrieva <liubov.dmitrieva@gmail.com>
* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add * sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add

4
NEWS
View File

@ -26,8 +26,8 @@ Version 2.15
* Improved strcpy, strncpy, stpcpy, stpncpy for SSE2 and SSSE3 on x86-64. * Improved strcpy, strncpy, stpcpy, stpncpy for SSE2 and SSSE3 on x86-64.
Contributed by HJ Lu. Contributed by HJ Lu.
* Optimized strcat, strncat, wcslen, strnlen on x86-64 and optimized * Optimized strcat, strncat on x86-64 and optimized wcscmp, wcslen, strnlen
wcscmp on x86-32 and x86-64. on x86-32 and x86-64.
Contributed by Liubov Dmitrieva. Contributed by Liubov Dmitrieva.
* Optimized strchr and strrchr for SSE on x86-32. * Optimized strchr and strrchr for SSE on x86-32.

View File

@ -1,5 +1,5 @@
/* Find the length of STRING, but scan at most MAXLEN characters. /* Find the length of STRING, but scan at most MAXLEN characters.
Copyright (C) 1991,1993,1997,2000,2001,2005 Free Software Foundation, Inc. Copyright (C) 1991, 1993, 1997, 2000, 2001, 2005, 2011 Free Software Foundation, Inc.
Contributed by Jakub Jelinek <jakub@redhat.com>. Contributed by Jakub Jelinek <jakub@redhat.com>.
Based on strlen written by Torbjorn Granlund (tege@sics.se), Based on strlen written by Torbjorn Granlund (tege@sics.se),
@ -26,8 +26,13 @@
/* Find the length of S, but scan at most MAXLEN characters. If no /* Find the length of S, but scan at most MAXLEN characters. If no
'\0' terminator is found in that many characters, return MAXLEN. */ '\0' terminator is found in that many characters, return MAXLEN. */
#ifndef STRNLEN
# define STRNLEN __strnlen
#endif
size_t size_t
__strnlen (const char *str, size_t maxlen) STRNLEN (const char *str, size_t maxlen)
{ {
const char *char_ptr, *end_ptr = str + maxlen; const char *char_ptr, *end_ptr = str + maxlen;
const unsigned long int *longword_ptr; const unsigned long int *longword_ptr;
@ -157,5 +162,7 @@ __strnlen (const char *str, size_t maxlen)
char_ptr = end_ptr; char_ptr = end_ptr;
return char_ptr - str; return char_ptr - str;
} }
#ifndef STRNLEN
weak_alias (__strnlen, strnlen) weak_alias (__strnlen, strnlen)
#endif
libc_hidden_def (strnlen) libc_hidden_def (strnlen)

View File

@ -18,6 +18,7 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
wcscmp-sse2 wcscmp-c memchr-sse2 memchr-sse2-bsf \ wcscmp-sse2 wcscmp-c memchr-sse2 memchr-sse2-bsf \
memrchr-sse2 memrchr-sse2-bsf memrchr-c \ memrchr-sse2 memrchr-sse2-bsf memrchr-c \
rawmemchr-sse2 rawmemchr-sse2-bsf \ rawmemchr-sse2 rawmemchr-sse2-bsf \
strnlen-sse2 strnlen-c wcslen-sse2 wcslen-c \
wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c
ifeq (yes,$(config-cflags-sse4)) ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c

View File

@ -18,10 +18,19 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */ 02111-1307 USA. */
#if (defined USE_AS_STRCAT || defined SHARED) && !defined NOT_IN_libc /* for strlen only SHARED version is optimized, for strcat, strncat, strnlen both STATIC and SHARED are optimized */
#if (defined USE_AS_STRNLEN || defined USE_AS_STRCAT || defined SHARED) && !defined NOT_IN_libc
# ifndef USE_AS_STRCAT # ifndef USE_AS_STRCAT
# include <sysdep.h> # include <sysdep.h>
# define PARMS 4
# define STR PARMS
# define RETURN ret
# ifdef USE_AS_STRNLEN
# define LEN PARMS + 8
# define CFI_PUSH(REG) \ # define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \ cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0) cfi_rel_offset (REG, 0)
@ -32,15 +41,23 @@
# define PUSH(REG) pushl REG; CFI_PUSH (REG) # define PUSH(REG) pushl REG; CFI_PUSH (REG)
# define POP(REG) popl REG; CFI_POP (REG) # define POP(REG) popl REG; CFI_POP (REG)
# define PARMS 4 # undef RETURN
# define STR PARMS # define RETURN POP (%edi); CFI_PUSH(%edi); ret
# define ENTRANCE # endif
# define RETURN ret
# ifndef STRLEN
# define STRLEN __strlen_sse2
# endif
atom_text_section atom_text_section
ENTRY (__strlen_sse2) ENTRY (STRLEN)
ENTRANCE
mov STR(%esp), %edx mov STR(%esp), %edx
# ifdef USE_AS_STRNLEN
PUSH (%edi)
movl LEN(%esp), %edi
sub $4, %edi
jbe L(len_less4_prolog)
# endif
# endif # endif
xor %eax, %eax xor %eax, %eax
cmpb $0, (%edx) cmpb $0, (%edx)
@ -51,6 +68,12 @@ ENTRY (__strlen_sse2)
jz L(exit_tail2) jz L(exit_tail2)
cmpb $0, 3(%edx) cmpb $0, 3(%edx)
jz L(exit_tail3) jz L(exit_tail3)
# ifdef USE_AS_STRNLEN
sub $4, %edi
jbe L(len_less8_prolog)
# endif
cmpb $0, 4(%edx) cmpb $0, 4(%edx)
jz L(exit_tail4) jz L(exit_tail4)
cmpb $0, 5(%edx) cmpb $0, 5(%edx)
@ -59,6 +82,12 @@ ENTRY (__strlen_sse2)
jz L(exit_tail6) jz L(exit_tail6)
cmpb $0, 7(%edx) cmpb $0, 7(%edx)
jz L(exit_tail7) jz L(exit_tail7)
# ifdef USE_AS_STRNLEN
sub $4, %edi
jbe L(len_less12_prolog)
# endif
cmpb $0, 8(%edx) cmpb $0, 8(%edx)
jz L(exit_tail8) jz L(exit_tail8)
cmpb $0, 9(%edx) cmpb $0, 9(%edx)
@ -67,6 +96,12 @@ ENTRY (__strlen_sse2)
jz L(exit_tail10) jz L(exit_tail10)
cmpb $0, 11(%edx) cmpb $0, 11(%edx)
jz L(exit_tail11) jz L(exit_tail11)
# ifdef USE_AS_STRNLEN
sub $4, %edi
jbe L(len_less16_prolog)
# endif
cmpb $0, 12(%edx) cmpb $0, 12(%edx)
jz L(exit_tail12) jz L(exit_tail12)
cmpb $0, 13(%edx) cmpb $0, 13(%edx)
@ -75,11 +110,18 @@ ENTRY (__strlen_sse2)
jz L(exit_tail14) jz L(exit_tail14)
cmpb $0, 15(%edx) cmpb $0, 15(%edx)
jz L(exit_tail15) jz L(exit_tail15)
pxor %xmm0, %xmm0 pxor %xmm0, %xmm0
mov %edx, %eax lea 16(%edx), %eax
lea 16(%edx), %ecx mov %eax, %ecx
and $-16, %eax and $-16, %eax
add $16, %eax
# ifdef USE_AS_STRNLEN
and $15, %edx
add %edx, %edi
sub $64, %edi
jbe L(len_less64)
# endif
pcmpeqb (%eax), %xmm0 pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx pmovmskb %xmm0, %edx
@ -95,7 +137,6 @@ ENTRY (__strlen_sse2)
lea 16(%eax), %eax lea 16(%eax), %eax
jnz L(exit) jnz L(exit)
pcmpeqb (%eax), %xmm2 pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx pmovmskb %xmm2, %edx
pxor %xmm3, %xmm3 pxor %xmm3, %xmm3
@ -109,29 +150,10 @@ ENTRY (__strlen_sse2)
lea 16(%eax), %eax lea 16(%eax), %eax
jnz L(exit) jnz L(exit)
pcmpeqb (%eax), %xmm0 # ifdef USE_AS_STRNLEN
pmovmskb %xmm0, %edx sub $64, %edi
test %edx, %edx jbe L(len_less64)
lea 16(%eax), %eax # endif
jnz L(exit)
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm0 pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx pmovmskb %xmm0, %edx
@ -157,6 +179,11 @@ ENTRY (__strlen_sse2)
lea 16(%eax), %eax lea 16(%eax), %eax
jnz L(exit) jnz L(exit)
# ifdef USE_AS_STRNLEN
sub $64, %edi
jbe L(len_less64)
# endif
pcmpeqb (%eax), %xmm0 pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx pmovmskb %xmm0, %edx
test %edx, %edx test %edx, %edx
@ -181,8 +208,49 @@ ENTRY (__strlen_sse2)
lea 16(%eax), %eax lea 16(%eax), %eax
jnz L(exit) jnz L(exit)
# ifdef USE_AS_STRNLEN
sub $64, %edi
jbe L(len_less64)
# endif
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
# ifdef USE_AS_STRNLEN
mov %eax, %edx
and $63, %edx
add %edx, %edi
# endif
and $-0x40, %eax and $-0x40, %eax
L(aligned_64):
.p2align 4
L(aligned_64_loop):
# ifdef USE_AS_STRNLEN
sub $64, %edi
jbe L(len_less64)
# endif
movaps (%eax), %xmm0 movaps (%eax), %xmm0
movaps 16(%eax), %xmm1 movaps 16(%eax), %xmm1
movaps 32(%eax), %xmm2 movaps 32(%eax), %xmm2
@ -194,7 +262,7 @@ L(aligned_64):
pmovmskb %xmm2, %edx pmovmskb %xmm2, %edx
test %edx, %edx test %edx, %edx
lea 64(%eax), %eax lea 64(%eax), %eax
jz L(aligned_64) jz L(aligned_64_loop)
pcmpeqb -64(%eax), %xmm3 pcmpeqb -64(%eax), %xmm3
pmovmskb %xmm3, %edx pmovmskb %xmm3, %edx
@ -221,55 +289,347 @@ L(exit):
sub %ecx, %eax sub %ecx, %eax
test %dl, %dl test %dl, %dl
jz L(exit_high) jz L(exit_high)
mov %dl, %cl
and $15, %cl
jz L(exit_8)
test $0x01, %dl test $0x01, %dl
jnz L(exit_tail0) jnz L(exit_tail0)
test $0x02, %dl test $0x02, %dl
jnz L(exit_tail1) jnz L(exit_tail1)
test $0x04, %dl test $0x04, %dl
jnz L(exit_tail2) jnz L(exit_tail2)
add $3, %eax
RETURN
test $0x08, %dl .p2align 4
jnz L(exit_tail3) L(exit_8):
test $0x10, %dl test $0x10, %dl
jnz L(exit_tail4) jnz L(exit_tail4)
test $0x20, %dl test $0x20, %dl
jnz L(exit_tail5) jnz L(exit_tail5)
test $0x40, %dl test $0x40, %dl
jnz L(exit_tail6) jnz L(exit_tail6)
add $7, %eax add $7, %eax
RETURN
.p2align 4
L(exit_high):
mov %dh, %ch
and $15, %ch
jz L(exit_high_8)
test $0x01, %dh
jnz L(exit_tail8)
test $0x02, %dh
jnz L(exit_tail9)
test $0x04, %dh
jnz L(exit_tail10)
add $11, %eax
RETURN
.p2align 4
L(exit_high_8):
test $0x10, %dh
jnz L(exit_tail12)
test $0x20, %dh
jnz L(exit_tail13)
test $0x40, %dh
jnz L(exit_tail14)
add $15, %eax
L(exit_tail0): L(exit_tail0):
RETURN RETURN
L(exit_high): # ifdef USE_AS_STRNLEN
add $8, %eax
test $0x01, %dh
jnz L(exit_tail0)
test $0x02, %dh .p2align 4
jnz L(exit_tail1) L(len_less64):
pxor %xmm0, %xmm0
add $64, %edi
test $0x04, %dh pcmpeqb (%eax), %xmm0
jnz L(exit_tail2) pmovmskb %xmm0, %edx
pxor %xmm1, %xmm1
lea 16(%eax), %eax
test %edx, %edx
jnz L(strnlen_exit)
test $0x08, %dh sub $16, %edi
jnz L(exit_tail3) jbe L(return_start_len)
test $0x10, %dh pcmpeqb (%eax), %xmm1
jnz L(exit_tail4) pmovmskb %xmm1, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(strnlen_exit)
test $0x20, %dh sub $16, %edi
jnz L(exit_tail5) jbe L(return_start_len)
test $0x40, %dh pcmpeqb (%eax), %xmm0
jnz L(exit_tail6) pmovmskb %xmm0, %edx
add $7, %eax lea 16(%eax), %eax
test %edx, %edx
jnz L(strnlen_exit)
sub $16, %edi
jbe L(return_start_len)
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(strnlen_exit)
movl LEN(%esp), %eax
RETURN RETURN
.p2align 4
L(strnlen_exit):
sub %ecx, %eax
test %dl, %dl
jz L(strnlen_exit_high)
mov %dl, %cl
and $15, %cl
jz L(strnlen_exit_8)
test $0x01, %dl
jnz L(exit_tail0)
test $0x02, %dl
jnz L(strnlen_exit_tail1)
test $0x04, %dl
jnz L(strnlen_exit_tail2)
sub $4, %edi
jb L(return_start_len)
lea 3(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_8):
test $0x10, %dl
jnz L(strnlen_exit_tail4)
test $0x20, %dl
jnz L(strnlen_exit_tail5)
test $0x40, %dl
jnz L(strnlen_exit_tail6)
sub $8, %edi
jb L(return_start_len)
lea 7(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_high):
mov %dh, %ch
and $15, %ch
jz L(strnlen_exit_high_8)
test $0x01, %dh
jnz L(strnlen_exit_tail8)
test $0x02, %dh
jnz L(strnlen_exit_tail9)
test $0x04, %dh
jnz L(strnlen_exit_tail10)
sub $12, %edi
jb L(return_start_len)
lea 11(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_high_8):
test $0x10, %dh
jnz L(strnlen_exit_tail12)
test $0x20, %dh
jnz L(strnlen_exit_tail13)
test $0x40, %dh
jnz L(strnlen_exit_tail14)
sub $16, %edi
jb L(return_start_len)
lea 15(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail1):
sub $2, %edi
jb L(return_start_len)
lea 1(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail2):
sub $3, %edi
jb L(return_start_len)
lea 2(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail4):
sub $5, %edi
jb L(return_start_len)
lea 4(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail5):
sub $6, %edi
jb L(return_start_len)
lea 5(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail6):
sub $7, %edi
jb L(return_start_len)
lea 6(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail8):
sub $9, %edi
jb L(return_start_len)
lea 8(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail9):
sub $10, %edi
jb L(return_start_len)
lea 9(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail10):
sub $11, %edi
jb L(return_start_len)
lea 10(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail12):
sub $13, %edi
jb L(return_start_len)
lea 12(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail13):
sub $14, %edi
jb L(return_start_len)
lea 13(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail14):
sub $15, %edi
jb L(return_start_len)
lea 14(%eax), %eax
RETURN
.p2align 4
L(return_start_len):
movl LEN(%esp), %eax
RETURN
/* for prolog only */
.p2align 4
L(len_less4_prolog):
xor %eax, %eax
add $4, %edi
jz L(exit_tail0)
cmpb $0, (%edx)
jz L(exit_tail0)
cmp $1, %edi
je L(exit_tail1)
cmpb $0, 1(%edx)
jz L(exit_tail1)
cmp $2, %edi
je L(exit_tail2)
cmpb $0, 2(%edx)
jz L(exit_tail2)
cmp $3, %edi
je L(exit_tail3)
cmpb $0, 3(%edx)
jz L(exit_tail3)
mov $4, %eax
RETURN
.p2align 4
L(len_less8_prolog):
add $4, %edi
cmpb $0, 4(%edx)
jz L(exit_tail4)
cmp $1, %edi
je L(exit_tail5)
cmpb $0, 5(%edx)
jz L(exit_tail5)
cmp $2, %edi
je L(exit_tail6)
cmpb $0, 6(%edx)
jz L(exit_tail6)
cmp $3, %edi
je L(exit_tail7)
cmpb $0, 7(%edx)
jz L(exit_tail7)
mov $8, %eax
RETURN
.p2align 4
L(len_less12_prolog):
add $4, %edi
cmpb $0, 8(%edx)
jz L(exit_tail8)
cmp $1, %edi
je L(exit_tail9)
cmpb $0, 9(%edx)
jz L(exit_tail9)
cmp $2, %edi
je L(exit_tail10)
cmpb $0, 10(%edx)
jz L(exit_tail10)
cmp $3, %edi
je L(exit_tail11)
cmpb $0, 11(%edx)
jz L(exit_tail11)
mov $12, %eax
RETURN
.p2align 4
L(len_less16_prolog):
add $4, %edi
cmpb $0, 12(%edx)
jz L(exit_tail12)
cmp $1, %edi
je L(exit_tail13)
cmpb $0, 13(%edx)
jz L(exit_tail13)
cmp $2, %edi
je L(exit_tail14)
cmpb $0, 14(%edx)
jz L(exit_tail14)
cmp $3, %edi
je L(exit_tail15)
cmpb $0, 15(%edx)
jz L(exit_tail15)
mov $16, %eax
RETURN
# endif
.p2align 4 .p2align 4
L(exit_tail1): L(exit_tail1):
add $1, %eax add $1, %eax
@ -330,7 +690,7 @@ L(exit_tail14):
L(exit_tail15): L(exit_tail15):
add $15, %eax add $15, %eax
# ifndef USE_AS_STRCAT # ifndef USE_AS_STRCAT
ret RETURN
END (__strlen_sse2) END (STRLEN)
# endif # endif
#endif #endif

View File

@ -0,0 +1,8 @@
#ifndef NOT_IN_libc
# define STRNLEN __strnlen_ia32
# undef libc_hidden_builtin_def
# define libc_hidden_def(name) \
__hidden_ver1 (__strnlen_ia32, __GI_strnlen, __strnlen_ia32);
#endif
#include "string/strnlen.c"

View File

@ -0,0 +1,3 @@
#define USE_AS_STRNLEN
#define STRLEN __strnlen_sse2
#include "strlen-sse2.S"

View File

@ -0,0 +1,56 @@
/* Multiple versions of strnlen
Copyright (C) 2011 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <sysdep.h>
#include <init-arch.h>
#ifndef NOT_IN_libc
.section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
.globl __i686.get_pc_thunk.bx
.hidden __i686.get_pc_thunk.bx
.p2align 4
.type __i686.get_pc_thunk.bx,@function
__i686.get_pc_thunk.bx:
movl (%esp), %ebx
ret
.text
ENTRY(__strnlen)
.type __strnlen, @gnu_indirect_function
pushl %ebx
cfi_adjust_cfa_offset (4)
cfi_rel_offset (ebx, 0)
call __i686.get_pc_thunk.bx
addl $_GLOBAL_OFFSET_TABLE_, %ebx
cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
jne 1f
call __init_cpu_features
1: leal __strnlen_ia32@GOTOFF(%ebx), %eax
testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
jz 2f
leal __strnlen_sse2@GOTOFF(%ebx), %eax
2: popl %ebx
cfi_adjust_cfa_offset (-4);
cfi_restore (ebx)
ret
END(__strnlen)
weak_alias(__strnlen, strnlen)
#endif

View File

@ -0,0 +1,5 @@
#ifndef NOT_IN_libc
# define WCSLEN __wcslen_ia32
#endif
#include "wcsmbs/wcslen.c"

View File

@ -0,0 +1,194 @@
/* wcslen with SSE2
Copyright (C) 2011 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#ifndef NOT_IN_libc
# include <sysdep.h>
# define STR 4
.text
ENTRY (__wcslen_sse2)
mov STR(%esp), %edx
cmp $0, (%edx)
jz L(exit_tail0)
cmp $0, 4(%edx)
jz L(exit_tail1)
cmp $0, 8(%edx)
jz L(exit_tail2)
cmp $0, 12(%edx)
jz L(exit_tail3)
cmp $0, 16(%edx)
jz L(exit_tail4)
cmp $0, 20(%edx)
jz L(exit_tail5)
cmp $0, 24(%edx)
jz L(exit_tail6)
cmp $0, 28(%edx)
jz L(exit_tail7)
pxor %xmm0, %xmm0
lea 32(%edx), %eax
lea 16(%edx), %ecx
and $-16, %eax
pcmpeqd (%eax), %xmm0
pmovmskb %xmm0, %edx
pxor %xmm1, %xmm1
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqd (%eax), %xmm1
pmovmskb %xmm1, %edx
pxor %xmm2, %xmm2
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqd (%eax), %xmm2
pmovmskb %xmm2, %edx
pxor %xmm3, %xmm3
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqd (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
and $-0x40, %eax
.p2align 4
L(aligned_64_loop):
movaps (%eax), %xmm0
movaps 16(%eax), %xmm1
movaps 32(%eax), %xmm2
movaps 48(%eax), %xmm6
pminub %xmm1, %xmm0
pminub %xmm6, %xmm2
pminub %xmm0, %xmm2
pcmpeqd %xmm3, %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
lea 64(%eax), %eax
jz L(aligned_64_loop)
pcmpeqd -64(%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
lea 48(%ecx), %ecx
jnz L(exit)
pcmpeqd %xmm1, %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
lea -16(%ecx), %ecx
jnz L(exit)
pcmpeqd -32(%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
lea -16(%ecx), %ecx
jnz L(exit)
pcmpeqd %xmm6, %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
lea -16(%ecx), %ecx
jnz L(exit)
jmp L(aligned_64_loop)
.p2align 4
L(exit):
sub %ecx, %eax
shr $2, %eax
test %dl, %dl
jz L(exit_high)
mov %dl, %cl
and $15, %cl
jz L(exit_1)
ret
.p2align 4
L(exit_high):
mov %dh, %ch
and $15, %ch
jz L(exit_3)
add $2, %eax
ret
.p2align 4
L(exit_1):
add $1, %eax
ret
.p2align 4
L(exit_3):
add $3, %eax
ret
.p2align 4
L(exit_tail0):
xor %eax, %eax
ret
.p2align 4
L(exit_tail1):
mov $1, %eax
ret
.p2align 4
L(exit_tail2):
mov $2, %eax
ret
.p2align 4
L(exit_tail3):
mov $3, %eax
ret
.p2align 4
L(exit_tail4):
mov $4, %eax
ret
.p2align 4
L(exit_tail5):
mov $5, %eax
ret
.p2align 4
L(exit_tail6):
mov $6, %eax
ret
.p2align 4
L(exit_tail7):
mov $7, %eax
ret
END (__wcslen_sse2)
#endif

View File

@ -0,0 +1,56 @@
/* Multiple versions of wcslen
Copyright (C) 2011 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <sysdep.h>
#include <init-arch.h>
#ifndef NOT_IN_libc
.section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
.globl __i686.get_pc_thunk.bx
.hidden __i686.get_pc_thunk.bx
.p2align 4
.type __i686.get_pc_thunk.bx,@function
__i686.get_pc_thunk.bx:
movl (%esp), %ebx
ret
.text
ENTRY(__wcslen)
.type __wcslen, @gnu_indirect_function
pushl %ebx
cfi_adjust_cfa_offset (4)
cfi_rel_offset (ebx, 0)
call __i686.get_pc_thunk.bx
addl $_GLOBAL_OFFSET_TABLE_, %ebx
cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
jne 1f
call __init_cpu_features
1: leal __wcslen_ia32@GOTOFF(%ebx), %eax
testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
jz 2f
leal __wcslen_sse2@GOTOFF(%ebx), %eax
2: popl %ebx
cfi_adjust_cfa_offset (-4);
cfi_restore (ebx)
ret
END(__wcslen)
weak_alias(__wcslen, wcslen)
#endif

View File

@ -1,4 +1,4 @@
/* Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. /* Copyright (C) 1995, 1996, 1997, 1998, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library. This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995. Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
@ -19,10 +19,13 @@
#include <wchar.h> #include <wchar.h>
/* Return length of string S. */ /* Return length of string S. */
#ifndef WCSLEN
# define WCSLEN __wcslen
#endif
size_t size_t
__wcslen (s) WCSLEN (s)
const wchar_t *s; const wchar_t *s;
{ {
size_t len = 0; size_t len = 0;
@ -40,4 +43,6 @@ __wcslen (s)
return len; return len;
} }
#ifndef WCSLEN
weak_alias (__wcslen, wcslen) weak_alias (__wcslen, wcslen)
#endif