Optimized strcasecmp for Power7

This commit is contained in:
Adhemerval Zanella 2011-12-17 20:32:59 -05:00 committed by Ulrich Drepper
parent 36b1a74da5
commit f0b264f174
10 changed files with 298 additions and 1 deletions

View File

@ -1,3 +1,16 @@
2011-11-22 Adhemerval Zanella <azanella@linux.vnet.ibm.com>
* sysdeps/powerpc/Makefile: Added locale-defines.sym generation.
* sysdeps/powerpc/locale-defines.sym: Locale definitions for strcasecmp
optimized code.
* sysdeps/powerpc/powerpc32/power7/Makefile: New file.
* sysdeps/powerpc/powerpc32/power7/strcasecmp.S: New file.
* sysdeps/powerpc/powerpc32/power7/strcasecmp_l.S: New file.
* sysdeps/powerpc/powerpc64/power7/Makefile: Added unroll-loop option
for strncasecmp/strncasecmp_l compilation.
* sysdeps/powerpc/powerpc64/power7/strcasecmp.S: New file.
* sysdeps/powerpc/powerpc64/power7/strcasecmp_l.S: New file.
2011-12-08 Marek Polacek <mpolacek@redhat.com>
[BZ #13484]

3
NEWS
View File

@ -62,7 +62,8 @@ Version 2.15
* Optimized strcasecmp and strncasecmp for SSSE3 and SSE4.2 on x86-32.
Implemented by Ulrich Drepper.
* Optimized nearbyint for PPC. Implemented by Adhemerval Zanella.
* Optimized nearbyint and strcasecmp for PPC.
Implemented by Adhemerval Zanella.
Version 2.14

View File

@ -23,4 +23,6 @@ endif
ifeq ($(subdir),csu)
# get offset to rtld_global._dl_hwcap
gen-as-const-headers += rtld-global-offsets.sym
# get offset to __locale_struct.__ctype_tolower
gen-as-const-headers += locale-defines.sym
endif

View File

@ -0,0 +1,5 @@
#include <locale/localeinfo.h>
--
LOCALE_CTYPE_TOLOWER offsetof (struct __locale_struct, __ctype_tolower)

View File

@ -0,0 +1,4 @@
ifeq ($(subdir),string)
CFLAGS-strncase.c += -funroll-loops
CFLAGS-strncase_l.c += -funroll-loops
endif

View File

@ -0,0 +1,132 @@
/* Optimized strcasecmp implementation for PowerPC32.
Copyright (C) 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <sysdep.h>
#include <bp-sym.h>
#include <bp-asm.h>
#include <locale-defines.h>
/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] )
or if defined USE_IN_EXTENDED_LOCALE_MODEL:
int [r3] strcasecmp_l (const char *s1 [r3], const char *s2 [r4],
__locale_t loc [r5]) */
#ifndef STRCMP
# define __STRCMP __strcasecmp
# define STRCMP strcasecmp
#endif
ENTRY (BP_SYM (__STRCMP))
#define rRTN r3 /* Return value */
#define rSTR1 r5 /* 1st string */
#define rSTR2 r4 /* 2nd string */
#define rLOCARG r5 /* 3rd argument: locale_t */
#define rCHAR1 r6 /* Byte readed from 1st string */
#define rCHAR2 r7 /* Byte readed from 2nd string */
#define rADDR1 r8 /* Address of tolower(rCHAR1) */
#define rADDR2 r12 /* Address of tolower(rCHAR2) */
#define rLWR1 r8 /* Byte tolower(rCHAR1) */
#define rLWR2 r12 /* Byte tolower(rCHAR2) */
#define rTMP r0
#define rGOT r9 /* Address of the Global Offset Table */
#define rLOC r11 /* Default locale address */
cmpw cr7, r3, r4
#ifndef USE_IN_EXTENDED_LOCALE_MODEL
# ifdef SHARED
mflr rTMP
bcl 20,31,.L1
.L1: mflr rGOT
addis rGOT, rGOT, _GLOBAL_OFFSET_TABLE_-.L1@ha
addi rGOT, rGOT, _GLOBAL_OFFSET_TABLE_-.L1@l
lwz rLOC, __libc_tsd_LOCALE@got@tprel(rGOT)
add rLOC, rLOC, __libc_tsd_LOCALE@tls
lwz rLOC, 0(rLOC)
mtlr rTMP
# else
lis rTMP,_GLOBAL_OFFSET_TABLE_@ha
la rLOC,_GLOBAL_OFFSET_TABLE_@l(rTMP)
lwz rLOC, __libc_tsd_LOCALE@got@tprel(rGOT)
add rLOC, rLOC, __libc_tsd_LOCALE@tls
lwz rLOC, 0(rLOC)
# endif /* SHARED */
#else
mr rLOC, rLOCARG
#endif
mr rSTR1, rRTN
lwz rLOC, LOCALE_CTYPE_TOLOWER(rLOC)
li rRTN, 0
beqlr cr7
/* Unrolling loop for POWER: loads are done with 'lbz' plus
offset and string descriptors are only updated in the end
of loop unrolling. */
L(loop):
lbz rCHAR1, 0(rSTR1) /* Load char from s1 */
lbz rCHAR2, 0(rSTR2) /* Load char from s2 */
sldi rADDR1, rCHAR1, 2 /* Calculate address for tolower(*s1) */
sldi rADDR2, rCHAR2, 2 /* Calculate address for tolower(*s2) */
lwzx rLWR1, rLOC, rADDR1 /* Load tolower(*s1) */
lwzx rLWR2, rLOC, rADDR2 /* Load tolower(*s2) */
cmpwi cr7, rCHAR1, 0 /* *s1 == '\0' ? */
subf. r3, rLWR2, rLWR1
bnelr
beqlr cr7
lbz rCHAR1, 1(rSTR1)
lbz rCHAR2, 1(rSTR2)
sldi rADDR1, rCHAR1, 2
sldi rADDR2, rCHAR2, 2
lwzx rLWR1, rLOC, rADDR1
lwzx rLWR2, rLOC, rADDR2
cmpwi cr7, rCHAR1, 0
subf. r3, rLWR2, rLWR1
bnelr
beqlr cr7
lbz rCHAR1, 2(rSTR1)
lbz rCHAR2, 2(rSTR2)
sldi rADDR1, rCHAR1, 2
sldi rADDR2, rCHAR2, 2
lwzx rLWR1, rLOC, rADDR1
lwzx rLWR2, rLOC, rADDR2
cmpwi cr7, rCHAR1, 0
subf. r3, rLWR2, rLWR1
bnelr
beqlr cr7
lbz rCHAR1, 3(rSTR1)
lbz rCHAR2, 3(rSTR2)
/* Increment both string descriptors */
addi rSTR1, rSTR1, 4
addi rSTR2, rSTR2, 4
sldi rADDR1, rCHAR1, 2
sldi rADDR2, rCHAR2, 2
lwzx rLWR1, rLOC, rADDR1
lwzx rLWR2, rLOC, rADDR2
cmpwi cr7, rCHAR1, 0
subf. r3, rLWR2, rLWR1
bnelr
bne cr7,L(loop)
blr
END (BP_SYM (__STRCMP))
weak_alias (BP_SYM (__STRCMP), BP_SYM (STRCMP))
libc_hidden_builtin_def (__STRCMP)

View File

@ -0,0 +1,5 @@
#define USE_IN_EXTENDED_LOCALE_MODEL
#define STRCMP strcasecmp_l
#define __STRCMP __strcasecmp_l
#include "strcasecmp.S"

View File

@ -3,3 +3,8 @@ ifeq ($(subdir),elf)
# optimization may require a TOC reference before relocations are resolved.
CFLAGS-rtld.c += -mno-vsx
endif
ifeq ($(subdir),string)
CFLAGS-strncase.c += -funroll-loops
CFLAGS-strncase_l.c += -funroll-loops
endif

View File

@ -0,0 +1,125 @@
/* Optimized strcasecmp implementation for PowerPC64.
Copyright (C) 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <sysdep.h>
#include <bp-sym.h>
#include <bp-asm.h>
#include <locale-defines.h>
/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] )
or if defined USE_IN_EXTENDED_LOCALE_MODEL:
int [r3] strcasecmp_l (const char *s1 [r3], const char *s2 [r4],
__locale_t loc [r5]) */
#ifndef STRCMP
# define __STRCMP __strcasecmp
# define STRCMP strcasecmp
#endif
ENTRY (BP_SYM (__STRCMP))
CALL_MCOUNT 2
#define rRTN r3 /* Return value */
#define rSTR1 r5 /* 1st string */
#define rSTR2 r4 /* 2nd string */
#define rLOCARG r5 /* 3rd argument: locale_t */
#define rCHAR1 r6 /* Byte readed from 1st string */
#define rCHAR2 r7 /* Byte readed from 2nd string */
#define rADDR1 r8 /* Address of tolower(rCHAR1) */
#define rADDR2 r12 /* Address of tolower(rCHAR2) */
#define rLWR1 r8 /* Word tolower(rCHAR1) */
#define rLWR2 r12 /* Word tolower(rCHAR2) */
#define rTMP r9
#define rLOC r11 /* Default locale address */
cmpd cr7, r3, r4
#ifndef USE_IN_EXTENDED_LOCALE_MODEL
ld rTMP, __libc_tsd_LOCALE@got@tprel(r2)
add rLOC, rTMP, __libc_tsd_LOCALE@tls
ld rLOC, 0(rLOC)
#else
mr rLOC, rLOCARG
#endif
ld rLOC, LOCALE_CTYPE_TOLOWER(rLOC)
mr rSTR1, rRTN
li rRTN, 0
beqlr cr7
/* Unrolling loop for POWER: loads are done with 'lbz' plus
offset and string descriptors are only updated in the end
of loop unrolling. */
lbz rCHAR1, 0(rSTR1) /* Load char from s1 */
lbz rCHAR2, 0(rSTR2) /* Load char from s2 */
L(loop):
cmpdi rCHAR1, 0 /* *s1 == '\0' ? */
sldi rADDR1, rCHAR1, 2 /* Calculate address for tolower(*s1) */
sldi rADDR2, rCHAR2, 2 /* Calculate address for tolower(*s2) */
lwzx rLWR1, rLOC, rADDR1 /* Load tolower(*s1) */
lwzx rLWR2, rLOC, rADDR2 /* Load tolower(*s2) */
cmpw cr1, rLWR1, rLWR2 /* r = tolower(*s1) == tolower(*s2) ? */
crorc 4*cr1+eq,eq,4*cr1+eq /* (*s1 != '\0') || (r == 1) */
beq cr1, L(done)
lbz rCHAR1, 1(rSTR1)
lbz rCHAR2, 1(rSTR2)
cmpdi rCHAR1, 0
sldi rADDR1, rCHAR1, 2
sldi rADDR2, rCHAR2, 2
lwzx rLWR1, rLOC, rADDR1
lwzx rLWR2, rLOC, rADDR2
cmpw cr1, rLWR1, rLWR2
crorc 4*cr1+eq,eq,4*cr1+eq
beq cr1, L(done)
lbz rCHAR1, 2(rSTR1)
lbz rCHAR2, 2(rSTR2)
cmpdi rCHAR1, 0
sldi rADDR1, rCHAR1, 2
sldi rADDR2, rCHAR2, 2
lwzx rLWR1, rLOC, rADDR1
lwzx rLWR2, rLOC, rADDR2
cmpw cr1, rLWR1, rLWR2
crorc 4*cr1+eq,eq,4*cr1+eq
beq cr1, L(done)
lbz rCHAR1, 3(rSTR1)
lbz rCHAR2, 3(rSTR2)
cmpdi rCHAR1, 0
/* Increment both string descriptors */
addi rSTR1, rSTR1, 4
addi rSTR2, rSTR2, 4
sldi rADDR1, rCHAR1, 2
sldi rADDR2, rCHAR2, 2
lwzx rLWR1, rLOC, rADDR1
lwzx rLWR2, rLOC, rADDR2
cmpw cr1, rLWR1, rLWR2
crorc 4*cr1+eq,eq,4*cr1+eq
beq cr1,L(done)
lbz rCHAR1, 0(rSTR1) /* Load char from s1 */
lbz rCHAR2, 0(rSTR2) /* Load char from s2 */
b L(loop)
L(done):
subf r0, rLWR2, rLWR1
extsw rRTN, r0
blr
END (BP_SYM (__STRCMP))
weak_alias (BP_SYM (__STRCMP), BP_SYM (STRCMP))
libc_hidden_builtin_def (__STRCMP)

View File

@ -0,0 +1,5 @@
#define USE_IN_EXTENDED_LOCALE_MODEL
#define STRCMP strcasecmp_l
#define __STRCMP __strcasecmp_l
#include "strcasecmp.S"