* sysdeps/powerpc/powerpc64/strchr.S: 64-bit optimizations.

* sysdeps/powerpc/powerpc64/strlen.S: 64-bit optimizations.

	* sysdeps/powerpc/fpu/bits/mathdef.h (FLT_EVAL_METHOD): Undef before
	defining.
This commit is contained in:
Roland McGrath 2003-04-04 22:03:25 +00:00
parent 91613ed9d8
commit beb03cee27
3 changed files with 74 additions and 46 deletions

View File

@ -1,3 +1,11 @@
2003-04-04 Steven Munroe <sjmunroe@us.ibm.com>
* sysdeps/powerpc/powerpc64/strchr.S: 64-bit optimizations.
* sysdeps/powerpc/powerpc64/strlen.S: 64-bit optimizations.
* sysdeps/powerpc/fpu/bits/mathdef.h (FLT_EVAL_METHOD): Undef before
defining.
2003-04-04 Alexandre Oliva <aoliva@redhat.com>
* sysdeps/unix/sysv/linux/mips/bits/fcntl.h (struct flock): Adjust

View File

@ -1,5 +1,5 @@
/* Optimized strchr implementation for PowerPC64.
Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc.
Copyright (C) 1997, 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@ -29,6 +29,11 @@ ENTRY (BP_SYM (strchr))
#define rTMP1 r0
#define rRTN r3 /* outgoing result */
/* Note: The Bounded pointer support in this code is broken. This code
was inherited from PPC32 and and that support was never completed.
Currently PPC gcc does not support -fbounds-check or -fbounded-pointers.
These artifacts are left in the code as a reminder in case we need
bounded pointer support in the future. */
#if __BOUNDED_POINTERS__
# define rSTR r4
# define rCHR r5 /* byte we're looking for, spread over the whole word */
@ -39,8 +44,8 @@ ENTRY (BP_SYM (strchr))
# define rWORD r5 /* the current word */
#endif
#define rCLZB rCHR /* leading zero byte count */
#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */
#define r7F7F r7 /* constant 0x7f7f7f7f */
#define rFEFE r6 /* constant 0xfefefefefefefeff (-0x0101010101010101) */
#define r7F7F r7 /* constant 0x7f7f7f7f7f7f7f7f */
#define rTMP2 r9
#define rIGN r10 /* number of bits we should ignore in the first word */
#define rMASK r11 /* mask with the bits to ignore set to 0 */
@ -49,18 +54,23 @@ ENTRY (BP_SYM (strchr))
CHECK_BOUNDS_LOW (rSTR, rTMP1, rTMP2)
STORE_RETURN_BOUNDS (rTMP1, rTMP2)
dcbt 0,rRTN
rlwimi rCHR, rCHR, 8, 16, 23
li rMASK, -1
rlwimi rCHR, rCHR, 16, 0, 15
rlwinm rIGN, rRTN, 3, 27, 28
rlwinm rIGN, rRTN, 3, 26, 28
insrdi rCHR, rCHR, 32, 0
lis rFEFE, -0x101
lis r7F7F, 0x7f7f
clrrdi rSTR, rRTN, 2
clrrdi rSTR, rRTN, 3
addi rFEFE, rFEFE, -0x101
addi r7F7F, r7F7F, 0x7f7f
sldi rTMP1, rFEFE, 32
insrdi r7F7F, r7F7F, 32, 0
add rFEFE, rFEFE, rTMP1
/* Test the first (partial?) word. */
lwz rWORD, 0(rSTR)
srw rMASK, rMASK, rIGN
ld rWORD, 0(rSTR)
srd rMASK, rMASK, rIGN
orc rWORD, rWORD, rMASK
add rTMP1, rFEFE, rWORD
nor rTMP2, r7F7F, rWORD
@ -71,7 +81,7 @@ ENTRY (BP_SYM (strchr))
/* The loop. */
L(loop):lwzu rWORD, 4(rSTR)
L(loop):ldu rWORD, 8(rSTR)
and. rTMP1, rTMP1, rTMP2
/* Test for 0. */
add rTMP1, rFEFE, rWORD
@ -104,12 +114,12 @@ L(missed):
add rTMP1, rTMP1, r7F7F
nor rWORD, rMASK, rFEFE
nor rTMP2, rIGN, rTMP1
cmplw rWORD, rTMP2
cmpld rWORD, rTMP2
bgtlr
cntlzw rCLZB, rTMP2
srwi rCLZB, rCLZB, 3
cntlzd rCLZB, rTMP2
srdi rCLZB, rCLZB, 3
add rRTN, rSTR, rCLZB
CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, twlge)
CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, tdlge)
STORE_RETURN_VALUE (rSTR)
blr
@ -118,11 +128,11 @@ L(foundit):
or rIGN, r7F7F, rTMP3
add rTMP1, rTMP1, r7F7F
nor rTMP2, rIGN, rTMP1
cntlzw rCLZB, rTMP2
subi rSTR, rSTR, 4
srwi rCLZB, rCLZB, 3
cntlzd rCLZB, rTMP2
subi rSTR, rSTR, 8
srdi rCLZB, rCLZB, 3
add rRTN, rSTR, rCLZB
CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, twlge)
CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, tdlge)
STORE_RETURN_VALUE (rSTR)
blr
END (BP_SYM (strchr))

View File

@ -1,5 +1,5 @@
/* Optimized strlen implementation for PowerPC64.
Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc.
Copyright (C) 1997, 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@ -60,7 +60,12 @@
2) How popular are bytes with the high bit set? If they are very rare,
on some processors it might be useful to use the simpler expression
~((x - 0x01010101) | 0x7f7f7f7f) (that is, on processors with only one
ALU), but this fails when any character has its high bit set. */
ALU), but this fails when any character has its high bit set.
Answer:
1) Added a Data Cache Block Touch early to prefetch the first 128
byte cache line. Adding dcbt instructions to the loop would not be
effective since most strings will be shorter than the cache line.*/
/* Some notes on register usage: Under the SVR4 ABI, we can use registers
0 and 3 through 12 (so long as we don't call any procedures) without
@ -80,63 +85,68 @@ ENTRY (BP_SYM (strlen))
#define rSTR r4 /* current string position */
#define rPADN r5 /* number of padding bits we prepend to the
string to make it start at a word boundary */
#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */
#define r7F7F r7 /* constant 0x7f7f7f7f */
#define rWORD1 r8 /* current string word */
#define rWORD2 r9 /* next string word */
#define rMASK r9 /* mask for first string word */
#define rFEFE r6 /* constant 0xfefefefefefefeff (-0x0101010101010101) */
#define r7F7F r7 /* constant 0x7f7f7f7f7f7f7f7f */
#define rWORD1 r8 /* current string doubleword */
#define rWORD2 r9 /* next string doubleword */
#define rMASK r9 /* mask for first string doubleword */
#define rTMP2 r10
#define rTMP3 r11
#define rTMP4 r12
/* Note: The Bounded pointer support in this code is broken. This code
was inherited from PPC32 and and that support was never completed.
Current PPC gcc does not support -fbounds-check or -fbounded-pointers.
These artifacts are left in the code as a reminder in case we need
bounded pointer support in the future. */
CHECK_BOUNDS_LOW (rRTN, rTMP1, rTMP2)
clrrdi rSTR, rRTN, 2
dcbt 0,rRTN
clrrdi rSTR, rRTN, 3
lis r7F7F, 0x7f7f
rlwinm rPADN, rRTN, 3, 27, 28
lwz rWORD1, 0(rSTR)
li rMASK, -1
rlwinm rPADN, rRTN, 3, 26, 28
ld rWORD1, 0(rSTR)
addi r7F7F, r7F7F, 0x7f7f
/* That's the setup done, now do the first pair of words.
We make an exception and use method (2) on the first two words, to reduce
overhead. */
srw rMASK, rMASK, rPADN
li rMASK, -1
insrdi r7F7F, r7F7F, 32, 0
/* That's the setup done, now do the first pair of doublewords.
We make an exception and use method (2) on the first two doublewords,
to reduce overhead. */
srd rMASK, rMASK, rPADN
and rTMP1, r7F7F, rWORD1
or rTMP2, r7F7F, rWORD1
lis rFEFE, -0x101
add rTMP1, rTMP1, r7F7F
addi rFEFE, rFEFE, -0x101
nor rTMP1, rTMP2, rTMP1
and. rWORD1, rTMP1, rMASK
mtcrf 0x01, rRTN
bne L(done0)
lis rFEFE, -0x101
addi rFEFE, rFEFE, -0x101
clrldi rFEFE,rFEFE,32 /* clear upper 32 */
sldi rTMP1, rFEFE, 32
add rFEFE, rFEFE, rTMP1
/* Are we now aligned to a doubleword boundary? */
bt 29, L(loop)
bt 28, L(loop)
/* Handle second word of pair. */
lwzu rWORD1, 4(rSTR)
/* Handle second doubleword of pair. */
ldu rWORD1, 8(rSTR)
and rTMP1, r7F7F, rWORD1
or rTMP2, r7F7F, rWORD1
add rTMP1, rTMP1, r7F7F
nor. rWORD1, rTMP2, rTMP1
clrldi. rWORD1,rWORD1,32 /* clear upper 32 */
bne L(done0)
/* The loop. */
L(loop):
lwz rWORD1, 4(rSTR)
lwzu rWORD2, 8(rSTR)
ld rWORD1, 8(rSTR)
ldu rWORD2, 16(rSTR)
add rTMP1, rFEFE, rWORD1
nor rTMP2, r7F7F, rWORD1
and. rTMP1, rTMP1, rTMP2
clrldi. rTMP1,rTMP1,32 /* clear upper 32 */
add rTMP3, rFEFE, rWORD2
nor rTMP4, r7F7F, rWORD2
bne L(done1)
and. rTMP1, rTMP3, rTMP4
clrldi. rTMP1,rTMP1,32 /* clear upper 32 */
beq L(loop)
and rTMP1, r7F7F, rWORD2
@ -146,17 +156,17 @@ L(loop):
L(done1):
and rTMP1, r7F7F, rWORD1
subi rSTR, rSTR, 4
subi rSTR, rSTR, 8
add rTMP1, rTMP1, r7F7F
andc rWORD1, rTMP2, rTMP1
/* When we get to here, rSTR points to the first word in the string that
/* When we get to here, rSTR points to the first doubleword in the string that
contains a zero byte, and the most significant set bit in rWORD1 is in that
byte. */
L(done0):
cntlzw rTMP3, rWORD1
cntlzd rTMP3, rWORD1
subf rTMP1, rRTN, rSTR
srwi rTMP3, rTMP3, 3
srdi rTMP3, rTMP3, 3
add rRTN, rTMP1, rTMP3
/* GKM FIXME: check high bound. */
blr