* sysdeps/powerpc/memset.S: Define & use symbolic register names.

Use C comments throughout.  Line up operands column with tabs. 
* sysdeps/powerpc/strchr.S: Likewise. 
* sysdeps/powerpc/strcmp.S: Likewise. 
* sysdeps/powerpc/strcpy.S: Likewise. 
* sysdeps/powerpc/strlen.S: Likewise.
	* sysdeps/powerpc/memset.S: Define & use symbolic register names.
	Use C comments throughout.  Line up operands column with tabs.
	* sysdeps/powerpc/strchr.S: Likewise.
	* sysdeps/powerpc/strcmp.S: Likewise.
	* sysdeps/powerpc/strcpy.S: Likewise.
	* sysdeps/powerpc/strlen.S: Likewise.
This commit is contained in:
Greg McGary 2000-06-06 22:37:40 +00:00
parent 019357d234
commit 1d280d9f1e
6 changed files with 407 additions and 377 deletions

View File

@ -1,5 +1,12 @@
2000-06-06 Greg McGary <greg@mcgary.org> 2000-06-06 Greg McGary <greg@mcgary.org>
* sysdeps/powerpc/memset.S: Define & use symbolic register names.
Use C comments throughout. Line up operands column with tabs.
* sysdeps/powerpc/strchr.S: Likewise.
* sysdeps/powerpc/strcmp.S: Likewise.
* sysdeps/powerpc/strcpy.S: Likewise.
* sysdeps/powerpc/strlen.S: Likewise.
* sysdeps/unix/sysv/linux/powerpc/brk.S [!PIC]: * sysdeps/unix/sysv/linux/powerpc/brk.S [!PIC]:
Get low part of &__curbrk with @l. Get low part of &__curbrk with @l.

View File

@ -19,181 +19,192 @@
#include <sysdep.h> #include <sysdep.h>
EALIGN(memset,5,1)
/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); /* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
Returns 's'. Returns 's'.
The memset is done in three sizes: byte (8 bits), word (32 bits), The memset is done in three sizes: byte (8 bits), word (32 bits),
cache line (256 bits). There is a special case for setting cache lines cache line (256 bits). There is a special case for setting cache lines
to 0, to take advantage of the dcbz instruction. to 0, to take advantage of the dcbz instruction. */
r6: current address we are storing at
r7: number of bytes we are setting now (when aligning) */ EALIGN (memset, 5, 1)
#define rTMP r0
#define rRTN r3 /* initial value of 1st argument */
#define rCHR r4 /* char to set in each byte */
#define rLEN r5 /* length of region to set */
#define rMEMP r6 /* address at which we are storing */
#define rALIGN r7 /* number of bytes we are setting now (when aligning) */
#define rMEMP2 r8
#define rPOS32 r7 /* constant +32 for clearing with dcbz */
#define rNEG64 r8 /* constant -64 for clearing with dcbz */
#define rNEG32 r9 /* constant -32 for clearing with dcbz */
/* take care of case for size <= 4 */ /* take care of case for size <= 4 */
cmplwi cr1,r5,4 cmplwi cr1, rLEN, 4
andi. r7,r3,3 andi. rALIGN, rRTN, 3
mr r6,r3 mr rMEMP, rRTN
ble- cr1,L(small) ble- cr1, L(small)
/* align to word boundary */ /* align to word boundary */
cmplwi cr5,r5,31 cmplwi cr5, rLEN, 31
rlwimi r4,r4,8,16,23 rlwimi rCHR, rCHR, 8, 16, 23
beq+ L(aligned) # 8th instruction from .align beq+ L(aligned) /* 8th instruction from .align */
mtcrf 0x01,r3 mtcrf 0x01, rRTN
subfic r7,r7,4 subfic rALIGN, rALIGN, 4
add r6,r6,r7 add rMEMP, rMEMP, rALIGN
sub r5,r5,r7 sub rLEN, rLEN, rALIGN
bf+ 31,L(g0) bf+ 31, L(g0)
stb r4,0(r3) stb rCHR, 0(rRTN)
bt 30,L(aligned) bt 30, L(aligned)
L(g0): sth r4,-2(r6) # 16th instruction from .align L(g0): sth rCHR, -2(rMEMP) /* 16th instruction from .align */
/* take care of case for size < 31 */ /* take care of case for size < 31 */
L(aligned): L(aligned):
mtcrf 0x01,r5 mtcrf 0x01, rLEN
rlwimi r4,r4,16,0,15 rlwimi rCHR, rCHR, 16, 0, 15
ble cr5,L(medium) ble cr5, L(medium)
/* align to cache line boundary... */ /* align to cache line boundary... */
andi. r7,r6,0x1C andi. rALIGN, rMEMP, 0x1C
subfic r7,r7,0x20 subfic rALIGN, rALIGN, 0x20
beq L(caligned) beq L(caligned)
mtcrf 0x01,r7 mtcrf 0x01, rALIGN
add r6,r6,r7 add rMEMP, rMEMP, rALIGN
sub r5,r5,r7 sub rLEN, rLEN, rALIGN
cmplwi cr1,r7,0x10 cmplwi cr1, rALIGN, 0x10
mr r8,r6 mr rMEMP2, rMEMP
bf 28,L(a1) bf 28, L(a1)
stw r4,-4(r8) stw rCHR, -4(rMEMP2)
stwu r4,-8(r8) stwu rCHR, -8(rMEMP2)
L(a1): blt cr1,L(a2) L(a1): blt cr1, L(a2)
stw r4,-4(r8) # 32nd instruction from .align stw rCHR, -4(rMEMP2) /* 32nd instruction from .align */
stw r4,-8(r8) stw rCHR, -8(rMEMP2)
stw r4,-12(r8) stw rCHR, -12(rMEMP2)
stwu r4,-16(r8) stwu rCHR, -16(rMEMP2)
L(a2): bf 29,L(caligned) L(a2): bf 29, L(caligned)
stw r4,-4(r8) stw rCHR, -4(rMEMP2)
/* now aligned to a cache line. */ /* now aligned to a cache line. */
L(caligned): L(caligned):
cmplwi cr1,r4,0 cmplwi cr1, rCHR, 0
clrrwi. r7,r5,5 clrrwi. rALIGN, rLEN, 5
mtcrf 0x01,r5 # 40th instruction from .align mtcrf 0x01, rLEN /* 40th instruction from .align */
beq cr1,L(zloopstart) # special case for clearing memory using dcbz beq cr1, L(zloopstart) /* special case for clearing memory using dcbz */
srwi r0,r7,5 srwi rTMP, rALIGN, 5
mtctr r0 mtctr rTMP
beq L(medium) # we may not actually get to do a full line beq L(medium) /* we may not actually get to do a full line */
clrlwi. r5,r5,27 clrlwi. rLEN, rLEN, 27
add r6,r6,r7 add rMEMP, rMEMP, rALIGN
li r8,-0x40 li rNEG64, -0x40
bdz L(cloopdone) # 48th instruction from .align bdz L(cloopdone) /* 48th instruction from .align */
L(c3): dcbz r8,r6 L(c3): dcbz rNEG64, rMEMP
stw r4,-4(r6) stw rCHR, -4(rMEMP)
stw r4,-8(r6) stw rCHR, -8(rMEMP)
stw r4,-12(r6) stw rCHR, -12(rMEMP)
stw r4,-16(r6) stw rCHR, -16(rMEMP)
nop # let 601 fetch last 4 instructions of loop nop /* let 601 fetch last 4 instructions of loop */
stw r4,-20(r6) stw rCHR, -20(rMEMP)
stw r4,-24(r6) # 56th instruction from .align stw rCHR, -24(rMEMP) /* 56th instruction from .align */
nop # let 601 fetch first 8 instructions of loop nop /* let 601 fetch first 8 instructions of loop */
stw r4,-28(r6) stw rCHR, -28(rMEMP)
stwu r4,-32(r6) stwu rCHR, -32(rMEMP)
bdnz L(c3) bdnz L(c3)
L(cloopdone): L(cloopdone):
stw r4,-4(r6) stw rCHR, -4(rMEMP)
stw r4,-8(r6) stw rCHR, -8(rMEMP)
stw r4,-12(r6) stw rCHR, -12(rMEMP)
stw r4,-16(r6) # 64th instruction from .align stw rCHR, -16(rMEMP) /* 64th instruction from .align */
stw r4,-20(r6) stw rCHR, -20(rMEMP)
cmplwi cr1,r5,16 cmplwi cr1, rLEN, 16
stw r4,-24(r6) stw rCHR, -24(rMEMP)
stw r4,-28(r6) stw rCHR, -28(rMEMP)
stwu r4,-32(r6) stwu rCHR, -32(rMEMP)
beqlr beqlr
add r6,r6,r7 add rMEMP, rMEMP, rALIGN
b L(medium_tail2) # 72nd instruction from .align b L(medium_tail2) /* 72nd instruction from .align */
.align 5 .align 5
nop nop
/* Clear lines of memory in 128-byte chunks. */ /* Clear lines of memory in 128-byte chunks. */
L(zloopstart): L(zloopstart):
clrlwi r5,r5,27 clrlwi rLEN, rLEN, 27
mtcrf 0x02,r7 mtcrf 0x02, rALIGN
srwi. r0,r7,7 srwi. rTMP, rALIGN, 7
mtctr r0 mtctr rTMP
li r7,0x20 li rPOS32, 0x20
li r8,-0x40 li rNEG64, -0x40
cmplwi cr1,r5,16 # 8 cmplwi cr1, rLEN, 16 /* 8 */
bf 26,L(z0) bf 26, L(z0)
dcbz 0,r6 dcbz 0, rMEMP
addi r6,r6,0x20 addi rMEMP, rMEMP, 0x20
L(z0): li r9,-0x20 L(z0): li rNEG32, -0x20
bf 25,L(z1) bf 25, L(z1)
dcbz 0,r6 dcbz 0, rMEMP
dcbz r7,r6 dcbz rPOS32, rMEMP
addi r6,r6,0x40 # 16 addi rMEMP, rMEMP, 0x40 /* 16 */
L(z1): cmplwi cr5,r5,0 L(z1): cmplwi cr5, rLEN, 0
beq L(medium) beq L(medium)
L(zloop): L(zloop):
dcbz 0,r6 dcbz 0, rMEMP
dcbz r7,r6 dcbz rPOS32, rMEMP
addi r6,r6,0x80 addi rMEMP, rMEMP, 0x80
dcbz r8,r6 dcbz rNEG64, rMEMP
dcbz r9,r6 dcbz rNEG32, rMEMP
bdnz L(zloop) bdnz L(zloop)
beqlr cr5 beqlr cr5
b L(medium_tail2) b L(medium_tail2)
.align 5 .align 5
L(small): L(small):
/* Memset of 4 bytes or less. */ /* Memset of 4 bytes or less. */
cmplwi cr5,r5,1 cmplwi cr5, rLEN, 1
cmplwi cr1,r5,3 cmplwi cr1, rLEN, 3
bltlr cr5 bltlr cr5
stb r4,0(r6) stb rCHR, 0(rMEMP)
beqlr cr5 beqlr cr5
nop nop
stb r4,1(r6) stb rCHR, 1(rMEMP)
bltlr cr1 bltlr cr1
stb r4,2(r6) stb rCHR, 2(rMEMP)
beqlr cr1 beqlr cr1
nop nop
stb r4,3(r6) stb rCHR, 3(rMEMP)
blr blr
/* Memset of 0-31 bytes. */ /* Memset of 0-31 bytes. */
.align 5 .align 5
L(medium): L(medium):
cmplwi cr1,r5,16 cmplwi cr1, rLEN, 16
L(medium_tail2): L(medium_tail2):
add r6,r6,r5 add rMEMP, rMEMP, rLEN
L(medium_tail): L(medium_tail):
bt- 31,L(medium_31t) bt- 31, L(medium_31t)
bt- 30,L(medium_30t) bt- 30, L(medium_30t)
L(medium_30f): L(medium_30f):
bt- 29,L(medium_29t) bt- 29, L(medium_29t)
L(medium_29f): L(medium_29f):
bge- cr1,L(medium_27t) bge- cr1, L(medium_27t)
bflr- 28 bflr- 28
stw r4,-4(r6) # 8th instruction from .align stw rCHR, -4(rMEMP) /* 8th instruction from .align */
stw r4,-8(r6) stw rCHR, -8(rMEMP)
blr blr
L(medium_31t): L(medium_31t):
stbu r4,-1(r6) stbu rCHR, -1(rMEMP)
bf- 30,L(medium_30f) bf- 30, L(medium_30f)
L(medium_30t): L(medium_30t):
sthu r4,-2(r6) sthu rCHR, -2(rMEMP)
bf- 29,L(medium_29f) bf- 29, L(medium_29f)
L(medium_29t): L(medium_29t):
stwu r4,-4(r6) stwu rCHR, -4(rMEMP)
blt- cr1,L(medium_27f) # 16th instruction from .align blt- cr1, L(medium_27f) /* 16th instruction from .align */
L(medium_27t): L(medium_27t):
stw r4,-4(r6) stw rCHR, -4(rMEMP)
stw r4,-8(r6) stw rCHR, -8(rMEMP)
stw r4,-12(r6) stw rCHR, -12(rMEMP)
stwu r4,-16(r6) stwu rCHR, -16(rMEMP)
L(medium_27f): L(medium_27f):
bflr- 28 bflr- 28
L(medium_28t): L(medium_28t):
stw r4,-4(r6) stw rCHR, -4(rMEMP)
stw r4,-8(r6) stw rCHR, -8(rMEMP)
blr blr
END(memset) END(memset)

View File

@ -1,5 +1,5 @@
/* Optimized strchr implementation for PowerPC. /* Optimized strchr implementation for PowerPC.
Copyright (C) 1997, 1999 Free Software Foundation, Inc. Copyright (C) 1997, 1999, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library. This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or The GNU C Library is free software; you can redistribute it and/or
@ -21,91 +21,95 @@
/* See strlen.s for comments on how this works. */ /* See strlen.s for comments on how this works. */
/* char * [r3] strchr (const char *s [r3] , int c [r4] ) /* char * [r3] strchr (const char *s [r3] , int c [r4] ) */
r0: a temporary ENTRY (strchr)
r3: our return result.
r4: byte we're looking for, spread over the whole word #define rTMP1 r0
r5: the current word #define rRTN r3 /* outgoing result */
r6: the constant 0xfefefeff (-0x01010101) #define rSTRin r3 /* incoming string arg */
r7: the constant 0x7f7f7f7f #define rCHR r4 /* byte we're looking for, spread over the whole word */
r8: pointer to the current word. #define rCLZB rCHR /* leading zero byte count */
r9: a temporary #define rWORD r5 /* the current word */
r10: the number of bits we should ignore in the first word #define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */
r11: a mask with the bits to ignore set to 0 #define r7F7F r7 /* constant 0x7f7f7f7f */
r12: a temporary */ #define rSTR r8 /* current word pointer */
ENTRY(strchr) #define rTMP2 r9
rlwimi r4,r4,8,16,23 #define rIGN r10 /* number of bits we should ignore in the first word */
li r11,-1 #define rMASK r11 /* mask with the bits to ignore set to 0 */
rlwimi r4,r4,16,0,15 #define rTMP3 r12
lis r6,0xfeff
lis r7,0x7f7f rlwimi rCHR, rCHR, 8, 16, 23
clrrwi r8,r3,2 li rMASK, -1
addi r7,r7,0x7f7f rlwimi rCHR, rCHR, 16, 0, 15
addi r6,r6,0xfffffeff lis rFEFE, -0x101
rlwinm r10,r3,3,27,28 lis r7F7F, 0x7f7f
clrrwi rSTR, rSTRin, 2
addi r7F7F, r7F7F, 0x7f7f
addi rFEFE, rFEFE, -0x101
rlwinm rIGN, rSTRin, 3, 27, 28
/* Test the first (partial?) word. */ /* Test the first (partial?) word. */
lwz r5,0(r8) lwz rWORD, 0(rSTR)
srw r11,r11,r10 srw rMASK, rMASK, rIGN
orc r5,r5,r11 orc rWORD, rWORD, rMASK
add r0,r6,r5 add rTMP1, rFEFE, rWORD
nor r9,r7,r5 nor rTMP2, r7F7F, rWORD
and. r0,r0,r9 and. rTMP1, rTMP1, rTMP2
xor r12,r4,r5 xor rTMP3, rCHR, rWORD
orc r12,r12,r11 orc rTMP3, rTMP3, rMASK
b L(loopentry) b L(loopentry)
/* The loop. */ /* The loop. */
L(loop):lwzu r5,4(r8) L(loop):lwzu rWORD, 4(rSTR)
and. r0,r0,r9 and. rTMP1, rTMP1, rTMP2
/* Test for 0. */ /* Test for 0. */
add r0,r6,r5 add rTMP1, rFEFE, rWORD
nor r9,r7,r5 nor rTMP2, r7F7F, rWORD
bne L(foundit) bne L(foundit)
and. r0,r0,r9 and. rTMP1, rTMP1, rTMP2
/* Start test for the bytes we're looking for. */ /* Start test for the bytes we're looking for. */
xor r12,r4,r5 xor rTMP3, rCHR, rWORD
L(loopentry): L(loopentry):
add r0,r6,r12 add rTMP1, rFEFE, rTMP3
nor r9,r7,r12 nor rTMP2, r7F7F, rTMP3
beq L(loop) beq L(loop)
/* There is a zero byte in the word, but may also be a matching byte (either /* There is a zero byte in the word, but may also be a matching byte (either
before or after the zero byte). In fact, we may be looking for a before or after the zero byte). In fact, we may be looking for a
zero byte, in which case we return a match. We guess that this hasn't zero byte, in which case we return a match. We guess that this hasn't
happened, though. */ happened, though. */
L(missed): L(missed):
and. r0,r0,r9 and. rTMP1, rTMP1, rTMP2
li r3,0 li rRTN, 0
beqlr beqlr
/* It did happen. Decide which one was first... /* It did happen. Decide which one was first...
I'm not sure if this is actually faster than a sequence of I'm not sure if this is actually faster than a sequence of
rotates, compares, and branches (we use it anyway because it's shorter). */ rotates, compares, and branches (we use it anyway because it's shorter). */
and r6,r7,r5 and rFEFE, r7F7F, rWORD
or r11,r7,r5 or rMASK, r7F7F, rWORD
and r0,r7,r12 and rTMP1, r7F7F, rTMP3
or r10,r7,r12 or rIGN, r7F7F, rTMP3
add r6,r6,r7 add rFEFE, rFEFE, r7F7F
add r0,r0,r7 add rTMP1, rTMP1, r7F7F
nor r5,r11,r6 nor rWORD, rMASK, rFEFE
nor r9,r10,r0 nor rTMP2, rIGN, rTMP1
cmplw r5,r9 cmplw rWORD, rTMP2
bgtlr bgtlr
cntlzw r4,r9 cntlzw rCLZB, rTMP2
srwi r4,r4,3 srwi rCLZB, rCLZB, 3
add r3,r8,r4 add rRTN, rSTR, rCLZB
blr blr
L(foundit): L(foundit):
and r0,r7,r12 and rTMP1, r7F7F, rTMP3
or r10,r7,r12 or rIGN, r7F7F, rTMP3
add r0,r0,r7 add rTMP1, rTMP1, r7F7F
nor r9,r10,r0 nor rTMP2, rIGN, rTMP1
cntlzw r4,r9 cntlzw rCLZB, rTMP2
subi r8,r8,4 subi rSTR, rSTR, 4
srwi r4,r4,3 srwi rCLZB, rCLZB, 3
add r3,r8,r4 add rRTN, rSTR, rCLZB
blr blr
END(strchr) END (strchr)
weak_alias(strchr,index) weak_alias(strchr, index)

View File

@ -21,95 +21,93 @@
/* See strlen.s for comments on how the end-of-string testing works. */ /* See strlen.s for comments on how the end-of-string testing works. */
EALIGN(strcmp,4,0) /* int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) */
/* int [r3] strcmp (const char *p1 [r3], const char *p2 [r4]) */
/* General register assignments: EALIGN (strcmp, 4, 0)
r0: temporary
r3: pointer to previous word in s1
r4: pointer to previous word in s2
r5: current word from s1
r6: current word from s2
r7: 0xfefefeff
r8: 0x7f7f7f7f
r9: ~(word in s1 | 0x7f7f7f7f) */
/* Register assignments in the prologue: #define rTMP r0
r10: low 2 bits of p2-p1 #define rRTN r3 /* return value */
r11: mask to orc with r5/r6 */ #define rSTR1 r3 /* first string arg */
#define rSTR2 r4 /* second string arg */
#define rWORD1 r5 /* current word in s1 */
#define rWORD2 r6 /* current word in s2 */
#define rFEFE r7 /* constant 0xfefefeff (-0x01010101) */
#define r7F7F r8 /* constant 0x7f7f7f7f */
#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f) */
#define rBITDIF r10 /* bits that differ in s1 & s2 words */
or r0,r4,r3 or rTMP, rSTR2, rSTR1
clrlwi. r0,r0,30 clrlwi. rTMP, rTMP, 30
lis r7,0xfeff lis rFEFE, -0x101
bne L(unaligned) bne L(unaligned)
lwz r5,0(r3) lwz rWORD1, 0(rSTR1)
lwz r6,0(r4) lwz rWORD2, 0(rSTR2)
lis r8,0x7f7f lis r7F7F, 0x7f7f
addi r7,r7,-0x101 addi rFEFE, rFEFE, -0x101
addi r8,r8,0x7f7f addi r7F7F, r7F7F, 0x7f7f
b L(g1) b L(g1)
L(g0): lwzu r5,4(r3) L(g0): lwzu rWORD1, 4(rSTR1)
bne cr1,L(different) bne cr1, L(different)
lwzu r6,4(r4) lwzu rWORD2, 4(rSTR2)
L(g1): add r0,r7,r5 L(g1): add rTMP, rFEFE, rWORD1
nor r9,r8,r5 nor rNEG, r7F7F, rWORD1
and. r0,r0,r9 and. rTMP, rTMP, rNEG
cmpw cr1,r5,r6 cmpw cr1, rWORD1, rWORD2
beq+ L(g0) beq+ L(g0)
L(endstring): L(endstring):
/* OK. We've hit the end of the string. We need to be careful that /* OK. We've hit the end of the string. We need to be careful that
we don't compare two strings as different because of gunk beyond we don't compare two strings as different because of gunk beyond
the end of the strings... */ the end of the strings... */
and r0,r8,r5 and rTMP, r7F7F, rWORD1
beq cr1,L(equal) beq cr1, L(equal)
add r0,r0,r8 add rTMP, rTMP, r7F7F
xor. r10,r5,r6 xor. rBITDIF, rWORD1, rWORD2
andc r9,r9,r0 andc rNEG, rNEG, rTMP
blt- L(highbit) blt- L(highbit)
cntlzw r10,r10 cntlzw rBITDIF, rBITDIF
cntlzw r9,r9 cntlzw rNEG, rNEG
addi r9,r9,7 addi rNEG, rNEG, 7
cmpw cr1,r9,r10 cmpw cr1, rNEG, rBITDIF
sub r3,r5,r6 sub rRTN, rWORD1, rWORD2
bgelr+ cr1 bgelr+ cr1
L(equal): L(equal):
li r3,0 li rRTN, 0
blr blr
L(different): L(different):
lwz r5,-4(r3) lwz rWORD1, -4(rSTR1)
xor. r10,r5,r6 xor. rBITDIF, rWORD1, rWORD2
sub r3,r5,r6 sub rRTN, rWORD1, rWORD2
bgelr+ bgelr+
L(highbit): L(highbit):
ori r3,r6,1 ori rRTN, rWORD2, 1
blr blr
/* Oh well. In this case, we just do a byte-by-byte comparison. */ /* Oh well. In this case, we just do a byte-by-byte comparison. */
.align 4 .align 4
L(unaligned): L(unaligned):
lbz r5,0(r3) lbz rWORD1, 0(rSTR1)
lbz r6,0(r4) lbz rWORD2, 0(rSTR2)
b L(u1) b L(u1)
L(u0): lbzu r5,1(r3) L(u0): lbzu rWORD1, 1(rSTR1)
bne- L(u4) bne- L(u4)
lbzu r6,1(r4) lbzu rWORD2, 1(rSTR2)
L(u1): cmpwi cr1,r5,0 L(u1): cmpwi cr1, rWORD1, 0
beq- cr1,L(u3) beq- cr1, L(u3)
cmpw r5,r6 cmpw rWORD1, rWORD2
bne- L(u3) bne- L(u3)
lbzu r5,1(r3) lbzu rWORD1, 1(rSTR1)
lbzu r6,1(r4) lbzu rWORD2, 1(rSTR2)
cmpwi cr1,r5,0 cmpwi cr1, rWORD1, 0
cmpw r5,r6 cmpw rWORD1, rWORD2
bne+ cr1,L(u0) bne+ cr1, L(u0)
L(u3): sub r3,r5,r6 L(u3): sub rRTN, rWORD1, rWORD2
blr blr
L(u4): lbz r5,-1(r3) L(u4): lbz rWORD1, -1(rSTR1)
sub r3,r5,r6 sub rRTN, rWORD1, rWORD2
blr blr
END(strcmp) END(strcmp)

View File

@ -21,80 +21,80 @@
/* See strlen.s for comments on how the end-of-string testing works. */ /* See strlen.s for comments on how the end-of-string testing works. */
EALIGN(strcpy,4,0)
/* char * [r3] strcpy (char *dest [r3], const char *src [r4]) */ /* char * [r3] strcpy (char *dest [r3], const char *src [r4]) */
/* General register assignments: EALIGN(strcpy, 4, 0)
r0: temporary
r3: saved `dest'
r4: pointer to previous word in src
r5: pointer to previous word in dest
r6: current word from src
r7: 0xfefefeff
r8: 0x7f7f7f7f
r9: ~(word in src | 0x7f7f7f7f)
r10: alternate word from src. */
or r0,r4,r3 #define rTMP r0
clrlwi. r0,r0,30 #define rRTN r3 /* incoming DEST arg preserved as result */
addi r5,r3,-4 #define rSRC r4 /* pointer to previous word in src */
bne L(unaligned) #define rDEST r5 /* pointer to previous word in dest */
#define rWORD r6 /* current word from src */
#define rFEFE r7 /* constant 0xfefefeff (-0x01010101) */
#define r7F7F r8 /* constant 0x7f7f7f7f */
#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f) */
#define rALT r10 /* alternate word from src */
lis r7,0xfeff or rTMP, rSRC, rRTN
lis r8,0x7f7f clrlwi. rTMP, rTMP, 30
lwz r6,0(r4) addi rDEST, rRTN, -4
addi r7,r7,-0x101 bne L(unaligned)
addi r8,r8,0x7f7f
b L(g2)
L(g0): lwzu r10,4(r4) lis rFEFE, -0x101
stwu r6,4(r5) lis r7F7F, 0x7f7f
add r0,r7,r10 lwz rWORD, 0(rSRC)
nor r9,r8,r10 addi rFEFE, rFEFE, -0x101
and. r0,r0,r9 addi r7F7F, r7F7F, 0x7f7f
bne- L(g1) b L(g2)
lwzu r6,4(r4)
stwu r10,4(r5)
L(g2): add r0,r7,r6
nor r9,r8,r6
and. r0,r0,r9
beq+ L(g0)
mr r10,r6 L(g0): lwzu rALT, 4(rSRC)
stwu rWORD, 4(rDEST)
add rTMP, rFEFE, rALT
nor rNEG, r7F7F, rALT
and. rTMP, rTMP, rNEG
bne- L(g1)
lwzu rWORD, 4(rSRC)
stwu rALT, 4(rDEST)
L(g2): add rTMP, rFEFE, rWORD
nor rNEG, r7F7F, rWORD
and. rTMP, rTMP, rNEG
beq+ L(g0)
mr rALT, rWORD
/* We've hit the end of the string. Do the rest byte-by-byte. */ /* We've hit the end of the string. Do the rest byte-by-byte. */
L(g1): rlwinm. r0,r10,8,24,31 L(g1): rlwinm. rTMP, rALT, 8, 24, 31
stb r0,4(r5) stb rTMP, 4(rDEST)
beqlr- beqlr-
rlwinm. r0,r10,16,24,31 rlwinm. rTMP, rALT, 16, 24, 31
stb r0,5(r5) stb rTMP, 5(rDEST)
beqlr- beqlr-
rlwinm. r0,r10,24,24,31 rlwinm. rTMP, rALT, 24, 24, 31
stb r0,6(r5) stb rTMP, 6(rDEST)
beqlr- beqlr-
stb r10,7(r5) stb rALT, 7(rDEST)
blr blr
/* Oh well. In this case, we just do a byte-by-byte copy. */ /* Oh well. In this case, we just do a byte-by-byte copy. */
.align 4 .align 4
nop nop
L(unaligned): L(unaligned):
lbz r6,0(r4) lbz rWORD, 0(rSRC)
addi r5,r3,-1 addi rDEST, rRTN, -1
cmpwi r6,0 cmpwi rWORD, 0
beq- L(u2) beq- L(u2)
L(u0): lbzu r10,1(r4) L(u0): lbzu rALT, 1(rSRC)
stbu r6,1(r5) stbu rWORD, 1(rDEST)
cmpwi r10,0 cmpwi rALT, 0
beq- L(u1) beq- L(u1)
nop /* Let 601 load start of loop. */ nop /* Let 601 load start of loop. */
lbzu r6,1(r4) lbzu rWORD, 1(rSRC)
stbu r10,1(r5) stbu rALT, 1(rDEST)
cmpwi r6,0 cmpwi rWORD, 0
bne+ L(u0) bne+ L(u0)
L(u2): stb r6,1(r5) L(u2): stb rWORD, 1(rDEST)
blr blr
L(u1): stb r10,1(r5) L(u1): stb rALT, 1(rDEST)
blr blr
END(strcpy) END(strcpy)

View File

@ -1,5 +1,5 @@
/* Optimized strlen implementation for PowerPC. /* Optimized strlen implementation for PowerPC.
Copyright (C) 1997, 1999 Free Software Foundation, Inc. Copyright (C) 1997, 1999, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library. This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or The GNU C Library is free software; you can redistribute it and/or
@ -69,76 +69,86 @@
We can use condition registers cr0, cr1, cr5, cr6, and cr7 without saving We can use condition registers cr0, cr1, cr5, cr6, and cr7 without saving
them, the others we must save. */ them, the others we must save. */
ENTRY(strlen) /* int [r3] strlen (char *s [r3]) */
/* On entry, r3 points to the string, and it's left that way.
We use r6 to store 0xfefefeff, and r7 to store 0x7f7f7f7f. ENTRY (strlen)
r4 is used to keep the current index into the string; r5 holds
the number of padding bits we prepend to the string to make it #define rTMP1 r0
start at a word boundary. r8 holds the 'current' word. #define rRTN r3 /* incoming STR arg, outgoing result */
r9-12 are temporaries. r0 is used as a temporary and for discarded #define rSTR r4 /* current string position */
results. */ #define rPADN r5 /* number of padding bits we prepend to the
clrrwi r4,r3,2 string to make it start at a word boundary */
lis r7,0x7f7f #define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */
rlwinm r5,r3,3,27,28 #define r7F7F r7 /* constant 0x7f7f7f7f */
lwz r8,0(r4) #define rWORD1 r8 /* current string word */
li r9,-1 #define rWORD2 r9 /* next string word */
addi r7,r7,0x7f7f #define rMASK r9 /* mask for first string word */
#define rTMP2 r10
#define rTMP3 r11
#define rTMP4 r12
clrrwi rSTR, rRTN, 2
lis r7F7F, 0x7f7f
rlwinm rPADN, rRTN, 3, 27, 28
lwz rWORD1, 0(rSTR)
li rMASK, -1
addi r7F7F, r7F7F, 0x7f7f
/* That's the setup done, now do the first pair of words. /* That's the setup done, now do the first pair of words.
We make an exception and use method (2) on the first two words, to reduce We make an exception and use method (2) on the first two words, to reduce
overhead. */ overhead. */
srw r9,r9,r5 srw rMASK, rMASK, rPADN
and r0,r7,r8 and rTMP1, r7F7F, rWORD1
or r10,r7,r8 or rTMP2, r7F7F, rWORD1
add r0,r0,r7 add rTMP1, rTMP1, r7F7F
nor r0,r10,r0 nor rTMP1, rTMP2, rTMP1
and. r8,r0,r9 and. rWORD1, rTMP1, rMASK
mtcrf 0x01,r3 mtcrf 0x01, rRTN
bne L(done0) bne L(done0)
lis r6,0xfeff lis rFEFE, -0x101
addi r6,r6,-0x101 addi rFEFE, rFEFE, -0x101
/* Are we now aligned to a doubleword boundary? */ /* Are we now aligned to a doubleword boundary? */
bt 29,L(loop) bt 29, L(loop)
/* Handle second word of pair. */ /* Handle second word of pair. */
lwzu r8,4(r4) lwzu rWORD1, 4(rSTR)
and r0,r7,r8 and rTMP1, r7F7F, rWORD1
or r10,r7,r8 or rTMP2, r7F7F, rWORD1
add r0,r0,r7 add rTMP1, rTMP1, r7F7F
nor. r8,r10,r0 nor. rWORD1, rTMP2, rTMP1
bne L(done0) bne L(done0)
/* The loop. */ /* The loop. */
L(loop): L(loop):
lwz r8,4(r4) lwz rWORD1, 4(rSTR)
lwzu r9,8(r4) lwzu rWORD2, 8(rSTR)
add r0,r6,r8 add rTMP1, rFEFE, rWORD1
nor r10,r7,r8 nor rTMP2, r7F7F, rWORD1
and. r0,r0,r10 and. rTMP1, rTMP1, rTMP2
add r11,r6,r9 add rTMP3, rFEFE, rWORD2
nor r12,r7,r9 nor rTMP4, r7F7F, rWORD2
bne L(done1) bne L(done1)
and. r0,r11,r12 and. rTMP1, rTMP3, rTMP4
beq L(loop) beq L(loop)
and r0,r7,r9 and rTMP1, r7F7F, rWORD2
add r0,r0,r7 add rTMP1, rTMP1, r7F7F
andc r8,r12,r0 andc rWORD1, rTMP4, rTMP1
b L(done0) b L(done0)
L(done1): L(done1):
and r0,r7,r8 and rTMP1, r7F7F, rWORD1
subi r4,r4,4 subi rSTR, rSTR, 4
add r0,r0,r7 add rTMP1, rTMP1, r7F7F
andc r8,r10,r0 andc rWORD1, rTMP2, rTMP1
/* When we get to here, r4 points to the first word in the string that /* When we get to here, rSTR points to the first word in the string that
contains a zero byte, and the most significant set bit in r8 is in that contains a zero byte, and the most significant set bit in rWORD1 is in that
byte. */ byte. */
L(done0): L(done0):
cntlzw r11,r8 cntlzw rTMP3, rWORD1
subf r0,r3,r4 subf rTMP1, rRTN, rSTR
srwi r11,r11,3 srwi rTMP3, rTMP3, 3
add r3,r0,r11 add rRTN, rTMP1, rTMP3
blr blr
END(strlen) END (strlen)