* sysdeps/powerpc/memset.S: Define & use symbolic register names.
Use C comments throughout. Line up operands column with tabs. * sysdeps/powerpc/strchr.S: Likewise. * sysdeps/powerpc/strcmp.S: Likewise. * sysdeps/powerpc/strcpy.S: Likewise. * sysdeps/powerpc/strlen.S: Likewise. * sysdeps/powerpc/memset.S: Define & use symbolic register names. Use C comments throughout. Line up operands column with tabs. * sysdeps/powerpc/strchr.S: Likewise. * sysdeps/powerpc/strcmp.S: Likewise. * sysdeps/powerpc/strcpy.S: Likewise. * sysdeps/powerpc/strlen.S: Likewise.
This commit is contained in:
parent
019357d234
commit
1d280d9f1e
@ -1,5 +1,12 @@
|
|||||||
2000-06-06 Greg McGary <greg@mcgary.org>
|
2000-06-06 Greg McGary <greg@mcgary.org>
|
||||||
|
|
||||||
|
* sysdeps/powerpc/memset.S: Define & use symbolic register names.
|
||||||
|
Use C comments throughout. Line up operands column with tabs.
|
||||||
|
* sysdeps/powerpc/strchr.S: Likewise.
|
||||||
|
* sysdeps/powerpc/strcmp.S: Likewise.
|
||||||
|
* sysdeps/powerpc/strcpy.S: Likewise.
|
||||||
|
* sysdeps/powerpc/strlen.S: Likewise.
|
||||||
|
|
||||||
* sysdeps/unix/sysv/linux/powerpc/brk.S [!PIC]:
|
* sysdeps/unix/sysv/linux/powerpc/brk.S [!PIC]:
|
||||||
Get low part of &__curbrk with @l.
|
Get low part of &__curbrk with @l.
|
||||||
|
|
||||||
|
@ -19,181 +19,192 @@
|
|||||||
|
|
||||||
#include <sysdep.h>
|
#include <sysdep.h>
|
||||||
|
|
||||||
EALIGN(memset,5,1)
|
|
||||||
/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
|
/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
|
||||||
Returns 's'.
|
Returns 's'.
|
||||||
|
|
||||||
The memset is done in three sizes: byte (8 bits), word (32 bits),
|
The memset is done in three sizes: byte (8 bits), word (32 bits),
|
||||||
cache line (256 bits). There is a special case for setting cache lines
|
cache line (256 bits). There is a special case for setting cache lines
|
||||||
to 0, to take advantage of the dcbz instruction.
|
to 0, to take advantage of the dcbz instruction. */
|
||||||
r6: current address we are storing at
|
|
||||||
r7: number of bytes we are setting now (when aligning) */
|
EALIGN (memset, 5, 1)
|
||||||
|
|
||||||
|
#define rTMP r0
|
||||||
|
#define rRTN r3 /* initial value of 1st argument */
|
||||||
|
#define rCHR r4 /* char to set in each byte */
|
||||||
|
#define rLEN r5 /* length of region to set */
|
||||||
|
#define rMEMP r6 /* address at which we are storing */
|
||||||
|
#define rALIGN r7 /* number of bytes we are setting now (when aligning) */
|
||||||
|
#define rMEMP2 r8
|
||||||
|
|
||||||
|
#define rPOS32 r7 /* constant +32 for clearing with dcbz */
|
||||||
|
#define rNEG64 r8 /* constant -64 for clearing with dcbz */
|
||||||
|
#define rNEG32 r9 /* constant -32 for clearing with dcbz */
|
||||||
|
|
||||||
/* take care of case for size <= 4 */
|
/* take care of case for size <= 4 */
|
||||||
cmplwi cr1,r5,4
|
cmplwi cr1, rLEN, 4
|
||||||
andi. r7,r3,3
|
andi. rALIGN, rRTN, 3
|
||||||
mr r6,r3
|
mr rMEMP, rRTN
|
||||||
ble- cr1,L(small)
|
ble- cr1, L(small)
|
||||||
/* align to word boundary */
|
/* align to word boundary */
|
||||||
cmplwi cr5,r5,31
|
cmplwi cr5, rLEN, 31
|
||||||
rlwimi r4,r4,8,16,23
|
rlwimi rCHR, rCHR, 8, 16, 23
|
||||||
beq+ L(aligned) # 8th instruction from .align
|
beq+ L(aligned) /* 8th instruction from .align */
|
||||||
mtcrf 0x01,r3
|
mtcrf 0x01, rRTN
|
||||||
subfic r7,r7,4
|
subfic rALIGN, rALIGN, 4
|
||||||
add r6,r6,r7
|
add rMEMP, rMEMP, rALIGN
|
||||||
sub r5,r5,r7
|
sub rLEN, rLEN, rALIGN
|
||||||
bf+ 31,L(g0)
|
bf+ 31, L(g0)
|
||||||
stb r4,0(r3)
|
stb rCHR, 0(rRTN)
|
||||||
bt 30,L(aligned)
|
bt 30, L(aligned)
|
||||||
L(g0): sth r4,-2(r6) # 16th instruction from .align
|
L(g0): sth rCHR, -2(rMEMP) /* 16th instruction from .align */
|
||||||
/* take care of case for size < 31 */
|
/* take care of case for size < 31 */
|
||||||
L(aligned):
|
L(aligned):
|
||||||
mtcrf 0x01,r5
|
mtcrf 0x01, rLEN
|
||||||
rlwimi r4,r4,16,0,15
|
rlwimi rCHR, rCHR, 16, 0, 15
|
||||||
ble cr5,L(medium)
|
ble cr5, L(medium)
|
||||||
/* align to cache line boundary... */
|
/* align to cache line boundary... */
|
||||||
andi. r7,r6,0x1C
|
andi. rALIGN, rMEMP, 0x1C
|
||||||
subfic r7,r7,0x20
|
subfic rALIGN, rALIGN, 0x20
|
||||||
beq L(caligned)
|
beq L(caligned)
|
||||||
mtcrf 0x01,r7
|
mtcrf 0x01, rALIGN
|
||||||
add r6,r6,r7
|
add rMEMP, rMEMP, rALIGN
|
||||||
sub r5,r5,r7
|
sub rLEN, rLEN, rALIGN
|
||||||
cmplwi cr1,r7,0x10
|
cmplwi cr1, rALIGN, 0x10
|
||||||
mr r8,r6
|
mr rMEMP2, rMEMP
|
||||||
bf 28,L(a1)
|
bf 28, L(a1)
|
||||||
stw r4,-4(r8)
|
stw rCHR, -4(rMEMP2)
|
||||||
stwu r4,-8(r8)
|
stwu rCHR, -8(rMEMP2)
|
||||||
L(a1): blt cr1,L(a2)
|
L(a1): blt cr1, L(a2)
|
||||||
stw r4,-4(r8) # 32nd instruction from .align
|
stw rCHR, -4(rMEMP2) /* 32nd instruction from .align */
|
||||||
stw r4,-8(r8)
|
stw rCHR, -8(rMEMP2)
|
||||||
stw r4,-12(r8)
|
stw rCHR, -12(rMEMP2)
|
||||||
stwu r4,-16(r8)
|
stwu rCHR, -16(rMEMP2)
|
||||||
L(a2): bf 29,L(caligned)
|
L(a2): bf 29, L(caligned)
|
||||||
stw r4,-4(r8)
|
stw rCHR, -4(rMEMP2)
|
||||||
/* now aligned to a cache line. */
|
/* now aligned to a cache line. */
|
||||||
L(caligned):
|
L(caligned):
|
||||||
cmplwi cr1,r4,0
|
cmplwi cr1, rCHR, 0
|
||||||
clrrwi. r7,r5,5
|
clrrwi. rALIGN, rLEN, 5
|
||||||
mtcrf 0x01,r5 # 40th instruction from .align
|
mtcrf 0x01, rLEN /* 40th instruction from .align */
|
||||||
beq cr1,L(zloopstart) # special case for clearing memory using dcbz
|
beq cr1, L(zloopstart) /* special case for clearing memory using dcbz */
|
||||||
srwi r0,r7,5
|
srwi rTMP, rALIGN, 5
|
||||||
mtctr r0
|
mtctr rTMP
|
||||||
beq L(medium) # we may not actually get to do a full line
|
beq L(medium) /* we may not actually get to do a full line */
|
||||||
clrlwi. r5,r5,27
|
clrlwi. rLEN, rLEN, 27
|
||||||
add r6,r6,r7
|
add rMEMP, rMEMP, rALIGN
|
||||||
li r8,-0x40
|
li rNEG64, -0x40
|
||||||
bdz L(cloopdone) # 48th instruction from .align
|
bdz L(cloopdone) /* 48th instruction from .align */
|
||||||
|
|
||||||
L(c3): dcbz r8,r6
|
L(c3): dcbz rNEG64, rMEMP
|
||||||
stw r4,-4(r6)
|
stw rCHR, -4(rMEMP)
|
||||||
stw r4,-8(r6)
|
stw rCHR, -8(rMEMP)
|
||||||
stw r4,-12(r6)
|
stw rCHR, -12(rMEMP)
|
||||||
stw r4,-16(r6)
|
stw rCHR, -16(rMEMP)
|
||||||
nop # let 601 fetch last 4 instructions of loop
|
nop /* let 601 fetch last 4 instructions of loop */
|
||||||
stw r4,-20(r6)
|
stw rCHR, -20(rMEMP)
|
||||||
stw r4,-24(r6) # 56th instruction from .align
|
stw rCHR, -24(rMEMP) /* 56th instruction from .align */
|
||||||
nop # let 601 fetch first 8 instructions of loop
|
nop /* let 601 fetch first 8 instructions of loop */
|
||||||
stw r4,-28(r6)
|
stw rCHR, -28(rMEMP)
|
||||||
stwu r4,-32(r6)
|
stwu rCHR, -32(rMEMP)
|
||||||
bdnz L(c3)
|
bdnz L(c3)
|
||||||
L(cloopdone):
|
L(cloopdone):
|
||||||
stw r4,-4(r6)
|
stw rCHR, -4(rMEMP)
|
||||||
stw r4,-8(r6)
|
stw rCHR, -8(rMEMP)
|
||||||
stw r4,-12(r6)
|
stw rCHR, -12(rMEMP)
|
||||||
stw r4,-16(r6) # 64th instruction from .align
|
stw rCHR, -16(rMEMP) /* 64th instruction from .align */
|
||||||
stw r4,-20(r6)
|
stw rCHR, -20(rMEMP)
|
||||||
cmplwi cr1,r5,16
|
cmplwi cr1, rLEN, 16
|
||||||
stw r4,-24(r6)
|
stw rCHR, -24(rMEMP)
|
||||||
stw r4,-28(r6)
|
stw rCHR, -28(rMEMP)
|
||||||
stwu r4,-32(r6)
|
stwu rCHR, -32(rMEMP)
|
||||||
beqlr
|
beqlr
|
||||||
add r6,r6,r7
|
add rMEMP, rMEMP, rALIGN
|
||||||
b L(medium_tail2) # 72nd instruction from .align
|
b L(medium_tail2) /* 72nd instruction from .align */
|
||||||
|
|
||||||
.align 5
|
.align 5
|
||||||
nop
|
nop
|
||||||
/* Clear lines of memory in 128-byte chunks. */
|
/* Clear lines of memory in 128-byte chunks. */
|
||||||
L(zloopstart):
|
L(zloopstart):
|
||||||
clrlwi r5,r5,27
|
clrlwi rLEN, rLEN, 27
|
||||||
mtcrf 0x02,r7
|
mtcrf 0x02, rALIGN
|
||||||
srwi. r0,r7,7
|
srwi. rTMP, rALIGN, 7
|
||||||
mtctr r0
|
mtctr rTMP
|
||||||
li r7,0x20
|
li rPOS32, 0x20
|
||||||
li r8,-0x40
|
li rNEG64, -0x40
|
||||||
cmplwi cr1,r5,16 # 8
|
cmplwi cr1, rLEN, 16 /* 8 */
|
||||||
bf 26,L(z0)
|
bf 26, L(z0)
|
||||||
dcbz 0,r6
|
dcbz 0, rMEMP
|
||||||
addi r6,r6,0x20
|
addi rMEMP, rMEMP, 0x20
|
||||||
L(z0): li r9,-0x20
|
L(z0): li rNEG32, -0x20
|
||||||
bf 25,L(z1)
|
bf 25, L(z1)
|
||||||
dcbz 0,r6
|
dcbz 0, rMEMP
|
||||||
dcbz r7,r6
|
dcbz rPOS32, rMEMP
|
||||||
addi r6,r6,0x40 # 16
|
addi rMEMP, rMEMP, 0x40 /* 16 */
|
||||||
L(z1): cmplwi cr5,r5,0
|
L(z1): cmplwi cr5, rLEN, 0
|
||||||
beq L(medium)
|
beq L(medium)
|
||||||
L(zloop):
|
L(zloop):
|
||||||
dcbz 0,r6
|
dcbz 0, rMEMP
|
||||||
dcbz r7,r6
|
dcbz rPOS32, rMEMP
|
||||||
addi r6,r6,0x80
|
addi rMEMP, rMEMP, 0x80
|
||||||
dcbz r8,r6
|
dcbz rNEG64, rMEMP
|
||||||
dcbz r9,r6
|
dcbz rNEG32, rMEMP
|
||||||
bdnz L(zloop)
|
bdnz L(zloop)
|
||||||
beqlr cr5
|
beqlr cr5
|
||||||
b L(medium_tail2)
|
b L(medium_tail2)
|
||||||
|
|
||||||
.align 5
|
.align 5
|
||||||
L(small):
|
L(small):
|
||||||
/* Memset of 4 bytes or less. */
|
/* Memset of 4 bytes or less. */
|
||||||
cmplwi cr5,r5,1
|
cmplwi cr5, rLEN, 1
|
||||||
cmplwi cr1,r5,3
|
cmplwi cr1, rLEN, 3
|
||||||
bltlr cr5
|
bltlr cr5
|
||||||
stb r4,0(r6)
|
stb rCHR, 0(rMEMP)
|
||||||
beqlr cr5
|
beqlr cr5
|
||||||
nop
|
nop
|
||||||
stb r4,1(r6)
|
stb rCHR, 1(rMEMP)
|
||||||
bltlr cr1
|
bltlr cr1
|
||||||
stb r4,2(r6)
|
stb rCHR, 2(rMEMP)
|
||||||
beqlr cr1
|
beqlr cr1
|
||||||
nop
|
nop
|
||||||
stb r4,3(r6)
|
stb rCHR, 3(rMEMP)
|
||||||
blr
|
blr
|
||||||
|
|
||||||
/* Memset of 0-31 bytes. */
|
/* Memset of 0-31 bytes. */
|
||||||
.align 5
|
.align 5
|
||||||
L(medium):
|
L(medium):
|
||||||
cmplwi cr1,r5,16
|
cmplwi cr1, rLEN, 16
|
||||||
L(medium_tail2):
|
L(medium_tail2):
|
||||||
add r6,r6,r5
|
add rMEMP, rMEMP, rLEN
|
||||||
L(medium_tail):
|
L(medium_tail):
|
||||||
bt- 31,L(medium_31t)
|
bt- 31, L(medium_31t)
|
||||||
bt- 30,L(medium_30t)
|
bt- 30, L(medium_30t)
|
||||||
L(medium_30f):
|
L(medium_30f):
|
||||||
bt- 29,L(medium_29t)
|
bt- 29, L(medium_29t)
|
||||||
L(medium_29f):
|
L(medium_29f):
|
||||||
bge- cr1,L(medium_27t)
|
bge- cr1, L(medium_27t)
|
||||||
bflr- 28
|
bflr- 28
|
||||||
stw r4,-4(r6) # 8th instruction from .align
|
stw rCHR, -4(rMEMP) /* 8th instruction from .align */
|
||||||
stw r4,-8(r6)
|
stw rCHR, -8(rMEMP)
|
||||||
blr
|
blr
|
||||||
|
|
||||||
L(medium_31t):
|
L(medium_31t):
|
||||||
stbu r4,-1(r6)
|
stbu rCHR, -1(rMEMP)
|
||||||
bf- 30,L(medium_30f)
|
bf- 30, L(medium_30f)
|
||||||
L(medium_30t):
|
L(medium_30t):
|
||||||
sthu r4,-2(r6)
|
sthu rCHR, -2(rMEMP)
|
||||||
bf- 29,L(medium_29f)
|
bf- 29, L(medium_29f)
|
||||||
L(medium_29t):
|
L(medium_29t):
|
||||||
stwu r4,-4(r6)
|
stwu rCHR, -4(rMEMP)
|
||||||
blt- cr1,L(medium_27f) # 16th instruction from .align
|
blt- cr1, L(medium_27f) /* 16th instruction from .align */
|
||||||
L(medium_27t):
|
L(medium_27t):
|
||||||
stw r4,-4(r6)
|
stw rCHR, -4(rMEMP)
|
||||||
stw r4,-8(r6)
|
stw rCHR, -8(rMEMP)
|
||||||
stw r4,-12(r6)
|
stw rCHR, -12(rMEMP)
|
||||||
stwu r4,-16(r6)
|
stwu rCHR, -16(rMEMP)
|
||||||
L(medium_27f):
|
L(medium_27f):
|
||||||
bflr- 28
|
bflr- 28
|
||||||
L(medium_28t):
|
L(medium_28t):
|
||||||
stw r4,-4(r6)
|
stw rCHR, -4(rMEMP)
|
||||||
stw r4,-8(r6)
|
stw rCHR, -8(rMEMP)
|
||||||
blr
|
blr
|
||||||
END(memset)
|
END(memset)
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/* Optimized strchr implementation for PowerPC.
|
/* Optimized strchr implementation for PowerPC.
|
||||||
Copyright (C) 1997, 1999 Free Software Foundation, Inc.
|
Copyright (C) 1997, 1999, 2000 Free Software Foundation, Inc.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
The GNU C Library is free software; you can redistribute it and/or
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
@ -21,91 +21,95 @@
|
|||||||
|
|
||||||
/* See strlen.s for comments on how this works. */
|
/* See strlen.s for comments on how this works. */
|
||||||
|
|
||||||
/* char * [r3] strchr (const char *s [r3] , int c [r4] )
|
/* char * [r3] strchr (const char *s [r3] , int c [r4] ) */
|
||||||
|
|
||||||
r0: a temporary
|
ENTRY (strchr)
|
||||||
r3: our return result.
|
|
||||||
r4: byte we're looking for, spread over the whole word
|
#define rTMP1 r0
|
||||||
r5: the current word
|
#define rRTN r3 /* outgoing result */
|
||||||
r6: the constant 0xfefefeff (-0x01010101)
|
#define rSTRin r3 /* incoming string arg */
|
||||||
r7: the constant 0x7f7f7f7f
|
#define rCHR r4 /* byte we're looking for, spread over the whole word */
|
||||||
r8: pointer to the current word.
|
#define rCLZB rCHR /* leading zero byte count */
|
||||||
r9: a temporary
|
#define rWORD r5 /* the current word */
|
||||||
r10: the number of bits we should ignore in the first word
|
#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */
|
||||||
r11: a mask with the bits to ignore set to 0
|
#define r7F7F r7 /* constant 0x7f7f7f7f */
|
||||||
r12: a temporary */
|
#define rSTR r8 /* current word pointer */
|
||||||
ENTRY(strchr)
|
#define rTMP2 r9
|
||||||
rlwimi r4,r4,8,16,23
|
#define rIGN r10 /* number of bits we should ignore in the first word */
|
||||||
li r11,-1
|
#define rMASK r11 /* mask with the bits to ignore set to 0 */
|
||||||
rlwimi r4,r4,16,0,15
|
#define rTMP3 r12
|
||||||
lis r6,0xfeff
|
|
||||||
lis r7,0x7f7f
|
rlwimi rCHR, rCHR, 8, 16, 23
|
||||||
clrrwi r8,r3,2
|
li rMASK, -1
|
||||||
addi r7,r7,0x7f7f
|
rlwimi rCHR, rCHR, 16, 0, 15
|
||||||
addi r6,r6,0xfffffeff
|
lis rFEFE, -0x101
|
||||||
rlwinm r10,r3,3,27,28
|
lis r7F7F, 0x7f7f
|
||||||
|
clrrwi rSTR, rSTRin, 2
|
||||||
|
addi r7F7F, r7F7F, 0x7f7f
|
||||||
|
addi rFEFE, rFEFE, -0x101
|
||||||
|
rlwinm rIGN, rSTRin, 3, 27, 28
|
||||||
/* Test the first (partial?) word. */
|
/* Test the first (partial?) word. */
|
||||||
lwz r5,0(r8)
|
lwz rWORD, 0(rSTR)
|
||||||
srw r11,r11,r10
|
srw rMASK, rMASK, rIGN
|
||||||
orc r5,r5,r11
|
orc rWORD, rWORD, rMASK
|
||||||
add r0,r6,r5
|
add rTMP1, rFEFE, rWORD
|
||||||
nor r9,r7,r5
|
nor rTMP2, r7F7F, rWORD
|
||||||
and. r0,r0,r9
|
and. rTMP1, rTMP1, rTMP2
|
||||||
xor r12,r4,r5
|
xor rTMP3, rCHR, rWORD
|
||||||
orc r12,r12,r11
|
orc rTMP3, rTMP3, rMASK
|
||||||
b L(loopentry)
|
b L(loopentry)
|
||||||
|
|
||||||
/* The loop. */
|
/* The loop. */
|
||||||
|
|
||||||
L(loop):lwzu r5,4(r8)
|
L(loop):lwzu rWORD, 4(rSTR)
|
||||||
and. r0,r0,r9
|
and. rTMP1, rTMP1, rTMP2
|
||||||
/* Test for 0. */
|
/* Test for 0. */
|
||||||
add r0,r6,r5
|
add rTMP1, rFEFE, rWORD
|
||||||
nor r9,r7,r5
|
nor rTMP2, r7F7F, rWORD
|
||||||
bne L(foundit)
|
bne L(foundit)
|
||||||
and. r0,r0,r9
|
and. rTMP1, rTMP1, rTMP2
|
||||||
/* Start test for the bytes we're looking for. */
|
/* Start test for the bytes we're looking for. */
|
||||||
xor r12,r4,r5
|
xor rTMP3, rCHR, rWORD
|
||||||
L(loopentry):
|
L(loopentry):
|
||||||
add r0,r6,r12
|
add rTMP1, rFEFE, rTMP3
|
||||||
nor r9,r7,r12
|
nor rTMP2, r7F7F, rTMP3
|
||||||
beq L(loop)
|
beq L(loop)
|
||||||
/* There is a zero byte in the word, but may also be a matching byte (either
|
/* There is a zero byte in the word, but may also be a matching byte (either
|
||||||
before or after the zero byte). In fact, we may be looking for a
|
before or after the zero byte). In fact, we may be looking for a
|
||||||
zero byte, in which case we return a match. We guess that this hasn't
|
zero byte, in which case we return a match. We guess that this hasn't
|
||||||
happened, though. */
|
happened, though. */
|
||||||
L(missed):
|
L(missed):
|
||||||
and. r0,r0,r9
|
and. rTMP1, rTMP1, rTMP2
|
||||||
li r3,0
|
li rRTN, 0
|
||||||
beqlr
|
beqlr
|
||||||
/* It did happen. Decide which one was first...
|
/* It did happen. Decide which one was first...
|
||||||
I'm not sure if this is actually faster than a sequence of
|
I'm not sure if this is actually faster than a sequence of
|
||||||
rotates, compares, and branches (we use it anyway because it's shorter). */
|
rotates, compares, and branches (we use it anyway because it's shorter). */
|
||||||
and r6,r7,r5
|
and rFEFE, r7F7F, rWORD
|
||||||
or r11,r7,r5
|
or rMASK, r7F7F, rWORD
|
||||||
and r0,r7,r12
|
and rTMP1, r7F7F, rTMP3
|
||||||
or r10,r7,r12
|
or rIGN, r7F7F, rTMP3
|
||||||
add r6,r6,r7
|
add rFEFE, rFEFE, r7F7F
|
||||||
add r0,r0,r7
|
add rTMP1, rTMP1, r7F7F
|
||||||
nor r5,r11,r6
|
nor rWORD, rMASK, rFEFE
|
||||||
nor r9,r10,r0
|
nor rTMP2, rIGN, rTMP1
|
||||||
cmplw r5,r9
|
cmplw rWORD, rTMP2
|
||||||
bgtlr
|
bgtlr
|
||||||
cntlzw r4,r9
|
cntlzw rCLZB, rTMP2
|
||||||
srwi r4,r4,3
|
srwi rCLZB, rCLZB, 3
|
||||||
add r3,r8,r4
|
add rRTN, rSTR, rCLZB
|
||||||
blr
|
blr
|
||||||
|
|
||||||
L(foundit):
|
L(foundit):
|
||||||
and r0,r7,r12
|
and rTMP1, r7F7F, rTMP3
|
||||||
or r10,r7,r12
|
or rIGN, r7F7F, rTMP3
|
||||||
add r0,r0,r7
|
add rTMP1, rTMP1, r7F7F
|
||||||
nor r9,r10,r0
|
nor rTMP2, rIGN, rTMP1
|
||||||
cntlzw r4,r9
|
cntlzw rCLZB, rTMP2
|
||||||
subi r8,r8,4
|
subi rSTR, rSTR, 4
|
||||||
srwi r4,r4,3
|
srwi rCLZB, rCLZB, 3
|
||||||
add r3,r8,r4
|
add rRTN, rSTR, rCLZB
|
||||||
blr
|
blr
|
||||||
END(strchr)
|
END (strchr)
|
||||||
|
|
||||||
weak_alias(strchr,index)
|
weak_alias(strchr, index)
|
||||||
|
@ -21,95 +21,93 @@
|
|||||||
|
|
||||||
/* See strlen.s for comments on how the end-of-string testing works. */
|
/* See strlen.s for comments on how the end-of-string testing works. */
|
||||||
|
|
||||||
EALIGN(strcmp,4,0)
|
/* int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) */
|
||||||
/* int [r3] strcmp (const char *p1 [r3], const char *p2 [r4]) */
|
|
||||||
|
|
||||||
/* General register assignments:
|
EALIGN (strcmp, 4, 0)
|
||||||
r0: temporary
|
|
||||||
r3: pointer to previous word in s1
|
|
||||||
r4: pointer to previous word in s2
|
|
||||||
r5: current word from s1
|
|
||||||
r6: current word from s2
|
|
||||||
r7: 0xfefefeff
|
|
||||||
r8: 0x7f7f7f7f
|
|
||||||
r9: ~(word in s1 | 0x7f7f7f7f) */
|
|
||||||
|
|
||||||
/* Register assignments in the prologue:
|
#define rTMP r0
|
||||||
r10: low 2 bits of p2-p1
|
#define rRTN r3 /* return value */
|
||||||
r11: mask to orc with r5/r6 */
|
#define rSTR1 r3 /* first string arg */
|
||||||
|
#define rSTR2 r4 /* second string arg */
|
||||||
|
#define rWORD1 r5 /* current word in s1 */
|
||||||
|
#define rWORD2 r6 /* current word in s2 */
|
||||||
|
#define rFEFE r7 /* constant 0xfefefeff (-0x01010101) */
|
||||||
|
#define r7F7F r8 /* constant 0x7f7f7f7f */
|
||||||
|
#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f) */
|
||||||
|
#define rBITDIF r10 /* bits that differ in s1 & s2 words */
|
||||||
|
|
||||||
or r0,r4,r3
|
or rTMP, rSTR2, rSTR1
|
||||||
clrlwi. r0,r0,30
|
clrlwi. rTMP, rTMP, 30
|
||||||
lis r7,0xfeff
|
lis rFEFE, -0x101
|
||||||
bne L(unaligned)
|
bne L(unaligned)
|
||||||
|
|
||||||
lwz r5,0(r3)
|
lwz rWORD1, 0(rSTR1)
|
||||||
lwz r6,0(r4)
|
lwz rWORD2, 0(rSTR2)
|
||||||
lis r8,0x7f7f
|
lis r7F7F, 0x7f7f
|
||||||
addi r7,r7,-0x101
|
addi rFEFE, rFEFE, -0x101
|
||||||
addi r8,r8,0x7f7f
|
addi r7F7F, r7F7F, 0x7f7f
|
||||||
b L(g1)
|
b L(g1)
|
||||||
|
|
||||||
L(g0): lwzu r5,4(r3)
|
L(g0): lwzu rWORD1, 4(rSTR1)
|
||||||
bne cr1,L(different)
|
bne cr1, L(different)
|
||||||
lwzu r6,4(r4)
|
lwzu rWORD2, 4(rSTR2)
|
||||||
L(g1): add r0,r7,r5
|
L(g1): add rTMP, rFEFE, rWORD1
|
||||||
nor r9,r8,r5
|
nor rNEG, r7F7F, rWORD1
|
||||||
and. r0,r0,r9
|
and. rTMP, rTMP, rNEG
|
||||||
cmpw cr1,r5,r6
|
cmpw cr1, rWORD1, rWORD2
|
||||||
beq+ L(g0)
|
beq+ L(g0)
|
||||||
L(endstring):
|
L(endstring):
|
||||||
/* OK. We've hit the end of the string. We need to be careful that
|
/* OK. We've hit the end of the string. We need to be careful that
|
||||||
we don't compare two strings as different because of gunk beyond
|
we don't compare two strings as different because of gunk beyond
|
||||||
the end of the strings... */
|
the end of the strings... */
|
||||||
and r0,r8,r5
|
and rTMP, r7F7F, rWORD1
|
||||||
beq cr1,L(equal)
|
beq cr1, L(equal)
|
||||||
add r0,r0,r8
|
add rTMP, rTMP, r7F7F
|
||||||
xor. r10,r5,r6
|
xor. rBITDIF, rWORD1, rWORD2
|
||||||
andc r9,r9,r0
|
andc rNEG, rNEG, rTMP
|
||||||
blt- L(highbit)
|
blt- L(highbit)
|
||||||
cntlzw r10,r10
|
cntlzw rBITDIF, rBITDIF
|
||||||
cntlzw r9,r9
|
cntlzw rNEG, rNEG
|
||||||
addi r9,r9,7
|
addi rNEG, rNEG, 7
|
||||||
cmpw cr1,r9,r10
|
cmpw cr1, rNEG, rBITDIF
|
||||||
sub r3,r5,r6
|
sub rRTN, rWORD1, rWORD2
|
||||||
bgelr+ cr1
|
bgelr+ cr1
|
||||||
L(equal):
|
L(equal):
|
||||||
li r3,0
|
li rRTN, 0
|
||||||
blr
|
blr
|
||||||
|
|
||||||
L(different):
|
L(different):
|
||||||
lwz r5,-4(r3)
|
lwz rWORD1, -4(rSTR1)
|
||||||
xor. r10,r5,r6
|
xor. rBITDIF, rWORD1, rWORD2
|
||||||
sub r3,r5,r6
|
sub rRTN, rWORD1, rWORD2
|
||||||
bgelr+
|
bgelr+
|
||||||
L(highbit):
|
L(highbit):
|
||||||
ori r3,r6,1
|
ori rRTN, rWORD2, 1
|
||||||
blr
|
blr
|
||||||
|
|
||||||
|
|
||||||
/* Oh well. In this case, we just do a byte-by-byte comparison. */
|
/* Oh well. In this case, we just do a byte-by-byte comparison. */
|
||||||
.align 4
|
.align 4
|
||||||
L(unaligned):
|
L(unaligned):
|
||||||
lbz r5,0(r3)
|
lbz rWORD1, 0(rSTR1)
|
||||||
lbz r6,0(r4)
|
lbz rWORD2, 0(rSTR2)
|
||||||
b L(u1)
|
b L(u1)
|
||||||
|
|
||||||
L(u0): lbzu r5,1(r3)
|
L(u0): lbzu rWORD1, 1(rSTR1)
|
||||||
bne- L(u4)
|
bne- L(u4)
|
||||||
lbzu r6,1(r4)
|
lbzu rWORD2, 1(rSTR2)
|
||||||
L(u1): cmpwi cr1,r5,0
|
L(u1): cmpwi cr1, rWORD1, 0
|
||||||
beq- cr1,L(u3)
|
beq- cr1, L(u3)
|
||||||
cmpw r5,r6
|
cmpw rWORD1, rWORD2
|
||||||
bne- L(u3)
|
bne- L(u3)
|
||||||
lbzu r5,1(r3)
|
lbzu rWORD1, 1(rSTR1)
|
||||||
lbzu r6,1(r4)
|
lbzu rWORD2, 1(rSTR2)
|
||||||
cmpwi cr1,r5,0
|
cmpwi cr1, rWORD1, 0
|
||||||
cmpw r5,r6
|
cmpw rWORD1, rWORD2
|
||||||
bne+ cr1,L(u0)
|
bne+ cr1, L(u0)
|
||||||
L(u3): sub r3,r5,r6
|
L(u3): sub rRTN, rWORD1, rWORD2
|
||||||
blr
|
blr
|
||||||
L(u4): lbz r5,-1(r3)
|
L(u4): lbz rWORD1, -1(rSTR1)
|
||||||
sub r3,r5,r6
|
sub rRTN, rWORD1, rWORD2
|
||||||
blr
|
blr
|
||||||
END(strcmp)
|
END(strcmp)
|
||||||
|
@ -21,80 +21,80 @@
|
|||||||
|
|
||||||
/* See strlen.s for comments on how the end-of-string testing works. */
|
/* See strlen.s for comments on how the end-of-string testing works. */
|
||||||
|
|
||||||
EALIGN(strcpy,4,0)
|
|
||||||
/* char * [r3] strcpy (char *dest [r3], const char *src [r4]) */
|
/* char * [r3] strcpy (char *dest [r3], const char *src [r4]) */
|
||||||
|
|
||||||
/* General register assignments:
|
EALIGN(strcpy, 4, 0)
|
||||||
r0: temporary
|
|
||||||
r3: saved `dest'
|
|
||||||
r4: pointer to previous word in src
|
|
||||||
r5: pointer to previous word in dest
|
|
||||||
r6: current word from src
|
|
||||||
r7: 0xfefefeff
|
|
||||||
r8: 0x7f7f7f7f
|
|
||||||
r9: ~(word in src | 0x7f7f7f7f)
|
|
||||||
r10: alternate word from src. */
|
|
||||||
|
|
||||||
or r0,r4,r3
|
#define rTMP r0
|
||||||
clrlwi. r0,r0,30
|
#define rRTN r3 /* incoming DEST arg preserved as result */
|
||||||
addi r5,r3,-4
|
#define rSRC r4 /* pointer to previous word in src */
|
||||||
bne L(unaligned)
|
#define rDEST r5 /* pointer to previous word in dest */
|
||||||
|
#define rWORD r6 /* current word from src */
|
||||||
|
#define rFEFE r7 /* constant 0xfefefeff (-0x01010101) */
|
||||||
|
#define r7F7F r8 /* constant 0x7f7f7f7f */
|
||||||
|
#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f) */
|
||||||
|
#define rALT r10 /* alternate word from src */
|
||||||
|
|
||||||
lis r7,0xfeff
|
or rTMP, rSRC, rRTN
|
||||||
lis r8,0x7f7f
|
clrlwi. rTMP, rTMP, 30
|
||||||
lwz r6,0(r4)
|
addi rDEST, rRTN, -4
|
||||||
addi r7,r7,-0x101
|
bne L(unaligned)
|
||||||
addi r8,r8,0x7f7f
|
|
||||||
b L(g2)
|
|
||||||
|
|
||||||
L(g0): lwzu r10,4(r4)
|
lis rFEFE, -0x101
|
||||||
stwu r6,4(r5)
|
lis r7F7F, 0x7f7f
|
||||||
add r0,r7,r10
|
lwz rWORD, 0(rSRC)
|
||||||
nor r9,r8,r10
|
addi rFEFE, rFEFE, -0x101
|
||||||
and. r0,r0,r9
|
addi r7F7F, r7F7F, 0x7f7f
|
||||||
bne- L(g1)
|
b L(g2)
|
||||||
lwzu r6,4(r4)
|
|
||||||
stwu r10,4(r5)
|
|
||||||
L(g2): add r0,r7,r6
|
|
||||||
nor r9,r8,r6
|
|
||||||
and. r0,r0,r9
|
|
||||||
beq+ L(g0)
|
|
||||||
|
|
||||||
mr r10,r6
|
L(g0): lwzu rALT, 4(rSRC)
|
||||||
|
stwu rWORD, 4(rDEST)
|
||||||
|
add rTMP, rFEFE, rALT
|
||||||
|
nor rNEG, r7F7F, rALT
|
||||||
|
and. rTMP, rTMP, rNEG
|
||||||
|
bne- L(g1)
|
||||||
|
lwzu rWORD, 4(rSRC)
|
||||||
|
stwu rALT, 4(rDEST)
|
||||||
|
L(g2): add rTMP, rFEFE, rWORD
|
||||||
|
nor rNEG, r7F7F, rWORD
|
||||||
|
and. rTMP, rTMP, rNEG
|
||||||
|
beq+ L(g0)
|
||||||
|
|
||||||
|
mr rALT, rWORD
|
||||||
/* We've hit the end of the string. Do the rest byte-by-byte. */
|
/* We've hit the end of the string. Do the rest byte-by-byte. */
|
||||||
L(g1): rlwinm. r0,r10,8,24,31
|
L(g1): rlwinm. rTMP, rALT, 8, 24, 31
|
||||||
stb r0,4(r5)
|
stb rTMP, 4(rDEST)
|
||||||
beqlr-
|
beqlr-
|
||||||
rlwinm. r0,r10,16,24,31
|
rlwinm. rTMP, rALT, 16, 24, 31
|
||||||
stb r0,5(r5)
|
stb rTMP, 5(rDEST)
|
||||||
beqlr-
|
beqlr-
|
||||||
rlwinm. r0,r10,24,24,31
|
rlwinm. rTMP, rALT, 24, 24, 31
|
||||||
stb r0,6(r5)
|
stb rTMP, 6(rDEST)
|
||||||
beqlr-
|
beqlr-
|
||||||
stb r10,7(r5)
|
stb rALT, 7(rDEST)
|
||||||
blr
|
blr
|
||||||
|
|
||||||
/* Oh well. In this case, we just do a byte-by-byte copy. */
|
/* Oh well. In this case, we just do a byte-by-byte copy. */
|
||||||
.align 4
|
.align 4
|
||||||
nop
|
nop
|
||||||
L(unaligned):
|
L(unaligned):
|
||||||
lbz r6,0(r4)
|
lbz rWORD, 0(rSRC)
|
||||||
addi r5,r3,-1
|
addi rDEST, rRTN, -1
|
||||||
cmpwi r6,0
|
cmpwi rWORD, 0
|
||||||
beq- L(u2)
|
beq- L(u2)
|
||||||
|
|
||||||
L(u0): lbzu r10,1(r4)
|
L(u0): lbzu rALT, 1(rSRC)
|
||||||
stbu r6,1(r5)
|
stbu rWORD, 1(rDEST)
|
||||||
cmpwi r10,0
|
cmpwi rALT, 0
|
||||||
beq- L(u1)
|
beq- L(u1)
|
||||||
nop /* Let 601 load start of loop. */
|
nop /* Let 601 load start of loop. */
|
||||||
lbzu r6,1(r4)
|
lbzu rWORD, 1(rSRC)
|
||||||
stbu r10,1(r5)
|
stbu rALT, 1(rDEST)
|
||||||
cmpwi r6,0
|
cmpwi rWORD, 0
|
||||||
bne+ L(u0)
|
bne+ L(u0)
|
||||||
L(u2): stb r6,1(r5)
|
L(u2): stb rWORD, 1(rDEST)
|
||||||
blr
|
blr
|
||||||
L(u1): stb r10,1(r5)
|
L(u1): stb rALT, 1(rDEST)
|
||||||
blr
|
blr
|
||||||
|
|
||||||
END(strcpy)
|
END(strcpy)
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/* Optimized strlen implementation for PowerPC.
|
/* Optimized strlen implementation for PowerPC.
|
||||||
Copyright (C) 1997, 1999 Free Software Foundation, Inc.
|
Copyright (C) 1997, 1999, 2000 Free Software Foundation, Inc.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
The GNU C Library is free software; you can redistribute it and/or
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
@ -69,76 +69,86 @@
|
|||||||
We can use condition registers cr0, cr1, cr5, cr6, and cr7 without saving
|
We can use condition registers cr0, cr1, cr5, cr6, and cr7 without saving
|
||||||
them, the others we must save. */
|
them, the others we must save. */
|
||||||
|
|
||||||
ENTRY(strlen)
|
/* int [r3] strlen (char *s [r3]) */
|
||||||
/* On entry, r3 points to the string, and it's left that way.
|
|
||||||
We use r6 to store 0xfefefeff, and r7 to store 0x7f7f7f7f.
|
ENTRY (strlen)
|
||||||
r4 is used to keep the current index into the string; r5 holds
|
|
||||||
the number of padding bits we prepend to the string to make it
|
#define rTMP1 r0
|
||||||
start at a word boundary. r8 holds the 'current' word.
|
#define rRTN r3 /* incoming STR arg, outgoing result */
|
||||||
r9-12 are temporaries. r0 is used as a temporary and for discarded
|
#define rSTR r4 /* current string position */
|
||||||
results. */
|
#define rPADN r5 /* number of padding bits we prepend to the
|
||||||
clrrwi r4,r3,2
|
string to make it start at a word boundary */
|
||||||
lis r7,0x7f7f
|
#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */
|
||||||
rlwinm r5,r3,3,27,28
|
#define r7F7F r7 /* constant 0x7f7f7f7f */
|
||||||
lwz r8,0(r4)
|
#define rWORD1 r8 /* current string word */
|
||||||
li r9,-1
|
#define rWORD2 r9 /* next string word */
|
||||||
addi r7,r7,0x7f7f
|
#define rMASK r9 /* mask for first string word */
|
||||||
|
#define rTMP2 r10
|
||||||
|
#define rTMP3 r11
|
||||||
|
#define rTMP4 r12
|
||||||
|
|
||||||
|
clrrwi rSTR, rRTN, 2
|
||||||
|
lis r7F7F, 0x7f7f
|
||||||
|
rlwinm rPADN, rRTN, 3, 27, 28
|
||||||
|
lwz rWORD1, 0(rSTR)
|
||||||
|
li rMASK, -1
|
||||||
|
addi r7F7F, r7F7F, 0x7f7f
|
||||||
/* That's the setup done, now do the first pair of words.
|
/* That's the setup done, now do the first pair of words.
|
||||||
We make an exception and use method (2) on the first two words, to reduce
|
We make an exception and use method (2) on the first two words, to reduce
|
||||||
overhead. */
|
overhead. */
|
||||||
srw r9,r9,r5
|
srw rMASK, rMASK, rPADN
|
||||||
and r0,r7,r8
|
and rTMP1, r7F7F, rWORD1
|
||||||
or r10,r7,r8
|
or rTMP2, r7F7F, rWORD1
|
||||||
add r0,r0,r7
|
add rTMP1, rTMP1, r7F7F
|
||||||
nor r0,r10,r0
|
nor rTMP1, rTMP2, rTMP1
|
||||||
and. r8,r0,r9
|
and. rWORD1, rTMP1, rMASK
|
||||||
mtcrf 0x01,r3
|
mtcrf 0x01, rRTN
|
||||||
bne L(done0)
|
bne L(done0)
|
||||||
lis r6,0xfeff
|
lis rFEFE, -0x101
|
||||||
addi r6,r6,-0x101
|
addi rFEFE, rFEFE, -0x101
|
||||||
/* Are we now aligned to a doubleword boundary? */
|
/* Are we now aligned to a doubleword boundary? */
|
||||||
bt 29,L(loop)
|
bt 29, L(loop)
|
||||||
|
|
||||||
/* Handle second word of pair. */
|
/* Handle second word of pair. */
|
||||||
lwzu r8,4(r4)
|
lwzu rWORD1, 4(rSTR)
|
||||||
and r0,r7,r8
|
and rTMP1, r7F7F, rWORD1
|
||||||
or r10,r7,r8
|
or rTMP2, r7F7F, rWORD1
|
||||||
add r0,r0,r7
|
add rTMP1, rTMP1, r7F7F
|
||||||
nor. r8,r10,r0
|
nor. rWORD1, rTMP2, rTMP1
|
||||||
bne L(done0)
|
bne L(done0)
|
||||||
|
|
||||||
/* The loop. */
|
/* The loop. */
|
||||||
|
|
||||||
L(loop):
|
L(loop):
|
||||||
lwz r8,4(r4)
|
lwz rWORD1, 4(rSTR)
|
||||||
lwzu r9,8(r4)
|
lwzu rWORD2, 8(rSTR)
|
||||||
add r0,r6,r8
|
add rTMP1, rFEFE, rWORD1
|
||||||
nor r10,r7,r8
|
nor rTMP2, r7F7F, rWORD1
|
||||||
and. r0,r0,r10
|
and. rTMP1, rTMP1, rTMP2
|
||||||
add r11,r6,r9
|
add rTMP3, rFEFE, rWORD2
|
||||||
nor r12,r7,r9
|
nor rTMP4, r7F7F, rWORD2
|
||||||
bne L(done1)
|
bne L(done1)
|
||||||
and. r0,r11,r12
|
and. rTMP1, rTMP3, rTMP4
|
||||||
beq L(loop)
|
beq L(loop)
|
||||||
|
|
||||||
and r0,r7,r9
|
and rTMP1, r7F7F, rWORD2
|
||||||
add r0,r0,r7
|
add rTMP1, rTMP1, r7F7F
|
||||||
andc r8,r12,r0
|
andc rWORD1, rTMP4, rTMP1
|
||||||
b L(done0)
|
b L(done0)
|
||||||
|
|
||||||
L(done1):
|
L(done1):
|
||||||
and r0,r7,r8
|
and rTMP1, r7F7F, rWORD1
|
||||||
subi r4,r4,4
|
subi rSTR, rSTR, 4
|
||||||
add r0,r0,r7
|
add rTMP1, rTMP1, r7F7F
|
||||||
andc r8,r10,r0
|
andc rWORD1, rTMP2, rTMP1
|
||||||
|
|
||||||
/* When we get to here, r4 points to the first word in the string that
|
/* When we get to here, rSTR points to the first word in the string that
|
||||||
contains a zero byte, and the most significant set bit in r8 is in that
|
contains a zero byte, and the most significant set bit in rWORD1 is in that
|
||||||
byte. */
|
byte. */
|
||||||
L(done0):
|
L(done0):
|
||||||
cntlzw r11,r8
|
cntlzw rTMP3, rWORD1
|
||||||
subf r0,r3,r4
|
subf rTMP1, rRTN, rSTR
|
||||||
srwi r11,r11,3
|
srwi rTMP3, rTMP3, 3
|
||||||
add r3,r0,r11
|
add rRTN, rTMP1, rTMP3
|
||||||
blr
|
blr
|
||||||
END(strlen)
|
END (strlen)
|
||||||
|
Loading…
Reference in New Issue
Block a user