update from main archive 961005

This commit is contained in:
Ulrich Drepper 1996-10-06 02:02:37 +00:00
parent b1691e6dce
commit cf182b5888
13 changed files with 1530 additions and 5 deletions

View File

@ -21,17 +21,21 @@ sysdep_routines += _mcount
endif
ifeq ($(subdir),setjmp)
sysdep_routines := $(sysdep_routines) setjmp_aux
sysdep_routines += setjmp_aux
endif
ifeq ($(subdir),gnulib)
routines = $(divrem)
endif # gnulib
sysdep_routines += $(divrem)
endif
ifeq ($(subdir),string)
sysdep_routines += stxcpy stxncpy
endif
ifeq ($(subdir),elf)
# The ld.so code cannot use literals until it self-relocates.
# The ld.so startup code cannot use literals until it self-relocates.
ifeq ($(elf),yes)
CFLAGS-rtld.c = -mbuild-constants
CFLAGS-rtld.c = -mbuild-constants
endif
# The rest of ld.so shouldn't use FP regs for block moves so
# that the lazy link trampoline doesn't have to save them.

113
sysdeps/alpha/bzero.S Normal file
View File

@ -0,0 +1,113 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
Contributed by Richard Henderson (rth@tamu.edu)
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
/* Fill a block of memory with zeros. Optimized for the Alpha architecture:
- memory accessed as aligned quadwords only
- destination memory not read unless needed for good cache behaviour
- basic blocks arranged to optimize branch prediction for full-quadword
aligned memory blocks.
- partial head and tail quadwords constructed with byte-mask instructions
This is generally scheduled for the EV5 (got to look out for my own
interests :-), but with EV4 needs in mind. There *should* be no more
stalls for the EV4 than there are for the EV5.
*/
#include <sysdep.h>
.set noat
.set noreorder
.text
/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
doesn't like putting the entry point for a procedure somewhere in the
middle of the procedure descriptor. Work around this by putting the main
loop in its own procedure descriptor. */
/* On entry to this basic block:
t3 == loop counter
t4 == bytes in partial final word
a0 == possibly misaligned destination pointer */
.ent bzero_loop
.align 3
bzero_loop:
.frame sp, 0, ra, 0
.prologue 0
beq t3, $tail #
blbc t3, 0f # skip single store if count even
stq_u zero, 0(a0) # e0 : store one word
subq t3, 1, t3 # .. e1 :
addq a0, 8, a0 # e0 :
beq t3, $tail # .. e1 :
0: stq_u zero, 0(a0) # e0 : store two words
subq t3, 2, t3 # .. e1 :
stq_u zero, 8(a0) # e0 :
addq a0, 16, a0 # .. e1 :
bne t3, 0b # e1 :
$tail: bne t4, 1f # is there a tail to do?
ret # no
1: ldq_u t0, 0(a0) # yes, load original data
mskqh t0, t4, t0 #
stq_u t0, 0(a0) #
ret #
.end bzero_loop
ENTRY(bzero)
.prologue 0
mov a0, v0 # e0 : move return value in place
beq a1, $done # .. e1 : early exit for zero-length store
and a0, 7, t1 # e0 :
addq a1, t1, a1 # e1 : add dest misalignment to count
srl a1, 3, t3 # e0 : loop = count >> 3
and a1, 7, t4 # .. e1 : find number of bytes in tail
unop # :
beq t1, bzero_loop # e1 : aligned head, jump right in
ldq_u t0, 0(a0) # e0 : load original data to mask into
cmpult a1, 8, t2 # .. e1 : is this a sub-word set?
bne t2, $oneq # e1 :
mskql t0, a0, t0 # e0 : we span words. finish this partial
subq t3, 1, t3 # e0 :
addq a0, 8, a0 # .. e1 :
stq_u t0, -8(a0) # e0 :
br bzero_loop # .. e1 :
.align 3
$oneq:
mskql t0, a0, t2 # e0 :
mskqh t0, a1, t3 # e0 :
or t2, t3, t0 # e1 :
stq_u t0, 0(a0) # e0 :
$done: ret
END(bzero)

130
sysdeps/alpha/memset.S Normal file
View File

@ -0,0 +1,130 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
Contributed by Richard Henderson (rth@tamu.edu)
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
/* Fill a block of memory with a character. Optimized for the Alpha
architecture:
- memory accessed as aligned quadwords only
- destination memory not read unless needed for good cache behaviour
- basic blocks arranged to optimize branch prediction for full-quadword
aligned memory blocks.
- partial head and tail quadwords constructed with byte-mask instructions
This is generally scheduled for the EV5 (got to look out for my own
interests :-), but with EV4 needs in mind. There *should* be no more
stalls for the EV4 than there are for the EV5.
*/
#include <sysdep.h>
.set noat
.set noreorder
.text
/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
doesn't like putting the entry point for a procedure somewhere in the
middle of the procedure descriptor. Work around this by putting the main
loop in its own procedure descriptor. */
/* On entry to this basic block:
t3 == loop counter
t4 == bytes in partial final word
a0 == possibly misaligned destination pointer
a1 == replicated source character */
.ent memset_loop
.align 3
memset_loop:
.frame sp, 0, ra, 0
.prologue 0
beq t3, $tail
blbc t3, 0f # skip single store if count even
stq_u a1, 0(a0) # e0 : store one word
subq t3, 1, t3 # .. e1 :
addq a0, 8, a0 # e0 :
beq t3, $tail # .. e1 :
0: stq_u a1, 0(a0) # e0 : store two words
subq t3, 2, t3 # .. e1 :
stq_u a1, 8(a0) # e0 :
addq a0, 16, a0 # .. e1 :
bne t3, 0b # e1 :
$tail: bne t4, 1f # is there a tail to do?
ret # no
.align 3
1: ldq_u t0, 0(a0) # e1 : yes, load original data
mskql a1, t4, t1 # .. e0 :
mskqh t0, t4, t0 # e0 :
or t0, t1, t0 # e1 (stall)
stq_u t0, 0(a0) # e0 :
ret # .. e1 :
.end memset_loop
ENTRY(memset)
.prologue 0
zapnot a1, 1, a1 # e0 : zero extend input character
mov a0, v0 # .. e1 : move return value in place
sll a1, 8, t0 # e0 : begin replicating the char
beq a2, $done # .. e1 : early exit for zero-length store
or t0, a1, a1 # e0 :
and a0, 7, t1 # .. e1 : dest misalignment
sll a1, 16, t0 # e0 :
addq a2, t1, a2 # .. e1 : add dest misalignment to count
or t0, a1, a1 # e0 :
srl a2, 3, t3 # .. e1 : loop = count >> 3
sll a1, 32, t0 # e0 :
and a2, 7, t4 # .. e1 : find number of bytes in tail
or t0, a1, a1 # e0 : character replication done
beq t1, memset_loop # .. e1 : aligned head, jump right in
ldq_u t0, 0(a0) # e1 : load original data to mask into
mskqh a1, a0, t1 # .. e0 :
cmpult a2, 8, t2 # e0 : is this a sub-word set?
bne t2, $oneq # .. e1 (zdb)
mskql t0, a0, t0 # e0 : we span words. finish this partial
subq t3, 1, t3 # .. e1 :
addq a0, 8, a0 # e0 :
or t0, t1, t0 # .. e1 :
stq_u t0, -8(a0) # e0 :
br memset_loop # .. e1 :
.align 3
$oneq:
mskql t1, a2, t1 # e0 : entire operation within one word
mskql t0, a0, t2 # e0 :
mskqh t0, a2, t3 # e0 :
or t1, t2, t0 # .. e1 :
or t0, t3, t0 # e1 :
stq_u t0, 0(a0) # e0 (stall)
$done: ret
END(memset)

49
sysdeps/alpha/stpcpy.S Normal file
View File

@ -0,0 +1,49 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
Contributed by Richard Henderson (rth@tamu.edu)
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
/* Copy a null-terminated string from SRC to DST. Return a pointer
to the null-terminator in the source. */
#include <sysdep.h>
.text
ENTRY(__stpcpy)
ldgp gp, 0(pv)
.prologue 1
jsr t9, __stxcpy # do the work of the copy
and t8, 0xf0, t2 # binary search for byte offset of the
and t8, 0xcc, t1 # last byte written.
and t8, 0xaa, t0
andnot a0, 7, a0
cmovne t2, 4, t2
cmovne t1, 2, t1
cmovne t0, 1, t0
addq a0, t2, v0
addq t0, t1, t0
addq v0, t0, v0
ret
END(__stpcpy)
weak_alias (__stpcpy, stpcpy)

103
sysdeps/alpha/stpncpy.S Normal file
View File

@ -0,0 +1,103 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
Contributed by Richard Henderson (rth@tamu.edu)
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
/* Copy no more than COUNT bytes of the null-terminated string from
SRC to DST. If SRC does not cover all of COUNT, the balance is
zeroed. Return the address of the terminating null in DEST, if
any, else DEST + COUNT. */
#include <sysdep.h>
.set noat
.set noreorder
.text
ENTRY(__stpncpy)
ldgp gp, 0(pv)
.prologue 1
beq a2, $zerocount
jsr t9, __stxncpy # do the work of the copy
and t8, 0xf0, t3 # binary search for byte offset of the
and t8, 0xcc, t2 # last byte written.
and t8, 0xaa, t1
andnot a0, 7, v0
cmovne t3, 4, t3
cmovne t2, 2, t2
cmovne t1, 1, t1
addq v0, t3, v0
addq t1, t2, t1
addq v0, t1, v0
bne a2, $multiword # do we have full words left?
.align 3
zapnot t0, t8, t4 # e0 : was last byte a null?
subq t8, 1, t2 # .. e1 :
addq v0, 1, t5 # e0 :
subq t10, 1, t3 # .. e1 :
or t2, t8, t2 # e0 : clear the bits between the last
or t3, t10, t3 # .. e1 : written byte and the last byte in
andnot t3, t2, t3 # e0 : COUNT
cmovne t4, t5, v0 # .. e1 : if last written wasnt null, inc v0
zap t0, t3, t0 # e0 :
stq_u t0, 0(a0) # e1 :
ret # .. e1 :
.align 3
$multiword:
subq t8, 1, t7 # e0 : clear the final bits in the prev
or t7, t8, t7 # e1 : word
zapnot t0, t7, t0 # e0 :
subq a2, 1, a2 # .. e1 :
stq_u t0, 0(a0) # e0 :
addq a0, 8, a0 # .. e1 :
beq a2, 1f # e1 :
blbc a2, 0f # e1 :
stq_u zero, 0(a0) # e0 : zero one word
subq a2, 1, a2 # .. e1 :
addq a0, 8, a0 # e0 :
beq a2, 1f # .. e1 :
0: stq_u zero, 0(a0) # e0 : zero two words
subq a2, 2, a2 # .. e1 :
stq_u zero, 8(a0) # e0 :
addq a0, 16, a0 # .. e1 :
bne a2, 0b # e1 :
unop
1: ldq_u t0, 0(a0) # e0 : clear the leading bits in the final
subq t10, 1, t7 # .. e1 : word
or t7, t10, t7 # e0 :
zap t0, t7, t0 # e1 (stall)
stq_u t0, 0(a0) # e0 :
ret # .. e1 :
$zerocount:
mov a0, v0
ret
END(__stpncpy)
weak_alias (__stpncpy, stpncpy)

66
sysdeps/alpha/strcat.S Normal file
View File

@ -0,0 +1,66 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
Contributed by Richard Henderson (rth@tamu.edu)
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
/* Append a null-terminated string from SRC to DST. */
#include <sysdep.h>
.text
ENTRY(strcat)
ldgp gp, 0(pv)
.prologue 1
mov a0, v0 # set up return value
/* Find the end of the string. */
ldq_u t0, 0(a0) # load first quadword (a0 may be misaligned)
lda t1, -1(zero)
insqh t1, a0, t1
andnot a0, 7, a0
or t1, t0, t0
cmpbge zero, t0, t1 # t1 <- bitmask: bit i == 1 <==> i-th byte == 0
bne t1, $found
$loop: ldq t0, 8(a0)
addq a0, 8, a0 # addr += 8
cmpbge zero, t0, t1
beq t1, $loop
$found: negq t1, t2 # clear all but least set bit
and t1, t2, t1
and t1, 0xf0, t2 # binary search for that set bit
and t1, 0xcc, t3
and t1, 0xaa, t4
cmovne t2, 4, t2
cmovne t3, 2, t3
cmovne t4, 1, t4
addq t2, t3, t2
addq a0, t4, a0
addq a0, t2, a0
/* Now do the append. */
jsr t9, __stxcpy
ret
END(strcat)

88
sysdeps/alpha/strchr.S Normal file
View File

@ -0,0 +1,88 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
Contributed by Richard Henderson (rth@tamu.edu)
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
/* Return the address of a given character within a null-terminated
string, or null if it is not found.
This is generally scheduled for the EV5 (got to look out for my own
interests :-), but with EV4 needs in mind. There *should* be no more
stalls for the EV4 than there are for the EV5.
*/
#include <sysdep.h>
.set noreorder
.set noat
ENTRY(strchr)
.prologue 0
zapnot a1, 1, a1 # e0 : zero extend the search character
ldq_u t0, 0(a0) # .. e1 : load first quadword
sll a1, 8, t5 # e0 : replicate the search character
andnot a0, 7, v0 # .. e1 : align our loop pointer
or t5, a1, a1 # e0 :
lda t4, -1 # .. e1 : build garbage mask
sll a1, 16, t5 # e0 :
cmpbge zero, t0, t2 # .. e1 : bits set iff byte == zero
mskqh t4, a0, t4 # e0 :
or t5, a1, a1 # .. e1 :
sll a1, 32, t5 # e0 :
cmpbge zero, t4, t4 # .. e1 : bits set iff byte is garbage
or t5, a1, a1 # e0 :
xor t0, a1, t1 # .. e1 : make bytes == c zero
cmpbge zero, t1, t3 # e0 : bits set iff byte == c
or t2, t3, t0 # e1 : bits set iff char match or zero match
andnot t0, t4, t0 # e0 : clear garbage bits
bne t0, $found # .. e1 (zdb)
$loop: ldq t0, 8(v0) # e0 :
addq v0, 8, v0 # .. e1 :
nop # e0 :
xor t0, a1, t1 # .. e1 (ev5 data stall)
cmpbge zero, t0, t2 # e0 : bits set iff byte == 0
cmpbge zero, t1, t3 # .. e1 : bits set iff byte == c
or t2, t3, t0 # e0 :
beq t0, $loop # .. e1 (zdb)
$found: negq t0, t1 # e0 : clear all but least set bit
and t0, t1, t0 # e1 (stall)
and t0, t3, t1 # e0 : bit set iff byte was the char
beq t1, $retnull # .. e1 (zdb)
and t0, 0xf0, t2 # e0 : binary search for that set bit
and t0, 0xcc, t3 # .. e1 :
and t0, 0xaa, t4 # e0 :
cmovne t2, 4, t2 # .. e1 :
cmovne t3, 2, t3 # e0 :
cmovne t4, 1, t4 # .. e1 :
addq t2, t3, t2 # e0 :
addq v0, t4, v0 # .. e1 :
addq v0, t2, v0 # e0 :
ret # .. e1 :
$retnull:
mov zero, v0 # e0 :
ret # .. e1 :
END(strchr)
weak_alias (strchr, index)

36
sysdeps/alpha/strcpy.S Normal file
View File

@ -0,0 +1,36 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
Contributed by Richard Henderson (rth@tamu.edu)
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
/* Copy a null-terminated string from SRC to DST. Return a pointer
to the null-terminator in the source. */
#include <sysdep.h>
.text
ENTRY(strcpy)
ldgp gp, 0(pv)
.prologue 1
mov a0, v0 # set up return value
jsr t9, __stxcpy # do the copy
ret
END(strcpy)

90
sysdeps/alpha/strncat.S Normal file
View File

@ -0,0 +1,90 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
Contributed by Richard Henderson (rth@tamu.edu)
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
/* Append no more than COUNT characters from the null-terminated string SRC
to the null-terminated string DST. Always null-terminate the new DST. */
#include <sysdep.h>
.text
ENTRY(strncat)
ldgp gp, 0(pv)
.prologue 1
mov a0, v0 # set up return value
beq a2, $zerocount
/* Find the end of the string. */
ldq_u t0, 0(a0) # load first quadword (a0 may be misaligned)
lda t1, -1(zero)
insqh t1, a0, t1
andnot a0, 7, a0
or t1, t0, t0
cmpbge zero, t0, t1 # t1 <- bitmask: bit i == 1 <==> i-th byte == 0
bne t1, $found
$loop: ldq t0, 8(a0)
addq a0, 8, a0 # addr += 8
cmpbge zero, t0, t1
beq t1, $loop
$found: negq t1, t2 # clear all but least set bit
and t1, t2, t1
and t1, 0xf0, t2 # binary search for that set bit
and t1, 0xcc, t3
and t1, 0xaa, t4
cmovne t2, 4, t2
cmovne t3, 2, t3
cmovne t4, 1, t4
addq t2, t3, t2
addq a0, t4, a0
addq a0, t2, a0
/* Now do the append. */
jsr t9, __stxncpy
/* Worry about the null termination. */
zapnot t0, t8, t1 # was last byte a null?
bne t1, 0f
ret
0: and t10, 0x80, t1
bne t1, 1f
/* Here there are bytes left in the current word. Clear one. */
addq t10, t10, t10 # end-of-count bit <<= 1
zap t0, t10, t0
stq_u t0, 0(a0)
ret
1: /* Here we must read the next DST word and clear the first byte. */
ldq_u t0, 8(a0)
zap t0, 1, t0
stq_u t0, 8(a0)
$zerocount:
ret
END(strncat)

85
sysdeps/alpha/strncpy.S Normal file
View File

@ -0,0 +1,85 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
Contributed by Richard Henderson (rth@tamu.edu)
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
/* Copy no more than COUNT bytes of the null-terminated string from
SRC to DST. If SRC does not cover all of COUNT, the balance is
zeroed. */
#include <sysdep.h>
.set noat
.set noreorder
.text
ENTRY(strncpy)
ldgp gp, 0(pv)
.prologue 1
mov a0, v0 # set return value now
beq a2, $zerocount
jsr t9, __stxncpy # do the work of the copy
bne a2, $multiword # do we have full words left?
.align 3
subq t8, 1, t2 # e0 : guess not
subq t10, 1, t3 # .. e1 :
or t2, t8, t2 # e0 : clear the bits between the last
or t3, t10, t3 # .. e1 : written byte and the last byte in
andnot t3, t2, t3 # e0 : COUNT
zap t0, t3, t0 # e1 :
stq_u t0, 0(a0) # e0 :
ret # .. e1 :
$multiword:
subq t8, 1, t7 # e0 : clear the final bits in the prev
or t7, t8, t7 # e1 : word
zapnot t0, t7, t0 # e0 :
subq a2, 1, a2 # .. e1 :
stq_u t0, 0(a0) # e0 :
addq a0, 8, a0 # .. e1 :
beq a2, 1f # e1 :
blbc a2, 0f # e1 :
stq_u zero, 0(a0) # e0 : zero one word
subq a2, 1, a2 # .. e1 :
addq a0, 8, a0 # e0 :
beq a2, 1f # .. e1 :
0: stq_u zero, 0(a0) # e0 : zero two words
subq a2, 2, a2 # .. e1 :
stq_u zero, 8(a0) # e0 :
addq a0, 16, a0 # .. e1 :
bne a2, 0b # e1 :
unop
1: ldq_u t0, 0(a0) # e0 : clear the leading bits in the final
subq t10, 1, t7 # .. e1 : word
or t7, t10, t7 # e0 :
zap t0, t7, t0 # e1 (stall)
stq_u t0, 0(a0) # e0 :
$zerocount:
ret # .. e1 :
END(strncpy)

104
sysdeps/alpha/strrchr.S Normal file
View File

@ -0,0 +1,104 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
/* Return the address of the last occurrance of a given character
within a null-terminated string, or null if it is not found.
This is generally scheduled for the EV5 (got to look out for my own
interests :-), but with EV4 needs in mind. There are, in fact, fewer
stalls on the EV4 than there are on the EV5.
*/
#include <sysdep.h>
.set noreorder
.set noat
ENTRY(strrchr)
.prologue 0
zapnot a1, 1, a1 # e0 : zero extend our test character
mov zero, t6 # .. e1 : t6 is last match aligned addr
sll a1, 8, t5 # e0 : replicate our test character
mov zero, t7 # .. e1 : t7 is last match byte compare mask
or t5, a1, a1 # e0 :
ldq_u t0, 0(a0) # .. e1 : load first quadword
sll a1, 16, t5 # e0 :
andnot a0, 7, v0 # .. e1 : align source addr
or t5, a1, a1 # e0 :
lda t4, -1 # .. e1 : build garbage mask
sll a1, 32, t5 # e0 :
cmpbge zero, t0, t1 # .. e1 : bits set iff byte == zero
mskqh t4, a0, t4 # e0 :
or t5, a1, a1 # .. e1 : character replication complete
xor t0, a1, t2 # e0 : make bytes == c zero
cmpbge zero, t4, t4 # .. e1 : bits set iff byte is garbage
cmpbge zero, t2, t3 # e0 : bits set iff byte == c
andnot t1, t4, t1 # .. e1 : clear garbage from null test
andnot t3, t4, t3 # e0 : clear garbage from char test
bne t1, $eos # .. e1 : did we already hit the terminator?
/* Character search main loop */
$loop:
ldq t0, 8(v0) # e0 : load next quadword
cmovne t3, v0, t6 # .. e1 : save previous comparisons match
cmovne t3, t3, t7 # e0 :
addq v0, 8, v0 # .. e1 :
xor t0, a1, t2 # e0 :
cmpbge zero, t0, t1 # .. e1 : bits set iff byte == zero
cmpbge zero, t2, t3 # e0 : bits set iff byte == c
beq t1, $loop # .. e1 : if we havnt seen a null, loop
/* Mask out character matches after terminator */
$eos:
negq t1, t4 # e0 : isolate first null byte match
and t1, t4, t4 # e1 :
subq t4, 1, t5 # e0 : build a mask of the bytes upto...
or t4, t5, t4 # e1 : ... and including the null
and t3, t4, t3 # e0 : mask out char matches after null
cmovne t3, t3, t7 # .. e1 : save it, if match found
cmovne t3, v0, t6 # e0 :
/* Locate the address of the last matched character */
/* Retain the early exit for the ev4 -- the ev5 mispredict penalty
is 5 cycles -- the same as just falling through. */
beq t7, $retnull # .. e1 :
and t7, 0xf0, t2 # e0 : binary search for the high bit set
cmovne t2, t2, t7 # .. e1 (zdb)
cmovne t2, 4, t2 # e0 :
and t7, 0xcc, t1 # .. e1 :
cmovne t1, t1, t7 # e0 :
cmovne t1, 2, t1 # .. e1 :
and t7, 0xaa, t0 # e0 :
cmovne t0, 1, t0 # .. e1 (zdb)
addq t2, t1, t1 # e0 :
addq t6, t0, v0 # .. e1 : add our aligned base ptr to the mix
addq v0, t1, v0 # e0 :
ret # .. e1 :
$retnull:
mov zero, v0 # e0 :
ret # .. e1 :
END(strrchr)
weak_alias (strrchr, rindex)

307
sysdeps/alpha/stxcpy.S Normal file
View File

@ -0,0 +1,307 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
Contributed by Richard Henderson (rth@tamu.edu)
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
/* Copy a null-terminated string from SRC to DST.
This is an internal routine used by strcpy, stpcpy, and strcat.
As such, it uses special linkage conventions to make implementation
of these public functions more efficient.
On input:
t9 = return address
a0 = DST
a1 = SRC
On output:
t8 = bitmask (with one bit set) indicating the last byte written
a0 = unaligned address of the last *word* written
Furthermore, v0, a3-a5, t11, and t12 are untouched.
*/
/* This is generally scheduled for the EV5, but should still be pretty
good for the EV4 too. */
#include <sysdep.h>
.set noat
.set noreorder
.text
/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
doesn't like putting the entry point for a procedure somewhere in the
middle of the procedure descriptor. Work around this by putting the
aligned copy in its own procedure descriptor */
.ent stxcpy_aligned
.align 3
stxcpy_aligned:
.frame sp, 0, t9
.prologue 0
/* On entry to this basic block:
t0 == the first destination word for masking back in
t1 == the first source word. */
/* Create the 1st output word and detect 0's in the 1st input word. */
lda t2, -1 # e1 : build a mask against false zero
mskqh t2, a1, t2 # e0 : detection in the src word
mskqh t1, a1, t3 # e0 :
ornot t1, t2, t2 # .. e1 :
mskql t0, a1, t0 # e0 : assemble the first output word
cmpbge zero, t2, t7 # .. e1 : bits set iff null found
or t0, t3, t1 # e0 :
bne t7, $a_eos # .. e1 :
/* On entry to this basic block:
t0 == the first destination word for masking back in
t1 == a source word not containing a null. */
$a_loop:
stq_u t1, 0(a0) # e0 :
addq a0, 8, a0 # .. e1 :
ldq_u t1, 0(a1) # e0 :
addq a1, 8, a1 # .. e1 :
cmpbge zero, t1, t7 # e0 (stall)
beq t7, $a_loop # .. e1 (zdb)
/* Take care of the final (partial) word store.
On entry to this basic block we have:
t1 == the source word containing the null
t7 == the cmpbge mask that found it. */
$a_eos:
negq t7, t6 # e0 : find low bit set
and t7, t6, t8 # e1 (stall)
/* For the sake of the cache, don't read a destination word
if we're not going to need it. */
and t8, 0x80, t6 # e0 :
bne t6, 1f # .. e1 (zdb)
/* We're doing a partial word store and so need to combine
our source and original destination words. */
ldq_u t0, 0(a0) # e0 :
subq t8, 1, t6 # .. e1 :
zapnot t1, t6, t1 # e0 : clear src bytes >= null
or t8, t6, t7 # .. e1 :
zap t0, t7, t0 # e0 : clear dst bytes <= null
or t0, t1, t1 # e1 :
1: stq_u t1, 0(a0) # e0 :
ret (t9) # .. e1 :
.end stxcpy_aligned
.align 3
.ent __stxcpy
.globl __stxcpy
__stxcpy:
.frame sp, 0, t9
.prologue 0
/* Are source and destination co-aligned? */
xor a0, a1, t0 # e0 :
unop # :
and t0, 7, t0 # e0 :
bne t0, $unaligned # .. e1 :
/* We are co-aligned; take care of a partial first word. */
ldq_u t1, 0(a1) # e0 : load first src word
and a0, 7, t0 # .. e1 : take care not to load a word ...
addq a1, 8, a1 # e0 :
beq t0, stxcpy_aligned # .. e1 : ... if we wont need it
ldq_u t0, 0(a0) # e0 :
br stxcpy_aligned # .. e1 :
/* The source and destination are not co-aligned. Align the destination
and cope. We have to be very careful about not reading too much and
causing a SEGV. */
.align 3
$u_head:
/* We know just enough now to be able to assemble the first
full source word. We can still find a zero at the end of it
that prevents us from outputting the whole thing.
On entry to this basic block:
t0 == the first dest word, for masking back in, if needed else 0
t1 == the low bits of the first source word
t6 == bytemask that is -1 in dest word bytes */
ldq_u t2, 8(a1) # e0 :
addq a1, 8, a1 # .. e1 :
extql t1, a1, t1 # e0 :
extqh t2, a1, t4 # e0 :
mskql t0, a0, t0 # e0 :
or t1, t4, t1 # .. e1 :
mskqh t1, a0, t1 # e0 :
or t0, t1, t1 # e1 :
or t1, t6, t6 # e0 :
cmpbge zero, t6, t7 # .. e1 :
lda t6, -1 # e0 : for masking just below
bne t7, $u_final # .. e1 :
mskql t6, a1, t6 # e0 : mask out the bits we have
or t6, t2, t2 # e1 : already extracted before
cmpbge zero, t2, t7 # e0 : testing eos
bne t7, $u_late_head_exit # .. e1 (zdb)
/* Finally, we've got all the stupid leading edge cases taken care
of and we can set up to enter the main loop. */
stq_u t1, 0(a0) # e0 : store first output word
addq a0, 8, a0 # .. e1 :
extql t2, a1, t0 # e0 : position ho-bits of lo word
ldq_u t2, 8(a1) # .. e1 : read next high-order source word
addq a1, 8, a1 # e0 :
cmpbge zero, t2, t7 # .. e1 :
nop # e0 :
bne t7, $u_eos # .. e1 :
/* Unaligned copy main loop. In order to avoid reading too much,
the loop is structured to detect zeros in aligned source words.
This has, unfortunately, effectively pulled half of a loop
iteration out into the head and half into the tail, but it does
prevent nastiness from accumulating in the very thing we want
to run as fast as possible.
On entry to this basic block:
t0 == the shifted high-order bits from the previous source word
t2 == the unshifted current source word
We further know that t2 does not contain a null terminator. */
.align 3
$u_loop:
extqh t2, a1, t1 # e0 : extract high bits for current word
addq a1, 8, a1 # .. e1 :
extql t2, a1, t3 # e0 : extract low bits for next time
addq a0, 8, a0 # .. e1 :
or t0, t1, t1 # e0 : current dst word now complete
ldq_u t2, 0(a1) # .. e1 : load high word for next time
stq_u t1, -8(a0) # e0 : save the current word
mov t3, t0 # .. e1 :
cmpbge zero, t2, t7 # e0 : test new word for eos
beq t7, $u_loop # .. e1 :
/* We've found a zero somewhere in the source word we just read.
If it resides in the lower half, we have one (probably partial)
word to write out, and if it resides in the upper half, we
have one full and one partial word left to write out.
On entry to this basic block:
t0 == the shifted high-order bits from the previous source word
t2 == the unshifted current source word. */
$u_eos:
extqh t2, a1, t1 # e0 :
or t0, t1, t1 # e1 : first (partial) source word complete
cmpbge zero, t1, t7 # e0 : is the null in this first bit?
bne t7, $u_final # .. e1 (zdb)
$u_late_head_exit:
stq_u t1, 0(a0) # e0 : the null was in the high-order bits
addq a0, 8, a0 # .. e1 :
extql t2, a1, t1 # e0 :
cmpbge zero, t1, t7 # .. e1 :
/* Take care of a final (probably partial) result word.
On entry to this basic block:
t1 == assembled source word
t7 == cmpbge mask that found the null. */
$u_final:
negq t7, t6 # e0 : isolate low bit set
and t6, t7, t8 # e1 :
and t8, 0x80, t6 # e0 : avoid dest word load if we can
bne t6, 1f # .. e1 (zdb)
ldq_u t0, 0(a0) # e0 :
subq t8, 1, t6 # .. e1 :
or t6, t8, t7 # e0 :
zapnot t1, t6, t1 # .. e1 : kill source bytes >= null
zap t0, t7, t0 # e0 : kill dest bytes <= null
or t0, t1, t1 # e1 :
1: stq_u t1, 0(a0) # e0 :
ret (t9) # .. e1 :
/* Unaligned copy entry point. */
.align 3
$unaligned:
ldq_u t1, 0(a1) # e0 : load first source word
and a0, 7, t4 # .. e1 : find dest misalignment
and a1, 7, t5 # e0 : find src misalignment
/* Conditionally load the first destination word and a bytemask
with 0xff indicating that the destination byte is sacrosanct. */
mov zero, t0 # .. e1 :
mov zero, t6 # e0 :
beq t4, 1f # .. e1 :
ldq_u t0, 0(a0) # e0 :
lda t6, -1 # .. e1 :
mskql t6, a0, t6 # e0 :
1:
subq a1, t4, a1 # .. e1 : sub dest misalignment from src addr
/* If source misalignment is larger than dest misalignment, we need
extra startup checks to avoid SEGV. */
cmplt t4, t5, t8 # e0 :
beq t8, $u_head # .. e1 (zdb)
lda t2, -1 # e1 : mask out leading garbage in source
mskqh t2, t5, t2 # e0 :
nop # e0 :
ornot t1, t2, t3 # .. e1 :
cmpbge zero, t3, t7 # e0 : is there a zero?
beq t7, $u_head # .. e1 (zdb)
/* At this point we've found a zero in the first partial word of
the source. We need to isolate the valid source data and mask
it into the original destination data. (Incidentally, we know
that we'll need at least one byte of that original dest word.) */
ldq_u t0, 0(a0) # e0 :
negq t7, t6 # .. e1 : build bitmask of bytes <= zero
and t6, t7, t8 # e0 :
nop # .. e1 :
subq t8, 1, t6 # e0 :
or t6, t8, t7 # e1 :
zapnot t2, t7, t2 # e0 : prepare source word; mirror changes
and t1, t2, t1 # e1 : to source validity mask
extql t2, a1, t2 # e0 :
extql t1, a1, t1 # e0 :
andnot t0, t2, t0 # e0 : zero place for source to reside
or t0, t1, t1 # e1 : and put it there
stq_u t1, 0(a0) # e0 :
ret (t9) # .. e1 :
.end __stxcpy

350
sysdeps/alpha/stxncpy.S Normal file
View File

@ -0,0 +1,350 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
Contributed by Richard Henderson (rth@tamu.edu)
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
/* Copy no more than COUNT bytes of the null-terminated string from
SRC to DST.
This is an internal routine used by strncpy, stpncpy, and strncat.
As such, it uses special linkage conventions to make implementation
of these public functions more efficient.
On input:
t9 = return address
a0 = DST
a1 = SRC
a2 = COUNT
Furthermore, COUNT may not be zero.
On output:
t0 = last word written
t8 = bitmask (with one bit set) indicating the last byte written
t10 = bitmask (with one bit set) indicating the byte position of
the end of the range specified by COUNT
a0 = unaligned address of the last *word* written
a2 = the number of full words left in COUNT
Furthermore, v0, a3-a5, t11, and t12 are untouched.
*/
/* This is generally scheduled for the EV5, but should still be pretty
good for the EV4 too. */
#include <sysdep.h>
.set noat
.set noreorder
.text
/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
doesn't like putting the entry point for a procedure somewhere in the
middle of the procedure descriptor. Work around this by putting the
aligned copy in its own procedure descriptor */
.ent stxncpy_aligned
.align 3
stxncpy_aligned:
.frame sp, 0, t9, 0
.prologue 0
/* On entry to this basic block:
t0 == the first destination word for masking back in
t1 == the first source word. */
/* Create the 1st output word and detect 0's in the 1st input word. */
lda t2, -1 # e1 : build a mask against false zero
mskqh t2, a1, t2 # e0 : detection in the src word
mskqh t1, a1, t3 # e0 :
ornot t1, t2, t2 # .. e1 :
mskql t0, a1, t0 # e0 : assemble the first output word
cmpbge zero, t2, t7 # .. e1 : bits set iff null found
or t0, t3, t0 # e0 :
beq a2, $a_eoc # .. e1 :
bne t7, $a_eos # .. e1 :
/* On entry to this basic block:
t0 == a source word not containing a null. */
$a_loop:
stq_u t0, 0(a0) # e0 :
addq a0, 8, a0 # .. e1 :
ldq_u t0, 0(a1) # e0 :
addq a1, 8, a1 # .. e1 :
subq a2, 1, a2 # e0 :
cmpbge zero, t0, t7 # .. e1 (stall)
beq a2, $a_eoc # e1 :
beq t7, $a_loop # e1 :
/* Take care of the final (partial) word store. At this point
the end-of-count bit is set in t7 iff it applies.
On entry to this basic block we have:
t0 == the source word containing the null
t7 == the cmpbge mask that found it. */
$a_eos:
negq t7, t8 # e0 : find low bit set
and t7, t8, t8 # e1 (stall)
/* For the sake of the cache, don't read a destination word
if we're not going to need it. */
and t8, 0x80, t6 # e0 :
bne t6, 1f # .. e1 (zdb)
/* We're doing a partial word store and so need to combine
our source and original destination words. */
ldq_u t1, 0(a0) # e0 :
subq t8, 1, t6 # .. e1 :
or t8, t6, t7 # e0 :
unop #
zapnot t0, t7, t0 # e0 : clear src bytes > null
zap t1, t7, t1 # .. e1 : clear dst bytes <= null
or t0, t1, t0 # e1 :
1: stq_u t0, 0(a0) # e0 :
ret (t9) # e1 :
/* Add the end-of-count bit to the eos detection bitmask. */
$a_eoc:
or t10, t7, t7
br $a_eos
.end stxncpy_aligned
.align 3
.ent __stxncpy
.globl __stxncpy
__stxncpy:
.frame sp, 0, t9, 0
.prologue 0
/* Are source and destination co-aligned? */
xor a0, a1, t1 # e0 :
and a0, 7, t0 # .. e1 : find dest misalignment
and t1, 7, t1 # e0 :
addq a2, t0, a2 # .. e1 : bias count by dest misalignment
subq a2, 1, a2 # e0 :
and a2, 7, t2 # e1 :
srl a2, 3, a2 # e0 : a2 = loop counter = (count - 1)/8
addq zero, 1, t10 # .. e1 :
sll t10, t2, t10 # e0 : t10 = bitmask of last count byte
bne t1, $unaligned # .. e1 :
/* We are co-aligned; take care of a partial first word. */
ldq_u t1, 0(a1) # e0 : load first src word
addq a1, 8, a1 # .. e1 :
beq t0, stxncpy_aligned # avoid loading dest word if not needed
ldq_u t0, 0(a0) # e0 :
br stxncpy_aligned # .. e1 :
/* The source and destination are not co-aligned. Align the destination
and cope. We have to be very careful about not reading too much and
causing a SEGV. */
.align 3
$u_head:
/* We know just enough now to be able to assemble the first
full source word. We can still find a zero at the end of it
that prevents us from outputting the whole thing.
On entry to this basic block:
t0 == the first dest word, unmasked
t1 == the shifted low bits of the first source word
t6 == bytemask that is -1 in dest word bytes */
ldq_u t2, 8(a1) # e0 : load second src word
addq a1, 8, a1 # .. e1 :
mskql t0, a0, t0 # e0 : mask trailing garbage in dst
extqh t2, a1, t4 # e0 :
or t1, t4, t1 # e1 : first aligned src word complete
mskqh t1, a0, t1 # e0 : mask leading garbage in src
or t0, t1, t0 # e0 : first output word complete
or t0, t6, t6 # e1 : mask original data for zero test
cmpbge zero, t6, t7 # e0 :
beq a2, $u_eocfin # .. e1 :
bne t7, $u_final # e1 :
lda t6, -1 # e1 : mask out the bits we have
mskql t6, a1, t6 # e0 : already seen
stq_u t0, 0(a0) # e0 : store first output word
or t6, t2, t2 # .. e1 :
cmpbge zero, t2, t7 # e0 : find nulls in second partial
addq a0, 8, a0 # .. e1 :
subq a2, 1, a2 # e0 :
bne t7, $u_late_head_exit # .. e1 :
/* Finally, we've got all the stupid leading edge cases taken care
of and we can set up to enter the main loop. */
extql t2, a1, t1 # e0 : position hi-bits of lo word
ldq_u t2, 8(a1) # .. e1 : read next high-order source word
addq a1, 8, a1 # e0 :
cmpbge zero, t2, t7 # e1 (stall)
beq a2, $u_eoc # e1 :
bne t7, $u_eos # e1 :
/* Unaligned copy main loop. In order to avoid reading too much,
the loop is structured to detect zeros in aligned source words.
This has, unfortunately, effectively pulled half of a loop
iteration out into the head and half into the tail, but it does
prevent nastiness from accumulating in the very thing we want
to run as fast as possible.
On entry to this basic block:
t1 == the shifted high-order bits from the previous source word
t2 == the unshifted current source word
We further know that t2 does not contain a null terminator. */
.align 3
$u_loop:
extqh t2, a1, t0 # e0 : extract high bits for current word
addq a1, 8, a1 # .. e1 :
extql t2, a1, t3 # e0 : extract low bits for next time
addq a0, 8, a0 # .. e1 :
or t0, t1, t0 # e0 : current dst word now complete
ldq_u t2, 0(a1) # .. e1 : load high word for next time
stq_u t0, -8(a0) # e0 : save the current word
mov t3, t1 # .. e1 :
subq a2, 1, a2 # e0 :
cmpbge zero, t2, t7 # .. e1 : test new word for eos
beq a2, $u_eoc # e1 :
beq t7, $u_loop # e1 :
/* We've found a zero somewhere in the source word we just read.
If it resides in the lower half, we have one (probably partial)
word to write out, and if it resides in the upper half, we
have one full and one partial word left to write out.
On entry to this basic block:
t1 == the shifted high-order bits from the previous source word
t2 == the unshifted current source word. */
$u_eos:
extqh t2, a1, t0 # e0 :
or t0, t1, t0 # e1 : first (partial) source word complete
cmpbge zero, t0, t7 # e0 : is the null in this first bit?
bne t7, $u_final # .. e1 (zdb)
stq_u t0, 0(a0) # e0 : the null was in the high-order bits
addq a0, 8, a0 # .. e1 :
subq a2, 1, a2 # e1 :
$u_late_head_exit:
extql t2, a1, t0 # .. e0 :
cmpbge zero, t0, t7 # e0 :
or t7, t10, t6 # e1 :
cmoveq a2, t6, t7 # e0 :
nop # .. e1 :
/* Take care of a final (probably partial) result word.
On entry to this basic block:
t0 == assembled source word
t7 == cmpbge mask that found the null. */
$u_final:
negq t7, t6 # e0 : isolate low bit set
and t6, t7, t8 # e1 :
and t8, 0x80, t6 # e0 : avoid dest word load if we can
bne t6, 1f # .. e1 (zdb)
ldq_u t1, 0(a0) # e0 :
subq t8, 1, t6 # .. e1 :
or t6, t8, t7 # e0 :
zapnot t0, t7, t0 # .. e1 : kill source bytes > null
zap t1, t7, t1 # e0 : kill dest bytes <= null
or t0, t1, t0 # e1 :
1: stq_u t0, 0(a0) # e0 :
ret (t9) # .. e1 :
$u_eoc: # end-of-count
extqh t2, a1, t0
or t0, t1, t0
cmpbge zero, t0, t7
$u_eocfin: # end-of-count, final word
or t10, t7, t7
br $u_final
/* Unaligned copy entry point. */
.align 3
$unaligned:
ldq_u t1, 0(a1) # e0 : load first source word
and a0, 7, t4 # .. e1 : find dest misalignment
and a1, 7, t5 # e0 : find src misalignment
/* Conditionally load the first destination word and a bytemask
with 0xff indicating that the destination byte is sacrosanct. */
mov zero, t0 # .. e1 :
mov zero, t6 # e0 :
beq t4, 1f # .. e1 :
ldq_u t0, 0(a0) # e0 :
lda t6, -1 # .. e1 :
mskql t6, a0, t6 # e0 :
1:
subq a1, t4, a1 # .. e1 : sub dest misalignment from src addr
/* If source misalignment is larger than dest misalignment, we need
extra startup checks to avoid SEGV. */
cmplt t4, t5, t8 # e1 :
extql t1, a1, t1 # .. e0 : shift src into place
lda t2, -1 # e0 : for creating masks later
beq t8, $u_head # e1 :
mskqh t2, t5, t2 # e0 : begin src byte validity mask
cmpbge zero, t1, t7 # .. e1 : is there a zero?
extql t2, a1, t2 # e0 :
or t7, t10, t6 # .. e1 : test for end-of-count too
cmpbge zero, t2, t3 # e0 :
cmoveq a2, t6, t7 # .. e1 :
andnot t7, t3, t7 # e0 :
beq t7, $u_head # .. e1 (zdb)
/* At this point we've found a zero in the first partial word of
the source. We need to isolate the valid source data and mask
it into the original destination data. (Incidentally, we know
that we'll need at least one byte of that original dest word.) */
ldq_u t0, 0(a0) # e0 :
negq t7, t6 # .. e1 : build bitmask of bytes <= zero
mskqh t1, t4, t1 # e0 :
and t6, t7, t8 # .. e1 :
subq t8, 1, t6 # e0 :
or t6, t8, t7 # e1 :
zapnot t2, t7, t2 # e0 : prepare source word; mirror changes
zapnot t1, t7, t1 # .. e1 : to source validity mask
andnot t0, t2, t0 # e0 : zero place for source to reside
or t0, t1, t0 # e1 : and put it there
stq_u t0, 0(a0) # e0 :
ret (t9) # .. e1 :
.end __stxncpy