update from main archive 961105

This commit is contained in:
Ulrich Drepper 1996-11-06 04:24:11 +00:00
parent cbb7824d08
commit 60c74cf07a
38 changed files with 1448 additions and 232 deletions

View File

@ -42,4 +42,4 @@ ifeq ($(subdir),elf)
sysdep-CFLAGS += -mno-fp-regs
endif
divrem := divl divlu divq divqu reml remlu remq remqu
divrem := divl divq reml remq

View File

@ -25,6 +25,10 @@ Cambridge, MA 02139, USA. */
ENTRY(_setjmp)
ldgp $29,0($27)
#ifdef PROF
lda AT, _mcount
jsr AT, (AT), _mcount
#endif
.prologue 1
bis $31, $31, $17 /* Pass a second argument of zero. */
jmp $31, __sigsetjmp /* Call __sigsetjmp. */

View File

@ -25,6 +25,10 @@ Cambridge, MA 02139, USA. */
ENTRY(setjmp)
ldgp $29, 0($27)
#ifdef PROF
lda AT, _mcount
jsr AT, (AT), _mcount
#endif
.prologue 1
bis $31, 1, $17 /* Pass a second argument of one. */
jmp $31, __sigsetjmp /* Call __sigsetjmp. */

View File

@ -80,7 +80,14 @@ $tail: bne t4, 1f # is there a tail to do?
.end bzero_loop
ENTRY(bzero)
#ifdef PROF
ldgp gp, 0(pv)
lda AT, _mcount
jsr AT, (AT), _mcount
.prologue 1
#else
.prologue 0
#endif
mov a0, v0 # e0 : move return value in place
beq a1, $done # .. e1 : early exit for zero-length store

110
sysdeps/alpha/div.S Normal file
View File

@ -0,0 +1,110 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
Contributed by Richard Henderson (rth@tamu.edu)
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
#include <sysdep.h>
#ifdef __linux__
# include <asm/gentrap.h>
# include <asm/pal.h>
#else
# include <machine/pal.h>
#endif
.set noat
.align 4
.globl div
.ent div
div:
.frame sp, 0, ra
#ifdef PROF
ldgp gp, 0(pv)
lda AT, _mcount
jsr AT, (AT), _mcount
.prologue 1
#else
.prologue 0
#endif
#define dividend t0
#define divisor t1
#define mask t2
#define quotient t3
#define modulus t4
#define tmp1 t5
#define tmp2 t6
#define compare t7
/* find correct sign for input to unsigned divide loop. */
sextl a1, a1 # e0 :
sextl a2, a2 # .. e1 :
negl a1, dividend # e0 :
negl a2, divisor # .. e1 :
cmovge a1, a1, dividend # e0 :
cmovge a2, a2, divisor # .. e1 :
beq a2, $divbyzero # e1 :
unop # :
/* shift divisor left, using 3-bit shifts for 32-bit divides as we
can't overflow. Three-bit shifts will result in looping three
times less here, but can result in two loops more later. Thus
using a large shift isn't worth it (and s8addq pairs better than
a shift). */
1: cmpult divisor, modulus, compare # e0 :
s8addq divisor, zero, divisor # .. e1 :
s8addq mask, zero, mask # e0 :
bne compare, 1b # .. e1 :
/* start to go right again. */
2: addq quotient, mask, tmp2 # e1 :
srl mask, 1, mask # .. e0 :
cmpule divisor, modulus, compare # e0 :
subq modulus, divisor, tmp1 # .. e1 :
cmovne compare, tmp2, quotient # e1 :
srl divisor, 1, divisor # .. e0 :
cmovne compare, tmp1, modulus # e0 :
bne mask, 2b # .. e1 :
/* find correct sign for result. */
xor a1, a2, compare # e0 :
negl quotient, tmp1 # .. e1 :
negl modulus, tmp2 # e0 :
cmovlt compare, tmp1, quotient # .. e1 :
cmovlt a1, tmp2, modulus # e1 :
/* and store it away in the structure. */
stl quotient, 0(a0) # .. e0 :
mov a0, v0 # e1 :
stl modulus, 4(a0) # .. e0 :
ret # e1 :
$divbyzero:
mov a0, v0
ldiq a0, GEN_INTDIV
call_pal PAL_gentrap
/* if trap returns, return zero. */
stl zero, 0(v0)
stl zero, 4(v0)
ret
.end div

View File

@ -1,6 +1,6 @@
#define IS_REM 0
#define SIZE 4
#define SIGNED 1
#define FUNC_NAME __divl
#define UFUNC_NAME __divlu
#define SFUNC_NAME __divl
#include "divrem.h"

View File

@ -1,6 +0,0 @@
#define IS_REM 0
#define SIZE 4
#define SIGNED 0
#define FUNC_NAME __divlu
#include "divrem.h"

View File

@ -1,6 +1,6 @@
#define IS_REM 0
#define SIZE 8
#define SIGNED 1
#define FUNC_NAME __divq
#define UFUNC_NAME __divqu
#define SFUNC_NAME __divq
#include "divrem.h"

View File

@ -1,6 +0,0 @@
#define IS_REM 0
#define SIZE 8
#define SIGNED 0
#define FUNC_NAME __divqu
#include "divrem.h"

View File

@ -1,25 +1,25 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
Contributed by David Mosberger (davidm@cs.arizona.edu).
This file is part of the GNU C Library.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
/* The current Alpha chips don't provide hardware for integer
division. The C compiler expects the functions
division. The C compiler expects the functions
__divqu: 64-bit unsigned long divide
__remqu: 64-bit unsigned long remainder
@ -27,10 +27,10 @@ division. The C compiler expects the functions
__divlu/__remlu: unsigned 32-bit
__divls/__remls: signed 32-bit
These are not normal C functions: instead of the normal calling
sequence, these expect their arguments in registers t10 and t11, and
return the result in t12 (aka pv). Register AT may be clobbered
(assembly temporary), anything else must be saved. */
These are not normal C functions: instead of the normal calling
sequence, these expect their arguments in registers t10 and t11, and
return the result in t12 (aka pv). Register AT may be clobbered
(assembly temporary), anything else must be saved. */
#include <sysdep.h>
@ -41,77 +41,144 @@ return the result in t12 (aka pv). Register AT may be clobbered
# include <machine/pal.h>
#endif
#ifdef DEBUG
# define arg1 a0
# define arg2 a1
# define result v0
# define mask t0
# define tmp0 t1
# define tmp1 t2
# define sign t3
# define retaddr ra
#else
# define arg1 t10
# define arg2 t11
# define result t12
# define mask v0
# define tmp0 t0
# define tmp1 t1
# define sign t2
# define retaddr t9
#endif
#define mask v0
#define divisor t0
#define compare AT
#define tmp1 t2
#define tmp2 t3
#define retaddr t9
#define arg1 t10
#define arg2 t11
#define result t12
# define divisor arg2
#if IS_REM
# define dividend result
# define quotient arg1
# define GETDIVIDEND bis arg1,zero,dividend
# define DIV_ONLY(x,y...)
# define REM_ONLY(x,y...) x,##y
# define modulus result
# define quotient t1
# define GETSIGN(x) mov arg1, x
# define STACK 32
#else
# define dividend arg1
# define quotient result
# define GETDIVIDEND
# define DIV_ONLY(x,y...) x,##y
# define REM_ONLY(x,y...)
# define modulus t1
# define quotient result
# define GETSIGN(x) xor arg1, arg2, x
# define STACK 48
#endif
#if SIZE == 8
# define LONGIFYarg1 GETDIVIDEND
# define LONGIFYarg2
# define LONGIFY(x,y) mov x,y
# define SLONGIFY(x,y) mov x,y
# define _SLONGIFY(x)
# define NEG(x,y) negq x,y
#else
# if SIGNED
# define LONGIFYarg1 addl arg1,zero,dividend
# define LONGIFYarg2 addl arg2,zero,divisor
# else
# define LONGIFYarg1 zapnot arg1,0x0f,dividend
# define LONGIFYarg2 zapnot arg2,0x0f,divisor
# endif
#endif
#if SIGNED
# define SETSIGN(sign,reg,tmp) subq zero,reg,tmp; cmovlt sign,tmp,reg
# if IS_REM
# define GETSIGN(x,y,s) bis x,zero,s
# else
# define GETSIGN(x,y,s) xor x,y,s
# endif
#else
# define SETSIGN(sign,reg,tmp)
# define GETSIGN(x,y,s)
# define LONGIFY(x,y) zapnot x,15,y
# define SLONGIFY(x,y) sextl x,y
# define _SLONGIFY(x) sextl x,x
# define NEG(x,y) negl x,y
#endif
.set noreorder
.set noat
.ent FUNC_NAME
.globl FUNC_NAME
.ent UFUNC_NAME
.globl UFUNC_NAME
#define FRAME_SIZE 0x30
.align 5
FUNC_NAME:
.align 3
UFUNC_NAME:
lda sp, -STACK(sp)
.frame sp, STACK, retaddr, 0
#ifdef PROF
lda sp, -0x18(sp)
stq ra, 0x00(sp)
stq pv, 0x08(sp)
stq gp, 0x10(sp)
stq ra, 0(sp)
stq pv, 8(sp)
stq gp, 16(sp)
br AT, 1f
1: ldgp gp, 0(AT)
mov retaddr, ra
lda AT, _mcount
jsr AT, (AT), _mcount
ldq ra, 0(sp)
ldq pv, 8(sp)
ldq gp, 16(sp)
#endif
.prologue 0
$udiv:
stq t0, 0(sp)
LONGIFY (arg2, divisor)
stq t1, 8(sp)
LONGIFY (arg1, modulus)
stq v0, 16(sp)
clr quotient
stq tmp1, 24(sp)
ldiq mask, 1
DIV_ONLY(stq tmp2,32(sp))
beq divisor, $divbyzero
.align 3
#if SIZE == 8
/* Shift divisor left. */
1: cmpult divisor, modulus, compare
blt divisor, 2f
addq divisor, divisor, divisor
addq mask, mask, mask
bne compare, 1b
unop
2:
#else
/* Shift divisor left using 3-bit shifts as we can't overflow.
This results in looping three times less here, but up to
two more times later. Thus using a large shift isn't worth it. */
1: cmpult divisor, modulus, compare
s8addq divisor, zero, divisor
s8addq mask, zero, mask
bne compare, 1b
#endif
/* Now go back to the right. */
3: DIV_ONLY(addq quotient, mask, tmp2)
srl mask, 1, mask
cmpule divisor, modulus, compare
subq modulus, divisor, tmp1
DIV_ONLY(cmovne compare, tmp2, quotient)
srl divisor, 1, divisor
cmovne compare, tmp1, modulus
bne mask, 3b
$done: ldq t0, 0(sp)
ldq t1, 8(sp)
ldq v0, 16(sp)
ldq tmp1, 24(sp)
DIV_ONLY(ldq tmp2, 32(sp))
lda sp, STACK(sp)
ret zero, (retaddr), 1
$divbyzero:
mov a0, tmp1
ldiq a0, GEN_INTDIV
call_pal PAL_gentrap
mov tmp1, a0
clr result /* If trap returns, return zero. */
br $done
.end UFUNC_NAME
.ent SFUNC_NAME
.globl SFUNC_NAME
.align 3
SFUNC_NAME:
lda sp, -STACK(sp)
.frame sp, STACK, retaddr, 0
#ifdef PROF
stq ra, 0(sp)
stq pv, 8(sp)
stq gp, 16(sp)
br AT, 1f
1: ldgp gp, 0(AT)
@ -119,69 +186,40 @@ FUNC_NAME:
mov retaddr, ra
jsr AT, _mcount
ldq ra, 0x00(sp)
ldq pv, 0x08(sp)
ldq gp, 0x10(sp)
lda sp, 0x18(sp)
ldq ra, 0(sp)
ldq pv, 8(sp)
ldq gp, 16(sp)
#endif
.frame sp, FRAME_SIZE, retaddr, 0
lda sp,-FRAME_SIZE(sp)
.prologue 1
stq arg1,0x00(sp)
LONGIFYarg1
stq arg2,0x08(sp)
LONGIFYarg2
stq mask,0x10(sp)
bis zero,1,mask
stq tmp0,0x18(sp)
bis zero,zero,quotient
stq tmp1,0x20(sp)
beq divisor,$divbyzero
stq sign,0x28(sp)
GETSIGN(dividend,divisor,sign)
#if SIGNED
subq zero,dividend,tmp0
subq zero,divisor,tmp1
cmovlt dividend,tmp0,dividend
cmovlt divisor,tmp1,divisor
#endif
/*
* Shift divisor left until either bit 63 is set or until it
* is at least as big as the dividend:
*/
.align 3
1: cmpule dividend,divisor,AT
blt divisor,2f
blbs AT,2f
addq mask,mask,mask
addq divisor,divisor,divisor
br 1b
.prologue 0
.align 3
2: addq mask,quotient,tmp0
cmpule divisor,dividend,AT
subq dividend,divisor,tmp1
srl divisor,1,divisor
srl mask,1,mask
cmovlbs AT,tmp0,quotient
cmovlbs AT,tmp1,dividend
bne mask,2b
or arg1, arg2, AT
_SLONGIFY(AT)
bge AT, $udiv /* don't need to mess with signs */
ldq arg1,0x00(sp)
SETSIGN(sign,result,tmp0)
$done: ldq arg2,0x08(sp)
ldq mask,0x10(sp)
ldq tmp0,0x18(sp)
ldq tmp1,0x20(sp)
ldq sign,0x28(sp)
lda sp,FRAME_SIZE(sp)
ret zero,(retaddr),0
/* Save originals and find absolute values. */
stq arg1, 0(sp)
NEG (arg1, AT)
stq arg2, 8(sp)
cmovge AT, AT, arg1
stq retaddr, 16(sp)
NEG (arg2, AT)
stq tmp1, 24(sp)
cmovge AT, AT, arg2
$divbyzero:
lda a0,GEN_INTDIV(zero)
call_pal PAL_gentrap
bis zero,zero,result /* if trap returns, return 0 */
ldq arg1,0x00(sp)
br $done
/* Do the unsigned division. */
bsr retaddr, UFUNC_NAME
END(FUNC_NAME)
/* Restore originals and adjust the sign of the result. */
ldq arg1, 0(sp)
ldq arg2, 8(sp)
GETSIGN (AT)
NEG (result, tmp1)
_SLONGIFY(AT)
ldq retaddr, 16(sp)
cmovlt AT, tmp1, result
ldq tmp1, 24(sp)
lda sp, STACK(sp)
ret zero, (retaddr), 1
.end SFUNC_NAME

View File

@ -27,7 +27,14 @@ architecture. */
.set noat
ENTRY(ffs)
#ifdef PROF
ldgp gp, 0(pv)
lda AT, _mcount
jsr AT, (AT), _mcount
.prologue 1
#else
.prologue 0
#endif
ldq_u zero, 0(sp) # on the 21064, this helps dual-issuing
addl a0, zero, a0 # the last insn and reduces the stall

View File

@ -19,7 +19,15 @@ Cambridge, MA 02139, USA. */
#include <sysdep.h>
ENTRY(__htonl)
#ifdef PROF
ldgp gp, 0(pv)
lda AT, _mcount
jsr AT, (AT), _mcount
.prologue 1
#else
.prologue 0
#endif
extlh a0, 5, t1 # t1 = dd000000
zap a0, 0xfd, t2 # t2 = 0000cc00
sll t2, 5, t2 # t2 = 00198000

View File

@ -19,7 +19,15 @@ Cambridge, MA 02139, USA. */
#include <sysdep.h>
ENTRY(__htons)
#ifdef PROF
ldgp gp, 0(pv)
lda AT, _mcount
jsr AT, (AT), _mcount
.prologue 1
#else
.prologue 0
#endif
extwh a0, 7, t1 # t1 = bb00
extbl a0, 1, v0 # v0 = 00aa
bis v0, t1, v0 # v0 = bbaa

109
sysdeps/alpha/ldiv.S Normal file
View File

@ -0,0 +1,109 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
Contributed by Richard Henderson (rth@tamu.edu)
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
#include <sysdep.h>
#ifdef __linux__
# include <asm/gentrap.h>
# include <asm/pal.h>
#else
# include <machine/pal.h>
#endif
.set noat
.align 4
.globl ldiv
.ent ldiv
ldiv:
.frame sp, 0, ra
#ifdef PROF
ldgp gp, 0(pv)
lda AT, _mcount
jsr AT, (AT), _mcount
.prologue 1
#else
.prologue 0
#endif
#define dividend t0
#define divisor t1
#define mask t2
#define quotient t3
#define modulus t4
#define tmp1 t5
#define tmp2 t6
#define compare t7
/* find correct sign for input to unsigned divide loop. */
mov a1, dividend # e0 :
mov a2, divisor # .. e1 :
negq a1, tmp1 # e0 :
negq a2, tmp2 # .. e1 :
cmovlt a1, tmp1, dividend # e0 :
cmovlt a2, tmp2, divisor # .. e1 :
beq a2, $divbyzero # e1 :
unop # :
/* shift divisor left. */
1: cmpult divisor, modulus, compare # e0 :
blt divisor, 2f # .. e1 :
addq divisor, divisor, divisor # e0 :
addq mask, mask, mask # .. e1 :
bne compare, 1b # e1 :
unop # :
/* start to go right again. */
2: addq quotient, mask, tmp2 # e1 :
srl mask, 1, mask # .. e0 :
cmpule divisor, modulus, compare # e0 :
subq modulus, divisor, tmp1 # .. e1 :
cmovne compare, tmp2, quotient # e1 :
srl divisor, 1, divisor # .. e0 :
cmovne compare, tmp1, modulus # e0 :
bne mask, 2b # .. e1 :
/* find correct sign for result. */
xor a1, a2, compare # e0 :
negq quotient, tmp1 # .. e1 :
negq modulus, tmp2 # e0 :
cmovlt compare, tmp1, quotient # .. e1 :
cmovlt a1, tmp2, modulus # e1 :
/* and store it away in the structure. */
9: stq quotient, 0(a0) # .. e0 :
mov a0, v0 # e1 :
stq modulus, 8(a0) # .. e0 :
ret # e1 :
$divbyzero:
mov a0, v0
lda a0, GEN_INTDIV
call_pal PAL_gentrap
/* if trap returns, return zero. */
stq zero, 0(v0)
stq zero, 8(v0)
ret
.end ldiv
weak_alias(ldiv, lldiv)

1
sysdeps/alpha/lldiv.S Normal file
View File

@ -0,0 +1 @@
/* lldiv is the same as ldiv on the Alpha. */

View File

@ -40,7 +40,14 @@ For correctness consider that:
.set noat
ENTRY(memchr)
#ifdef PROF
ldgp gp, 0(pv)
lda AT, _mcount
jsr AT, (AT), _mcount
.prologue 1
#else
.prologue 0
#endif
beq a2, $not_found
ldq_u t0, 0(a0) # load first quadword (a0 may be misaligned)

276
sysdeps/alpha/memcpy.S Normal file
View File

@ -0,0 +1,276 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
Contributed by Richard Henderson (rth@tamu.edu)
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
/* This is the child of the C-with-inline-assembly memcpy posted by
Martin Ostermann (ost@comnets.rwth-aachen.de).
This is generally scheduled for the EV5, but whenever necessary and
possible, the autoswap slotting feature of the EV5 is used so that the
code lays out nicely for the EV4 as well. */
#include <alpha/regdef.h>
.set noreorder
.text
.ent copy_fwd_aligned
copy_fwd_aligned:
.frame sp, 0, ra, 0
.prologue 0
/* Aligned forward copy main loop. On entry to this basic block:
t0 == source word waiting to be stored
t2 == loop counter
a0 == destination pointer
a1 == source pointer
a2 mod 8 == byte count in final word */
.align 4
$fa_loop:
and t2, 7, t1 # e0 :
beq t1, 1f # .. e1 :
0: stq_u t0, 0(a0) # e0 :
subq t1, 1, t1 # .. e1 :
ldq_u t0, 8(a1) # e0 : copy up to seven words
addq a0, 8, a0 # .. e1 :
addq a1, 8, a1 # e0 :
bne t1, 0b # .. e1 :
1: bic t2, 7, t2 # e0 :
beq t2, $fa_tail # .. e1 :
2: stq_u t0, 0(a0) # e0 :
addq a0, 64, a0 # .. e1 :
ldq_u t3, 8(a1) # e0 : copy eight words as fast as we can
ldq_u t4, 16(a1) # .. e1 :
ldq_u t5, 24(a1) # e0 :
ldq_u t6, 32(a1) # .. e1 :
ldq_u t7, 40(a1) # e0 :
ldq_u t8, 48(a1) # .. e1 :
ldq_u t9, 56(a1) # e0 :
ldq_u t0, 64(a1) # .. e1 :
stq_u t3, -56(a0) # e0 :
subq t2, 8, t2 # .. e1 :
stq_u t4, -48(a0) # e0 :
addq a1, 64, a1 # .. e1 :
stq_u t5, -40(a0) # e0 :
stq_u t6, -32(a0) # e0 :
stq_u t7, -24(a0) # e0 :
stq_u t8, -16(a0) # e0 :
stq_u t9, -8(a0) # e0 :
bne t2, 2b # .. e1 :
/* Take care of a partial word tail. */
$fa_tail:
and a2, 7, t3 # e0 :
bne t3, 1f # .. e1 (zdb)
/* Aligned copy, aligned tail, final store. */
stq_u t0, 0(a0)
ret
1: ldq_u t1, 0(a0) # e1 :
mskql t0, a2, t0 # .. e1 :
mskqh t1, a2, t1 # e0 (stall)
bis t0, t1, t0 # e1 :
stq_u t0, 0(a0) # e0 :
ret # .. e1 :
/* This is the actual entry point to this function. */
.align 3
$fwd_aligned:
ldq_u t0, 0(a1) # e0 :
and a0, 7, t3 # .. e1 :
addq a2, t3, a2 # e0 :
subq a2, 1, t2 # e1 :
sra t2, 3, t2 # e0 :
beq t3, $fa_loop # .. e1 :
ldq_u t1, 0(a0) # e0 :
beq t2, $fa_small # .. e1 :
mskqh t0, a0, t0 # e0 :
mskql t1, a0, t3 # e0 :
bis t0, t3, t0 # e0 :
br $fa_loop # .. e1 :
/* The move affects exactly one destination word. */
$fa_small:
mskqh t0, a0, t0 # e0 :
and a2, 7, t4 # .. e1 :
mskql t1, a0, t3 # e0 :
bne t4, 1f # .. e1 :
or t0, t3, t0 # e0 :
unop # :
stq_u t0, 0(a0) # e0 :
ret # .. e1 :
1: mskql t0, a2, t0 # e0 :
mskqh t1, a2, t1 # e0 :
or t0, t3, t0 # e0 :
or t0, t1, t0 # e1 :
stq_u t0, 0(a0) # e0 :
ret # .. e1 :
.end copy_fwd_aligned
.ent memcpy
.globl memcpy
.align 3
memcpy:
.frame sp, 0, ra, 0
#ifdef PROF
ldgp gp, 0(ra)
lda AT, _mcount
jsr AT, (AT), _mcount
.prologue 1
#else
.prologue 0
#endif
mov a0, v0
beq a2, $zero_length
/* Are source and destination co-aligned? */
xor a0, a1, t0
unop
and t0, 7, t0
beq t0, $fwd_aligned
br $fwd_unaligned
.end memcpy
.ent copy_fwd_unaligned
copy_fwd_unaligned:
.frame sp, 0, ra, 0
.prologue 0
/* Unaligned forward copy main loop. On entry to this basic block:
t0 == source low word, unshifted
t2 == loop counter
t7 == last source byte + 1
a0 == destination pointer
a1 == source pointer
a2 mod 8 == byte count in final word */
.align 4
$fu_loop:
beq t2, $fu_tail # e1 :
blbc t2, 0f # e1 :
ldq_u t1, 8(a1) # e1 : copy one unaligned word
extql t0, a1, t3 # .. e0 :
addq a1, 8, a1 # e0 :
addq a0, 8, a0 # .. e1 :
extqh t1, a1, t4 # e0 :
subq t2, 1, t2 # .. e1 :
mov t1, t0 # e0 :
or t3, t4, t3 # .. e1 :
stq_u t3, -8(a0) # e0 :
beq t2, $fu_tail # .. e1 :
0: ldq_u t1, 8(a1) # e1 : copy two unaligned words
extql t0, a1, t3 # .. e0 :
ldq_u t0, 16(a1) # e0 :
subq t2, 2, t2 # .. e1 :
extqh t1, a1, t4 # e0 :
addq a0, 16, a0 # .. e1 :
extql t1, a1, t5 # e0 :
or t3, t4, t3 # .. e1 :
extqh t0, a1, t6 # e0 :
addq a1, 16, a1 # .. e1 :
stq_u t3, -16(a0) # e0 :
or t5, t6, t5 # .. e1 :
stq_u t5, -8(a0) # e0 :
bne t2, 0b # .. e1 :
/* Take care of a partial words tail. */
$fu_tail:
ldq_u t4, -1(t7) # e1 :
extql t0, a1, t3 # .. e0 :
extqh t4, a1, t4 # e0 (stall)
and a2, 7, t5 # .. e1 :
or t3, t4, t3 # e0 :
beq t5, 1f # .. e1 :
ldq_u t1, 0(a0) # e1 :
mskql t3, a2, t3 # .. e0 :
mskqh t1, a2, t1 # e0 (stall)
or t1, t3, t3 # e1 :
1: stq_u t3, 0(a0) # e0 :
ret # .. e1 :
/* The entry point to the unaligned forward copy. */
.align 3
$fwd_unaligned:
ldq_u t0, 0(a1) # e0 : load initial bits of src
addq a1, a2, t7 # .. e1 : record last byte + 1 of src
and a0, 7, t3 # e0 : find dst misalignment
addq a2, t3, a2 # e1 : find number of words affected
subq a2, 1, t2 # e0 :
cmple a2, 8, t4 # .. e1 : are we dealing with a small block?
subq a1, t3, a1 # e0 :
bne t4, $fu_small # .. e1 :
srl t2, 3, t2 # e0 :
beq t3, $fu_loop # .. e1 :
/* Take care of an unaligned dst head. */
ldq_u t5, 0(a0) # e0 :
ldq_u t1, 8(a1) # .. e1 :
extql t0, a1, t3 # e0 :
addq a0, 8, a0 # .. e1 :
extqh t1, a1, t4 # e0 :
addq a1, 8, a1 # .. e1 :
mskql t5, a0, t5 # e0 :
or t3, t4, t3 # .. e1 :
mskqh t3, a0, t3 # e0 :
subq t2, 1, t2 # .. e1 :
or t3, t5, t3 # e0 :
mov t1, t0 # .. e1 :
stq_u t3, -8(a0) # e0 :
br $fu_loop # .. e1 :
/* The move affects exactly one destination word. */
.align 3
$fu_small:
ldq_u t2, 0(a0) # e1 :
extql t0, a1, t3 # .. e0 :
ldq_u t1, -1(t7) # e0 :
and a2, 7, t8 # .. e1 :
mskqh t2, a2, t6 # e0 :
mskql t2, a0, t5 # e0 :
extqh t1, a1, t4 # e0 :
cmovne t8, t6, t8 # .. e1 :
or t3, t4, t3 # e0 :
or t5, t8, t5 # .. e1 :
mskqh t3, a0, t3 # e0 :
and a2, 7, t8 # .. e1 :
mskql t3, a2, t6 # e0 :
cmovne t8, t6, t8 # e1 :
or t3, t5, t3 # e0 :
unop # :
stq_u t3, 0(a0) # e0 :
$zero_length:
ret # .. e1 :
.end copy_fwd_unaligned

View File

@ -85,7 +85,14 @@ $tail: bne t4, 1f # is there a tail to do?
.end memset_loop
ENTRY(memset)
#ifdef PROF
ldgp gp, 0(pv)
lda AT, _mcount
jsr AT, (AT), _mcount
.prologue 1
#else
.prologue 0
#endif
zapnot a1, 1, a1 # e0 : zero extend input character
mov a0, v0 # .. e1 : move return value in place

View File

@ -1,6 +1,6 @@
#define IS_REM 1
#define SIZE 4
#define SIGNED 1
#define FUNC_NAME __reml
#define UFUNC_NAME __remlu
#define SFUNC_NAME __reml
#include "divrem.h"

View File

@ -1,6 +0,0 @@
#define IS_REM 1
#define SIZE 4
#define SIGNED 0
#define FUNC_NAME __remlu
#include "divrem.h"

View File

@ -1,6 +1,6 @@
#define IS_REM 1
#define SIZE 8
#define SIGNED 1
#define FUNC_NAME __remq
#define UFUNC_NAME __remqu
#define SFUNC_NAME __remq
#include "divrem.h"

View File

@ -1,6 +0,0 @@
#define IS_REM 1
#define SIZE 8
#define SIGNED 0
#define FUNC_NAME __remqu
#include "divrem.h"

View File

@ -20,7 +20,15 @@ Cambridge, MA 02139, USA. */
#include <sysdep.h>
ENTRY(__copysign)
#ifdef PROF
ldgp gp, 0(pv)
lda AT, _mcount
jsr AT, (AT), _mcount
.prologue 1
#else
.prologue 0
#endif
cpys $f17,$f16,$f0
ret

View File

@ -20,7 +20,15 @@ Cambridge, MA 02139, USA. */
#include <sysdep.h>
ENTRY(__fabs)
#ifdef PROF
ldgp gp, 0(pv)
lda AT, _mcount
jsr AT, (AT), _mcount
.prologue 1
#else
.prologue 0
#endif
cpys $f31,$f16,$f0
ret

View File

@ -23,6 +23,10 @@ Cambridge, MA 02139, USA. */
extra arguments. */
ENTRY (__sigsetjmp)
ldgp $29, 0($27)
#ifdef PROF
lda AT, _mcount
jsr AT, (AT), _mcount
#endif
.prologue 1
bis $30, $30, $18 /* Pass SP as 3rd arg. */

View File

@ -27,6 +27,10 @@ Cambridge, MA 02139, USA. */
ENTRY(__stpcpy)
ldgp gp, 0(pv)
#ifdef PROF
lda AT, _mcount
jsr AT, (AT), _mcount
#endif
.prologue 1
jsr t9, __stxcpy # do the work of the copy

View File

@ -1,24 +1,23 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Richard Henderson (rth@tamu.edu)
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
/* Copy no more than COUNT bytes of the null-terminated string from
/* Copy no more than COUNT bytes of the null-terminated string from
SRC to DST. If SRC does not cover all of COUNT, the balance is
zeroed. Return the address of the terminating null in DEST, if
any, else DEST + COUNT. */
@ -32,8 +31,12 @@ Cambridge, MA 02139, USA. */
ENTRY(__stpncpy)
ldgp gp, 0(pv)
#ifdef PROF
lda AT, _mcount
jsr AT, (AT), _mcount
#endif
.prologue 1
beq a2, $zerocount
jsr t9, __stxncpy # do the work of the copy

View File

@ -1,22 +1,21 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Richard Henderson (rth@tamu.edu)
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
/* Append a null-terminated string from SRC to DST. */
@ -26,6 +25,10 @@ Cambridge, MA 02139, USA. */
ENTRY(strcat)
ldgp gp, 0(pv)
#ifdef PROF
lda AT, _mcount
jsr AT, (AT), _mcount
#endif
.prologue 1
mov a0, v0 # set up return value
@ -59,7 +62,7 @@ $found: negq t1, t2 # clear all but least set bit
addq a0, t2, a0
/* Now do the append. */
jsr t9, __stxcpy
ret

View File

@ -1,25 +1,24 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Richard Henderson (rth@tamu.edu)
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
/* Return the address of a given character within a null-terminated
string, or null if it is not found.
string, or null if it is not found.
This is generally scheduled for the EV5 (got to look out for my own
interests :-), but with EV4 needs in mind. There *should* be no more
@ -32,7 +31,14 @@ Cambridge, MA 02139, USA. */
.set noat
ENTRY(strchr)
#ifdef PROF
ldgp gp, 0(pv)
lda AT, _mcount
jsr AT, (AT), _mcount
.prologue 1
#else
.prologue 0
#endif
zapnot a1, 1, a1 # e0 : zero extend the search character
ldq_u t0, 0(a0) # .. e1 : load first quadword

195
sysdeps/alpha/strcmp.S Normal file
View File

@ -0,0 +1,195 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
Contributed by Richard Henderson (rth@tamu.edu)
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
/* Bytewise compare two null-terminated strings. */
#include <sysdep.h>
.set noat
.set noreorder
.text
ENTRY(strcmp)
#ifdef PROF
ldgp gp, 0(pv)
lda AT, _mcount
jmp AT, (AT), _mcount
.prologue 1
#else
.prologue 0
#endif
ldq_u t0, 0(a0) # e0 : give cache time to catch up
xor a0, a1, t2 # .. e1 : are s1 and s2 co-aligned?
ldq_u t1, 0(a1) # e0 :
and t2, 7, t2 # .. e1 :
lda t3, -1 # e0 :
bne t2, $unaligned # .. e1 :
/* On entry to this basic block:
t0 == the first destination word for masking back in
t1 == the first source word.
t3 == -1. */
$aligned:
mskqh t3, a0, t3 # e0 :
nop # .. e1 :
ornot t1, t3, t1 # e0 :
ornot t0, t3, t0 # .. e1 :
cmpbge zero, t1, t7 # e0 : bits set iff null found
bne t7, $eos # e1 (zdb)
/* Aligned compare main loop.
On entry to this basic block:
t0 == an s1 word.
t1 == an s2 word not containing a null. */
$a_loop:
xor t0, t1, t2 # e0 :
bne t2, $wordcmp # .. e1 (zdb)
ldq_u t1, 8(a1) # e0 :
ldq_u t0, 8(a0) # .. e1 :
addq a1, 8, a1 # e0 :
addq a0, 8, a0 # .. e1 :
cmpbge zero, t1, t7 # e0 :
beq t7, $a_loop # .. e1 (zdb)
br $eos # e1 :
/* The two strings are not co-aligned. Align s1 and cope. */
$unaligned:
and a0, 7, t4 # e0 : find s1 misalignment
and a1, 7, t5 # .. e1 : find s2 misalignment
subq a1, t4, a1 # e0 :
/* If s2 misalignment is larger than s2 misalignment, we need
extra startup checks to avoid SEGV. */
cmplt t4, t5, t8 # .. e1 :
beq t8, $u_head # e1 :
mskqh t3, t5, t3 # e0 :
ornot t1, t3, t3 # e0 :
cmpbge zero, t3, t7 # e1 : is there a zero?
beq t7, $u_head # e1 :
/* We've found a zero in the first partial word of s2. Align
our current s1 and s2 words and compare what we've got. */
extql t1, t5, t1 # e0 :
extql t0, a0, t0 # e0 :
cmpbge zero, t1, t7 # .. e1 : find that zero again
br $eos # e1 : and finish up
.align 3
$u_head:
/* We know just enough now to be able to assemble the first
full word of s2. We can still find a zero at the end of it.
On entry to this basic block:
t0 == first word of s1
t1 == first partial word of s2. */
ldq_u t2, 8(a1) # e0 : load second partial s2 word
lda t3, -1 # .. e1 : create leading garbage mask
extql t1, a1, t1 # e0 : create first s2 word
mskqh t3, a0, t3 # e0 :
extqh t2, a1, t4 # e0 :
ornot t0, t3, t0 # .. e1 : kill s1 garbage
or t1, t4, t1 # e0 : s2 word now complete
cmpbge zero, t0, t7 # .. e1 : find zero in first s1 word
ornot t1, t3, t1 # e0 : kill s2 garbage
lda t3, -1 # .. e1 :
mskql t3, a1, t3 # e0 : mask for s2[1] bits we have seen
bne t7, $eos # .. e1 :
xor t0, t1, t4 # e0 : compare aligned words
bne t4, $wordcmp # .. e1 (zdb)
or t2, t3, t3 # e0 :
cmpbge zero, t3, t7 # e1 :
bne t7, $u_final # e1 :
/* Unaligned copy main loop. In order to avoid reading too much,
the loop is structured to detect zeros in aligned words from s2.
This has, unfortunately, effectively pulled half of a loop
iteration out into the head and half into the tail, but it does
prevent nastiness from accumulating in the very thing we want
to run as fast as possible.
On entry to this basic block:
t2 == the unshifted low-bits from the next s2 word. */
.align 3
$u_loop:
extql t2, a1, t3 # e0 :
ldq_u t2, 16(a1) # .. e1 : load next s2 high bits
ldq_u t0, 8(a0) # e0 : load next s1 word
addq a1, 8, a1 # .. e1 :
addq a0, 8, a0 # e0 :
nop # .. e1 :
extqh t2, a1, t1 # e0 :
cmpbge zero, t0, t7 # .. e1 : find zero in current s1 word
or t1, t3, t1 # e0 :
bne t7, $eos # .. e1 :
xor t0, t1, t4 # e0 : compare the words
bne t4, $wordcmp # .. e1 (zdb)
cmpbge zero, t2, t4 # e0 : find zero in next low bits
beq t4, $u_loop # .. e1 (zdb)
/* We've found a zero in the low bits of the last s2 word. Get
the next s1 word and align them. */
$u_final:
ldq_u t0, 8(a0) # e1 :
extql t2, a1, t1 # .. e0 :
cmpbge zero, t1, t7 # e0 :
/* We've found a zero somewhere in a word we just read.
On entry to this basic block:
t0 == s1 word
t1 == s2 word
t7 == cmpbge mask containing the zero. */
.align 3
$eos:
negq t7, t6 # e0 : create bytemask of valid data
and t6, t7, t8 # e1 :
subq t8, 1, t6 # e0 :
or t6, t8, t7 # e1 :
zapnot t0, t7, t0 # e0 : kill the garbage
zapnot t1, t7, t1 # .. e1 :
xor t0, t1, v0 # e0 : and compare
beq v0, $done # .. e1 :
/* Here we have two differing co-aligned words in t0 & t1.
Bytewise compare them and return (t0 > t1 ? 1 : -1). */
$wordcmp:
cmpbge t0, t1, t2 # e0 : comparison yields bit mask of ge
cmpbge t1, t0, t3 # .. e1 :
xor t2, t3, t0 # e0 : bits set iff t0/t1 bytes differ
negq t0, t1 # e1 : clear all but least bit
and t0, t1, t0 # e0 :
lda v0, -1 # .. e1 :
and t0, t2, t1 # e0 : was bit set in t0 > t1?
cmovne t1, 1, v0 # .. e1 (zdb)
$done:
ret # e1 :
END(strcmp)

View File

@ -27,6 +27,10 @@ Cambridge, MA 02139, USA. */
ENTRY(strcpy)
ldgp gp, 0(pv)
#ifdef PROF
lda AT, _mcount
jsr AT, (AT), _mcount
#endif
.prologue 1
mov a0, v0 # set up return value

View File

@ -34,6 +34,15 @@ Cambridge, MA 02139, USA. */
.set noat
ENTRY(strlen)
#ifdef PROF
ldgp gp, 0(pv)
lda AT, _mcount
jsr AT, (AT), _mcount
.prologue 1
#else
.prologue 0
#endif
ldq_u t0, 0(a0) # load first quadword (a0 may be misaligned)
lda t1, -1(zero)
insqh t1, a0, t1

View File

@ -27,6 +27,10 @@ Cambridge, MA 02139, USA. */
ENTRY(strncat)
ldgp gp, 0(pv)
#ifdef PROF
lda AT, _mcount
jsr AT, (AT), _mcount
#endif
.prologue 1
mov a0, v0 # set up return value

224
sysdeps/alpha/strncmp.S Normal file
View File

@ -0,0 +1,224 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
Contributed by Richard Henderson (rth@tamu.edu)
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
/* Bytewise compare two null-terminated strings of length no longer than N. */
#include <sysdep.h>
.set noat
.set noreorder
.text
ENTRY(strncmp)
#ifdef PROF
ldgp gp, 0(pv)
lda AT, _mcount
jsr AT, (AT), _mcount
.prologue 1
#else
.prologue 0
#endif
xor a0, a1, t2 # e0 : are s1 and s2 co-aligned?
beq a2, $zerolength # .. e1 :
ldq_u t0, 0(a0) # e0 : give cache time to catch up
ldq_u t1, 0(a1) # .. e1 :
and t2, 7, t2 # e0 :
and a0, 7, t4 # .. e1 : find s1 misalignment
lda t3, -1 # e0 :
addq a2, t4, a2 # .. e1 : bias count by s1 misalignment
and a2, 7, t10 # e1 : ofs of last byte in last word
srl a2, 3, a2 # .. e0 : remaining full words in count
and a1, 7, t5 # e0 : find s2 misalignment
bne t2, $unaligned # .. e1 :
/* On entry to this basic block:
t0 == the first word of s1.
t1 == the first word of s2.
t3 == -1. */
$aligned:
mskqh t3, a1, t3 # e0 : mask off leading garbage
nop # .. e1 :
ornot t1, t3, t1 # e0 :
ornot t0, t3, t0 # .. e1 :
cmpbge zero, t1, t7 # e0 : bits set iff null found
beq a2, $eoc # .. e1 : check end of count
unop # :
bne t7, $eos # e1 :
/* Aligned compare main loop.
On entry to this basic block:
t0 == an s1 word.
t1 == an s2 word not containing a null. */
$a_loop:
xor t0, t1, t2 # e0 :
bne t2, $wordcmp # .. e1 (zdb)
ldq_u t1, 0(a1) # e0 :
ldq_u t0, 0(a0) # .. e1 :
addq a1, 8, a1 # e0 :
addq a0, 8, a0 # .. e1 :
cmpbge zero, t1, t7 # e0 :
beq a2, $eoc # .. e1 :
subq a2, 1, a2 # e0 :
beq t7, $a_loop # .. e1 :
br $eos # e1 :
/* The two strings are not co-aligned. Align s1 and cope. */
$unaligned:
subq a1, t4, a1 # e0 :
unop # :
/* If s2 misalignment is larger than s2 misalignment, we need
extra startup checks to avoid SEGV. */
cmplt t4, t5, t8 # .. e1 :
beq t8, $u_head # e1 :
mskqh t3, t5, t3 # e0 :
ornot t1, t3, t3 # e0 :
cmpbge zero, t3, t7 # e1 : is there a zero?
beq t7, $u_head # e1 :
/* We've found a zero in the first partial word of s2. Align
our current s1 and s2 words and compare what we've got. */
extql t1, t5, t1 # e0 :
lda t3, -1 # .. e1 :
insql t1, a0, t1 # e0 :
mskqh t3, a0, t3 # e0 :
ornot t1, t3, t1 # e0 :
ornot t0, t3, t0 # .. e1 :
cmpbge zero, t1, t7 # e0 : find that zero again
beq a2, $eoc # .. e1 : and finish up
br $eos # e1 :
.align 3
$u_head:
/* We know just enough now to be able to assemble the first
full word of s2. We can still find a zero at the end of it.
On entry to this basic block:
t0 == first word of s1
t1 == first partial word of s2. */
ldq_u t2, 8(a1) # e0 : load second partial s2 word
lda t3, -1 # .. e1 : create leading garbage mask
extql t1, a1, t1 # e0 : create first s2 word
mskqh t3, a0, t3 # e0 :
extqh t2, a1, t4 # e0 :
ornot t0, t3, t0 # .. e1 : kill s1 garbage
or t1, t4, t1 # e0 : s2 word now complete
ornot t1, t3, t1 # e1 : kill s2 garbage
cmpbge zero, t0, t7 # e0 : find zero in first s1 word
beq a2, $eoc # .. e1 :
lda t3, -1 # e0 :
bne t7, $eos # .. e1 :
subq a2, 1, a2 # e0 :
xor t0, t1, t4 # .. e1 : compare aligned words
mskql t3, a1, t3 # e0 : mask out s2[1] bits we have seen
bne t4, $wordcmp # .. e1 :
or t2, t3, t3 # e0 :
cmpbge zero, t3, t7 # e1 : find zero in high bits of s2[1]
bne t7, $u_final # e1 :
/* Unaligned copy main loop. In order to avoid reading too much,
the loop is structured to detect zeros in aligned words from s2.
This has, unfortunately, effectively pulled half of a loop
iteration out into the head and half into the tail, but it does
prevent nastiness from accumulating in the very thing we want
to run as fast as possible.
On entry to this basic block:
t2 == the unshifted low-bits from the next s2 word. */
.align 3
$u_loop:
extql t2, a1, t3 # e0 :
ldq_u t2, 16(a1) # .. e1 : load next s2 high bits
ldq_u t0, 8(a0) # e0 : load next s1 word
addq a1, 8, a1 # .. e1 :
addq a0, 8, a0 # e0 :
nop # .. e1 :
extqh t2, a1, t1 # e0 :
cmpbge zero, t0, t7 # .. e1 : find zero in current s1 word
or t1, t3, t1 # e0 :
beq a2, $eoc # .. e1 : check for end of count
subq a2, 1, a2 # e0 :
bne t7, $eos # .. e1 :
xor t0, t1, t4 # e0 : compare the words
bne t4, $wordcmp # .. e1 (zdb)
cmpbge zero, t2, t4 # e0 : find zero in next low bits
beq t4, $u_loop # .. e1 (zdb)
/* We've found a zero in the low bits of the last s2 word. Get
the next s1 word and align them. */
$u_final:
ldq_u t0, 8(a0) # e1 :
extql t2, a1, t1 # .. e0 :
cmpbge zero, t1, t7 # e0 :
bne a2, $eos # .. e1 :
/* We've hit end of count. Zero everything after the count
and compare whats left. */
.align 3
$eoc:
mskql t0, t10, t0
mskql t1, t10, t1
/* We've found a zero somewhere in a word we just read.
On entry to this basic block:
t0 == s1 word
t1 == s2 word
t7 == cmpbge mask containing the zero. */
$eos:
negq t7, t6 # e0 : create bytemask of valid data
and t6, t7, t8 # e1 :
subq t8, 1, t6 # e0 :
or t6, t8, t7 # e1 :
zapnot t0, t7, t0 # e0 : kill the garbage
zapnot t1, t7, t1 # .. e1 :
xor t0, t1, v0 # e0 : and compare
beq v0, $done # .. e1 :
/* Here we have two differing co-aligned words in t0 & t1.
Bytewise compare them and return (t0 > t1 ? 1 : -1). */
$wordcmp:
cmpbge t0, t1, t2 # e0 : comparison yields bit mask of ge
cmpbge t1, t0, t3 # .. e1 :
xor t2, t3, t0 # e0 : bits set iff t0/t1 bytes differ
negq t0, t1 # e1 : clear all but least bit
and t0, t1, t0 # e0 :
lda v0, -1 # .. e1 :
and t0, t2, t1 # e0 : was bit set in t0 > t1?
cmovne t1, 1, v0 # .. e1 (zdb)
$done:
ret # e1 :
$zerolength:
clr v0
ret
END(strncmp)

View File

@ -31,6 +31,10 @@ Cambridge, MA 02139, USA. */
ENTRY(strncpy)
ldgp gp, 0(pv)
#ifdef PROF
lda AT, _mcount
jsr AT, (AT), _mcount
#endif
.prologue 1
mov a0, v0 # set return value now

View File

@ -31,7 +31,14 @@ Cambridge, MA 02139, USA. */
.set noat
ENTRY(strrchr)
#ifdef PROF
ldgp gp, 0(pv)
lda AT, _mcount
jsr AT, (AT), _mcount
.prologue 1
#else
.prologue 0
#endif
zapnot a1, 1, a1 # e0 : zero extend our test character
mov zero, t6 # .. e1 : t6 is last match aligned addr

View File

@ -27,8 +27,15 @@
.text
LEAF(__udiv_qrnnd, 0)
#ifdef PROF
ldgp gp, 0(pv)
lda AT, _mcount
jsr AT, (AT), _mcount
.prologue 1
#else
.prologue 0
#endif
#define cnt $2
#define tmp $3
#define rem_ptr $16
@ -38,9 +45,9 @@ LEAF(__udiv_qrnnd, 0)
#define qb $20
ldiq cnt,16
blt d,.Largedivisor
blt d,$largedivisor
.Loop1: cmplt n0,0,tmp
$loop1: cmplt n0,0,tmp
addq n1,n1,n1
bis n1,tmp,n1
addq n0,n0,n0
@ -73,12 +80,12 @@ LEAF(__udiv_qrnnd, 0)
cmovne qb,tmp,n1
bis n0,qb,n0
subq cnt,1,cnt
bgt cnt,.Loop1
bgt cnt,$loop1
stq n1,0(rem_ptr)
bis $31,n0,$0
ret $31,($26),1
.Largedivisor:
$largedivisor:
and n0,1,$4
srl n0,1,n0
@ -90,7 +97,7 @@ LEAF(__udiv_qrnnd, 0)
srl d,1,$5
addq $5,$6,$5
.Loop2: cmplt n0,0,tmp
$loop2: cmplt n0,0,tmp
addq n1,n1,n1
bis n1,tmp,n1
addq n0,n0,n0
@ -123,27 +130,30 @@ LEAF(__udiv_qrnnd, 0)
cmovne qb,tmp,n1
bis n0,qb,n0
subq cnt,1,cnt
bgt cnt,.Loop2
bgt cnt,$loop2
addq n1,n1,n1
addq $4,n1,n1
bne $6,.LOdd
bne $6,$Odd
stq n1,0(rem_ptr)
bis $31,n0,$0
ret $31,($26),1
.LOdd:
$Odd:
/* q' in n0. r' in n1 */
addq n1,n0,n1
cmpult n1,n0,tmp # tmp := carry from addq
beq tmp,.LLp6
addq n0,1,n0
subq n1,d,n1
.LLp6: cmpult n1,d,tmp
bne tmp,.LLp7
addq n0,1,n0
subq n1,d,n1
.LLp7:
subq n1,d,AT
addq n0,tmp,n0
cmovne tmp,AT,n1
cmpult n1,d,tmp
addq n0,1,AT
cmoveq tmp,AT,n0
subq n1,d,AT
cmoveq tmp,AT,n1
stq n1,0(rem_ptr)
bis $31,n0,$0
ret $31,($26),1

161
sysdeps/alpha/w_sqrt.S Normal file
View File

@ -0,0 +1,161 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
Contributed by David Mosberger (davidm@cs.arizona.edu).
Based on public-domain C source by Linus Torvalds.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
/* This version is much faster than generic sqrt implementation, but
it doesn't handle exceptional values or the inexact flag. Don't use
this if _IEEE_FP or _IEEE_FP_INEXACT is in effect. */
#ifndef _IEEE_FP
#include <errnos.h>
#include <sysdep.h>
.set noreorder
#ifdef __ELF__
.section .rodata
#else
.rdata
#endif
.align 5 # align to cache line
/* Do all memory accesses relative to sqrtdata. */
sqrtdata:
#define DN 0x00
#define UP 0x08
#define HALF 0x10
#define ALMOST_THREE_HALF 0x18
#define T2 0x20
.quad 0x3fefffffffffffff /* DN = next(1.0) */
.quad 0x3ff0000000000001 /* UP = prev(1.0) */
.quad 0x3fe0000000000000 /* HALF = 0.5 */
.quad 0x3ff7ffffffc00000 /* ALMOST_THREE_HALF = 1.5-2^-30 */
/* table T2: */
.long 0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866
.long 0xf14a, 0x1091b, 0x11fcd, 0x13552, 0x14999, 0x15c98, 0x16e34, 0x17e5f
.long 0x18d03, 0x19a01, 0x1a545, 0x1ae8a, 0x1b5c4, 0x1bb01, 0x1bfde, 0x1c28d
.long 0x1c2de, 0x1c0db, 0x1ba73, 0x1b11c, 0x1a4b5, 0x1953d, 0x18266, 0x16be0
.long 0x1683e, 0x179d8, 0x18a4d, 0x19992, 0x1a789, 0x1b445, 0x1bf61, 0x1c989
.long 0x1d16d, 0x1d77b, 0x1dddf, 0x1e2ad, 0x1e5bf, 0x1e6e8, 0x1e654, 0x1e3cd
.long 0x1df2a, 0x1d635, 0x1cb16, 0x1be2c, 0x1ae4e, 0x19bde, 0x1868e, 0x16e2e
.long 0x1527f, 0x1334a, 0x11051, 0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd
/*
* Stack variables:
*/
#define K 16(sp)
#define Y 24(sp)
#define FSIZE 32
.text
LEAF(__sqrt, FSIZE)
lda sp, -FSIZE(sp)
ldgp gp, .-__sqrt(pv)
stq ra, 0(sp)
#ifdef PROF
lda AT, _mcount
jsr AT, (AT), _mcount
#endif
.prologue 1
stt $f16, K
lda t3, sqrtdata # load base address into t3
fblt $f16, $negative
/* Compute initial guess. */
.align 3
ldah t1, 0x5fe8 # e0 :
ldq t2, K # .. e1 :
ldt $f12, HALF(t3) # e0 :
ldt $f18, ALMOST_THREE_HALF(t3) # .. e1 :
srl t2, 33, t0 # e0 :
mult $f16, $f12, $f11 # .. fm : $f11 = x * 0.5
subl t1, t0, t1 # e0 :
addt $f12, $f12, $f17 # .. fa : $f17 = 1.0
srl t1, 12, t0 # e0 :
and t0, 0xfc, t0 # .. e1 :
addq t0, t3, t0 # e0 :
ldl t0, T2(t0) # .. e1 :
addt $f12, $f17, $f15 # fa : $f15 = 1.5
subl t1, t0, t1 # .. e1 :
sll t1, 32, t1 # e0 :
ldt $f14, DN(t3) # .. e1 :
stq t1, Y # e0 :
ldt $f13, Y # e1 :
addq sp, FSIZE, sp # e0 :
mult $f11, $f13, $f10 # fm : $f10 = (x * 0.5) * y
mult $f10, $f13, $f10 # fm : $f10 = ((x * 0.5) * y) * y
subt $f15, $f10, $f1 # fa : $f1 = (1.5 - 0.5*x*y*y)
mult $f13, $f1, $f13 # fm : yp = y*(1.5 - 0.5*x*y*y)
mult $f11, $f13, $f11 # fm : $f11 = x * 0.5 * yp
mult $f11, $f13, $f11 # fm : $f11 = (x * 0.5 * yp) * yp
subt $f18, $f11, $f1 # fa : $f1= (1.5-2^-30) - 0.5*x*yp*yp
mult $f13, $f1, $f13 # fm : ypp = $f13 = yp*$f1
subt $f15, $f12, $f1 # fa : $f1 = (1.5 - 0.5)
ldt $f15, UP(t3) # .. e1 :
mult $f16, $f13, $f10 # fm : z = $f10 = x * ypp
mult $f10, $f13, $f11 # fm : $f11 = z*ypp
mult $f10, $f12, $f12 # fm : $f12 = z*0.5
subt $f1, $f11, $f1 # .. fa : $f1 = 1 - z*ypp
mult $f12, $f1, $f12 # fm : $f12 = z*0.5*(1 - z*ypp)
addt $f10, $f12, $f0 # fa : zp=res=$f0= z + z*0.5*(1 - z*ypp)
mult/c $f0, $f14, $f12 # fm : zmi = zp * DN
mult/c $f0, $f15, $f11 # fm : zpl = zp * UP
mult/c $f0, $f12, $f1 # fm : $f1 = zp * zmi
mult/c $f0, $f11, $f15 # fm : $f15 = zp * zpl
subt $f1, $f16, $f13 # fa : y1 = zp*zmi - x
subt $f15, $f16, $f15 # fa : y2 = zp*zpl - x
fcmovge $f13, $f12, $f0 # res = (y1 >= 0) ? zmi : res
fcmovlt $f15, $f11, $f0 # res = (y2 < 0) ? zpl : res
ret
$negative:
lda t1, -1
stq t1, K
lda t1, EDOM
stl t1, errno
#ifdef _LIBC_REENTRANT
jsr ra, __errno_location
lda t1, -1
ldq ra, 0(sp)
stl t1, 0(v0)
#endif
ldt $f0, K # res = (double) 0xffffffffffffffff
addq sp, FSIZE, sp
ret
END(__sqrt)
weak_alias(__sqrt, sqrt)
#endif /* !_IEEE_FP */