* sysdeps/alpha/Makefile <gnulib> (sysdep_routines): Merge divrem
variable, add unsigned variants. * sysdeps/alpha/divrem.h: Remove file. * sysdeps/alpha/div_libc.h: New file. * sysdeps/alpha/divl.S: Rewrite from scratch. * sysdeps/alpha/reml.S: Likewise. * sysdeps/alpha/divq.S: Likewise. * sysdeps/alpha/remq.S: Likewise. * sysdeps/alpha/divlu.S: New file. * sysdeps/alpha/remlu.S: New file. * sysdeps/alpha/divqu.S: New file. * sysdeps/alpha/remqu.S: New file.
This commit is contained in:
parent
a66be89006
commit
08e3c578ca
|
@ -26,7 +26,7 @@ sysdep_routines += _mcount
|
|||
endif
|
||||
|
||||
ifeq ($(subdir),gnulib)
|
||||
sysdep_routines += $(divrem)
|
||||
sysdep_routines += divl divlu divq divqu reml remlu remq remqu
|
||||
endif
|
||||
|
||||
ifeq ($(subdir),string)
|
||||
|
@ -38,8 +38,6 @@ ifeq ($(subdir),elf)
|
|||
CFLAGS-rtld.c = -mbuild-constants
|
||||
endif
|
||||
|
||||
divrem := divl divq reml remq
|
||||
|
||||
# For now, build everything with full IEEE math support.
|
||||
# TODO: build separate libm and libm-ieee.
|
||||
sysdep-CFLAGS += -mieee
|
||||
|
|
|
@ -0,0 +1,113 @@
|
|||
/* Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* Common bits for implementing software divide. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#ifdef __linux__
|
||||
# include <asm/gentrap.h>
|
||||
# include <asm/pal.h>
|
||||
#else
|
||||
# include <machine/pal.h>
|
||||
#endif
|
||||
|
||||
/* These are not normal C functions. Argument registers are t10 and t11;
|
||||
the result goes in t12; the return address is in t9. Only t12 and AT
|
||||
may be clobbered. */
|
||||
#define X t10
|
||||
#define Y t11
|
||||
#define RV t12
|
||||
#define RA t9
|
||||
|
||||
/* None of these functions should use implicit anything. */
|
||||
.set nomacro
|
||||
.set noat
|
||||
|
||||
/* Code fragment to invoke _mcount for profiling. This should be invoked
|
||||
directly after allocation of the stack frame. */
|
||||
.macro CALL_MCOUNT
|
||||
#ifdef PROF
|
||||
stq ra, 0(sp)
|
||||
stq pv, 8(sp)
|
||||
stq gp, 16(sp)
|
||||
cfi_rel_offset (ra, 0)
|
||||
cfi_rel_offset (pv, 8)
|
||||
cfi_rel_offset (gp, 16)
|
||||
br AT, 1f
|
||||
.set macro
|
||||
1: ldgp gp, 0(AT)
|
||||
mov RA, ra
|
||||
lda AT, _mcount
|
||||
jsr AT, (AT), _mcount
|
||||
.set nomacro
|
||||
ldq ra, 0(sp)
|
||||
ldq pv, 8(sp)
|
||||
ldq gp, 16(sp)
|
||||
cfi_restore (ra)
|
||||
cfi_restore (pv)
|
||||
cfi_restore (gp)
|
||||
/* Realign subsequent code with what we'd have without this
|
||||
macro at all. This means aligned with one arithmetic insn
|
||||
used within the bundle. */
|
||||
.align 4
|
||||
nop
|
||||
#endif
|
||||
.endm
|
||||
|
||||
/* In order to make the below work, all top-level divide routines must
|
||||
use the same frame size. */
|
||||
#define FRAME 48
|
||||
|
||||
/* Code fragment to generate an integer divide-by-zero fault. When
|
||||
building libc.so, we arrange for there to be one copy of this code
|
||||
placed late in the dso, such that all branches are forward. When
|
||||
building libc.a, we use multiple copies to avoid having an out of
|
||||
range branch. Users should jump to DIVBYZERO. */
|
||||
|
||||
.macro DO_DIVBYZERO
|
||||
#ifdef PIC
|
||||
#define DIVBYZERO __divbyzero
|
||||
.section .gnu.linkonce.t.divbyzero, "ax", @progbits
|
||||
.globl __divbyzero
|
||||
.type __divbyzero, @function
|
||||
.usepv __divbyzero, no
|
||||
.hidden __divbyzero
|
||||
#else
|
||||
#define DIVBYZERO $divbyzero
|
||||
#endif
|
||||
|
||||
.align 4
|
||||
DIVBYZERO:
|
||||
cfi_startproc
|
||||
cfi_return_column (RA)
|
||||
cfi_def_cfa_offset (FRAME)
|
||||
|
||||
mov a0, RV
|
||||
unop
|
||||
lda a0, GEN_INTDIV
|
||||
call_pal PAL_gentrap
|
||||
|
||||
mov RV, a0
|
||||
clr RV
|
||||
lda sp, FRAME(sp)
|
||||
cfi_def_cfa_offset (0)
|
||||
ret $31, (RA), 1
|
||||
|
||||
cfi_endproc
|
||||
.size DIVBYZERO, .-DIVBYZERO
|
||||
.endm
|
|
@ -1,6 +1,75 @@
|
|||
#define IS_REM 0
|
||||
#define SIZE 4
|
||||
#define UFUNC_NAME __divlu
|
||||
#define SFUNC_NAME __divl
|
||||
/* Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
#include "divrem.h"
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include "div_libc.h"
|
||||
|
||||
/* 32-bit signed int divide. This is not a normal C function. Argument
|
||||
registers are t10 and t11, the result goes in t12. Only t12 and AT may
|
||||
be clobbered.
|
||||
|
||||
The FPU can handle all input values except zero. Whee! */
|
||||
|
||||
#ifndef EXTEND
|
||||
#define EXTEND(S,D) sextl S, D
|
||||
#endif
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.globl __divl
|
||||
.type __divl, @function
|
||||
.usepv __divl, no
|
||||
|
||||
cfi_startproc
|
||||
cfi_return_column (RA)
|
||||
__divl:
|
||||
lda sp, -FRAME(sp)
|
||||
cfi_def_cfa_offset (FRAME)
|
||||
CALL_MCOUNT
|
||||
stt $f0, 0(sp)
|
||||
stt $f1, 8(sp)
|
||||
beq Y, DIVBYZERO
|
||||
cfi_rel_offset ($f0, 0)
|
||||
cfi_rel_offset ($f1, 8)
|
||||
|
||||
EXTEND (X, RV)
|
||||
EXTEND (Y, AT)
|
||||
stq RV, 16(sp)
|
||||
stq AT, 24(sp)
|
||||
|
||||
ldt $f0, 16(sp)
|
||||
ldt $f1, 24(sp)
|
||||
cvtqt $f0, $f0
|
||||
cvtqt $f1, $f1
|
||||
|
||||
divt/c $f0, $f1, $f0
|
||||
cvttq/c $f0, $f0
|
||||
stt $f0, 16(sp)
|
||||
ldt $f0, 0(sp)
|
||||
|
||||
ldt $f1, 8(sp)
|
||||
ldl RV, 16(sp)
|
||||
lda sp, FRAME(sp)
|
||||
cfi_restore ($f0)
|
||||
cfi_restore ($f1)
|
||||
cfi_def_cfa_offset (0)
|
||||
ret $31, (RA), 1
|
||||
|
||||
cfi_endproc
|
||||
.size __divl, .-__divl
|
||||
|
||||
DO_DIVBYZERO
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
#define UNSIGNED
|
||||
#define EXTEND(S,D) zapnot S, 15, D
|
||||
#define __divl __divlu
|
||||
#include <divl.S>
|
|
@ -1,6 +1,265 @@
|
|||
#define IS_REM 0
|
||||
#define SIZE 8
|
||||
#define UFUNC_NAME __divqu
|
||||
#define SFUNC_NAME __divq
|
||||
/* Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
#include "divrem.h"
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include "div_libc.h"
|
||||
|
||||
|
||||
/* 64-bit signed long divide. These are not normal C functions. Argument
|
||||
registers are t10 and t11, the result goes in t12. Only t12 and AT may
|
||||
be clobbered.
|
||||
|
||||
Theory of operation here is that we can use the FPU divider for virtually
|
||||
all operands that we see: all dividend values between -2**53 and 2**53-1
|
||||
can be computed directly. Note that divisor values need not be checked
|
||||
against that range because the rounded fp value will be close enough such
|
||||
that the quotient is < 1, which will properly be truncated to zero when we
|
||||
convert back to integer.
|
||||
|
||||
When the dividend is outside the range for which we can compute exact
|
||||
results, we use the fp quotent as an estimate from which we begin refining
|
||||
an exact integral value. This reduces the number of iterations in the
|
||||
shift-and-subtract loop significantly. */
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.globl __divq
|
||||
.type __divq, @function
|
||||
.usepv __divq, no
|
||||
|
||||
cfi_startproc
|
||||
cfi_return_column (RA)
|
||||
__divq:
|
||||
lda sp, -FRAME(sp)
|
||||
cfi_def_cfa_offset (FRAME)
|
||||
CALL_MCOUNT
|
||||
|
||||
/* Get the fp divide insn issued as quickly as possible. After
|
||||
that's done, we have at least 22 cycles until its results are
|
||||
ready -- all the time in the world to figure out how we're
|
||||
going to use the results. */
|
||||
stq X, 16(sp)
|
||||
stq Y, 24(sp)
|
||||
beq Y, DIVBYZERO
|
||||
|
||||
stt $f0, 0(sp)
|
||||
stt $f1, 8(sp)
|
||||
cfi_rel_offset ($f0, 0)
|
||||
cfi_rel_offset ($f1, 8)
|
||||
ldt $f0, 16(sp)
|
||||
ldt $f1, 24(sp)
|
||||
|
||||
cvtqt $f0, $f0
|
||||
cvtqt $f1, $f1
|
||||
divt/c $f0, $f1, $f0
|
||||
|
||||
/* Check to see if X fit in the double as an exact value. */
|
||||
sll X, (64-53), AT
|
||||
ldt $f1, 8(sp)
|
||||
sra AT, (64-53), AT
|
||||
cmpeq X, AT, AT
|
||||
beq AT, $x_big
|
||||
|
||||
/* If we get here, we're expecting exact results from the division.
|
||||
Do nothing else besides convert and clean up. */
|
||||
cvttq/c $f0, $f0
|
||||
stt $f0, 16(sp)
|
||||
|
||||
ldq RV, 16(sp)
|
||||
ldt $f0, 0(sp)
|
||||
cfi_restore ($f1)
|
||||
cfi_remember_state
|
||||
cfi_restore ($f0)
|
||||
cfi_def_cfa_offset (0)
|
||||
lda sp, FRAME(sp)
|
||||
ret $31, (RA), 1
|
||||
|
||||
.align 4
|
||||
cfi_restore_state
|
||||
$x_big:
|
||||
/* If we get here, X is large enough that we don't expect exact
|
||||
results, and neither X nor Y got mis-translated for the fp
|
||||
division. Our task is to take the fp result, figure out how
|
||||
far it's off from the correct result and compute a fixup. */
|
||||
stq t0, 16(sp)
|
||||
stq t1, 24(sp)
|
||||
stq t2, 32(sp)
|
||||
stq t5, 40(sp)
|
||||
cfi_rel_offset (t0, 16)
|
||||
cfi_rel_offset (t1, 24)
|
||||
cfi_rel_offset (t2, 32)
|
||||
cfi_rel_offset (t5, 40)
|
||||
|
||||
#define Q RV /* quotient */
|
||||
#define R t0 /* remainder */
|
||||
#define SY t1 /* scaled Y */
|
||||
#define S t2 /* scalar */
|
||||
#define QY t3 /* Q*Y */
|
||||
|
||||
/* The fixup code below can only handle unsigned values. */
|
||||
or X, Y, AT
|
||||
mov $31, t5
|
||||
blt AT, $fix_sign_in
|
||||
$fix_sign_in_ret1:
|
||||
cvttq/c $f0, $f0
|
||||
|
||||
stt $f0, 8(sp)
|
||||
ldq Q, 8(sp)
|
||||
$fix_sign_in_ret2:
|
||||
mulq Q, Y, QY
|
||||
stq t4, 8(sp)
|
||||
|
||||
ldt $f0, 0(sp)
|
||||
unop
|
||||
cfi_rel_offset (t4, 8)
|
||||
cfi_restore ($f0)
|
||||
stq t3, 0(sp)
|
||||
unop
|
||||
cfi_rel_offset (t3, 0)
|
||||
|
||||
subq QY, X, R
|
||||
mov Y, SY
|
||||
mov 1, S
|
||||
bgt R, $q_high
|
||||
|
||||
$q_high_ret:
|
||||
subq X, QY, R
|
||||
mov Y, SY
|
||||
mov 1, S
|
||||
bgt R, $q_low
|
||||
|
||||
$q_low_ret:
|
||||
ldq t0, 16(sp)
|
||||
ldq t1, 24(sp)
|
||||
ldq t2, 32(sp)
|
||||
bne t5, $fix_sign_out
|
||||
|
||||
$fix_sign_out_ret:
|
||||
ldq t3, 0(sp)
|
||||
ldq t4, 8(sp)
|
||||
ldq t5, 40(sp)
|
||||
lda sp, FRAME(sp)
|
||||
cfi_remember_state
|
||||
cfi_restore (t0)
|
||||
cfi_restore (t1)
|
||||
cfi_restore (t2)
|
||||
cfi_restore (t3)
|
||||
cfi_restore (t4)
|
||||
cfi_restore (t5)
|
||||
cfi_def_cfa_offset (0)
|
||||
ret $31, (RA), 1
|
||||
|
||||
.align 4
|
||||
cfi_restore_state
|
||||
/* The quotient that we computed was too large. We need to reduce
|
||||
it by S such that Y*S >= R. Obviously the closer we get to the
|
||||
correct value the better, but overshooting high is ok, as we'll
|
||||
fix that up later. */
|
||||
0:
|
||||
addq SY, SY, SY
|
||||
addq S, S, S
|
||||
$q_high:
|
||||
cmpult SY, R, AT
|
||||
bne AT, 0b
|
||||
|
||||
subq Q, S, Q
|
||||
unop
|
||||
subq QY, SY, QY
|
||||
br $q_high_ret
|
||||
|
||||
.align 4
|
||||
/* The quotient that we computed was too small. Divide Y by the
|
||||
current remainder (R) and add that to the existing quotient (Q).
|
||||
The expectation, of course, is that R is much smaller than X. */
|
||||
/* Begin with a shift-up loop. Compute S such that Y*S >= R. We
|
||||
already have a copy of Y in SY and the value 1 in S. */
|
||||
0:
|
||||
addq SY, SY, SY
|
||||
addq S, S, S
|
||||
$q_low:
|
||||
cmpult SY, R, AT
|
||||
bne AT, 0b
|
||||
|
||||
/* Shift-down and subtract loop. Each iteration compares our scaled
|
||||
Y (SY) with the remainder (R); if SY <= R then X is divisible by
|
||||
Y's scalar (S) so add it to the quotient (Q). */
|
||||
2: addq Q, S, t3
|
||||
srl S, 1, S
|
||||
cmpule SY, R, AT
|
||||
subq R, SY, t4
|
||||
|
||||
cmovne AT, t3, Q
|
||||
cmovne AT, t4, R
|
||||
srl SY, 1, SY
|
||||
bne S, 2b
|
||||
|
||||
br $q_low_ret
|
||||
|
||||
.align 4
|
||||
$fix_sign_in:
|
||||
/* If we got here, then X|Y is negative. Need to adjust everything
|
||||
such that we're doing unsigned division in the fixup loop. */
|
||||
/* T5 records the changes we had to make:
|
||||
bit 0: set if result should be negative.
|
||||
bit 2: set if X was negated.
|
||||
bit 3: set if Y was negated.
|
||||
*/
|
||||
xor X, Y, AT
|
||||
cmplt AT, 0, t5
|
||||
cmplt X, 0, AT
|
||||
negq X, t0
|
||||
|
||||
s4addq AT, t5, t5
|
||||
cmovne AT, t0, X
|
||||
cmplt Y, 0, AT
|
||||
negq Y, t0
|
||||
|
||||
s8addq AT, t5, t5
|
||||
cmovne AT, t0, Y
|
||||
unop
|
||||
blbc t5, $fix_sign_in_ret1
|
||||
|
||||
cvttq/c $f0, $f0
|
||||
stt $f0, 8(sp)
|
||||
ldq Q, 8(sp)
|
||||
unop
|
||||
|
||||
negq Q, Q
|
||||
br $fix_sign_in_ret2
|
||||
|
||||
.align 4
|
||||
$fix_sign_out:
|
||||
/* Now we get to undo what we did above. */
|
||||
/* ??? Is this really faster than just increasing the size of
|
||||
the stack frame and storing X and Y in memory? */
|
||||
and t5, 8, AT
|
||||
negq Y, t4
|
||||
cmovne AT, t4, Y
|
||||
|
||||
and t5, 4, AT
|
||||
negq X, t4
|
||||
cmovne AT, t4, X
|
||||
|
||||
negq RV, t4
|
||||
cmovlbs t5, t4, RV
|
||||
|
||||
br $fix_sign_out_ret
|
||||
|
||||
cfi_endproc
|
||||
.size __divq, .-__divq
|
||||
|
||||
DO_DIVBYZERO
|
||||
|
|
|
@ -0,0 +1,244 @@
|
|||
/* Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include "div_libc.h"
|
||||
|
||||
|
||||
/* 64-bit unsigned long divide. These are not normal C functions. Argument
|
||||
registers are t10 and t11, the result goes in t12. Only t12 and AT may be
|
||||
clobbered.
|
||||
|
||||
Theory of operation here is that we can use the FPU divider for virtually
|
||||
all operands that we see: all dividend values between -2**53 and 2**53-1
|
||||
can be computed directly. Note that divisor values need not be checked
|
||||
against that range because the rounded fp value will be close enough such
|
||||
that the quotient is < 1, which will properly be truncated to zero when we
|
||||
convert back to integer.
|
||||
|
||||
When the dividend is outside the range for which we can compute exact
|
||||
results, we use the fp quotent as an estimate from which we begin refining
|
||||
an exact integral value. This reduces the number of iterations in the
|
||||
shift-and-subtract loop significantly. */
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.globl __divqu
|
||||
.type __divqu, @function
|
||||
.usepv __divqu, no
|
||||
|
||||
cfi_startproc
|
||||
cfi_return_column (RA)
|
||||
__divqu:
|
||||
lda sp, -FRAME(sp)
|
||||
cfi_def_cfa_offset (FRAME)
|
||||
CALL_MCOUNT
|
||||
|
||||
/* Get the fp divide insn issued as quickly as possible. After
|
||||
that's done, we have at least 22 cycles until its results are
|
||||
ready -- all the time in the world to figure out how we're
|
||||
going to use the results. */
|
||||
stq X, 16(sp)
|
||||
stq Y, 24(sp)
|
||||
beq Y, DIVBYZERO
|
||||
|
||||
stt $f0, 0(sp)
|
||||
stt $f1, 8(sp)
|
||||
cfi_rel_offset ($f0, 0)
|
||||
cfi_rel_offset ($f1, 8)
|
||||
ldt $f0, 16(sp)
|
||||
ldt $f1, 24(sp)
|
||||
|
||||
cvtqt $f0, $f0
|
||||
cvtqt $f1, $f1
|
||||
blt X, $x_is_neg
|
||||
divt/c $f0, $f1, $f0
|
||||
|
||||
/* Check to see if Y was mis-converted as signed value. */
|
||||
ldt $f1, 8(sp)
|
||||
unop
|
||||
nop
|
||||
blt Y, $y_is_neg
|
||||
|
||||
/* Check to see if X fit in the double as an exact value. */
|
||||
srl X, 53, AT
|
||||
bne AT, $x_big
|
||||
|
||||
/* If we get here, we're expecting exact results from the division.
|
||||
Do nothing else besides convert and clean up. */
|
||||
cvttq/c $f0, $f0
|
||||
stt $f0, 16(sp)
|
||||
|
||||
ldq RV, 16(sp)
|
||||
ldt $f0, 0(sp)
|
||||
cfi_remember_state
|
||||
cfi_restore ($f0)
|
||||
cfi_restore ($f1)
|
||||
cfi_def_cfa_offset (0)
|
||||
lda sp, FRAME(sp)
|
||||
ret $31, (RA), 1
|
||||
|
||||
.align 4
|
||||
cfi_restore_state
|
||||
$x_is_neg:
|
||||
/* If we get here, X is so big that bit 63 is set, which made the
|
||||
conversion come out negative. Fix it up lest we not even get
|
||||
a good estimate. */
|
||||
ldah AT, 0x5f80 /* 2**64 as float. */
|
||||
stt $f2, 24(sp)
|
||||
cfi_rel_offset ($f2, 24)
|
||||
stl AT, 16(sp)
|
||||
lds $f2, 16(sp)
|
||||
|
||||
addt $f0, $f2, $f0
|
||||
unop
|
||||
divt/c $f0, $f1, $f0
|
||||
unop
|
||||
|
||||
/* Ok, we've now the divide issued. Continue with other checks. */
|
||||
ldt $f1, 8(sp)
|
||||
unop
|
||||
ldt $f2, 24(sp)
|
||||
blt Y, $y_is_neg
|
||||
cfi_restore ($f1)
|
||||
cfi_restore ($f2)
|
||||
cfi_remember_state /* for y_is_neg */
|
||||
|
||||
.align 4
|
||||
$x_big:
|
||||
/* If we get here, X is large enough that we don't expect exact
|
||||
results, and neither X nor Y got mis-translated for the fp
|
||||
division. Our task is to take the fp result, figure out how
|
||||
far it's off from the correct result and compute a fixup. */
|
||||
stq t0, 16(sp)
|
||||
stq t1, 24(sp)
|
||||
stq t2, 32(sp)
|
||||
stq t3, 40(sp)
|
||||
cfi_rel_offset (t0, 16)
|
||||
cfi_rel_offset (t1, 24)
|
||||
cfi_rel_offset (t2, 32)
|
||||
cfi_rel_offset (t3, 40)
|
||||
|
||||
#define Q RV /* quotient */
|
||||
#define R t0 /* remainder */
|
||||
#define SY t1 /* scaled Y */
|
||||
#define S t2 /* scalar */
|
||||
#define QY t3 /* Q*Y */
|
||||
|
||||
cvttq/c $f0, $f0
|
||||
stt $f0, 8(sp)
|
||||
ldq Q, 8(sp)
|
||||
mulq Q, Y, QY
|
||||
|
||||
stq t4, 8(sp)
|
||||
unop
|
||||
ldt $f0, 0(sp)
|
||||
unop
|
||||
cfi_rel_offset (t4, 8)
|
||||
cfi_restore ($f0)
|
||||
|
||||
subq QY, X, R
|
||||
mov Y, SY
|
||||
mov 1, S
|
||||
bgt R, $q_high
|
||||
|
||||
$q_high_ret:
|
||||
subq X, QY, R
|
||||
mov Y, SY
|
||||
mov 1, S
|
||||
bgt R, $q_low
|
||||
|
||||
$q_low_ret:
|
||||
ldq t4, 8(sp)
|
||||
ldq t0, 16(sp)
|
||||
ldq t1, 24(sp)
|
||||
ldq t2, 32(sp)
|
||||
|
||||
ldq t3, 40(sp)
|
||||
lda sp, FRAME(sp)
|
||||
cfi_remember_state
|
||||
cfi_restore (t0)
|
||||
cfi_restore (t1)
|
||||
cfi_restore (t2)
|
||||
cfi_restore (t3)
|
||||
cfi_restore (t4)
|
||||
cfi_def_cfa_offset (0)
|
||||
ret $31, (RA), 1
|
||||
|
||||
.align 4
|
||||
cfi_restore_state
|
||||
/* The quotient that we computed was too large. We need to reduce
|
||||
it by S such that Y*S >= R. Obviously the closer we get to the
|
||||
correct value the better, but overshooting high is ok, as we'll
|
||||
fix that up later. */
|
||||
0:
|
||||
addq SY, SY, SY
|
||||
addq S, S, S
|
||||
$q_high:
|
||||
cmpult SY, R, AT
|
||||
bne AT, 0b
|
||||
|
||||
subq Q, S, Q
|
||||
unop
|
||||
subq QY, SY, QY
|
||||
br $q_high_ret
|
||||
|
||||
.align 4
|
||||
/* The quotient that we computed was too small. Divide Y by the
|
||||
current remainder (R) and add that to the existing quotient (Q).
|
||||
The expectation, of course, is that R is much smaller than X. */
|
||||
/* Begin with a shift-up loop. Compute S such that Y*S >= R. We
|
||||
already have a copy of Y in SY and the value 1 in S. */
|
||||
0:
|
||||
addq SY, SY, SY
|
||||
addq S, S, S
|
||||
$q_low:
|
||||
cmpult SY, R, AT
|
||||
bne AT, 0b
|
||||
|
||||
/* Shift-down and subtract loop. Each iteration compares our scaled
|
||||
Y (SY) with the remainder (R); if SY <= R then X is divisible by
|
||||
Y's scalar (S) so add it to the quotient (Q). */
|
||||
2: addq Q, S, t3
|
||||
srl S, 1, S
|
||||
cmpule SY, R, AT
|
||||
subq R, SY, t4
|
||||
|
||||
cmovne AT, t3, Q
|
||||
cmovne AT, t4, R
|
||||
srl SY, 1, SY
|
||||
bne S, 2b
|
||||
|
||||
br $q_low_ret
|
||||
|
||||
.align 4
|
||||
cfi_restore_state
|
||||
$y_is_neg:
|
||||
/* If we get here, Y is so big that bit 63 is set. The results
|
||||
from the divide will be completely wrong. Fortunately, the
|
||||
quotient must be either 0 or 1, so just compute it directly. */
|
||||
cmpult Y, X, RV
|
||||
ldt $f0, 0(sp)
|
||||
lda sp, FRAME(sp)
|
||||
cfi_restore ($f0)
|
||||
cfi_def_cfa_offset (0)
|
||||
ret $31, (RA), 1
|
||||
|
||||
cfi_endproc
|
||||
.size __divqu, .-__divqu
|
||||
|
||||
DO_DIVBYZERO
|
|
@ -1,225 +0,0 @@
|
|||
/* Copyright (C) 1996,97,2002 Free Software Foundation, Inc.
|
||||
Contributed by David Mosberger (davidm@cs.arizona.edu).
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* The current Alpha chips don't provide hardware for integer
|
||||
division. The C compiler expects the functions
|
||||
|
||||
__divqu: 64-bit unsigned long divide
|
||||
__remqu: 64-bit unsigned long remainder
|
||||
__divqs/__remqs: signed 64-bit
|
||||
__divlu/__remlu: unsigned 32-bit
|
||||
__divls/__remls: signed 32-bit
|
||||
|
||||
These are not normal C functions: instead of the normal calling
|
||||
sequence, these expect their arguments in registers t10 and t11, and
|
||||
return the result in t12 (aka pv). Register AT may be clobbered
|
||||
(assembly temporary), anything else must be saved. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
#ifdef __linux__
|
||||
# include <asm/gentrap.h>
|
||||
# include <asm/pal.h>
|
||||
#else
|
||||
# include <machine/pal.h>
|
||||
#endif
|
||||
|
||||
#define mask v0
|
||||
#define divisor t0
|
||||
#define compare AT
|
||||
#define tmp1 t2
|
||||
#define tmp2 t3
|
||||
#define retaddr t9
|
||||
#define arg1 t10
|
||||
#define arg2 t11
|
||||
#define result t12
|
||||
|
||||
#if IS_REM
|
||||
# define DIV_ONLY(x,y...)
|
||||
# define REM_ONLY(x,y...) x,##y
|
||||
# define modulus result
|
||||
# define quotient t1
|
||||
# define GETSIGN(x) mov arg1, x
|
||||
# define STACK 32
|
||||
#else
|
||||
# define DIV_ONLY(x,y...) x,##y
|
||||
# define REM_ONLY(x,y...)
|
||||
# define modulus t1
|
||||
# define quotient result
|
||||
# define GETSIGN(x) xor arg1, arg2, x
|
||||
# define STACK 48
|
||||
#endif
|
||||
|
||||
#if SIZE == 8
|
||||
# define LONGIFY(x,y) mov x,y
|
||||
# define SLONGIFY(x,y) mov x,y
|
||||
# define _SLONGIFY(x)
|
||||
# define NEG(x,y) negq x,y
|
||||
#else
|
||||
# define LONGIFY(x,y) zapnot x,15,y
|
||||
# define SLONGIFY(x,y) sextl x,y
|
||||
# define _SLONGIFY(x) sextl x,x
|
||||
# define NEG(x,y) negl x,y
|
||||
#endif
|
||||
|
||||
.set noreorder
|
||||
.set noat
|
||||
|
||||
.ent UFUNC_NAME
|
||||
.globl UFUNC_NAME
|
||||
|
||||
.align 3
|
||||
UFUNC_NAME:
|
||||
$udiv_entry:
|
||||
lda sp, -STACK(sp)
|
||||
.frame sp, STACK, retaddr, 0
|
||||
#ifdef PROF
|
||||
stq ra, 0(sp)
|
||||
stq pv, 8(sp)
|
||||
stq gp, 16(sp)
|
||||
|
||||
br AT, 1f
|
||||
1: ldgp gp, 0(AT)
|
||||
|
||||
mov retaddr, ra
|
||||
lda AT, _mcount
|
||||
jsr AT, (AT), _mcount
|
||||
|
||||
ldq ra, 0(sp)
|
||||
ldq pv, 8(sp)
|
||||
ldq gp, 16(sp)
|
||||
#endif
|
||||
.prologue 0
|
||||
|
||||
$udiv:
|
||||
stq t0, 0(sp)
|
||||
LONGIFY (arg2, divisor)
|
||||
stq t1, 8(sp)
|
||||
LONGIFY (arg1, modulus)
|
||||
stq v0, 16(sp)
|
||||
clr quotient
|
||||
stq tmp1, 24(sp)
|
||||
ldiq mask, 1
|
||||
DIV_ONLY(stq tmp2,32(sp))
|
||||
|
||||
beq divisor, $divbyzero
|
||||
|
||||
.align 3
|
||||
#if SIZE == 8
|
||||
/* Shift divisor left. */
|
||||
1: cmpult divisor, modulus, compare
|
||||
blt divisor, 2f
|
||||
addq divisor, divisor, divisor
|
||||
addq mask, mask, mask
|
||||
bne compare, 1b
|
||||
unop
|
||||
2:
|
||||
#else
|
||||
/* Shift divisor left using 3-bit shifts as we can't overflow.
|
||||
This results in looping three times less here, but up to
|
||||
two more times later. Thus using a large shift isn't worth it. */
|
||||
1: cmpult divisor, modulus, compare
|
||||
s8addq divisor, zero, divisor
|
||||
s8addq mask, zero, mask
|
||||
bne compare, 1b
|
||||
#endif
|
||||
|
||||
/* Now go back to the right. */
|
||||
3: DIV_ONLY(addq quotient, mask, tmp2)
|
||||
srl mask, 1, mask
|
||||
cmpule divisor, modulus, compare
|
||||
subq modulus, divisor, tmp1
|
||||
DIV_ONLY(cmovne compare, tmp2, quotient)
|
||||
srl divisor, 1, divisor
|
||||
cmovne compare, tmp1, modulus
|
||||
bne mask, 3b
|
||||
|
||||
$done: ldq t0, 0(sp)
|
||||
ldq t1, 8(sp)
|
||||
ldq v0, 16(sp)
|
||||
ldq tmp1, 24(sp)
|
||||
DIV_ONLY(ldq tmp2, 32(sp))
|
||||
lda sp, STACK(sp)
|
||||
ret zero, (retaddr), 1
|
||||
|
||||
$divbyzero:
|
||||
mov a0, tmp1
|
||||
ldiq a0, GEN_INTDIV
|
||||
call_pal PAL_gentrap
|
||||
mov tmp1, a0
|
||||
clr result /* If trap returns, return zero. */
|
||||
br $done
|
||||
|
||||
.end UFUNC_NAME
|
||||
|
||||
.ent SFUNC_NAME
|
||||
.globl SFUNC_NAME
|
||||
|
||||
.align 3
|
||||
SFUNC_NAME:
|
||||
lda sp, -STACK(sp)
|
||||
.frame sp, STACK, retaddr, 0
|
||||
#ifdef PROF
|
||||
stq ra, 0(sp)
|
||||
stq pv, 8(sp)
|
||||
stq gp, 16(sp)
|
||||
|
||||
br AT, 1f
|
||||
1: ldgp gp, 0(AT)
|
||||
|
||||
mov retaddr, ra
|
||||
jsr AT, _mcount
|
||||
|
||||
ldq ra, 0(sp)
|
||||
ldq pv, 8(sp)
|
||||
ldq gp, 16(sp)
|
||||
#endif
|
||||
.prologue 0
|
||||
|
||||
or arg1, arg2, AT
|
||||
_SLONGIFY(AT)
|
||||
bge AT, $udiv /* don't need to mess with signs */
|
||||
|
||||
/* Save originals and find absolute values. */
|
||||
stq arg1, 0(sp)
|
||||
NEG (arg1, AT)
|
||||
stq arg2, 8(sp)
|
||||
cmovge AT, AT, arg1
|
||||
stq retaddr, 16(sp)
|
||||
NEG (arg2, AT)
|
||||
stq tmp1, 24(sp)
|
||||
cmovge AT, AT, arg2
|
||||
|
||||
/* Do the unsigned division. */
|
||||
bsr retaddr, $udiv_entry
|
||||
|
||||
/* Restore originals and adjust the sign of the result. */
|
||||
ldq arg1, 0(sp)
|
||||
ldq arg2, 8(sp)
|
||||
GETSIGN (AT)
|
||||
NEG (result, tmp1)
|
||||
_SLONGIFY(AT)
|
||||
ldq retaddr, 16(sp)
|
||||
cmovlt AT, tmp1, result
|
||||
ldq tmp1, 24(sp)
|
||||
|
||||
lda sp, STACK(sp)
|
||||
ret zero, (retaddr), 1
|
||||
|
||||
.end SFUNC_NAME
|
|
@ -1,6 +1,80 @@
|
|||
#define IS_REM 1
|
||||
#define SIZE 4
|
||||
#define UFUNC_NAME __remlu
|
||||
#define SFUNC_NAME __reml
|
||||
/* Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
Contributed by Richard Henderson <rth@twiddle.net>
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
#include "divrem.h"
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include "div_libc.h"
|
||||
|
||||
/* 32-bit signed int remainder. This is not a normal C function. Argument
|
||||
registers are t10 and t11, the result goes in t12. Only t12 and AT may
|
||||
be clobbered.
|
||||
|
||||
The FPU can handle the division for all input values except zero.
|
||||
All we have to do is compute the remainder via multiply-and-subtract. */
|
||||
|
||||
#ifndef EXTEND
|
||||
#define EXTEND(S,D) sextl S, D
|
||||
#endif
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.globl __reml
|
||||
.type __reml, @function
|
||||
.usepv __reml, no
|
||||
|
||||
cfi_startproc
|
||||
cfi_return_column (RA)
|
||||
__reml:
|
||||
lda sp, -FRAME(sp)
|
||||
cfi_def_cfa_offset (FRAME)
|
||||
CALL_MCOUNT
|
||||
stt $f0, 0(sp)
|
||||
stt $f1, 8(sp)
|
||||
beq Y, DIVBYZERO
|
||||
cfi_rel_offset ($f0, 0)
|
||||
cfi_rel_offset ($f1, 8)
|
||||
|
||||
EXTEND (X, RV)
|
||||
EXTEND (Y, AT)
|
||||
stq RV, 16(sp)
|
||||
stq AT, 24(sp)
|
||||
|
||||
ldt $f0, 16(sp)
|
||||
ldt $f1, 24(sp)
|
||||
cvtqt $f0, $f0
|
||||
cvtqt $f1, $f1
|
||||
|
||||
divt/c $f0, $f1, $f0
|
||||
cvttq/c $f0, $f0
|
||||
stt $f0, 16(sp)
|
||||
ldq RV, 16(sp)
|
||||
|
||||
ldt $f0, 0(sp)
|
||||
mull RV, Y, RV
|
||||
ldt $f1, 8(sp)
|
||||
lda sp, FRAME(sp)
|
||||
cfi_restore ($f0)
|
||||
cfi_restore ($f1)
|
||||
cfi_def_cfa_offset (0)
|
||||
|
||||
subl X, RV, RV
|
||||
ret $31, (RA), 1
|
||||
|
||||
cfi_endproc
|
||||
.size __reml, .-__reml
|
||||
|
||||
DO_DIVBYZERO
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
#define UNSIGNED
|
||||
#define EXTEND(S,D) zapnot S, 15, D
|
||||
#define __reml __remlu
|
||||
#include <reml.S>
|
|
@ -1,6 +1,261 @@
|
|||
#define IS_REM 1
|
||||
#define SIZE 8
|
||||
#define UFUNC_NAME __remqu
|
||||
#define SFUNC_NAME __remq
|
||||
/* Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
#include "divrem.h"
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include "div_libc.h"
|
||||
|
||||
|
||||
/* 64-bit signed long remainder. These are not normal C functions. Argument
|
||||
registers are t10 and t11, the result goes in t12. Only t12 and AT may
|
||||
be clobbered.
|
||||
|
||||
Theory of operation here is that we can use the FPU divider for virtually
|
||||
all operands that we see: all dividend values between -2**53 and 2**53-1
|
||||
can be computed directly. Note that divisor values need not be checked
|
||||
against that range because the rounded fp value will be close enough such
|
||||
that the quotient is < 1, which will properly be truncated to zero when we
|
||||
convert back to integer.
|
||||
|
||||
When the dividend is outside the range for which we can compute exact
|
||||
results, we use the fp quotent as an estimate from which we begin refining
|
||||
an exact integral value. This reduces the number of iterations in the
|
||||
shift-and-subtract loop significantly. */
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.globl __remq
|
||||
.type __remq, @function
|
||||
.usepv __remq, no
|
||||
|
||||
cfi_startproc
|
||||
cfi_return_column (RA)
|
||||
__remq:
|
||||
lda sp, -FRAME(sp)
|
||||
cfi_def_cfa_offset (FRAME)
|
||||
CALL_MCOUNT
|
||||
|
||||
/* Get the fp divide insn issued as quickly as possible. After
|
||||
that's done, we have at least 22 cycles until its results are
|
||||
ready -- all the time in the world to figure out how we're
|
||||
going to use the results. */
|
||||
stq X, 16(sp)
|
||||
stq Y, 24(sp)
|
||||
beq Y, DIVBYZERO
|
||||
|
||||
stt $f0, 0(sp)
|
||||
stt $f1, 8(sp)
|
||||
cfi_rel_offset ($f0, 0)
|
||||
cfi_rel_offset ($f1, 8)
|
||||
ldt $f0, 16(sp)
|
||||
ldt $f1, 24(sp)
|
||||
|
||||
cvtqt $f0, $f0
|
||||
cvtqt $f1, $f1
|
||||
divt/c $f0, $f1, $f0
|
||||
|
||||
/* Check to see if X fit in the double as an exact value. */
|
||||
sll X, (64-53), AT
|
||||
ldt $f1, 8(sp)
|
||||
sra AT, (64-53), AT
|
||||
cmpeq X, AT, AT
|
||||
beq AT, $x_big
|
||||
|
||||
/* If we get here, we're expecting exact results from the division.
|
||||
Do nothing else besides convert, compute remainder, clean up. */
|
||||
cvttq/c $f0, $f0
|
||||
stt $f0, 16(sp)
|
||||
|
||||
ldq AT, 16(sp)
|
||||
mulq AT, Y, AT
|
||||
ldt $f0, 0(sp)
|
||||
cfi_restore ($f1)
|
||||
cfi_remember_state
|
||||
cfi_restore ($f0)
|
||||
cfi_def_cfa_offset (0)
|
||||
lda sp, FRAME(sp)
|
||||
|
||||
subq X, AT, RV
|
||||
ret $31, (RA), 1
|
||||
|
||||
.align 4
|
||||
cfi_restore_state
|
||||
$x_big:
|
||||
/* If we get here, X is large enough that we don't expect exact
|
||||
results, and neither X nor Y got mis-translated for the fp
|
||||
division. Our task is to take the fp result, figure out how
|
||||
far it's off from the correct result and compute a fixup. */
|
||||
stq t0, 16(sp)
|
||||
stq t1, 24(sp)
|
||||
stq t2, 32(sp)
|
||||
stq t5, 40(sp)
|
||||
cfi_rel_offset (t0, 16)
|
||||
cfi_rel_offset (t1, 24)
|
||||
cfi_rel_offset (t2, 32)
|
||||
cfi_rel_offset (t5, 40)
|
||||
|
||||
#define Q t0 /* quotient */
|
||||
#define R RV /* remainder */
|
||||
#define SY t1 /* scaled Y */
|
||||
#define S t2 /* scalar */
|
||||
#define QY t3 /* Q*Y */
|
||||
|
||||
/* The fixup code below can only handle unsigned values. */
|
||||
or X, Y, AT
|
||||
mov $31, t5
|
||||
blt AT, $fix_sign_in
|
||||
$fix_sign_in_ret1:
|
||||
cvttq/c $f0, $f0
|
||||
|
||||
stt $f0, 8(sp)
|
||||
ldq Q, 8(sp)
|
||||
$fix_sign_in_ret2:
|
||||
mulq Q, Y, QY
|
||||
stq t4, 8(sp)
|
||||
|
||||
ldt $f0, 0(sp)
|
||||
unop
|
||||
cfi_rel_offset (t4, 8)
|
||||
cfi_restore ($f0)
|
||||
stq t3, 0(sp)
|
||||
unop
|
||||
cfi_rel_offset (t3, 0)
|
||||
|
||||
subq QY, X, R
|
||||
mov Y, SY
|
||||
mov 1, S
|
||||
bgt R, $q_high
|
||||
|
||||
$q_high_ret:
|
||||
subq X, QY, R
|
||||
mov Y, SY
|
||||
mov 1, S
|
||||
bgt R, $q_low
|
||||
|
||||
$q_low_ret:
|
||||
ldq t0, 16(sp)
|
||||
ldq t1, 24(sp)
|
||||
ldq t2, 32(sp)
|
||||
bne t5, $fix_sign_out
|
||||
|
||||
$fix_sign_out_ret:
|
||||
ldq t3, 0(sp)
|
||||
ldq t4, 8(sp)
|
||||
ldq t5, 40(sp)
|
||||
lda sp, FRAME(sp)
|
||||
cfi_remember_state
|
||||
cfi_restore (t0)
|
||||
cfi_restore (t1)
|
||||
cfi_restore (t2)
|
||||
cfi_restore (t3)
|
||||
cfi_restore (t4)
|
||||
cfi_restore (t5)
|
||||
cfi_def_cfa_offset (0)
|
||||
ret $31, (RA), 1
|
||||
|
||||
.align 4
|
||||
cfi_restore_state
|
||||
/* The quotient that we computed was too large. We need to reduce
|
||||
it by S such that Y*S >= R. Obviously the closer we get to the
|
||||
correct value the better, but overshooting high is ok, as we'll
|
||||
fix that up later. */
|
||||
0:
|
||||
addq SY, SY, SY
|
||||
addq S, S, S
|
||||
$q_high:
|
||||
cmpult SY, R, AT
|
||||
bne AT, 0b
|
||||
|
||||
subq Q, S, Q
|
||||
unop
|
||||
subq QY, SY, QY
|
||||
br $q_high_ret
|
||||
|
||||
.align 4
|
||||
/* The quotient that we computed was too small. Divide Y by the
|
||||
current remainder (R) and add that to the existing quotient (Q).
|
||||
The expectation, of course, is that R is much smaller than X. */
|
||||
/* Begin with a shift-up loop. Compute S such that Y*S >= R. We
|
||||
already have a copy of Y in SY and the value 1 in S. */
|
||||
0:
|
||||
addq SY, SY, SY
|
||||
addq S, S, S
|
||||
$q_low:
|
||||
cmpult SY, R, AT
|
||||
bne AT, 0b
|
||||
|
||||
/* Shift-down and subtract loop. Each iteration compares our scaled
|
||||
Y (SY) with the remainder (R); if SY <= R then X is divisible by
|
||||
Y's scalar (S) so add it to the quotient (Q). */
|
||||
2: addq Q, S, t3
|
||||
srl S, 1, S
|
||||
cmpule SY, R, AT
|
||||
subq R, SY, t4
|
||||
|
||||
cmovne AT, t3, Q
|
||||
cmovne AT, t4, R
|
||||
srl SY, 1, SY
|
||||
bne S, 2b
|
||||
|
||||
br $q_low_ret
|
||||
|
||||
.align 4
|
||||
$fix_sign_in:
|
||||
/* If we got here, then X|Y is negative. Need to adjust everything
|
||||
such that we're doing unsigned division in the fixup loop. */
|
||||
/* T5 records the changes we had to make:
|
||||
bit 0: set if X was negated. Note that the sign of the
|
||||
remainder follows the sign of the divisor.
|
||||
bit 2: set if Y was negated.
|
||||
*/
|
||||
xor X, Y, t1
|
||||
cmplt X, 0, t5
|
||||
negq X, t0
|
||||
cmovne t5, t0, X
|
||||
|
||||
cmplt Y, 0, AT
|
||||
negq Y, t0
|
||||
s4addq AT, t5, t5
|
||||
cmovne AT, t0, Y
|
||||
|
||||
bge t1, $fix_sign_in_ret1
|
||||
cvttq/c $f0, $f0
|
||||
stt $f0, 8(sp)
|
||||
ldq Q, 8(sp)
|
||||
|
||||
negq Q, Q
|
||||
br $fix_sign_in_ret2
|
||||
|
||||
.align 4
|
||||
$fix_sign_out:
|
||||
/* Now we get to undo what we did above. */
|
||||
/* ??? Is this really faster than just increasing the size of
|
||||
the stack frame and storing X and Y in memory? */
|
||||
and t5, 4, AT
|
||||
negq Y, t4
|
||||
cmovne AT, t4, Y
|
||||
|
||||
negq X, t4
|
||||
cmovlbs t5, t4, X
|
||||
negq RV, t4
|
||||
cmovlbs t5, t4, RV
|
||||
|
||||
br $fix_sign_out_ret
|
||||
|
||||
cfi_endproc
|
||||
.size __remq, .-__remq
|
||||
|
||||
DO_DIVBYZERO
|
||||
|
|
|
@ -0,0 +1,251 @@
|
|||
/* Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include "div_libc.h"
|
||||
|
||||
|
||||
/* 64-bit unsigned long remainder. These are not normal C functions. Argument
|
||||
registers are t10 and t11, the result goes in t12. Only t12 and AT may be
|
||||
clobbered.
|
||||
|
||||
Theory of operation here is that we can use the FPU divider for virtually
|
||||
all operands that we see: all dividend values between -2**53 and 2**53-1
|
||||
can be computed directly. Note that divisor values need not be checked
|
||||
against that range because the rounded fp value will be close enough such
|
||||
that the quotient is < 1, which will properly be truncated to zero when we
|
||||
convert back to integer.
|
||||
|
||||
When the dividend is outside the range for which we can compute exact
|
||||
results, we use the fp quotent as an estimate from which we begin refining
|
||||
an exact integral value. This reduces the number of iterations in the
|
||||
shift-and-subtract loop significantly. */
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.globl __remqu
|
||||
.type __remqu, @function
|
||||
.usepv __remqu, no
|
||||
|
||||
cfi_startproc
|
||||
cfi_return_column (RA)
|
||||
__remqu:
|
||||
lda sp, -FRAME(sp)
|
||||
cfi_def_cfa_offset (FRAME)
|
||||
CALL_MCOUNT
|
||||
|
||||
/* Get the fp divide insn issued as quickly as possible. After
|
||||
that's done, we have at least 22 cycles until its results are
|
||||
ready -- all the time in the world to figure out how we're
|
||||
going to use the results. */
|
||||
stq X, 16(sp)
|
||||
stq Y, 24(sp)
|
||||
beq Y, DIVBYZERO
|
||||
|
||||
stt $f0, 0(sp)
|
||||
stt $f1, 8(sp)
|
||||
cfi_rel_offset ($f0, 0)
|
||||
cfi_rel_offset ($f1, 8)
|
||||
ldt $f0, 16(sp)
|
||||
ldt $f1, 24(sp)
|
||||
|
||||
cvtqt $f0, $f0
|
||||
cvtqt $f1, $f1
|
||||
blt X, $x_is_neg
|
||||
divt/c $f0, $f1, $f0
|
||||
|
||||
/* Check to see if Y was mis-converted as signed value. */
|
||||
ldt $f1, 8(sp)
|
||||
unop
|
||||
nop
|
||||
blt Y, $y_is_neg
|
||||
|
||||
/* Check to see if X fit in the double as an exact value. */
|
||||
srl X, 53, AT
|
||||
bne AT, $x_big
|
||||
|
||||
/* If we get here, we're expecting exact results from the division.
|
||||
Do nothing else besides convert, compute remainder, clean up. */
|
||||
cvttq/c $f0, $f0
|
||||
stt $f0, 16(sp)
|
||||
|
||||
ldq AT, 16(sp)
|
||||
mulq AT, Y, AT
|
||||
ldt $f0, 0(sp)
|
||||
lda sp, FRAME(sp)
|
||||
cfi_remember_state
|
||||
cfi_restore ($f0)
|
||||
cfi_restore ($f1)
|
||||
cfi_def_cfa_offset (0)
|
||||
|
||||
subq X, AT, RV
|
||||
ret $31, (RA), 1
|
||||
|
||||
.align 4
|
||||
cfi_restore_state
|
||||
$x_is_neg:
|
||||
/* If we get here, X is so big that bit 63 is set, which made the
|
||||
conversion come out negative. Fix it up lest we not even get
|
||||
a good estimate. */
|
||||
ldah AT, 0x5f80 /* 2**64 as float. */
|
||||
stt $f2, 24(sp)
|
||||
cfi_rel_offset ($f2, 24)
|
||||
stl AT, 16(sp)
|
||||
lds $f2, 16(sp)
|
||||
|
||||
addt $f0, $f2, $f0
|
||||
unop
|
||||
divt/c $f0, $f1, $f0
|
||||
unop
|
||||
|
||||
/* Ok, we've now the divide issued. Continue with other checks. */
|
||||
ldt $f1, 8(sp)
|
||||
unop
|
||||
ldt $f2, 24(sp)
|
||||
blt Y, $y_is_neg
|
||||
cfi_restore ($f1)
|
||||
cfi_restore ($f2)
|
||||
cfi_remember_state /* for y_is_neg */
|
||||
|
||||
.align 4
|
||||
$x_big:
|
||||
/* If we get here, X is large enough that we don't expect exact
|
||||
results, and neither X nor Y got mis-translated for the fp
|
||||
division. Our task is to take the fp result, figure out how
|
||||
far it's off from the correct result and compute a fixup. */
|
||||
stq t0, 16(sp)
|
||||
stq t1, 24(sp)
|
||||
stq t2, 32(sp)
|
||||
stq t3, 40(sp)
|
||||
cfi_rel_offset (t0, 16)
|
||||
cfi_rel_offset (t1, 24)
|
||||
cfi_rel_offset (t2, 32)
|
||||
cfi_rel_offset (t3, 40)
|
||||
|
||||
#define Q t0 /* quotient */
|
||||
#define R RV /* remainder */
|
||||
#define SY t1 /* scaled Y */
|
||||
#define S t2 /* scalar */
|
||||
#define QY t3 /* Q*Y */
|
||||
|
||||
cvttq/c $f0, $f0
|
||||
stt $f0, 8(sp)
|
||||
ldq Q, 8(sp)
|
||||
mulq Q, Y, QY
|
||||
|
||||
stq t4, 8(sp)
|
||||
unop
|
||||
ldt $f0, 0(sp)
|
||||
unop
|
||||
cfi_rel_offset (t4, 8)
|
||||
cfi_restore ($f0)
|
||||
|
||||
subq QY, X, R
|
||||
mov Y, SY
|
||||
mov 1, S
|
||||
bgt R, $q_high
|
||||
|
||||
$q_high_ret:
|
||||
subq X, QY, R
|
||||
mov Y, SY
|
||||
mov 1, S
|
||||
bgt R, $q_low
|
||||
|
||||
$q_low_ret:
|
||||
ldq t4, 8(sp)
|
||||
ldq t0, 16(sp)
|
||||
ldq t1, 24(sp)
|
||||
ldq t2, 32(sp)
|
||||
|
||||
ldq t3, 40(sp)
|
||||
lda sp, FRAME(sp)
|
||||
cfi_remember_state
|
||||
cfi_restore (t0)
|
||||
cfi_restore (t1)
|
||||
cfi_restore (t2)
|
||||
cfi_restore (t3)
|
||||
cfi_restore (t4)
|
||||
cfi_def_cfa_offset (0)
|
||||
ret $31, (RA), 1
|
||||
|
||||
.align 4
|
||||
cfi_restore_state
|
||||
/* The quotient that we computed was too large. We need to reduce
|
||||
it by S such that Y*S >= R. Obviously the closer we get to the
|
||||
correct value the better, but overshooting high is ok, as we'll
|
||||
fix that up later. */
|
||||
0:
|
||||
addq SY, SY, SY
|
||||
addq S, S, S
|
||||
$q_high:
|
||||
cmpult SY, R, AT
|
||||
bne AT, 0b
|
||||
|
||||
subq Q, S, Q
|
||||
unop
|
||||
subq QY, SY, QY
|
||||
br $q_high_ret
|
||||
|
||||
.align 4
|
||||
/* The quotient that we computed was too small. Divide Y by the
|
||||
current remainder (R) and add that to the existing quotient (Q).
|
||||
The expectation, of course, is that R is much smaller than X. */
|
||||
/* Begin with a shift-up loop. Compute S such that Y*S >= R. We
|
||||
already have a copy of Y in SY and the value 1 in S. */
|
||||
0:
|
||||
addq SY, SY, SY
|
||||
addq S, S, S
|
||||
$q_low:
|
||||
cmpult SY, R, AT
|
||||
bne AT, 0b
|
||||
|
||||
/* Shift-down and subtract loop. Each iteration compares our scaled
|
||||
Y (SY) with the remainder (R); if SY <= R then X is divisible by
|
||||
Y's scalar (S) so add it to the quotient (Q). */
|
||||
2: addq Q, S, t3
|
||||
srl S, 1, S
|
||||
cmpule SY, R, AT
|
||||
subq R, SY, t4
|
||||
|
||||
cmovne AT, t3, Q
|
||||
cmovne AT, t4, R
|
||||
srl SY, 1, SY
|
||||
bne S, 2b
|
||||
|
||||
br $q_low_ret
|
||||
|
||||
.align 4
|
||||
cfi_restore_state
|
||||
$y_is_neg:
|
||||
/* If we get here, Y is so big that bit 63 is set. The results
|
||||
from the divide will be completely wrong. Fortunately, the
|
||||
quotient must be either 0 or 1, so the remainder must be X
|
||||
or X-Y, so just compute it directly. */
|
||||
cmpult Y, X, AT
|
||||
subq X, Y, RV
|
||||
ldt $f0, 0(sp)
|
||||
cmoveq AT, X, RV
|
||||
|
||||
lda sp, FRAME(sp)
|
||||
cfi_restore ($f0)
|
||||
cfi_def_cfa_offset (0)
|
||||
ret $31, (RA), 1
|
||||
|
||||
cfi_endproc
|
||||
.size __remqu, .-__remqu
|
||||
|
||||
DO_DIVBYZERO
|
Loading…
Reference in New Issue