a554497024
From-SVN: r267494
2592 lines
53 KiB
ArmAsm
2592 lines
53 KiB
ArmAsm
/* IEEE-754 double-precision functions for Xtensa
|
|
Copyright (C) 2006-2019 Free Software Foundation, Inc.
|
|
Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
|
|
|
|
This file is part of GCC.
|
|
|
|
GCC is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3, or (at your option)
|
|
any later version.
|
|
|
|
GCC is distributed in the hope that it will be useful, but WITHOUT
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
|
|
License for more details.
|
|
|
|
Under Section 7 of GPL version 3, you are granted additional
|
|
permissions described in the GCC Runtime Library Exception, version
|
|
3.1, as published by the Free Software Foundation.
|
|
|
|
You should have received a copy of the GNU General Public License and
|
|
a copy of the GCC Runtime Library Exception along with this program;
|
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#ifdef __XTENSA_EB__
|
|
#define xh a2
|
|
#define xl a3
|
|
#define yh a4
|
|
#define yl a5
|
|
#else
|
|
#define xh a3
|
|
#define xl a2
|
|
#define yh a5
|
|
#define yl a4
|
|
#endif
|
|
|
|
/* Warning! The branch displacements for some Xtensa branch instructions
|
|
are quite small, and this code has been carefully laid out to keep
|
|
branch targets in range. If you change anything, be sure to check that
|
|
the assembler is not relaxing anything to branch over a jump. */
|
|
|
|
#ifdef L_negdf2
|
|
|
|
.align 4
|
|
.global __negdf2
|
|
.type __negdf2, @function
|
|
__negdf2:
|
|
leaf_entry sp, 16
|
|
movi a4, 0x80000000
|
|
xor xh, xh, a4
|
|
leaf_return
|
|
|
|
#endif /* L_negdf2 */
|
|
|
|
#ifdef L_addsubdf3
|
|
|
|
.literal_position
|
|
/* Addition */
|
|
__adddf3_aux:
|
|
|
|
/* Handle NaNs and Infinities. (This code is placed before the
|
|
start of the function just to keep it in range of the limited
|
|
branch displacements.) */
|
|
|
|
.Ladd_xnan_or_inf:
|
|
/* If y is neither Infinity nor NaN, return x. */
|
|
bnall yh, a6, .Ladd_return_nan_or_inf
|
|
/* If x is a NaN, return it. Otherwise, return y. */
|
|
slli a7, xh, 12
|
|
or a7, a7, xl
|
|
bnez a7, .Ladd_return_nan
|
|
|
|
.Ladd_ynan_or_inf:
|
|
/* Return y. */
|
|
mov xh, yh
|
|
mov xl, yl
|
|
|
|
.Ladd_return_nan_or_inf:
|
|
slli a7, xh, 12
|
|
or a7, a7, xl
|
|
bnez a7, .Ladd_return_nan
|
|
leaf_return
|
|
|
|
.Ladd_return_nan:
|
|
movi a4, 0x80000 /* make it a quiet NaN */
|
|
or xh, xh, a4
|
|
leaf_return
|
|
|
|
.Ladd_opposite_signs:
|
|
/* Operand signs differ. Do a subtraction. */
|
|
slli a7, a6, 11
|
|
xor yh, yh, a7
|
|
j .Lsub_same_sign
|
|
|
|
.align 4
|
|
.global __adddf3
|
|
.type __adddf3, @function
|
|
__adddf3:
|
|
leaf_entry sp, 16
|
|
movi a6, 0x7ff00000
|
|
|
|
/* Check if the two operands have the same sign. */
|
|
xor a7, xh, yh
|
|
bltz a7, .Ladd_opposite_signs
|
|
|
|
.Ladd_same_sign:
|
|
/* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */
|
|
ball xh, a6, .Ladd_xnan_or_inf
|
|
ball yh, a6, .Ladd_ynan_or_inf
|
|
|
|
/* Compare the exponents. The smaller operand will be shifted
|
|
right by the exponent difference and added to the larger
|
|
one. */
|
|
extui a7, xh, 20, 12
|
|
extui a8, yh, 20, 12
|
|
bltu a7, a8, .Ladd_shiftx
|
|
|
|
.Ladd_shifty:
|
|
/* Check if the smaller (or equal) exponent is zero. */
|
|
bnone yh, a6, .Ladd_yexpzero
|
|
|
|
/* Replace yh sign/exponent with 0x001. */
|
|
or yh, yh, a6
|
|
slli yh, yh, 11
|
|
srli yh, yh, 11
|
|
|
|
.Ladd_yexpdiff:
|
|
/* Compute the exponent difference. Optimize for difference < 32. */
|
|
sub a10, a7, a8
|
|
bgeui a10, 32, .Ladd_bigshifty
|
|
|
|
/* Shift yh/yl right by the exponent difference. Any bits that are
|
|
shifted out of yl are saved in a9 for rounding the result. */
|
|
ssr a10
|
|
movi a9, 0
|
|
src a9, yl, a9
|
|
src yl, yh, yl
|
|
srl yh, yh
|
|
|
|
.Ladd_addy:
|
|
/* Do the 64-bit addition. */
|
|
add xl, xl, yl
|
|
add xh, xh, yh
|
|
bgeu xl, yl, 1f
|
|
addi xh, xh, 1
|
|
1:
|
|
/* Check if the add overflowed into the exponent. */
|
|
extui a10, xh, 20, 12
|
|
beq a10, a7, .Ladd_round
|
|
mov a8, a7
|
|
j .Ladd_carry
|
|
|
|
.Ladd_yexpzero:
|
|
/* y is a subnormal value. Replace its sign/exponent with zero,
|
|
i.e., no implicit "1.0", and increment the apparent exponent
|
|
because subnormals behave as if they had the minimum (nonzero)
|
|
exponent. Test for the case when both exponents are zero. */
|
|
slli yh, yh, 12
|
|
srli yh, yh, 12
|
|
bnone xh, a6, .Ladd_bothexpzero
|
|
addi a8, a8, 1
|
|
j .Ladd_yexpdiff
|
|
|
|
.Ladd_bothexpzero:
|
|
/* Both exponents are zero. Handle this as a special case. There
|
|
is no need to shift or round, and the normal code for handling
|
|
a carry into the exponent field will not work because it
|
|
assumes there is an implicit "1.0" that needs to be added. */
|
|
add xl, xl, yl
|
|
add xh, xh, yh
|
|
bgeu xl, yl, 1f
|
|
addi xh, xh, 1
|
|
1: leaf_return
|
|
|
|
.Ladd_bigshifty:
|
|
/* Exponent difference > 64 -- just return the bigger value. */
|
|
bgeui a10, 64, 1b
|
|
|
|
/* Shift yh/yl right by the exponent difference. Any bits that are
|
|
shifted out are saved in a9 for rounding the result. */
|
|
ssr a10
|
|
sll a11, yl /* lost bits shifted out of yl */
|
|
src a9, yh, yl
|
|
srl yl, yh
|
|
movi yh, 0
|
|
beqz a11, .Ladd_addy
|
|
or a9, a9, a10 /* any positive, nonzero value will work */
|
|
j .Ladd_addy
|
|
|
|
.Ladd_xexpzero:
|
|
/* Same as "yexpzero" except skip handling the case when both
|
|
exponents are zero. */
|
|
slli xh, xh, 12
|
|
srli xh, xh, 12
|
|
addi a7, a7, 1
|
|
j .Ladd_xexpdiff
|
|
|
|
.Ladd_shiftx:
|
|
/* Same thing as the "shifty" code, but with x and y swapped. Also,
|
|
because the exponent difference is always nonzero in this version,
|
|
the shift sequence can use SLL and skip loading a constant zero. */
|
|
bnone xh, a6, .Ladd_xexpzero
|
|
|
|
or xh, xh, a6
|
|
slli xh, xh, 11
|
|
srli xh, xh, 11
|
|
|
|
.Ladd_xexpdiff:
|
|
sub a10, a8, a7
|
|
bgeui a10, 32, .Ladd_bigshiftx
|
|
|
|
ssr a10
|
|
sll a9, xl
|
|
src xl, xh, xl
|
|
srl xh, xh
|
|
|
|
.Ladd_addx:
|
|
add xl, xl, yl
|
|
add xh, xh, yh
|
|
bgeu xl, yl, 1f
|
|
addi xh, xh, 1
|
|
1:
|
|
/* Check if the add overflowed into the exponent. */
|
|
extui a10, xh, 20, 12
|
|
bne a10, a8, .Ladd_carry
|
|
|
|
.Ladd_round:
|
|
/* Round up if the leftover fraction is >= 1/2. */
|
|
bgez a9, 1f
|
|
addi xl, xl, 1
|
|
beqz xl, .Ladd_roundcarry
|
|
|
|
/* Check if the leftover fraction is exactly 1/2. */
|
|
slli a9, a9, 1
|
|
beqz a9, .Ladd_exactlyhalf
|
|
1: leaf_return
|
|
|
|
.Ladd_bigshiftx:
|
|
/* Mostly the same thing as "bigshifty".... */
|
|
bgeui a10, 64, .Ladd_returny
|
|
|
|
ssr a10
|
|
sll a11, xl
|
|
src a9, xh, xl
|
|
srl xl, xh
|
|
movi xh, 0
|
|
beqz a11, .Ladd_addx
|
|
or a9, a9, a10
|
|
j .Ladd_addx
|
|
|
|
.Ladd_returny:
|
|
mov xh, yh
|
|
mov xl, yl
|
|
leaf_return
|
|
|
|
.Ladd_carry:
|
|
/* The addition has overflowed into the exponent field, so the
|
|
value needs to be renormalized. The mantissa of the result
|
|
can be recovered by subtracting the original exponent and
|
|
adding 0x100000 (which is the explicit "1.0" for the
|
|
mantissa of the non-shifted operand -- the "1.0" for the
|
|
shifted operand was already added). The mantissa can then
|
|
be shifted right by one bit. The explicit "1.0" of the
|
|
shifted mantissa then needs to be replaced by the exponent,
|
|
incremented by one to account for the normalizing shift.
|
|
It is faster to combine these operations: do the shift first
|
|
and combine the additions and subtractions. If x is the
|
|
original exponent, the result is:
|
|
shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
|
|
or:
|
|
shifted mantissa + ((x + 1) << 19)
|
|
Note that the exponent is incremented here by leaving the
|
|
explicit "1.0" of the mantissa in the exponent field. */
|
|
|
|
/* Shift xh/xl right by one bit. Save the lsb of xl. */
|
|
mov a10, xl
|
|
ssai 1
|
|
src xl, xh, xl
|
|
srl xh, xh
|
|
|
|
/* See explanation above. The original exponent is in a8. */
|
|
addi a8, a8, 1
|
|
slli a8, a8, 19
|
|
add xh, xh, a8
|
|
|
|
/* Return an Infinity if the exponent overflowed. */
|
|
ball xh, a6, .Ladd_infinity
|
|
|
|
/* Same thing as the "round" code except the msb of the leftover
|
|
fraction is bit 0 of a10, with the rest of the fraction in a9. */
|
|
bbci.l a10, 0, 1f
|
|
addi xl, xl, 1
|
|
beqz xl, .Ladd_roundcarry
|
|
beqz a9, .Ladd_exactlyhalf
|
|
1: leaf_return
|
|
|
|
.Ladd_infinity:
|
|
/* Clear the mantissa. */
|
|
movi xl, 0
|
|
srli xh, xh, 20
|
|
slli xh, xh, 20
|
|
|
|
/* The sign bit may have been lost in a carry-out. Put it back. */
|
|
slli a8, a8, 1
|
|
or xh, xh, a8
|
|
leaf_return
|
|
|
|
.Ladd_exactlyhalf:
|
|
/* Round down to the nearest even value. */
|
|
srli xl, xl, 1
|
|
slli xl, xl, 1
|
|
leaf_return
|
|
|
|
.Ladd_roundcarry:
|
|
/* xl is always zero when the rounding increment overflows, so
|
|
there's no need to round it to an even value. */
|
|
addi xh, xh, 1
|
|
/* Overflow to the exponent is OK. */
|
|
leaf_return
|
|
|
|
|
|
/* Subtraction */
|
|
__subdf3_aux:
|
|
|
|
/* Handle NaNs and Infinities. (This code is placed before the
|
|
start of the function just to keep it in range of the limited
|
|
branch displacements.) */
|
|
|
|
.Lsub_xnan_or_inf:
|
|
/* If y is neither Infinity nor NaN, return x. */
|
|
bnall yh, a6, .Lsub_return_nan_or_inf
|
|
|
|
.Lsub_return_nan:
|
|
/* Both x and y are either NaN or Inf, so the result is NaN. */
|
|
movi a4, 0x80000 /* make it a quiet NaN */
|
|
or xh, xh, a4
|
|
leaf_return
|
|
|
|
.Lsub_ynan_or_inf:
|
|
/* Negate y and return it. */
|
|
slli a7, a6, 11
|
|
xor xh, yh, a7
|
|
mov xl, yl
|
|
|
|
.Lsub_return_nan_or_inf:
|
|
slli a7, xh, 12
|
|
or a7, a7, xl
|
|
bnez a7, .Lsub_return_nan
|
|
leaf_return
|
|
|
|
.Lsub_opposite_signs:
|
|
/* Operand signs differ. Do an addition. */
|
|
slli a7, a6, 11
|
|
xor yh, yh, a7
|
|
j .Ladd_same_sign
|
|
|
|
.align 4
|
|
.global __subdf3
|
|
.type __subdf3, @function
|
|
__subdf3:
|
|
leaf_entry sp, 16
|
|
movi a6, 0x7ff00000
|
|
|
|
/* Check if the two operands have the same sign. */
|
|
xor a7, xh, yh
|
|
bltz a7, .Lsub_opposite_signs
|
|
|
|
.Lsub_same_sign:
|
|
/* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */
|
|
ball xh, a6, .Lsub_xnan_or_inf
|
|
ball yh, a6, .Lsub_ynan_or_inf
|
|
|
|
/* Compare the operands. In contrast to addition, the entire
|
|
value matters here. */
|
|
extui a7, xh, 20, 11
|
|
extui a8, yh, 20, 11
|
|
bltu xh, yh, .Lsub_xsmaller
|
|
beq xh, yh, .Lsub_compare_low
|
|
|
|
.Lsub_ysmaller:
|
|
/* Check if the smaller (or equal) exponent is zero. */
|
|
bnone yh, a6, .Lsub_yexpzero
|
|
|
|
/* Replace yh sign/exponent with 0x001. */
|
|
or yh, yh, a6
|
|
slli yh, yh, 11
|
|
srli yh, yh, 11
|
|
|
|
.Lsub_yexpdiff:
|
|
/* Compute the exponent difference. Optimize for difference < 32. */
|
|
sub a10, a7, a8
|
|
bgeui a10, 32, .Lsub_bigshifty
|
|
|
|
/* Shift yh/yl right by the exponent difference. Any bits that are
|
|
shifted out of yl are saved in a9 for rounding the result. */
|
|
ssr a10
|
|
movi a9, 0
|
|
src a9, yl, a9
|
|
src yl, yh, yl
|
|
srl yh, yh
|
|
|
|
.Lsub_suby:
|
|
/* Do the 64-bit subtraction. */
|
|
sub xh, xh, yh
|
|
bgeu xl, yl, 1f
|
|
addi xh, xh, -1
|
|
1: sub xl, xl, yl
|
|
|
|
/* Subtract the leftover bits in a9 from zero and propagate any
|
|
borrow from xh/xl. */
|
|
neg a9, a9
|
|
beqz a9, 1f
|
|
addi a5, xh, -1
|
|
moveqz xh, a5, xl
|
|
addi xl, xl, -1
|
|
1:
|
|
/* Check if the subtract underflowed into the exponent. */
|
|
extui a10, xh, 20, 11
|
|
beq a10, a7, .Lsub_round
|
|
j .Lsub_borrow
|
|
|
|
.Lsub_compare_low:
|
|
/* The high words are equal. Compare the low words. */
|
|
bltu xl, yl, .Lsub_xsmaller
|
|
bltu yl, xl, .Lsub_ysmaller
|
|
/* The operands are equal. Return 0.0. */
|
|
movi xh, 0
|
|
movi xl, 0
|
|
1: leaf_return
|
|
|
|
.Lsub_yexpzero:
|
|
/* y is a subnormal value. Replace its sign/exponent with zero,
|
|
i.e., no implicit "1.0". Unless x is also a subnormal, increment
|
|
y's apparent exponent because subnormals behave as if they had
|
|
the minimum (nonzero) exponent. */
|
|
slli yh, yh, 12
|
|
srli yh, yh, 12
|
|
bnone xh, a6, .Lsub_yexpdiff
|
|
addi a8, a8, 1
|
|
j .Lsub_yexpdiff
|
|
|
|
.Lsub_bigshifty:
|
|
/* Exponent difference > 64 -- just return the bigger value. */
|
|
bgeui a10, 64, 1b
|
|
|
|
/* Shift yh/yl right by the exponent difference. Any bits that are
|
|
shifted out are saved in a9 for rounding the result. */
|
|
ssr a10
|
|
sll a11, yl /* lost bits shifted out of yl */
|
|
src a9, yh, yl
|
|
srl yl, yh
|
|
movi yh, 0
|
|
beqz a11, .Lsub_suby
|
|
or a9, a9, a10 /* any positive, nonzero value will work */
|
|
j .Lsub_suby
|
|
|
|
.Lsub_xsmaller:
|
|
/* Same thing as the "ysmaller" code, but with x and y swapped and
|
|
with y negated. */
|
|
bnone xh, a6, .Lsub_xexpzero
|
|
|
|
or xh, xh, a6
|
|
slli xh, xh, 11
|
|
srli xh, xh, 11
|
|
|
|
.Lsub_xexpdiff:
|
|
sub a10, a8, a7
|
|
bgeui a10, 32, .Lsub_bigshiftx
|
|
|
|
ssr a10
|
|
movi a9, 0
|
|
src a9, xl, a9
|
|
src xl, xh, xl
|
|
srl xh, xh
|
|
|
|
/* Negate y. */
|
|
slli a11, a6, 11
|
|
xor yh, yh, a11
|
|
|
|
.Lsub_subx:
|
|
sub xl, yl, xl
|
|
sub xh, yh, xh
|
|
bgeu yl, xl, 1f
|
|
addi xh, xh, -1
|
|
1:
|
|
/* Subtract the leftover bits in a9 from zero and propagate any
|
|
borrow from xh/xl. */
|
|
neg a9, a9
|
|
beqz a9, 1f
|
|
addi a5, xh, -1
|
|
moveqz xh, a5, xl
|
|
addi xl, xl, -1
|
|
1:
|
|
/* Check if the subtract underflowed into the exponent. */
|
|
extui a10, xh, 20, 11
|
|
bne a10, a8, .Lsub_borrow
|
|
|
|
.Lsub_round:
|
|
/* Round up if the leftover fraction is >= 1/2. */
|
|
bgez a9, 1f
|
|
addi xl, xl, 1
|
|
beqz xl, .Lsub_roundcarry
|
|
|
|
/* Check if the leftover fraction is exactly 1/2. */
|
|
slli a9, a9, 1
|
|
beqz a9, .Lsub_exactlyhalf
|
|
1: leaf_return
|
|
|
|
.Lsub_xexpzero:
|
|
/* Same as "yexpzero". */
|
|
slli xh, xh, 12
|
|
srli xh, xh, 12
|
|
bnone yh, a6, .Lsub_xexpdiff
|
|
addi a7, a7, 1
|
|
j .Lsub_xexpdiff
|
|
|
|
.Lsub_bigshiftx:
|
|
/* Mostly the same thing as "bigshifty", but with the sign bit of the
|
|
shifted value set so that the subsequent subtraction flips the
|
|
sign of y. */
|
|
bgeui a10, 64, .Lsub_returny
|
|
|
|
ssr a10
|
|
sll a11, xl
|
|
src a9, xh, xl
|
|
srl xl, xh
|
|
slli xh, a6, 11 /* set sign bit of xh */
|
|
beqz a11, .Lsub_subx
|
|
or a9, a9, a10
|
|
j .Lsub_subx
|
|
|
|
.Lsub_returny:
|
|
/* Negate and return y. */
|
|
slli a7, a6, 11
|
|
xor xh, yh, a7
|
|
mov xl, yl
|
|
leaf_return
|
|
|
|
.Lsub_borrow:
|
|
/* The subtraction has underflowed into the exponent field, so the
|
|
value needs to be renormalized. Shift the mantissa left as
|
|
needed to remove any leading zeros and adjust the exponent
|
|
accordingly. If the exponent is not large enough to remove
|
|
all the leading zeros, the result will be a subnormal value. */
|
|
|
|
slli a8, xh, 12
|
|
beqz a8, .Lsub_xhzero
|
|
do_nsau a6, a8, a7, a11
|
|
srli a8, a8, 12
|
|
bge a6, a10, .Lsub_subnormal
|
|
addi a6, a6, 1
|
|
|
|
.Lsub_shift_lt32:
|
|
/* Shift the mantissa (a8/xl/a9) left by a6. */
|
|
ssl a6
|
|
src a8, a8, xl
|
|
src xl, xl, a9
|
|
sll a9, a9
|
|
|
|
/* Combine the shifted mantissa with the sign and exponent,
|
|
decrementing the exponent by a6. (The exponent has already
|
|
been decremented by one due to the borrow from the subtraction,
|
|
but adding the mantissa will increment the exponent by one.) */
|
|
srli xh, xh, 20
|
|
sub xh, xh, a6
|
|
slli xh, xh, 20
|
|
add xh, xh, a8
|
|
j .Lsub_round
|
|
|
|
.Lsub_exactlyhalf:
|
|
/* Round down to the nearest even value. */
|
|
srli xl, xl, 1
|
|
slli xl, xl, 1
|
|
leaf_return
|
|
|
|
.Lsub_roundcarry:
|
|
/* xl is always zero when the rounding increment overflows, so
|
|
there's no need to round it to an even value. */
|
|
addi xh, xh, 1
|
|
/* Overflow to the exponent is OK. */
|
|
leaf_return
|
|
|
|
.Lsub_xhzero:
|
|
/* When normalizing the result, all the mantissa bits in the high
|
|
word are zero. Shift by "20 + (leading zero count of xl) + 1". */
|
|
do_nsau a6, xl, a7, a11
|
|
addi a6, a6, 21
|
|
blt a10, a6, .Lsub_subnormal
|
|
|
|
.Lsub_normalize_shift:
|
|
bltui a6, 32, .Lsub_shift_lt32
|
|
|
|
ssl a6
|
|
src a8, xl, a9
|
|
sll xl, a9
|
|
movi a9, 0
|
|
|
|
srli xh, xh, 20
|
|
sub xh, xh, a6
|
|
slli xh, xh, 20
|
|
add xh, xh, a8
|
|
j .Lsub_round
|
|
|
|
.Lsub_subnormal:
|
|
/* The exponent is too small to shift away all the leading zeros.
|
|
Set a6 to the current exponent (which has already been
|
|
decremented by the borrow) so that the exponent of the result
|
|
will be zero. Do not add 1 to a6 in this case, because: (1)
|
|
adding the mantissa will not increment the exponent, so there is
|
|
no need to subtract anything extra from the exponent to
|
|
compensate, and (2) the effective exponent of a subnormal is 1
|
|
not 0 so the shift amount must be 1 smaller than normal. */
|
|
mov a6, a10
|
|
j .Lsub_normalize_shift
|
|
|
|
#endif /* L_addsubdf3 */
|
|
|
|
#ifdef L_muldf3
|
|
|
|
/* Multiplication */
|
|
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
|
|
#define XCHAL_NO_MUL 1
|
|
#endif
|
|
|
|
.literal_position
|
|
__muldf3_aux:
|
|
|
|
/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
|
|
(This code is placed before the start of the function just to
|
|
keep it in range of the limited branch displacements.) */
|
|
|
|
.Lmul_xexpzero:
|
|
/* Clear the sign bit of x. */
|
|
slli xh, xh, 1
|
|
srli xh, xh, 1
|
|
|
|
/* If x is zero, return zero. */
|
|
or a10, xh, xl
|
|
beqz a10, .Lmul_return_zero
|
|
|
|
/* Normalize x. Adjust the exponent in a8. */
|
|
beqz xh, .Lmul_xh_zero
|
|
do_nsau a10, xh, a11, a12
|
|
addi a10, a10, -11
|
|
ssl a10
|
|
src xh, xh, xl
|
|
sll xl, xl
|
|
movi a8, 1
|
|
sub a8, a8, a10
|
|
j .Lmul_xnormalized
|
|
.Lmul_xh_zero:
|
|
do_nsau a10, xl, a11, a12
|
|
addi a10, a10, -11
|
|
movi a8, -31
|
|
sub a8, a8, a10
|
|
ssl a10
|
|
bltz a10, .Lmul_xl_srl
|
|
sll xh, xl
|
|
movi xl, 0
|
|
j .Lmul_xnormalized
|
|
.Lmul_xl_srl:
|
|
srl xh, xl
|
|
sll xl, xl
|
|
j .Lmul_xnormalized
|
|
|
|
.Lmul_yexpzero:
|
|
/* Clear the sign bit of y. */
|
|
slli yh, yh, 1
|
|
srli yh, yh, 1
|
|
|
|
/* If y is zero, return zero. */
|
|
or a10, yh, yl
|
|
beqz a10, .Lmul_return_zero
|
|
|
|
/* Normalize y. Adjust the exponent in a9. */
|
|
beqz yh, .Lmul_yh_zero
|
|
do_nsau a10, yh, a11, a12
|
|
addi a10, a10, -11
|
|
ssl a10
|
|
src yh, yh, yl
|
|
sll yl, yl
|
|
movi a9, 1
|
|
sub a9, a9, a10
|
|
j .Lmul_ynormalized
|
|
.Lmul_yh_zero:
|
|
do_nsau a10, yl, a11, a12
|
|
addi a10, a10, -11
|
|
movi a9, -31
|
|
sub a9, a9, a10
|
|
ssl a10
|
|
bltz a10, .Lmul_yl_srl
|
|
sll yh, yl
|
|
movi yl, 0
|
|
j .Lmul_ynormalized
|
|
.Lmul_yl_srl:
|
|
srl yh, yl
|
|
sll yl, yl
|
|
j .Lmul_ynormalized
|
|
|
|
.Lmul_return_zero:
|
|
/* Return zero with the appropriate sign bit. */
|
|
srli xh, a7, 31
|
|
slli xh, xh, 31
|
|
movi xl, 0
|
|
j .Lmul_done
|
|
|
|
.Lmul_xnan_or_inf:
|
|
/* If y is zero, return NaN. */
|
|
bnez yl, 1f
|
|
slli a8, yh, 1
|
|
beqz a8, .Lmul_return_nan
|
|
1:
|
|
/* If y is NaN, return y. */
|
|
bnall yh, a6, .Lmul_returnx
|
|
slli a8, yh, 12
|
|
or a8, a8, yl
|
|
beqz a8, .Lmul_returnx
|
|
|
|
.Lmul_returny:
|
|
mov xh, yh
|
|
mov xl, yl
|
|
|
|
.Lmul_returnx:
|
|
slli a8, xh, 12
|
|
or a8, a8, xl
|
|
bnez a8, .Lmul_return_nan
|
|
/* Set the sign bit and return. */
|
|
extui a7, a7, 31, 1
|
|
slli xh, xh, 1
|
|
ssai 1
|
|
src xh, a7, xh
|
|
j .Lmul_done
|
|
|
|
.Lmul_ynan_or_inf:
|
|
/* If x is zero, return NaN. */
|
|
bnez xl, .Lmul_returny
|
|
slli a8, xh, 1
|
|
bnez a8, .Lmul_returny
|
|
mov xh, yh
|
|
|
|
.Lmul_return_nan:
|
|
movi a4, 0x80000 /* make it a quiet NaN */
|
|
or xh, xh, a4
|
|
j .Lmul_done
|
|
|
|
.align 4
|
|
.global __muldf3
|
|
.type __muldf3, @function
|
|
__muldf3:
|
|
#if __XTENSA_CALL0_ABI__
|
|
leaf_entry sp, 32
|
|
addi sp, sp, -32
|
|
s32i a12, sp, 16
|
|
s32i a13, sp, 20
|
|
s32i a14, sp, 24
|
|
s32i a15, sp, 28
|
|
#elif XCHAL_NO_MUL
|
|
/* This is not really a leaf function; allocate enough stack space
|
|
to allow CALL12s to a helper function. */
|
|
leaf_entry sp, 64
|
|
#else
|
|
leaf_entry sp, 32
|
|
#endif
|
|
movi a6, 0x7ff00000
|
|
|
|
/* Get the sign of the result. */
|
|
xor a7, xh, yh
|
|
|
|
/* Check for NaN and infinity. */
|
|
ball xh, a6, .Lmul_xnan_or_inf
|
|
ball yh, a6, .Lmul_ynan_or_inf
|
|
|
|
/* Extract the exponents. */
|
|
extui a8, xh, 20, 11
|
|
extui a9, yh, 20, 11
|
|
|
|
beqz a8, .Lmul_xexpzero
|
|
.Lmul_xnormalized:
|
|
beqz a9, .Lmul_yexpzero
|
|
.Lmul_ynormalized:
|
|
|
|
/* Add the exponents. */
|
|
add a8, a8, a9
|
|
|
|
/* Replace sign/exponent fields with explicit "1.0". */
|
|
movi a10, 0x1fffff
|
|
or xh, xh, a6
|
|
and xh, xh, a10
|
|
or yh, yh, a6
|
|
and yh, yh, a10
|
|
|
|
/* Multiply 64x64 to 128 bits. The result ends up in xh/xl/a6.
|
|
The least-significant word of the result is thrown away except
|
|
that if it is nonzero, the lsb of a6 is set to 1. */
|
|
#if XCHAL_HAVE_MUL32_HIGH
|
|
|
|
/* Compute a6 with any carry-outs in a10. */
|
|
movi a10, 0
|
|
mull a6, xl, yh
|
|
mull a11, xh, yl
|
|
add a6, a6, a11
|
|
bgeu a6, a11, 1f
|
|
addi a10, a10, 1
|
|
1:
|
|
muluh a11, xl, yl
|
|
add a6, a6, a11
|
|
bgeu a6, a11, 1f
|
|
addi a10, a10, 1
|
|
1:
|
|
/* If the low word of the result is nonzero, set the lsb of a6. */
|
|
mull a11, xl, yl
|
|
beqz a11, 1f
|
|
movi a9, 1
|
|
or a6, a6, a9
|
|
1:
|
|
/* Compute xl with any carry-outs in a9. */
|
|
movi a9, 0
|
|
mull a11, xh, yh
|
|
add a10, a10, a11
|
|
bgeu a10, a11, 1f
|
|
addi a9, a9, 1
|
|
1:
|
|
muluh a11, xh, yl
|
|
add a10, a10, a11
|
|
bgeu a10, a11, 1f
|
|
addi a9, a9, 1
|
|
1:
|
|
muluh xl, xl, yh
|
|
add xl, xl, a10
|
|
bgeu xl, a10, 1f
|
|
addi a9, a9, 1
|
|
1:
|
|
/* Compute xh. */
|
|
muluh xh, xh, yh
|
|
add xh, xh, a9
|
|
|
|
#else /* ! XCHAL_HAVE_MUL32_HIGH */
|
|
|
|
/* Break the inputs into 16-bit chunks and compute 16 32-bit partial
|
|
products. These partial products are:
|
|
|
|
0 xll * yll
|
|
|
|
1 xll * ylh
|
|
2 xlh * yll
|
|
|
|
3 xll * yhl
|
|
4 xlh * ylh
|
|
5 xhl * yll
|
|
|
|
6 xll * yhh
|
|
7 xlh * yhl
|
|
8 xhl * ylh
|
|
9 xhh * yll
|
|
|
|
10 xlh * yhh
|
|
11 xhl * yhl
|
|
12 xhh * ylh
|
|
|
|
13 xhl * yhh
|
|
14 xhh * yhl
|
|
|
|
15 xhh * yhh
|
|
|
|
where the input chunks are (hh, hl, lh, ll). If using the Mul16
|
|
or Mul32 multiplier options, these input chunks must be stored in
|
|
separate registers. For Mac16, the UMUL.AA.* opcodes can specify
|
|
that the inputs come from either half of the registers, so there
|
|
is no need to shift them out ahead of time. If there is no
|
|
multiply hardware, the 16-bit chunks can be extracted when setting
|
|
up the arguments to the separate multiply function. */
|
|
|
|
/* Save a7 since it is needed to hold a temporary value. */
|
|
s32i a7, sp, 4
|
|
#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
|
|
/* Calling a separate multiply function will clobber a0 and requires
|
|
use of a8 as a temporary, so save those values now. (The function
|
|
uses a custom ABI so nothing else needs to be saved.) */
|
|
s32i a0, sp, 0
|
|
s32i a8, sp, 8
|
|
#endif
|
|
|
|
#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
|
|
|
|
#define xlh a12
|
|
#define ylh a13
|
|
#define xhh a14
|
|
#define yhh a15
|
|
|
|
/* Get the high halves of the inputs into registers. */
|
|
srli xlh, xl, 16
|
|
srli ylh, yl, 16
|
|
srli xhh, xh, 16
|
|
srli yhh, yh, 16
|
|
|
|
#define xll xl
|
|
#define yll yl
|
|
#define xhl xh
|
|
#define yhl yh
|
|
|
|
#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
|
|
/* Clear the high halves of the inputs. This does not matter
|
|
for MUL16 because the high bits are ignored. */
|
|
extui xl, xl, 0, 16
|
|
extui xh, xh, 0, 16
|
|
extui yl, yl, 0, 16
|
|
extui yh, yh, 0, 16
|
|
#endif
|
|
#endif /* MUL16 || MUL32 */
|
|
|
|
|
|
#if XCHAL_HAVE_MUL16
|
|
|
|
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
|
|
mul16u dst, xreg ## xhalf, yreg ## yhalf
|
|
|
|
#elif XCHAL_HAVE_MUL32
|
|
|
|
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
|
|
mull dst, xreg ## xhalf, yreg ## yhalf
|
|
|
|
#elif XCHAL_HAVE_MAC16
|
|
|
|
/* The preprocessor insists on inserting a space when concatenating after
|
|
a period in the definition of do_mul below. These macros are a workaround
|
|
using underscores instead of periods when doing the concatenation. */
|
|
#define umul_aa_ll umul.aa.ll
|
|
#define umul_aa_lh umul.aa.lh
|
|
#define umul_aa_hl umul.aa.hl
|
|
#define umul_aa_hh umul.aa.hh
|
|
|
|
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
|
|
umul_aa_ ## xhalf ## yhalf xreg, yreg; \
|
|
rsr dst, ACCLO
|
|
|
|
#else /* no multiply hardware */
|
|
|
|
#define set_arg_l(dst, src) \
|
|
extui dst, src, 0, 16
|
|
#define set_arg_h(dst, src) \
|
|
srli dst, src, 16
|
|
|
|
#if __XTENSA_CALL0_ABI__
|
|
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
|
|
set_arg_ ## xhalf (a13, xreg); \
|
|
set_arg_ ## yhalf (a14, yreg); \
|
|
call0 .Lmul_mulsi3; \
|
|
mov dst, a12
|
|
#else
|
|
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
|
|
set_arg_ ## xhalf (a14, xreg); \
|
|
set_arg_ ## yhalf (a15, yreg); \
|
|
call12 .Lmul_mulsi3; \
|
|
mov dst, a14
|
|
#endif /* __XTENSA_CALL0_ABI__ */
|
|
|
|
#endif /* no multiply hardware */
|
|
|
|
/* Add pp1 and pp2 into a10 with carry-out in a9. */
|
|
do_mul(a10, xl, l, yl, h) /* pp 1 */
|
|
do_mul(a11, xl, h, yl, l) /* pp 2 */
|
|
movi a9, 0
|
|
add a10, a10, a11
|
|
bgeu a10, a11, 1f
|
|
addi a9, a9, 1
|
|
1:
|
|
/* Initialize a6 with a9/a10 shifted into position. Note that
|
|
this value can be safely incremented without any carry-outs. */
|
|
ssai 16
|
|
src a6, a9, a10
|
|
|
|
/* Compute the low word into a10. */
|
|
do_mul(a11, xl, l, yl, l) /* pp 0 */
|
|
sll a10, a10
|
|
add a10, a10, a11
|
|
bgeu a10, a11, 1f
|
|
addi a6, a6, 1
|
|
1:
|
|
/* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
|
|
This is good enough to determine the low half of a6, so that any
|
|
nonzero bits from the low word of the result can be collapsed
|
|
into a6, freeing up a register. */
|
|
movi a9, 0
|
|
do_mul(a11, xl, l, yh, l) /* pp 3 */
|
|
add a6, a6, a11
|
|
bgeu a6, a11, 1f
|
|
addi a9, a9, 1
|
|
1:
|
|
do_mul(a11, xl, h, yl, h) /* pp 4 */
|
|
add a6, a6, a11
|
|
bgeu a6, a11, 1f
|
|
addi a9, a9, 1
|
|
1:
|
|
do_mul(a11, xh, l, yl, l) /* pp 5 */
|
|
add a6, a6, a11
|
|
bgeu a6, a11, 1f
|
|
addi a9, a9, 1
|
|
1:
|
|
/* Collapse any nonzero bits from the low word into a6. */
|
|
beqz a10, 1f
|
|
movi a11, 1
|
|
or a6, a6, a11
|
|
1:
|
|
/* Add pp6-9 into a11 with carry-outs in a10. */
|
|
do_mul(a7, xl, l, yh, h) /* pp 6 */
|
|
do_mul(a11, xh, h, yl, l) /* pp 9 */
|
|
movi a10, 0
|
|
add a11, a11, a7
|
|
bgeu a11, a7, 1f
|
|
addi a10, a10, 1
|
|
1:
|
|
do_mul(a7, xl, h, yh, l) /* pp 7 */
|
|
add a11, a11, a7
|
|
bgeu a11, a7, 1f
|
|
addi a10, a10, 1
|
|
1:
|
|
do_mul(a7, xh, l, yl, h) /* pp 8 */
|
|
add a11, a11, a7
|
|
bgeu a11, a7, 1f
|
|
addi a10, a10, 1
|
|
1:
|
|
/* Shift a10/a11 into position, and add low half of a11 to a6. */
|
|
src a10, a10, a11
|
|
add a10, a10, a9
|
|
sll a11, a11
|
|
add a6, a6, a11
|
|
bgeu a6, a11, 1f
|
|
addi a10, a10, 1
|
|
1:
|
|
/* Add pp10-12 into xl with carry-outs in a9. */
|
|
movi a9, 0
|
|
do_mul(xl, xl, h, yh, h) /* pp 10 */
|
|
add xl, xl, a10
|
|
bgeu xl, a10, 1f
|
|
addi a9, a9, 1
|
|
1:
|
|
do_mul(a10, xh, l, yh, l) /* pp 11 */
|
|
add xl, xl, a10
|
|
bgeu xl, a10, 1f
|
|
addi a9, a9, 1
|
|
1:
|
|
do_mul(a10, xh, h, yl, h) /* pp 12 */
|
|
add xl, xl, a10
|
|
bgeu xl, a10, 1f
|
|
addi a9, a9, 1
|
|
1:
|
|
/* Add pp13-14 into a11 with carry-outs in a10. */
|
|
do_mul(a11, xh, l, yh, h) /* pp 13 */
|
|
do_mul(a7, xh, h, yh, l) /* pp 14 */
|
|
movi a10, 0
|
|
add a11, a11, a7
|
|
bgeu a11, a7, 1f
|
|
addi a10, a10, 1
|
|
1:
|
|
/* Shift a10/a11 into position, and add low half of a11 to a6. */
|
|
src a10, a10, a11
|
|
add a10, a10, a9
|
|
sll a11, a11
|
|
add xl, xl, a11
|
|
bgeu xl, a11, 1f
|
|
addi a10, a10, 1
|
|
1:
|
|
/* Compute xh. */
|
|
do_mul(xh, xh, h, yh, h) /* pp 15 */
|
|
add xh, xh, a10
|
|
|
|
/* Restore values saved on the stack during the multiplication. */
|
|
l32i a7, sp, 4
|
|
#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
|
|
l32i a0, sp, 0
|
|
l32i a8, sp, 8
|
|
#endif
|
|
#endif /* ! XCHAL_HAVE_MUL32_HIGH */
|
|
|
|
/* Shift left by 12 bits, unless there was a carry-out from the
|
|
multiply, in which case, shift by 11 bits and increment the
|
|
exponent. Note: It is convenient to use the constant 0x3ff
|
|
instead of 0x400 when removing the extra exponent bias (so that
|
|
it is easy to construct 0x7fe for the overflow check). Reverse
|
|
the logic here to decrement the exponent sum by one unless there
|
|
was a carry-out. */
|
|
movi a4, 11
|
|
srli a5, xh, 21 - 12
|
|
bnez a5, 1f
|
|
addi a4, a4, 1
|
|
addi a8, a8, -1
|
|
1: ssl a4
|
|
src xh, xh, xl
|
|
src xl, xl, a6
|
|
sll a6, a6
|
|
|
|
/* Subtract the extra bias from the exponent sum (plus one to account
|
|
for the explicit "1.0" of the mantissa that will be added to the
|
|
exponent in the final result). */
|
|
movi a4, 0x3ff
|
|
sub a8, a8, a4
|
|
|
|
/* Check for over/underflow. The value in a8 is one less than the
|
|
final exponent, so values in the range 0..7fd are OK here. */
|
|
slli a4, a4, 1 /* 0x7fe */
|
|
bgeu a8, a4, .Lmul_overflow
|
|
|
|
.Lmul_round:
|
|
/* Round. */
|
|
bgez a6, .Lmul_rounded
|
|
addi xl, xl, 1
|
|
beqz xl, .Lmul_roundcarry
|
|
slli a6, a6, 1
|
|
beqz a6, .Lmul_exactlyhalf
|
|
|
|
.Lmul_rounded:
|
|
/* Add the exponent to the mantissa. */
|
|
slli a8, a8, 20
|
|
add xh, xh, a8
|
|
|
|
.Lmul_addsign:
|
|
/* Add the sign bit. */
|
|
srli a7, a7, 31
|
|
slli a7, a7, 31
|
|
or xh, xh, a7
|
|
|
|
.Lmul_done:
|
|
#if __XTENSA_CALL0_ABI__
|
|
l32i a12, sp, 16
|
|
l32i a13, sp, 20
|
|
l32i a14, sp, 24
|
|
l32i a15, sp, 28
|
|
addi sp, sp, 32
|
|
#endif
|
|
leaf_return
|
|
|
|
.Lmul_exactlyhalf:
|
|
/* Round down to the nearest even value. */
|
|
srli xl, xl, 1
|
|
slli xl, xl, 1
|
|
j .Lmul_rounded
|
|
|
|
.Lmul_roundcarry:
|
|
/* xl is always zero when the rounding increment overflows, so
|
|
there's no need to round it to an even value. */
|
|
addi xh, xh, 1
|
|
/* Overflow is OK -- it will be added to the exponent. */
|
|
j .Lmul_rounded
|
|
|
|
.Lmul_overflow:
|
|
bltz a8, .Lmul_underflow
|
|
/* Return +/- Infinity. */
|
|
addi a8, a4, 1 /* 0x7ff */
|
|
slli xh, a8, 20
|
|
movi xl, 0
|
|
j .Lmul_addsign
|
|
|
|
.Lmul_underflow:
|
|
/* Create a subnormal value, where the exponent field contains zero,
|
|
but the effective exponent is 1. The value of a8 is one less than
|
|
the actual exponent, so just negate it to get the shift amount. */
|
|
neg a8, a8
|
|
mov a9, a6
|
|
ssr a8
|
|
bgeui a8, 32, .Lmul_bigshift
|
|
|
|
/* Shift xh/xl right. Any bits that are shifted out of xl are saved
|
|
in a6 (combined with the shifted-out bits currently in a6) for
|
|
rounding the result. */
|
|
sll a6, xl
|
|
src xl, xh, xl
|
|
srl xh, xh
|
|
j 1f
|
|
|
|
.Lmul_bigshift:
|
|
bgeui a8, 64, .Lmul_flush_to_zero
|
|
sll a10, xl /* lost bits shifted out of xl */
|
|
src a6, xh, xl
|
|
srl xl, xh
|
|
movi xh, 0
|
|
or a9, a9, a10
|
|
|
|
/* Set the exponent to zero. */
|
|
1: movi a8, 0
|
|
|
|
/* Pack any nonzero bits shifted out into a6. */
|
|
beqz a9, .Lmul_round
|
|
movi a9, 1
|
|
or a6, a6, a9
|
|
j .Lmul_round
|
|
|
|
.Lmul_flush_to_zero:
|
|
/* Return zero with the appropriate sign bit. */
|
|
srli xh, a7, 31
|
|
slli xh, xh, 31
|
|
movi xl, 0
|
|
j .Lmul_done
|
|
|
|
#if XCHAL_NO_MUL
|
|
|
|
/* For Xtensa processors with no multiply hardware, this simplified
|
|
version of _mulsi3 is used for multiplying 16-bit chunks of
|
|
the floating-point mantissas. When using CALL0, this function
|
|
uses a custom ABI: the inputs are passed in a13 and a14, the
|
|
result is returned in a12, and a8 and a15 are clobbered. */
|
|
.align 4
|
|
.Lmul_mulsi3:
|
|
leaf_entry sp, 16
|
|
.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
|
|
movi \dst, 0
|
|
1: add \tmp1, \src2, \dst
|
|
extui \tmp2, \src1, 0, 1
|
|
movnez \dst, \tmp1, \tmp2
|
|
|
|
do_addx2 \tmp1, \src2, \dst, \tmp1
|
|
extui \tmp2, \src1, 1, 1
|
|
movnez \dst, \tmp1, \tmp2
|
|
|
|
do_addx4 \tmp1, \src2, \dst, \tmp1
|
|
extui \tmp2, \src1, 2, 1
|
|
movnez \dst, \tmp1, \tmp2
|
|
|
|
do_addx8 \tmp1, \src2, \dst, \tmp1
|
|
extui \tmp2, \src1, 3, 1
|
|
movnez \dst, \tmp1, \tmp2
|
|
|
|
srli \src1, \src1, 4
|
|
slli \src2, \src2, 4
|
|
bnez \src1, 1b
|
|
.endm
|
|
#if __XTENSA_CALL0_ABI__
|
|
mul_mulsi3_body a12, a13, a14, a15, a8
|
|
#else
|
|
/* The result will be written into a2, so save that argument in a4. */
|
|
mov a4, a2
|
|
mul_mulsi3_body a2, a4, a3, a5, a6
|
|
#endif
|
|
leaf_return
|
|
#endif /* XCHAL_NO_MUL */
|
|
#endif /* L_muldf3 */
|
|
|
|
#ifdef L_divdf3
|
|
|
|
/* Division */
|
|
|
|
#if XCHAL_HAVE_DFP_DIV
|
|
|
|
.text
|
|
.align 4
|
|
.global __divdf3
|
|
.type __divdf3, @function
|
|
__divdf3:
|
|
leaf_entry sp, 16
|
|
|
|
wfrd f1, xh, xl
|
|
wfrd f2, yh, yl
|
|
|
|
div0.d f3, f2
|
|
nexp01.d f4, f2
|
|
const.d f0, 1
|
|
maddn.d f0, f4, f3
|
|
const.d f5, 0
|
|
mov.d f7, f2
|
|
mkdadj.d f7, f1
|
|
maddn.d f3, f0, f3
|
|
maddn.d f5, f0, f0
|
|
nexp01.d f1, f1
|
|
div0.d f2, f2
|
|
maddn.d f3, f5, f3
|
|
const.d f5, 1
|
|
const.d f0, 0
|
|
neg.d f6, f1
|
|
maddn.d f5, f4, f3
|
|
maddn.d f0, f6, f2
|
|
maddn.d f3, f5, f3
|
|
maddn.d f6, f4, f0
|
|
const.d f2, 1
|
|
maddn.d f2, f4, f3
|
|
maddn.d f0, f6, f3
|
|
neg.d f1, f1
|
|
maddn.d f3, f2, f3
|
|
maddn.d f1, f4, f0
|
|
addexpm.d f0, f7
|
|
addexp.d f3, f7
|
|
divn.d f0, f1, f3
|
|
|
|
rfr xl, f0
|
|
rfrd xh, f0
|
|
|
|
leaf_return
|
|
|
|
#else
|
|
|
|
.literal_position
|
|
|
|
__divdf3_aux:
|
|
|
|
/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
|
|
(This code is placed before the start of the function just to
|
|
keep it in range of the limited branch displacements.) */
|
|
|
|
.Ldiv_yexpzero:
|
|
/* Clear the sign bit of y. */
|
|
slli yh, yh, 1
|
|
srli yh, yh, 1
|
|
|
|
/* Check for division by zero. */
|
|
or a10, yh, yl
|
|
beqz a10, .Ldiv_yzero
|
|
|
|
/* Normalize y. Adjust the exponent in a9. */
|
|
beqz yh, .Ldiv_yh_zero
|
|
do_nsau a10, yh, a11, a9
|
|
addi a10, a10, -11
|
|
ssl a10
|
|
src yh, yh, yl
|
|
sll yl, yl
|
|
movi a9, 1
|
|
sub a9, a9, a10
|
|
j .Ldiv_ynormalized
|
|
.Ldiv_yh_zero:
|
|
do_nsau a10, yl, a11, a9
|
|
addi a10, a10, -11
|
|
movi a9, -31
|
|
sub a9, a9, a10
|
|
ssl a10
|
|
bltz a10, .Ldiv_yl_srl
|
|
sll yh, yl
|
|
movi yl, 0
|
|
j .Ldiv_ynormalized
|
|
.Ldiv_yl_srl:
|
|
srl yh, yl
|
|
sll yl, yl
|
|
j .Ldiv_ynormalized
|
|
|
|
.Ldiv_yzero:
|
|
/* y is zero. Return NaN if x is also zero; otherwise, infinity. */
|
|
slli xh, xh, 1
|
|
srli xh, xh, 1
|
|
or xl, xl, xh
|
|
srli xh, a7, 31
|
|
slli xh, xh, 31
|
|
or xh, xh, a6
|
|
bnez xl, 1f
|
|
movi a4, 0x80000 /* make it a quiet NaN */
|
|
or xh, xh, a4
|
|
1: movi xl, 0
|
|
leaf_return
|
|
|
|
.Ldiv_xexpzero:
|
|
/* Clear the sign bit of x. */
|
|
slli xh, xh, 1
|
|
srli xh, xh, 1
|
|
|
|
/* If x is zero, return zero. */
|
|
or a10, xh, xl
|
|
beqz a10, .Ldiv_return_zero
|
|
|
|
/* Normalize x. Adjust the exponent in a8. */
|
|
beqz xh, .Ldiv_xh_zero
|
|
do_nsau a10, xh, a11, a8
|
|
addi a10, a10, -11
|
|
ssl a10
|
|
src xh, xh, xl
|
|
sll xl, xl
|
|
movi a8, 1
|
|
sub a8, a8, a10
|
|
j .Ldiv_xnormalized
|
|
.Ldiv_xh_zero:
|
|
do_nsau a10, xl, a11, a8
|
|
addi a10, a10, -11
|
|
movi a8, -31
|
|
sub a8, a8, a10
|
|
ssl a10
|
|
bltz a10, .Ldiv_xl_srl
|
|
sll xh, xl
|
|
movi xl, 0
|
|
j .Ldiv_xnormalized
|
|
.Ldiv_xl_srl:
|
|
srl xh, xl
|
|
sll xl, xl
|
|
j .Ldiv_xnormalized
|
|
|
|
.Ldiv_return_zero:
|
|
/* Return zero with the appropriate sign bit. */
|
|
srli xh, a7, 31
|
|
slli xh, xh, 31
|
|
movi xl, 0
|
|
leaf_return
|
|
|
|
.Ldiv_xnan_or_inf:
|
|
/* Set the sign bit of the result. */
|
|
srli a7, yh, 31
|
|
slli a7, a7, 31
|
|
xor xh, xh, a7
|
|
/* If y is NaN or Inf, return NaN. */
|
|
ball yh, a6, .Ldiv_return_nan
|
|
slli a8, xh, 12
|
|
or a8, a8, xl
|
|
bnez a8, .Ldiv_return_nan
|
|
leaf_return
|
|
|
|
.Ldiv_ynan_or_inf:
|
|
/* If y is Infinity, return zero. */
|
|
slli a8, yh, 12
|
|
or a8, a8, yl
|
|
beqz a8, .Ldiv_return_zero
|
|
/* y is NaN; return it. */
|
|
mov xh, yh
|
|
mov xl, yl
|
|
|
|
.Ldiv_return_nan:
|
|
movi a4, 0x80000 /* make it a quiet NaN */
|
|
or xh, xh, a4
|
|
leaf_return
|
|
|
|
.Ldiv_highequal1:
|
|
bltu xl, yl, 2f
|
|
j 3f
|
|
|
|
.align 4
|
|
.global __divdf3
|
|
.type __divdf3, @function
|
|
__divdf3:
|
|
leaf_entry sp, 16
|
|
movi a6, 0x7ff00000
|
|
|
|
/* Get the sign of the result. */
|
|
xor a7, xh, yh
|
|
|
|
/* Check for NaN and infinity. */
|
|
ball xh, a6, .Ldiv_xnan_or_inf
|
|
ball yh, a6, .Ldiv_ynan_or_inf
|
|
|
|
/* Extract the exponents. */
|
|
extui a8, xh, 20, 11
|
|
extui a9, yh, 20, 11
|
|
|
|
beqz a9, .Ldiv_yexpzero
|
|
.Ldiv_ynormalized:
|
|
beqz a8, .Ldiv_xexpzero
|
|
.Ldiv_xnormalized:
|
|
|
|
/* Subtract the exponents. */
|
|
sub a8, a8, a9
|
|
|
|
/* Replace sign/exponent fields with explicit "1.0". */
|
|
movi a10, 0x1fffff
|
|
or xh, xh, a6
|
|
and xh, xh, a10
|
|
or yh, yh, a6
|
|
and yh, yh, a10
|
|
|
|
/* Set SAR for left shift by one. */
|
|
ssai (32 - 1)
|
|
|
|
/* The first digit of the mantissa division must be a one.
|
|
Shift x (and adjust the exponent) as needed to make this true. */
|
|
bltu yh, xh, 3f
|
|
beq yh, xh, .Ldiv_highequal1
|
|
2: src xh, xh, xl
|
|
sll xl, xl
|
|
addi a8, a8, -1
|
|
3:
|
|
/* Do the first subtraction and shift. */
|
|
sub xh, xh, yh
|
|
bgeu xl, yl, 1f
|
|
addi xh, xh, -1
|
|
1: sub xl, xl, yl
|
|
src xh, xh, xl
|
|
sll xl, xl
|
|
|
|
/* Put the quotient into a10/a11. */
|
|
movi a10, 0
|
|
movi a11, 1
|
|
|
|
/* Divide one bit at a time for 52 bits. */
|
|
movi a9, 52
|
|
#if XCHAL_HAVE_LOOPS
|
|
loop a9, .Ldiv_loopend
|
|
#endif
|
|
.Ldiv_loop:
|
|
/* Shift the quotient << 1. */
|
|
src a10, a10, a11
|
|
sll a11, a11
|
|
|
|
/* Is this digit a 0 or 1? */
|
|
bltu xh, yh, 3f
|
|
beq xh, yh, .Ldiv_highequal2
|
|
|
|
/* Output a 1 and subtract. */
|
|
2: addi a11, a11, 1
|
|
sub xh, xh, yh
|
|
bgeu xl, yl, 1f
|
|
addi xh, xh, -1
|
|
1: sub xl, xl, yl
|
|
|
|
/* Shift the dividend << 1. */
|
|
3: src xh, xh, xl
|
|
sll xl, xl
|
|
|
|
#if !XCHAL_HAVE_LOOPS
|
|
addi a9, a9, -1
|
|
bnez a9, .Ldiv_loop
|
|
#endif
|
|
.Ldiv_loopend:
|
|
|
|
/* Add the exponent bias (less one to account for the explicit "1.0"
|
|
of the mantissa that will be added to the exponent in the final
|
|
result). */
|
|
movi a9, 0x3fe
|
|
add a8, a8, a9
|
|
|
|
/* Check for over/underflow. The value in a8 is one less than the
|
|
final exponent, so values in the range 0..7fd are OK here. */
|
|
addmi a9, a9, 0x400 /* 0x7fe */
|
|
bgeu a8, a9, .Ldiv_overflow
|
|
|
|
.Ldiv_round:
|
|
/* Round. The remainder (<< 1) is in xh/xl. */
|
|
bltu xh, yh, .Ldiv_rounded
|
|
beq xh, yh, .Ldiv_highequal3
|
|
.Ldiv_roundup:
|
|
addi a11, a11, 1
|
|
beqz a11, .Ldiv_roundcarry
|
|
|
|
.Ldiv_rounded:
|
|
mov xl, a11
|
|
/* Add the exponent to the mantissa. */
|
|
slli a8, a8, 20
|
|
add xh, a10, a8
|
|
|
|
.Ldiv_addsign:
|
|
/* Add the sign bit. */
|
|
srli a7, a7, 31
|
|
slli a7, a7, 31
|
|
or xh, xh, a7
|
|
leaf_return
|
|
|
|
.Ldiv_highequal2:
|
|
bgeu xl, yl, 2b
|
|
j 3b
|
|
|
|
.Ldiv_highequal3:
|
|
bltu xl, yl, .Ldiv_rounded
|
|
bne xl, yl, .Ldiv_roundup
|
|
|
|
/* Remainder is exactly half the divisor. Round even. */
|
|
addi a11, a11, 1
|
|
beqz a11, .Ldiv_roundcarry
|
|
srli a11, a11, 1
|
|
slli a11, a11, 1
|
|
j .Ldiv_rounded
|
|
|
|
.Ldiv_overflow:
|
|
bltz a8, .Ldiv_underflow
|
|
/* Return +/- Infinity. */
|
|
addi a8, a9, 1 /* 0x7ff */
|
|
slli xh, a8, 20
|
|
movi xl, 0
|
|
j .Ldiv_addsign
|
|
|
|
.Ldiv_underflow:
|
|
/* Create a subnormal value, where the exponent field contains zero,
|
|
but the effective exponent is 1. The value of a8 is one less than
|
|
the actual exponent, so just negate it to get the shift amount. */
|
|
neg a8, a8
|
|
ssr a8
|
|
bgeui a8, 32, .Ldiv_bigshift
|
|
|
|
/* Shift a10/a11 right. Any bits that are shifted out of a11 are
|
|
saved in a6 for rounding the result. */
|
|
sll a6, a11
|
|
src a11, a10, a11
|
|
srl a10, a10
|
|
j 1f
|
|
|
|
.Ldiv_bigshift:
|
|
bgeui a8, 64, .Ldiv_flush_to_zero
|
|
sll a9, a11 /* lost bits shifted out of a11 */
|
|
src a6, a10, a11
|
|
srl a11, a10
|
|
movi a10, 0
|
|
or xl, xl, a9
|
|
|
|
/* Set the exponent to zero. */
|
|
1: movi a8, 0
|
|
|
|
/* Pack any nonzero remainder (in xh/xl) into a6. */
|
|
or xh, xh, xl
|
|
beqz xh, 1f
|
|
movi a9, 1
|
|
or a6, a6, a9
|
|
|
|
/* Round a10/a11 based on the bits shifted out into a6. */
|
|
1: bgez a6, .Ldiv_rounded
|
|
addi a11, a11, 1
|
|
beqz a11, .Ldiv_roundcarry
|
|
slli a6, a6, 1
|
|
bnez a6, .Ldiv_rounded
|
|
srli a11, a11, 1
|
|
slli a11, a11, 1
|
|
j .Ldiv_rounded
|
|
|
|
.Ldiv_roundcarry:
|
|
/* a11 is always zero when the rounding increment overflows, so
|
|
there's no need to round it to an even value. */
|
|
addi a10, a10, 1
|
|
/* Overflow to the exponent field is OK. */
|
|
j .Ldiv_rounded
|
|
|
|
.Ldiv_flush_to_zero:
|
|
/* Return zero with the appropriate sign bit. */
|
|
srli xh, a7, 31
|
|
slli xh, xh, 31
|
|
movi xl, 0
|
|
leaf_return
|
|
|
|
#endif /* XCHAL_HAVE_DFP_DIV */
|
|
|
|
#endif /* L_divdf3 */
|
|
|
|
#ifdef L_cmpdf2
|
|
|
|
/* Equal and Not Equal */
|
|
|
|
.align 4
|
|
.global __eqdf2
|
|
.global __nedf2
|
|
.set __nedf2, __eqdf2
|
|
.type __eqdf2, @function
|
|
__eqdf2:
|
|
leaf_entry sp, 16
|
|
bne xl, yl, 2f
|
|
bne xh, yh, 4f
|
|
|
|
/* The values are equal but NaN != NaN. Check the exponent. */
|
|
movi a6, 0x7ff00000
|
|
ball xh, a6, 3f
|
|
|
|
/* Equal. */
|
|
movi a2, 0
|
|
leaf_return
|
|
|
|
/* Not equal. */
|
|
2: movi a2, 1
|
|
leaf_return
|
|
|
|
/* Check if the mantissas are nonzero. */
|
|
3: slli a7, xh, 12
|
|
or a7, a7, xl
|
|
j 5f
|
|
|
|
/* Check if x and y are zero with different signs. */
|
|
4: or a7, xh, yh
|
|
slli a7, a7, 1
|
|
or a7, a7, xl /* xl == yl here */
|
|
|
|
/* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
|
|
or x when exponent(x) = 0x7ff and x == y. */
|
|
5: movi a2, 0
|
|
movi a3, 1
|
|
movnez a2, a3, a7
|
|
leaf_return
|
|
|
|
|
|
/* Greater Than */
|
|
|
|
.align 4
|
|
.global __gtdf2
|
|
.type __gtdf2, @function
|
|
__gtdf2:
|
|
leaf_entry sp, 16
|
|
movi a6, 0x7ff00000
|
|
ball xh, a6, 2f
|
|
1: bnall yh, a6, .Lle_cmp
|
|
|
|
/* Check if y is a NaN. */
|
|
slli a7, yh, 12
|
|
or a7, a7, yl
|
|
beqz a7, .Lle_cmp
|
|
movi a2, 0
|
|
leaf_return
|
|
|
|
/* Check if x is a NaN. */
|
|
2: slli a7, xh, 12
|
|
or a7, a7, xl
|
|
beqz a7, 1b
|
|
movi a2, 0
|
|
leaf_return
|
|
|
|
|
|
/* Less Than or Equal */
|
|
|
|
.align 4
|
|
.global __ledf2
|
|
.type __ledf2, @function
|
|
__ledf2:
|
|
leaf_entry sp, 16
|
|
movi a6, 0x7ff00000
|
|
ball xh, a6, 2f
|
|
1: bnall yh, a6, .Lle_cmp
|
|
|
|
/* Check if y is a NaN. */
|
|
slli a7, yh, 12
|
|
or a7, a7, yl
|
|
beqz a7, .Lle_cmp
|
|
movi a2, 1
|
|
leaf_return
|
|
|
|
/* Check if x is a NaN. */
|
|
2: slli a7, xh, 12
|
|
or a7, a7, xl
|
|
beqz a7, 1b
|
|
movi a2, 1
|
|
leaf_return
|
|
|
|
.Lle_cmp:
|
|
/* Check if x and y have different signs. */
|
|
xor a7, xh, yh
|
|
bltz a7, .Lle_diff_signs
|
|
|
|
/* Check if x is negative. */
|
|
bltz xh, .Lle_xneg
|
|
|
|
/* Check if x <= y. */
|
|
bltu xh, yh, 4f
|
|
bne xh, yh, 5f
|
|
bltu yl, xl, 5f
|
|
4: movi a2, 0
|
|
leaf_return
|
|
|
|
.Lle_xneg:
|
|
/* Check if y <= x. */
|
|
bltu yh, xh, 4b
|
|
bne yh, xh, 5f
|
|
bgeu xl, yl, 4b
|
|
5: movi a2, 1
|
|
leaf_return
|
|
|
|
.Lle_diff_signs:
|
|
bltz xh, 4b
|
|
|
|
/* Check if both x and y are zero. */
|
|
or a7, xh, yh
|
|
slli a7, a7, 1
|
|
or a7, a7, xl
|
|
or a7, a7, yl
|
|
movi a2, 1
|
|
movi a3, 0
|
|
moveqz a2, a3, a7
|
|
leaf_return
|
|
|
|
|
|
/* Greater Than or Equal */
|
|
|
|
.align 4
|
|
.global __gedf2
|
|
.type __gedf2, @function
|
|
__gedf2:
|
|
leaf_entry sp, 16
|
|
movi a6, 0x7ff00000
|
|
ball xh, a6, 2f
|
|
1: bnall yh, a6, .Llt_cmp
|
|
|
|
/* Check if y is a NaN. */
|
|
slli a7, yh, 12
|
|
or a7, a7, yl
|
|
beqz a7, .Llt_cmp
|
|
movi a2, -1
|
|
leaf_return
|
|
|
|
/* Check if x is a NaN. */
|
|
2: slli a7, xh, 12
|
|
or a7, a7, xl
|
|
beqz a7, 1b
|
|
movi a2, -1
|
|
leaf_return
|
|
|
|
|
|
/* Less Than */
|
|
|
|
.align 4
|
|
.global __ltdf2
|
|
.type __ltdf2, @function
|
|
__ltdf2:
|
|
leaf_entry sp, 16
|
|
movi a6, 0x7ff00000
|
|
ball xh, a6, 2f
|
|
1: bnall yh, a6, .Llt_cmp
|
|
|
|
/* Check if y is a NaN. */
|
|
slli a7, yh, 12
|
|
or a7, a7, yl
|
|
beqz a7, .Llt_cmp
|
|
movi a2, 0
|
|
leaf_return
|
|
|
|
/* Check if x is a NaN. */
|
|
2: slli a7, xh, 12
|
|
or a7, a7, xl
|
|
beqz a7, 1b
|
|
movi a2, 0
|
|
leaf_return
|
|
|
|
.Llt_cmp:
|
|
/* Check if x and y have different signs. */
|
|
xor a7, xh, yh
|
|
bltz a7, .Llt_diff_signs
|
|
|
|
/* Check if x is negative. */
|
|
bltz xh, .Llt_xneg
|
|
|
|
/* Check if x < y. */
|
|
bltu xh, yh, 4f
|
|
bne xh, yh, 5f
|
|
bgeu xl, yl, 5f
|
|
4: movi a2, -1
|
|
leaf_return
|
|
|
|
.Llt_xneg:
|
|
/* Check if y < x. */
|
|
bltu yh, xh, 4b
|
|
bne yh, xh, 5f
|
|
bltu yl, xl, 4b
|
|
5: movi a2, 0
|
|
leaf_return
|
|
|
|
.Llt_diff_signs:
|
|
bgez xh, 5b
|
|
|
|
/* Check if both x and y are nonzero. */
|
|
or a7, xh, yh
|
|
slli a7, a7, 1
|
|
or a7, a7, xl
|
|
or a7, a7, yl
|
|
movi a2, 0
|
|
movi a3, -1
|
|
movnez a2, a3, a7
|
|
leaf_return
|
|
|
|
|
|
/* Unordered */
|
|
|
|
.align 4
|
|
.global __unorddf2
|
|
.type __unorddf2, @function
|
|
__unorddf2:
|
|
leaf_entry sp, 16
|
|
movi a6, 0x7ff00000
|
|
ball xh, a6, 3f
|
|
1: ball yh, a6, 4f
|
|
2: movi a2, 0
|
|
leaf_return
|
|
|
|
3: slli a7, xh, 12
|
|
or a7, a7, xl
|
|
beqz a7, 1b
|
|
movi a2, 1
|
|
leaf_return
|
|
|
|
4: slli a7, yh, 12
|
|
or a7, a7, yl
|
|
beqz a7, 2b
|
|
movi a2, 1
|
|
leaf_return
|
|
|
|
#endif /* L_cmpdf2 */
|
|
|
|
#ifdef L_fixdfsi
|
|
|
|
.align 4
|
|
.global __fixdfsi
|
|
.type __fixdfsi, @function
|
|
__fixdfsi:
|
|
leaf_entry sp, 16
|
|
|
|
/* Check for NaN and Infinity. */
|
|
movi a6, 0x7ff00000
|
|
ball xh, a6, .Lfixdfsi_nan_or_inf
|
|
|
|
/* Extract the exponent and check if 0 < (exp - 0x3fe) < 32. */
|
|
extui a4, xh, 20, 11
|
|
extui a5, a6, 19, 10 /* 0x3fe */
|
|
sub a4, a4, a5
|
|
bgei a4, 32, .Lfixdfsi_maxint
|
|
blti a4, 1, .Lfixdfsi_zero
|
|
|
|
/* Add explicit "1.0" and shift << 11. */
|
|
or a7, xh, a6
|
|
ssai (32 - 11)
|
|
src a5, a7, xl
|
|
|
|
/* Shift back to the right, based on the exponent. */
|
|
ssl a4 /* shift by 32 - a4 */
|
|
srl a5, a5
|
|
|
|
/* Negate the result if sign != 0. */
|
|
neg a2, a5
|
|
movgez a2, a5, a7
|
|
leaf_return
|
|
|
|
.Lfixdfsi_nan_or_inf:
|
|
/* Handle Infinity and NaN. */
|
|
slli a4, xh, 12
|
|
or a4, a4, xl
|
|
beqz a4, .Lfixdfsi_maxint
|
|
|
|
/* Translate NaN to +maxint. */
|
|
movi xh, 0
|
|
|
|
.Lfixdfsi_maxint:
|
|
slli a4, a6, 11 /* 0x80000000 */
|
|
addi a5, a4, -1 /* 0x7fffffff */
|
|
movgez a4, a5, xh
|
|
mov a2, a4
|
|
leaf_return
|
|
|
|
.Lfixdfsi_zero:
|
|
movi a2, 0
|
|
leaf_return
|
|
|
|
#endif /* L_fixdfsi */
|
|
|
|
#ifdef L_fixdfdi
|
|
|
|
.align 4
|
|
.global __fixdfdi
|
|
.type __fixdfdi, @function
|
|
__fixdfdi:
|
|
leaf_entry sp, 16
|
|
|
|
/* Check for NaN and Infinity. */
|
|
movi a6, 0x7ff00000
|
|
ball xh, a6, .Lfixdfdi_nan_or_inf
|
|
|
|
/* Extract the exponent and check if 0 < (exp - 0x3fe) < 64. */
|
|
extui a4, xh, 20, 11
|
|
extui a5, a6, 19, 10 /* 0x3fe */
|
|
sub a4, a4, a5
|
|
bgei a4, 64, .Lfixdfdi_maxint
|
|
blti a4, 1, .Lfixdfdi_zero
|
|
|
|
/* Add explicit "1.0" and shift << 11. */
|
|
or a7, xh, a6
|
|
ssai (32 - 11)
|
|
src xh, a7, xl
|
|
sll xl, xl
|
|
|
|
/* Shift back to the right, based on the exponent. */
|
|
ssl a4 /* shift by 64 - a4 */
|
|
bgei a4, 32, .Lfixdfdi_smallshift
|
|
srl xl, xh
|
|
movi xh, 0
|
|
|
|
.Lfixdfdi_shifted:
|
|
/* Negate the result if sign != 0. */
|
|
bgez a7, 1f
|
|
neg xl, xl
|
|
neg xh, xh
|
|
beqz xl, 1f
|
|
addi xh, xh, -1
|
|
1: leaf_return
|
|
|
|
.Lfixdfdi_smallshift:
|
|
src xl, xh, xl
|
|
srl xh, xh
|
|
j .Lfixdfdi_shifted
|
|
|
|
.Lfixdfdi_nan_or_inf:
|
|
/* Handle Infinity and NaN. */
|
|
slli a4, xh, 12
|
|
or a4, a4, xl
|
|
beqz a4, .Lfixdfdi_maxint
|
|
|
|
/* Translate NaN to +maxint. */
|
|
movi xh, 0
|
|
|
|
.Lfixdfdi_maxint:
|
|
slli a7, a6, 11 /* 0x80000000 */
|
|
bgez xh, 1f
|
|
mov xh, a7
|
|
movi xl, 0
|
|
leaf_return
|
|
|
|
1: addi xh, a7, -1 /* 0x7fffffff */
|
|
movi xl, -1
|
|
leaf_return
|
|
|
|
.Lfixdfdi_zero:
|
|
movi xh, 0
|
|
movi xl, 0
|
|
leaf_return
|
|
|
|
#endif /* L_fixdfdi */
|
|
|
|
#ifdef L_fixunsdfsi
|
|
|
|
.align 4
|
|
.global __fixunsdfsi
|
|
.type __fixunsdfsi, @function
|
|
__fixunsdfsi:
|
|
leaf_entry sp, 16
|
|
|
|
/* Check for NaN and Infinity. */
|
|
movi a6, 0x7ff00000
|
|
ball xh, a6, .Lfixunsdfsi_nan_or_inf
|
|
|
|
/* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32. */
|
|
extui a4, xh, 20, 11
|
|
extui a5, a6, 20, 10 /* 0x3ff */
|
|
sub a4, a4, a5
|
|
bgei a4, 32, .Lfixunsdfsi_maxint
|
|
bltz a4, .Lfixunsdfsi_zero
|
|
|
|
/* Add explicit "1.0" and shift << 11. */
|
|
or a7, xh, a6
|
|
ssai (32 - 11)
|
|
src a5, a7, xl
|
|
|
|
/* Shift back to the right, based on the exponent. */
|
|
addi a4, a4, 1
|
|
beqi a4, 32, .Lfixunsdfsi_bigexp
|
|
ssl a4 /* shift by 32 - a4 */
|
|
srl a5, a5
|
|
|
|
/* Negate the result if sign != 0. */
|
|
neg a2, a5
|
|
movgez a2, a5, a7
|
|
leaf_return
|
|
|
|
.Lfixunsdfsi_nan_or_inf:
|
|
/* Handle Infinity and NaN. */
|
|
slli a4, xh, 12
|
|
or a4, a4, xl
|
|
beqz a4, .Lfixunsdfsi_maxint
|
|
|
|
/* Translate NaN to 0xffffffff. */
|
|
movi a2, -1
|
|
leaf_return
|
|
|
|
.Lfixunsdfsi_maxint:
|
|
slli a4, a6, 11 /* 0x80000000 */
|
|
movi a5, -1 /* 0xffffffff */
|
|
movgez a4, a5, xh
|
|
mov a2, a4
|
|
leaf_return
|
|
|
|
.Lfixunsdfsi_zero:
|
|
movi a2, 0
|
|
leaf_return
|
|
|
|
.Lfixunsdfsi_bigexp:
|
|
/* Handle unsigned maximum exponent case. */
|
|
bltz xh, 1f
|
|
mov a2, a5 /* no shift needed */
|
|
leaf_return
|
|
|
|
/* Return 0x80000000 if negative. */
|
|
1: slli a2, a6, 11
|
|
leaf_return
|
|
|
|
#endif /* L_fixunsdfsi */
|
|
|
|
#ifdef L_fixunsdfdi
|
|
|
|
.align 4
|
|
.global __fixunsdfdi
|
|
.type __fixunsdfdi, @function
|
|
__fixunsdfdi:
|
|
leaf_entry sp, 16
|
|
|
|
/* Check for NaN and Infinity. */
|
|
movi a6, 0x7ff00000
|
|
ball xh, a6, .Lfixunsdfdi_nan_or_inf
|
|
|
|
/* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64. */
|
|
extui a4, xh, 20, 11
|
|
extui a5, a6, 20, 10 /* 0x3ff */
|
|
sub a4, a4, a5
|
|
bgei a4, 64, .Lfixunsdfdi_maxint
|
|
bltz a4, .Lfixunsdfdi_zero
|
|
|
|
/* Add explicit "1.0" and shift << 11. */
|
|
or a7, xh, a6
|
|
ssai (32 - 11)
|
|
src xh, a7, xl
|
|
sll xl, xl
|
|
|
|
/* Shift back to the right, based on the exponent. */
|
|
addi a4, a4, 1
|
|
beqi a4, 64, .Lfixunsdfdi_bigexp
|
|
ssl a4 /* shift by 64 - a4 */
|
|
bgei a4, 32, .Lfixunsdfdi_smallshift
|
|
srl xl, xh
|
|
movi xh, 0
|
|
|
|
.Lfixunsdfdi_shifted:
|
|
/* Negate the result if sign != 0. */
|
|
bgez a7, 1f
|
|
neg xl, xl
|
|
neg xh, xh
|
|
beqz xl, 1f
|
|
addi xh, xh, -1
|
|
1: leaf_return
|
|
|
|
.Lfixunsdfdi_smallshift:
|
|
src xl, xh, xl
|
|
srl xh, xh
|
|
j .Lfixunsdfdi_shifted
|
|
|
|
.Lfixunsdfdi_nan_or_inf:
|
|
/* Handle Infinity and NaN. */
|
|
slli a4, xh, 12
|
|
or a4, a4, xl
|
|
beqz a4, .Lfixunsdfdi_maxint
|
|
|
|
/* Translate NaN to 0xffffffff.... */
|
|
1: movi xh, -1
|
|
movi xl, -1
|
|
leaf_return
|
|
|
|
.Lfixunsdfdi_maxint:
|
|
bgez xh, 1b
|
|
2: slli xh, a6, 11 /* 0x80000000 */
|
|
movi xl, 0
|
|
leaf_return
|
|
|
|
.Lfixunsdfdi_zero:
|
|
movi xh, 0
|
|
movi xl, 0
|
|
leaf_return
|
|
|
|
.Lfixunsdfdi_bigexp:
|
|
/* Handle unsigned maximum exponent case. */
|
|
bltz a7, 2b
|
|
leaf_return /* no shift needed */
|
|
|
|
#endif /* L_fixunsdfdi */
|
|
|
|
#ifdef L_floatsidf
|
|
|
|
.align 4
|
|
.global __floatunsidf
|
|
.type __floatunsidf, @function
|
|
__floatunsidf:
|
|
leaf_entry sp, 16
|
|
beqz a2, .Lfloatsidf_return_zero
|
|
|
|
/* Set the sign to zero and jump to the floatsidf code. */
|
|
movi a7, 0
|
|
j .Lfloatsidf_normalize
|
|
|
|
.align 4
|
|
.global __floatsidf
|
|
.type __floatsidf, @function
|
|
__floatsidf:
|
|
leaf_entry sp, 16
|
|
|
|
/* Check for zero. */
|
|
beqz a2, .Lfloatsidf_return_zero
|
|
|
|
/* Save the sign. */
|
|
extui a7, a2, 31, 1
|
|
|
|
/* Get the absolute value. */
|
|
#if XCHAL_HAVE_ABS
|
|
abs a2, a2
|
|
#else
|
|
neg a4, a2
|
|
movltz a2, a4, a2
|
|
#endif
|
|
|
|
.Lfloatsidf_normalize:
|
|
/* Normalize with the first 1 bit in the msb. */
|
|
do_nsau a4, a2, a5, a6
|
|
ssl a4
|
|
sll a5, a2
|
|
|
|
/* Shift the mantissa into position. */
|
|
srli xh, a5, 11
|
|
slli xl, a5, (32 - 11)
|
|
|
|
/* Set the exponent. */
|
|
movi a5, 0x41d /* 0x3fe + 31 */
|
|
sub a5, a5, a4
|
|
slli a5, a5, 20
|
|
add xh, xh, a5
|
|
|
|
/* Add the sign and return. */
|
|
slli a7, a7, 31
|
|
or xh, xh, a7
|
|
leaf_return
|
|
|
|
.Lfloatsidf_return_zero:
|
|
movi a3, 0
|
|
leaf_return
|
|
|
|
#endif /* L_floatsidf */
|
|
|
|
#ifdef L_floatdidf
|
|
|
|
.align 4
|
|
.global __floatundidf
|
|
.type __floatundidf, @function
|
|
__floatundidf:
|
|
leaf_entry sp, 16
|
|
|
|
/* Check for zero. */
|
|
or a4, xh, xl
|
|
beqz a4, 2f
|
|
|
|
/* Set the sign to zero and jump to the floatdidf code. */
|
|
movi a7, 0
|
|
j .Lfloatdidf_normalize
|
|
|
|
.align 4
|
|
.global __floatdidf
|
|
.type __floatdidf, @function
|
|
__floatdidf:
|
|
leaf_entry sp, 16
|
|
|
|
/* Check for zero. */
|
|
or a4, xh, xl
|
|
beqz a4, 2f
|
|
|
|
/* Save the sign. */
|
|
extui a7, xh, 31, 1
|
|
|
|
/* Get the absolute value. */
|
|
bgez xh, .Lfloatdidf_normalize
|
|
neg xl, xl
|
|
neg xh, xh
|
|
beqz xl, .Lfloatdidf_normalize
|
|
addi xh, xh, -1
|
|
|
|
.Lfloatdidf_normalize:
|
|
/* Normalize with the first 1 bit in the msb of xh. */
|
|
beqz xh, .Lfloatdidf_bigshift
|
|
do_nsau a4, xh, a5, a6
|
|
ssl a4
|
|
src xh, xh, xl
|
|
sll xl, xl
|
|
|
|
.Lfloatdidf_shifted:
|
|
/* Shift the mantissa into position, with rounding bits in a6. */
|
|
ssai 11
|
|
sll a6, xl
|
|
src xl, xh, xl
|
|
srl xh, xh
|
|
|
|
/* Set the exponent. */
|
|
movi a5, 0x43d /* 0x3fe + 63 */
|
|
sub a5, a5, a4
|
|
slli a5, a5, 20
|
|
add xh, xh, a5
|
|
|
|
/* Add the sign. */
|
|
slli a7, a7, 31
|
|
or xh, xh, a7
|
|
|
|
/* Round up if the leftover fraction is >= 1/2. */
|
|
bgez a6, 2f
|
|
addi xl, xl, 1
|
|
beqz xl, .Lfloatdidf_roundcarry
|
|
|
|
/* Check if the leftover fraction is exactly 1/2. */
|
|
slli a6, a6, 1
|
|
beqz a6, .Lfloatdidf_exactlyhalf
|
|
2: leaf_return
|
|
|
|
.Lfloatdidf_bigshift:
|
|
/* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
|
|
do_nsau a4, xl, a5, a6
|
|
ssl a4
|
|
sll xh, xl
|
|
movi xl, 0
|
|
addi a4, a4, 32
|
|
j .Lfloatdidf_shifted
|
|
|
|
.Lfloatdidf_exactlyhalf:
|
|
/* Round down to the nearest even value. */
|
|
srli xl, xl, 1
|
|
slli xl, xl, 1
|
|
leaf_return
|
|
|
|
.Lfloatdidf_roundcarry:
|
|
/* xl is always zero when the rounding increment overflows, so
|
|
there's no need to round it to an even value. */
|
|
addi xh, xh, 1
|
|
/* Overflow to the exponent is OK. */
|
|
leaf_return
|
|
|
|
#endif /* L_floatdidf */
|
|
|
|
#ifdef L_truncdfsf2
|
|
|
|
.align 4
|
|
.global __truncdfsf2
|
|
.type __truncdfsf2, @function
|
|
__truncdfsf2:
|
|
leaf_entry sp, 16
|
|
|
|
/* Adjust the exponent bias. */
|
|
movi a4, (0x3ff - 0x7f) << 20
|
|
sub a5, xh, a4
|
|
|
|
/* Check for underflow. */
|
|
xor a6, xh, a5
|
|
bltz a6, .Ltrunc_underflow
|
|
extui a6, a5, 20, 11
|
|
beqz a6, .Ltrunc_underflow
|
|
|
|
/* Check for overflow. */
|
|
movi a4, 255
|
|
bge a6, a4, .Ltrunc_overflow
|
|
|
|
/* Shift a5/xl << 3 into a5/a4. */
|
|
ssai (32 - 3)
|
|
src a5, a5, xl
|
|
sll a4, xl
|
|
|
|
.Ltrunc_addsign:
|
|
/* Add the sign bit. */
|
|
extui a6, xh, 31, 1
|
|
slli a6, a6, 31
|
|
or a2, a6, a5
|
|
|
|
/* Round up if the leftover fraction is >= 1/2. */
|
|
bgez a4, 1f
|
|
addi a2, a2, 1
|
|
/* Overflow to the exponent is OK. The answer will be correct. */
|
|
|
|
/* Check if the leftover fraction is exactly 1/2. */
|
|
slli a4, a4, 1
|
|
beqz a4, .Ltrunc_exactlyhalf
|
|
1: leaf_return
|
|
|
|
.Ltrunc_exactlyhalf:
|
|
/* Round down to the nearest even value. */
|
|
srli a2, a2, 1
|
|
slli a2, a2, 1
|
|
leaf_return
|
|
|
|
.Ltrunc_overflow:
|
|
/* Check if exponent == 0x7ff. */
|
|
movi a4, 0x7ff00000
|
|
bnall xh, a4, 1f
|
|
|
|
/* Check if mantissa is nonzero. */
|
|
slli a5, xh, 12
|
|
or a5, a5, xl
|
|
beqz a5, 1f
|
|
|
|
/* Shift a4 to set a bit in the mantissa, making a quiet NaN. */
|
|
srli a4, a4, 1
|
|
|
|
1: slli a4, a4, 4 /* 0xff000000 or 0xff800000 */
|
|
/* Add the sign bit. */
|
|
extui a6, xh, 31, 1
|
|
ssai 1
|
|
src a2, a6, a4
|
|
leaf_return
|
|
|
|
.Ltrunc_underflow:
|
|
/* Find shift count for a subnormal. Flush to zero if >= 32. */
|
|
extui a6, xh, 20, 11
|
|
movi a5, 0x3ff - 0x7f
|
|
sub a6, a5, a6
|
|
addi a6, a6, 1
|
|
bgeui a6, 32, 1f
|
|
|
|
/* Replace the exponent with an explicit "1.0". */
|
|
slli a5, a5, 13 /* 0x700000 */
|
|
or a5, a5, xh
|
|
slli a5, a5, 11
|
|
srli a5, a5, 11
|
|
|
|
/* Shift the mantissa left by 3 bits (into a5/a4). */
|
|
ssai (32 - 3)
|
|
src a5, a5, xl
|
|
sll a4, xl
|
|
|
|
/* Shift right by a6. */
|
|
ssr a6
|
|
sll a7, a4
|
|
src a4, a5, a4
|
|
srl a5, a5
|
|
beqz a7, .Ltrunc_addsign
|
|
or a4, a4, a6 /* any positive, nonzero value will work */
|
|
j .Ltrunc_addsign
|
|
|
|
/* Return +/- zero. */
|
|
1: extui a2, xh, 31, 1
|
|
slli a2, a2, 31
|
|
leaf_return
|
|
|
|
#endif /* L_truncdfsf2 */
|
|
|
|
#ifdef L_extendsfdf2
|
|
|
|
.align 4
|
|
.global __extendsfdf2
|
|
.type __extendsfdf2, @function
|
|
__extendsfdf2:
|
|
leaf_entry sp, 16
|
|
|
|
/* Save the sign bit and then shift it off. */
|
|
extui a5, a2, 31, 1
|
|
slli a5, a5, 31
|
|
slli a4, a2, 1
|
|
|
|
/* Extract and check the exponent. */
|
|
extui a6, a2, 23, 8
|
|
beqz a6, .Lextend_expzero
|
|
addi a6, a6, 1
|
|
beqi a6, 256, .Lextend_nan_or_inf
|
|
|
|
/* Shift >> 3 into a4/xl. */
|
|
srli a4, a4, 4
|
|
slli xl, a2, (32 - 3)
|
|
|
|
/* Adjust the exponent bias. */
|
|
movi a6, (0x3ff - 0x7f) << 20
|
|
add a4, a4, a6
|
|
|
|
/* Add the sign bit. */
|
|
or xh, a4, a5
|
|
leaf_return
|
|
|
|
.Lextend_nan_or_inf:
|
|
movi a4, 0x7ff00000
|
|
|
|
/* Check for NaN. */
|
|
slli a7, a2, 9
|
|
beqz a7, 1f
|
|
|
|
slli a6, a6, 11 /* 0x80000 */
|
|
or a4, a4, a6
|
|
|
|
/* Add the sign and return. */
|
|
1: or xh, a4, a5
|
|
movi xl, 0
|
|
leaf_return
|
|
|
|
.Lextend_expzero:
|
|
beqz a4, 1b
|
|
|
|
/* Normalize it to have 8 zero bits before the first 1 bit. */
|
|
do_nsau a7, a4, a2, a3
|
|
addi a7, a7, -8
|
|
ssl a7
|
|
sll a4, a4
|
|
|
|
/* Shift >> 3 into a4/xl. */
|
|
slli xl, a4, (32 - 3)
|
|
srli a4, a4, 3
|
|
|
|
/* Set the exponent. */
|
|
movi a6, 0x3fe - 0x7f
|
|
sub a6, a6, a7
|
|
slli a6, a6, 20
|
|
add a4, a4, a6
|
|
|
|
/* Add the sign and return. */
|
|
or xh, a4, a5
|
|
leaf_return
|
|
|
|
#endif /* L_extendsfdf2 */
|
|
|
|
|
|
#if XCHAL_HAVE_DFP_SQRT
|
|
#ifdef L_sqrt
|
|
|
|
.text
|
|
.align 4
|
|
.global __ieee754_sqrt
|
|
.type __ieee754_sqrt, @function
|
|
__ieee754_sqrt:
|
|
leaf_entry sp, 16
|
|
|
|
wfrd f1, xh, xl
|
|
|
|
sqrt0.d f2, f1
|
|
const.d f4, 0
|
|
maddn.d f4, f2, f2
|
|
nexp01.d f3, f1
|
|
const.d f0, 3
|
|
addexp.d f3, f0
|
|
maddn.d f0, f4, f3
|
|
nexp01.d f4, f1
|
|
maddn.d f2, f0, f2
|
|
const.d f5, 0
|
|
maddn.d f5, f2, f3
|
|
const.d f0, 3
|
|
maddn.d f0, f5, f2
|
|
neg.d f6, f4
|
|
maddn.d f2, f0, f2
|
|
const.d f0, 0
|
|
const.d f5, 0
|
|
const.d f7, 0
|
|
maddn.d f0, f6, f2
|
|
maddn.d f5, f2, f3
|
|
const.d f3, 3
|
|
maddn.d f7, f3, f2
|
|
maddn.d f4, f0, f0
|
|
maddn.d f3, f5, f2
|
|
neg.d f2, f7
|
|
maddn.d f0, f4, f2
|
|
maddn.d f7, f3, f7
|
|
mksadj.d f2, f1
|
|
nexp01.d f1, f1
|
|
maddn.d f1, f0, f0
|
|
neg.d f3, f7
|
|
addexpm.d f0, f2
|
|
addexp.d f3, f2
|
|
divn.d f0, f1, f3
|
|
|
|
rfr xl, f0
|
|
rfrd xh, f0
|
|
|
|
leaf_return
|
|
|
|
#endif /* L_sqrt */
|
|
#endif /* XCHAL_HAVE_DFP_SQRT */
|
|
|
|
#if XCHAL_HAVE_DFP_RECIP
|
|
#ifdef L_recipdf2
|
|
/* Reciprocal */
|
|
|
|
.align 4
|
|
.global __recipdf2
|
|
.type __recipdf2, @function
|
|
__recipdf2:
|
|
leaf_entry sp, 16
|
|
|
|
wfrd f1, xh, xl
|
|
|
|
recip0.d f0, f1
|
|
const.d f2, 2
|
|
msub.d f2, f1, f0
|
|
mul.d f3, f1, f0
|
|
const.d f4, 2
|
|
mul.d f5, f0, f2
|
|
msub.d f4, f3, f2
|
|
const.d f2, 1
|
|
mul.d f0, f5, f4
|
|
msub.d f2, f1, f0
|
|
maddn.d f0, f0, f2
|
|
|
|
rfr xl, f0
|
|
rfrd xh, f0
|
|
|
|
leaf_return
|
|
|
|
#endif /* L_recipdf2 */
|
|
#endif /* XCHAL_HAVE_DFP_RECIP */
|
|
|
|
#if XCHAL_HAVE_DFP_RSQRT
|
|
#ifdef L_rsqrtdf2
|
|
/* Reciprocal square root */
|
|
|
|
.align 4
|
|
.global __rsqrtdf2
|
|
.type __rsqrtdf2, @function
|
|
__rsqrtdf2:
|
|
leaf_entry sp, 16
|
|
|
|
wfrd f1, xh, xl
|
|
|
|
rsqrt0.d f0, f1
|
|
mul.d f2, f1, f0
|
|
const.d f3, 3
|
|
mul.d f4, f3, f0
|
|
const.d f5, 1
|
|
msub.d f5, f2, f0
|
|
maddn.d f0, f4, f5
|
|
const.d f2, 1
|
|
mul.d f4, f1, f0
|
|
mul.d f5, f3, f0
|
|
msub.d f2, f4, f0
|
|
maddn.d f0, f5, f2
|
|
const.d f2, 1
|
|
mul.d f1, f1, f0
|
|
mul.d f3, f3, f0
|
|
msub.d f2, f1, f0
|
|
maddn.d f0, f3, f2
|
|
|
|
rfr xl, f0
|
|
rfrd xh, f0
|
|
|
|
leaf_return
|
|
|
|
#endif /* L_rsqrtdf2 */
|
|
#endif /* XCHAL_HAVE_DFP_RSQRT */
|