ia64: strip trailing whitespace

Many ia64 files have trailing whitespace which gets in the way and
annoys me.  So strip it away:

	find `find sysdeps/ -name ia64` -type f -exec sed -i 's:[[:space:]]*$::' {} +

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
This commit is contained in:
Mike Frysinger 2012-04-16 22:08:04 -04:00
parent d5efd131d4
commit 0347518d63
105 changed files with 5415 additions and 5308 deletions

View File

@ -1,3 +1,110 @@
2012-04-22 Mike Frysinger <vapier@gentoo.org>
* sysdeps/ia64/fpu/e_acosf.S: Trim trailing whitespace.
* sysdeps/ia64/fpu/e_acoshl.S: Likewise.
* sysdeps/ia64/fpu/e_acosl.S: Likewise.
* sysdeps/ia64/fpu/e_asinf.S: Likewise.
* sysdeps/ia64/fpu/e_asinl.S: Likewise.
* sysdeps/ia64/fpu/e_atan2f.S: Likewise.
* sysdeps/ia64/fpu/e_atanhl.S: Likewise.
* sysdeps/ia64/fpu/e_coshl.S: Likewise.
* sysdeps/ia64/fpu/e_exp.S: Likewise.
* sysdeps/ia64/fpu/e_expf.S: Likewise.
* sysdeps/ia64/fpu/e_fmodl.S: Likewise.
* sysdeps/ia64/fpu/e_hypot.S: Likewise.
* sysdeps/ia64/fpu/e_hypotf.S: Likewise.
* sysdeps/ia64/fpu/e_hypotl.S: Likewise.
* sysdeps/ia64/fpu/e_log.S: Likewise.
* sysdeps/ia64/fpu/e_log2.S: Likewise.
* sysdeps/ia64/fpu/e_log2f.S: Likewise.
* sysdeps/ia64/fpu/e_log2l.S: Likewise.
* sysdeps/ia64/fpu/e_logl.S: Likewise.
* sysdeps/ia64/fpu/e_powf.S: Likewise.
* sysdeps/ia64/fpu/e_remainder.S: Likewise.
* sysdeps/ia64/fpu/e_remainderf.S: Likewise.
* sysdeps/ia64/fpu/e_remainderl.S: Likewise.
* sysdeps/ia64/fpu/e_scalb.S: Likewise.
* sysdeps/ia64/fpu/e_scalbf.S: Likewise.
* sysdeps/ia64/fpu/e_scalbl.S: Likewise.
* sysdeps/ia64/fpu/e_sinhl.S: Likewise.
* sysdeps/ia64/fpu/e_sqrt.S: Likewise.
* sysdeps/ia64/fpu/e_sqrtf.S: Likewise.
* sysdeps/ia64/fpu/e_sqrtl.S: Likewise.
* sysdeps/ia64/fpu/libm_cpu_defs.h: Likewise.
* sysdeps/ia64/fpu/libm_error_codes.h: Likewise.
* sysdeps/ia64/fpu/libm_frexp.S: Likewise.
* sysdeps/ia64/fpu/libm_frexpf.S: Likewise.
* sysdeps/ia64/fpu/libm_frexpl.S: Likewise.
* sysdeps/ia64/fpu/libm_scalblnf.S: Likewise.
* sysdeps/ia64/fpu/libm_tan.S: Likewise.
* sysdeps/ia64/fpu/s_asinhl.S: Likewise.
* sysdeps/ia64/fpu/s_atanf.S: Likewise.
* sysdeps/ia64/fpu/s_atanl.S: Likewise.
* sysdeps/ia64/fpu/s_cbrtl.S: Likewise.
* sysdeps/ia64/fpu/s_cos.S: Likewise.
* sysdeps/ia64/fpu/s_cosf.S: Likewise.
* sysdeps/ia64/fpu/s_erf.S: Likewise.
* sysdeps/ia64/fpu/s_erfc.S: Likewise.
* sysdeps/ia64/fpu/s_erfcf.S: Likewise.
* sysdeps/ia64/fpu/s_erfcl.S: Likewise.
* sysdeps/ia64/fpu/s_erff.S: Likewise.
* sysdeps/ia64/fpu/s_erfl.S: Likewise.
* sysdeps/ia64/fpu/s_expm1.S: Likewise.
* sysdeps/ia64/fpu/s_expm1f.S: Likewise.
* sysdeps/ia64/fpu/s_expm1l.S: Likewise.
* sysdeps/ia64/fpu/s_fabs.S: Likewise.
* sysdeps/ia64/fpu/s_fabsf.S: Likewise.
* sysdeps/ia64/fpu/s_fabsl.S: Likewise.
* sysdeps/ia64/fpu/s_finite.S: Likewise.
* sysdeps/ia64/fpu/s_fma.S: Likewise.
* sysdeps/ia64/fpu/s_fmaf.S: Likewise.
* sysdeps/ia64/fpu/s_fmal.S: Likewise.
* sysdeps/ia64/fpu/s_fmax.S: Likewise.
* sysdeps/ia64/fpu/s_fmaxf.S: Likewise.
* sysdeps/ia64/fpu/s_fmaxl.S: Likewise.
* sysdeps/ia64/fpu/s_fpclassify.S: Likewise.
* sysdeps/ia64/fpu/s_frexp.c: Likewise.
* sysdeps/ia64/fpu/s_frexpf.c: Likewise.
* sysdeps/ia64/fpu/s_frexpl.c: Likewise.
* sysdeps/ia64/fpu/s_ldexp.c: Likewise.
* sysdeps/ia64/fpu/s_ldexpf.c: Likewise.
* sysdeps/ia64/fpu/s_ldexpl.c: Likewise.
* sysdeps/ia64/fpu/s_log1pl.S: Likewise.
* sysdeps/ia64/fpu/s_modf.S: Likewise.
* sysdeps/ia64/fpu/s_modff.S: Likewise.
* sysdeps/ia64/fpu/s_modfl.S: Likewise.
* sysdeps/ia64/fpu/s_nextafter.S: Likewise.
* sysdeps/ia64/fpu/s_nextafterf.S: Likewise.
* sysdeps/ia64/fpu/s_nextafterl.S: Likewise.
* sysdeps/ia64/fpu/s_nexttoward.S: Likewise.
* sysdeps/ia64/fpu/s_nexttowardf.S: Likewise.
* sysdeps/ia64/fpu/s_nexttowardl.S: Likewise.
* sysdeps/ia64/fpu/s_round.S: Likewise.
* sysdeps/ia64/fpu/s_roundf.S: Likewise.
* sysdeps/ia64/fpu/s_roundl.S: Likewise.
* sysdeps/ia64/fpu/s_scalblnf.c: Likewise.
* sysdeps/ia64/fpu/s_scalbn.c: Likewise.
* sysdeps/ia64/fpu/s_scalbnf.c: Likewise.
* sysdeps/ia64/fpu/s_scalbnl.c: Likewise.
* sysdeps/ia64/fpu/s_signbit.S: Likewise.
* sysdeps/ia64/fpu/s_significand.S: Likewise.
* sysdeps/ia64/fpu/s_significandf.S: Likewise.
* sysdeps/ia64/fpu/s_significandl.S: Likewise.
* sysdeps/ia64/fpu/s_tan.S: Likewise.
* sysdeps/ia64/fpu/s_tanf.S: Likewise.
* sysdeps/ia64/fpu/s_tanh.S: Likewise.
* sysdeps/ia64/fpu/s_tanhf.S: Likewise.
* sysdeps/ia64/fpu/s_tanhl.S: Likewise.
* sysdeps/ia64/fpu/s_tanl.S: Likewise.
* sysdeps/ia64/fpu/w_tgamma.S: Likewise.
* sysdeps/ia64/fpu/w_tgammaf.S: Likewise.
* sysdeps/ia64/fpu/w_tgammal.S: Likewise.
* sysdeps/ia64/softpipe.h: Likewise.
* sysdeps/ia64/strchr.S: Likewise.
* sysdeps/ia64/strlen.S: Likewise.
* sysdeps/ia64/strncmp.S: Likewise.
* sysdeps/unix/sysv/linux/ia64/register-dump.h: Likewise.
2012-04-22 Mike Frysinger <vapier@gentoo.org>
* sysdeps/ia64/Implies: Copied from the main tree.

View File

@ -61,7 +61,7 @@
// The acosf function returns the arc cosine in the range [0, +pi] radians.
// acos(1) returns +0
// acos(x) returns a Nan and raises the invalid exception for |x| >1
// acos(x) returns a Nan and raises the invalid exception for |x| >1
// |x| <= sqrt(2)/2. get Ax and Bx
@ -249,355 +249,355 @@ LOCAL_OBJECT_END(acosf_coeff_2_table)
.section .text
GLOBAL_LIBM_ENTRY(acosf)
// Load the addresses of the two tables.
// Then, load the coefficients and other constants.
{ .mfi
{ .mfi
alloc r32 = ar.pfs,1,8,4,0
fnma.s1 acosf_t = f8,f8,f1
dep.z ACOSF_GR_1by2 = 0x3f,24,8 // 0x3f000000
}
{ .mfi
}
{ .mfi
addl ACOSF_Addr1 = @ltoff(acosf_coeff_1_table),gp
fma.s1 acosf_x2 = f8,f8,f0
addl ACOSF_Addr2 = @ltoff(acosf_coeff_2_table),gp ;;
}
{ .mfi
{ .mfi
ld8 ACOSF_Addr1 = [ACOSF_Addr1]
fmerge.s acosf_abs_x = f1,f8
dep ACOSF_GR_3by2 = -1,r0,22,8 // 0x3fc00000
}
{ .mlx
}
{ .mlx
nop.m 999
movl ACOSF_GR_5by2 = 0x40200000;;
}
{ .mfi
{ .mfi
setf.s acosf_1by2 = ACOSF_GR_1by2
fmerge.s acosf_sgn_x = f8,f1
nop.i 999
}
{ .mfi
}
{ .mfi
ld8 ACOSF_Addr2 = [ACOSF_Addr2]
nop.f 0
nop.i 999;;
}
{ .mfi
{ .mfi
setf.s acosf_5by2 = ACOSF_GR_5by2
fcmp.lt.s1 p11,p12 = f8,f0
nop.i 999;;
}
{ .mmf
{ .mmf
ldfpd acosf_coeff_P1,acosf_coeff_P4 = [ACOSF_Addr1],16
setf.s acosf_3by2 = ACOSF_GR_3by2
fclass.m.unc p8,p0 = f8, 0xc3 ;; //@qnan | @snan
}
{ .mfi
{ .mfi
ldfpd acosf_coeff_P7,acosf_coeff_P6 = [ACOSF_Addr1],16
fma.s1 acosf_t2 = acosf_t,acosf_t,f0
nop.i 999
}
{ .mfi
}
{ .mfi
ldfpd acosf_coeff_P3,acosf_coeff_P8 = [ACOSF_Addr2],16
fma.s1 acosf_x4 = acosf_x2,acosf_x2,f0
nop.i 999;;
}
{ .mfi
{ .mfi
ldfpd acosf_coeff_P9,acosf_const_sqrt2by2 = [ACOSF_Addr1]
fclass.m.unc p10,p0 = f8, 0x07 //@zero
nop.i 999
}
{ .mfi
}
{ .mfi
ldfpd acosf_coeff_P5,acosf_coeff_P2 = [ACOSF_Addr2],16
fma.s1 acosf_x3 = f8,acosf_x2,f0
nop.i 999;;
}
{ .mfi
{ .mfi
ldfd acosf_const_piby2 = [ACOSF_Addr2]
frsqrta.s1 acosf_B,p0 = acosf_t
nop.i 999
}
{ .mfb
}
{ .mfb
nop.m 999
(p8) fma.s.s0 f8 = f8,f1,f0
(p8) br.ret.spnt b0 ;; // Exit if x=nan
}
{ .mfb
{ .mfb
nop.m 999
fcmp.eq.s1 p6,p0 = acosf_abs_x,f1
(p10) br.cond.spnt ACOSF_ZERO ;; // Branch if x=0
}
{ .mfi
}
{ .mfi
nop.m 999
fcmp.gt.s1 p9,p0 = acosf_abs_x,f1
nop.i 999;;
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 acosf_x8 = acosf_x4,acosf_x4,f0
nop.i 999
}
{ .mfb
}
{ .mfb
nop.m 999
fma.s1 acosf_t4 = acosf_t2,acosf_t2,f0
(p6) br.cond.spnt ACOSF_ABS_ONE ;; // Branch if |x|=1
}
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 acosf_x5 = acosf_x2,acosf_x3,f0
nop.i 999
}
{ .mfb
{ .mfb
(p9) mov GR_Parameter_TAG = 59
fma.s1 acosf_yby2 = acosf_t,acosf_1by2,f0
(p9) br.cond.spnt __libm_error_region ;; // Branch if |x|>1
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 acosf_Az = acosf_t,acosf_B,f0
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 acosf_B2 = acosf_B,acosf_B,f0
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 acosf_poly_p1 = f8,acosf_coeff_P1,f0
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 acosf_2poly_p1 = acosf_coeff_P1,acosf_t,f1
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 acosf_poly_p3 = acosf_coeff_P4,acosf_x2,acosf_coeff_P3
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 acosf_2poly_p6 = acosf_coeff_P7,acosf_t,acosf_coeff_P6
nop.i 999;;
}
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 acosf_poly_p7 = acosf_x2,acosf_coeff_P8,acosf_coeff_P7
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 acosf_2poly_p2 = acosf_coeff_P3,acosf_t,acosf_coeff_P2
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 acosf_poly_p5 = acosf_x2,acosf_coeff_P6,acosf_coeff_P5
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 acosf_2poly_p4 = acosf_coeff_P5,acosf_t,acosf_coeff_P4
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 acosf_x11 = acosf_x8,acosf_x3,f0
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fnma.s1 acosf_dz = acosf_B2,acosf_yby2,acosf_1by2
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 acosf_poly_p1a = acosf_x2,acosf_poly_p1,f8
nop.i 999
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 acosf_2poly_p8 = acosf_coeff_P9,acosf_t,acosf_coeff_P8
nop.i 999;;
}
// Get the absolute value of x and determine the region in which x lies
{ .mfi
{ .mfi
nop.m 999
fcmp.le.s1 p7,p8 = acosf_abs_x,acosf_const_sqrt2by2
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 acosf_poly_p2 = acosf_x2,acosf_poly_p3,acosf_coeff_P2
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 acosf_poly_p7a = acosf_x4,acosf_coeff_P9,acosf_poly_p7
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 acosf_2poly_p2a = acosf_2poly_p2,acosf_t2,acosf_2poly_p1
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
(p8) fma.s1 acosf_sgnx_t4 = acosf_sgn_x,acosf_t4,f0
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
(p8) fma.s1 acosf_2poly_p4a = acosf_2poly_p6,acosf_t2,acosf_2poly_p4
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
(p8) fma.s1 acosf_Sz = acosf_5by2,acosf_dz,acosf_3by2
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
(p8) fma.s1 acosf_d2z = acosf_dz,acosf_dz,f0
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
(p8) fnma.d.s1 acosf_sgn_x_piby2 = acosf_sgn_x,acosf_const_piby2,acosf_const_piby2
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
(p7) fma.s1 acosf_poly_Ax = acosf_x5,acosf_poly_p2,acosf_poly_p1a
nop.i 999;;
}
{ .mfi
}
{ .mfi
nop.m 999
(p7) fma.s1 acosf_poly_Bx = acosf_x4,acosf_poly_p7a,acosf_poly_p5
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
(p8) fma.s1 acosf_sgnx_2poly_p2 = acosf_sgn_x,acosf_2poly_p2a,f0
nop.i 999;;
}
{ .mfi
}
{ .mfi
nop.m 999
fcmp.eq.s0 p6,p0 = f8,f0 // Only purpose is to set D if x denormal
nop.i 999
}
{ .mfi
{ .mfi
nop.m 999
(p8) fma.s1 acosf_2poly_p4b = acosf_2poly_p8,acosf_t4,acosf_2poly_p4a
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
(p8) fma.s1 acosf_Fz = acosf_d2z,acosf_Sz,acosf_dz
nop.i 999;;
}
}
{ .mfi
{ .mfi
nop.m 999
(p8) fma.d.s1 acosf_Pt = acosf_2poly_p4b,acosf_sgnx_t4,acosf_sgnx_2poly_p2
nop.i 999;;
}
{ .mfi
}
{ .mfi
nop.m 999
(p8) fma.d.s1 acosf_z = acosf_Az,acosf_Fz,acosf_Az
nop.i 999 ;;
}
{ .mfi
}
{ .mfi
nop.m 999
(p7) fma.d.s1 acosf_sinf1 = acosf_x11,acosf_poly_Bx,acosf_poly_Ax
nop.i 999;;
}
}
.pred.rel "mutex",p8,p7 //acosf_pred_GTsqrt2by2,acosf_pred_LEsqrt2by2
{ .mfi
{ .mfi
nop.m 999
(p8) fma.s.s0 f8 = acosf_z,acosf_Pt,acosf_sgn_x_piby2
nop.i 999
}
{ .mfb
}
{ .mfb
nop.m 999
(p7) fms.s.s0 f8 = acosf_const_piby2,f1,acosf_sinf1
br.ret.sptk b0 ;;
}
}
ACOSF_ZERO:
// Here if x=0
{ .mfb
{ .mfb
nop.m 999
fma.s.s0 f8 = acosf_const_piby2,f1,f0 // acosf(0)=pi/2
br.ret.sptk b0 ;;
}
}
ACOSF_ABS_ONE:
.pred.rel "mutex",p11,p12
// Here if |x|=1
{ .mfi
{ .mfi
nop.m 999
(p11) fma.s.s0 f8 = acosf_const_piby2,f1,acosf_const_piby2 // acosf(-1)=pi
nop.i 999
}
{ .mfb
}
{ .mfb
nop.m 999
(p12) fma.s.s0 f8 = f1,f0,f0 // acosf(1)=0
br.ret.sptk b0 ;;
}
}
GLOBAL_LIBM_END(acosf)

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
//*********************************************************************
//
// History:
// History:
// 10/01/01 Initial version
// 10/10/01 Performance inproved
// 12/11/01 Changed huges_logp to not be global
@ -57,7 +57,7 @@
//
// Overview of operation
//==============================================================
//
//
// There are 6 paths:
// 1. x = 1
// Return acoshl(x) = 0;
@ -67,37 +67,37 @@
//
// 3. x = [S,Q]Nan or +INF
// Return acoshl(x) = x + x;
//
//
// 4. 'Near 1': 1 < x < 1+1/8
// Return acoshl(x) = sqrtl(2*y)*(1-P(y)/Q(y)),
// Return acoshl(x) = sqrtl(2*y)*(1-P(y)/Q(y)),
// where y = 1, P(y)/Q(y) - rational approximation
//
// 5. 'Huges': x > 0.5*2^64
// Return acoshl(x) = (logl(2*x-1));
//
//
// 6. 'Main path': 1+1/8 < x < 0.5*2^64
// b_hi + b_lo = x + sqrt(x^2 - 1);
// acoshl(x) = logl_special(b_hi, b_lo);
//
// Algorithm description
//
// Algorithm description
//==============================================================
//
// I. Near 1 path algorithm
// **************************************************************
// The formula is acoshl(x) = sqrtl(2*y)*(1-P(y)/Q(y)),
// The formula is acoshl(x) = sqrtl(2*y)*(1-P(y)/Q(y)),
// where y = 1, P(y)/Q(y) - rational approximation
//
// 1) y = x - 1, y2 = 2 * y
//
// 2) Compute in parallel sqrtl(2*y) and P(y)/Q(y)
// a) sqrtl computation method described below (main path algorithm, item 2))
// As result we obtain (gg+gl) - multiprecision result
// As result we obtain (gg+gl) - multiprecision result
// as pair of double extended values
// b) P(y) and Q(y) calculated without any extra precision manipulations
// c) P/Q division:
// y = frcpa(Q) initial approximation of 1/Q
// z = P*y initial approximation of P/Q
//
//
// e = 1 - b*y
// e2 = e + e^2
// e1 = e^2
@ -121,7 +121,7 @@
// b) res = ((((gl + ll) + lh) + hl) + hh) + gg;
// (exactly in this order)
//
// II. Main path algorithm
// II. Main path algorithm
// ( thanks to Peter Markstein for the idea of sqrt(x^2+1) computation! )
// **********************************************************************
//
@ -130,19 +130,19 @@
// 1) m2 = (m2_hi+m2_lo) = x^2-1 obtaining
// ------------------------------------
// m2_hi = x2_hi - 1, where x2_hi = x * x;
// m2_lo = x2_lo + p1_lo, where
// x2_lo = FMS(x*x-x2_hi),
// m2_lo = x2_lo + p1_lo, where
// x2_lo = FMS(x*x-x2_hi),
// p1_lo = (1 + m2_hi) - x2_hi;
//
// 2) g = (g_hi+g_lo) = sqrt(m2) = sqrt(m2_hi+m2_lo)
// ----------------------------------------------
// r = invsqrt(m2_hi) (8-bit reciprocal square root approximation);
// g = m2_hi * r (first 8 bit-approximation of sqrt);
//
//
// h = 0.5 * r;
// e = 0.5 - g * h;
// g = g * e + g (second 16 bit-approximation of sqrt);
//
//
// h = h * e + h;
// e = 0.5 - g * h;
// g = g * e + g (third 32 bit-approximation of sqrt);
@ -150,7 +150,7 @@
// h = h * e + h;
// e = 0.5 - g * h;
// g_hi = g * e + g (fourth 64 bit-approximation of sqrt);
//
//
// Remainder computation:
// h = h * e + h;
// d = (m2_hi - g_hi * g_hi) + m2_lo;
@ -160,15 +160,15 @@
// -------------------------------------------------------------------
// b_hi = (g_hi + x) + gl;
// b_lo = (x - b_hi) + g_hi + gl;
//
//
// Now we pass b presented as sum b_hi + b_lo to special version
// of logl function which accept a pair of arguments as
// mutiprecision value.
//
// mutiprecision value.
//
// Special log algorithm overview
// ================================
// Here we use a table lookup method. The basic idea is that in
// order to compute logl(Arg) for an argument Arg in [1,2),
// order to compute logl(Arg) for an argument Arg in [1,2),
// we construct a value G such that G*Arg is close to 1 and that
// logl(1/G) is obtainable easily from a table of values calculated
// beforehand. Thus
@ -198,7 +198,7 @@
// G := G_1 * G_2 * G_3
// r := (G * S_hi - 1) + G * S_lo
//
// These G_j's have the property that the product is exactly
// These G_j's have the property that the product is exactly
// representable and that |r| < 2^(-12) as a result.
//
// Step 2: Approximation
@ -217,11 +217,11 @@
//
// Registers used
//==============================================================
// Floating Point registers used:
// Floating Point registers used:
// f8, input
// f32 -> f95 (64 registers)
// General registers used:
// General registers used:
// r32 -> r67 (36 registers)
// Predicate registers used:
@ -229,15 +229,15 @@
// p7 for 'NaNs, Inf' path
// p8 for 'near 1' path
// p9 for 'huges' path
// p10 for x = 1
// p10 for x = 1
// p11 for x < 1
//
//*********************************************************************
// IEEE Special Conditions:
//
// acoshl(+inf) = +inf
// acoshl(-inf) = QNaN
// acoshl(1) = 0
// acoshl(-inf) = QNaN
// acoshl(1) = 0
// acoshl(x<1) = QNaN
// acoshl(SNaN) = QNaN
// acoshl(QNaN) = QNaN
@ -245,38 +245,38 @@
// Data tables
//==============================================================
RODATA
.align 64
// Near 1 path rational aproximation coefficients
LOCAL_OBJECT_START(Poly_P)
data8 0xB0978143F695D40F, 0x3FF1 // .84205539791447100108478906277453574946e-4
data8 0xB9800D841A8CAD29, 0x3FF6 // .28305085180397409672905983082168721069e-2
data8 0xC889F455758C1725, 0x3FF9 // .24479844297887530847660233111267222945e-1
data8 0x9BE1DFF006F45F12, 0x3FFB // .76114415657565879842941751209926938306e-1
data8 0x9E34AF4D372861E0, 0x3FFB // .77248925727776366270605984806795850504e-1
data8 0xF3DC502AEE14C4AE, 0x3FA6 // .3077953476682583606615438814166025592e-26
data8 0xB0978143F695D40F, 0x3FF1 // .84205539791447100108478906277453574946e-4
data8 0xB9800D841A8CAD29, 0x3FF6 // .28305085180397409672905983082168721069e-2
data8 0xC889F455758C1725, 0x3FF9 // .24479844297887530847660233111267222945e-1
data8 0x9BE1DFF006F45F12, 0x3FFB // .76114415657565879842941751209926938306e-1
data8 0x9E34AF4D372861E0, 0x3FFB // .77248925727776366270605984806795850504e-1
data8 0xF3DC502AEE14C4AE, 0x3FA6 // .3077953476682583606615438814166025592e-26
LOCAL_OBJECT_END(Poly_P)
//
LOCAL_OBJECT_START(Poly_Q)
data8 0xF76E3FD3C7680357, 0x3FF1 // .11798413344703621030038719253730708525e-3
data8 0xD107D2E7273263AE, 0x3FF7 // .63791065024872525660782716786703188820e-2
data8 0xB609BE5CDE206AEF, 0x3FFB // .88885771950814004376363335821980079985e-1
data8 0xF7DEACAC28067C8A, 0x3FFD // .48412074662702495416825113623936037072302
data8 0x8F9BE5890CEC7E38, 0x3FFF // 1.1219450873557867470217771071068369729526
data8 0xED4F06F3D2BC92D1, 0x3FFE // .92698710873331639524734537734804056798748
data8 0xF76E3FD3C7680357, 0x3FF1 // .11798413344703621030038719253730708525e-3
data8 0xD107D2E7273263AE, 0x3FF7 // .63791065024872525660782716786703188820e-2
data8 0xB609BE5CDE206AEF, 0x3FFB // .88885771950814004376363335821980079985e-1
data8 0xF7DEACAC28067C8A, 0x3FFD // .48412074662702495416825113623936037072302
data8 0x8F9BE5890CEC7E38, 0x3FFF // 1.1219450873557867470217771071068369729526
data8 0xED4F06F3D2BC92D1, 0x3FFE // .92698710873331639524734537734804056798748
LOCAL_OBJECT_END(Poly_Q)
// Q coeffs
// Q coeffs
LOCAL_OBJECT_START(Constants_Q)
data4 0x00000000,0xB1721800,0x00003FFE,0x00000000
data4 0x00000000,0xB1721800,0x00003FFE,0x00000000
data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
data4 0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000
data4 0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000
data4 0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000
data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000
data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000
LOCAL_OBJECT_END(Constants_Q)
// Z1 - 16 bit fixed
@ -391,7 +391,7 @@ data4 0x3F71D488,0x3D693B9D
data8 0xBE049391B6B7C239
LOCAL_OBJECT_END(Constants_G_H_h2)
// G3 and H3 - IEEE single and h3 - IEEE double
// G3 and H3 - IEEE single and h3 - IEEE double
LOCAL_OBJECT_START(Constants_G_H_h3)
data4 0x3F7FFC00,0x38800100
data8 0x3D355595562224CD
@ -481,16 +481,16 @@ FR_QQ3 = f41
FR_QQ4 = f42
FR_QQ5 = f43
FR_Q1 = f44
FR_Q2 = f45
FR_Q3 = f46
FR_Q4 = f47
FR_Q1 = f44
FR_Q2 = f45
FR_Q3 = f46
FR_Q4 = f47
FR_Half = f48
FR_Two = f49
FR_log2_hi = f50
FR_log2_lo = f51
FR_log2_hi = f50
FR_log2_lo = f51
FR_X2 = f52
@ -512,14 +512,14 @@ FR_XM12 = f64
// Special logl registers
FR_XLog_Hi = f65
FR_XLog_Lo = f66
FR_XLog_Hi = f65
FR_XLog_Lo = f66
FR_Y_hi = f67
FR_Y_hi = f67
FR_Y_lo = f68
FR_S_hi = f69
FR_S_lo = f70
FR_S_hi = f69
FR_S_lo = f70
FR_poly_lo = f71
FR_poly_hi = f72
@ -530,19 +530,19 @@ FR_h = f75
FR_G2 = f76
FR_H2 = f77
FR_h2 = f78
FR_h2 = f78
FR_r = f79
FR_rsq = f80
FR_rcub = f81
FR_r = f79
FR_rsq = f80
FR_rcub = f81
FR_float_N = f82
FR_float_N = f82
FR_G3 = f83
FR_H3 = f84
FR_h3 = f85
FR_G3 = f83
FR_H3 = f84
FR_h3 = f85
FR_2_to_minus_N = f86
FR_2_to_minus_N = f86
// Near 1 registers
@ -561,7 +561,7 @@ FR_QV3 = f75
FR_QV2 = f76
FR_Y0 = f77
FR_Q0 = f78
FR_Q0 = f78
FR_E0 = f79
FR_E2 = f80
FR_E1 = f81
@ -601,23 +601,23 @@ GR_Poly_P = r37
GR_Poly_Q = r38
// Special logl registers
GR_Index1 = r39
GR_Index2 = r40
GR_signif = r41
GR_X_0 = r42
GR_X_1 = r43
GR_X_2 = r44
GR_Index1 = r39
GR_Index2 = r40
GR_signif = r41
GR_X_0 = r42
GR_X_1 = r43
GR_X_2 = r44
GR_minus_N = r45
GR_Z_1 = r46
GR_Z_2 = r47
GR_N = r48
GR_Bias = r49
GR_M = r50
GR_Index3 = r51
GR_exp_2tom80 = r52
GR_exp_mask = r53
GR_exp_2tom7 = r54
GR_ad_ln10 = r55
GR_Z_1 = r46
GR_Z_2 = r47
GR_N = r48
GR_Bias = r49
GR_M = r50
GR_Index3 = r51
GR_exp_2tom80 = r52
GR_exp_mask = r53
GR_exp_2tom7 = r54
GR_ad_ln10 = r55
GR_ad_tbl_1 = r56
GR_ad_tbl_2 = r57
GR_ad_tbl_3 = r58
@ -652,29 +652,29 @@ GLOBAL_LIBM_ENTRY(acoshl)
addl GR_Poly_Q = @ltoff(Poly_Q), gp // Address of Q-coeff table
fma.s1 FR_X2 = FR_Arg, FR_Arg, f0 // Obtain x^2
addl GR_Poly_P = @ltoff(Poly_P), gp // Address of P-coeff table
};;
};;
{ .mfi
{ .mfi
getf.d GR_Arg = FR_Arg // get arument as double (int64)
fma.s0 FR_Two = f1, f1, f1 // construct 2.0
addl GR_ad_z_1 = @ltoff(Constants_Z_1#),gp // logl tables
}
{ .mlx
nop.m 0
{ .mlx
nop.m 0
movl GR_TwoP63 = 0x43E8000000000000 // 0.5*2^63 (huge arguments)
};;
};;
{ .mfi
{ .mfi
ld8 GR_Poly_P = [GR_Poly_P] // get actual P-coeff table address
fcmp.eq.s1 p10, p0 = FR_Arg, f1 // if arg == 1 (return 0)
nop.i 0
}
{ .mlx
{ .mlx
ld8 GR_Poly_Q = [GR_Poly_Q] // get actual Q-coeff table address
movl GR_OneP125 = 0x3FF2000000000000 // 1.125 (near 1 path bound)
};;
{ .mfi
{ .mfi
ld8 GR_ad_z_1 = [GR_ad_z_1] // Get pointer to Constants_Z_1
fclass.m p7,p0 = FR_Arg, 0xe3 // if arg NaN inf
cmp.le p9, p0 = GR_TwoP63, GR_Arg // if arg > 0.5*2^63 ('huges')
@ -683,31 +683,31 @@ GLOBAL_LIBM_ENTRY(acoshl)
cmp.ge p8, p0 = GR_OneP125, GR_Arg // if arg<1.125 -near 1 path
fms.s1 FR_XM1 = FR_Arg, f1, f1 // X0 = X-1 (for near 1 path)
(p11) br.cond.spnt acoshl_lt_pone // error branch (less than 1)
};;
};;
{ .mmi
{ .mmi
setf.exp FR_Half = GR_Half // construct 0.5
(p9) setf.s FR_XLog_Lo = r0 // Low of logl arg=0 (Huges path)
mov GR_exp_mask = 0x1FFFF // Create exponent mask
};;
};;
{ .mmf
{ .mmf
(p8) ldfe FR_PP5 = [GR_Poly_P],16 // Load P5
(p8) ldfe FR_QQ5 = [GR_Poly_Q],16 // Load Q5
fms.s1 FR_M2 = FR_X2, f1, f1 // m2 = x^2 - 1
};;
{ .mfi
{ .mfi
(p8) ldfe FR_QQ4 = [GR_Poly_Q],16 // Load Q4
fms.s1 FR_M2L = FR_Arg, FR_Arg, FR_X2 // low part of
fms.s1 FR_M2L = FR_Arg, FR_Arg, FR_X2 // low part of
// m2 = fma(X*X - m2)
add GR_ad_tbl_1 = 0x040, GR_ad_z_1 // Point to Constants_G_H_h1
}
{ .mfb
(p8) ldfe FR_PP4 = [GR_Poly_P],16 // Load P4
(p8) ldfe FR_PP4 = [GR_Poly_P],16 // Load P4
(p7) fma.s0 FR_Res = FR_Arg,f1,FR_Arg // r = a + a (Nan, Inf)
(p7) br.ret.spnt b0 // return (Nan, Inf)
};;
};;
{ .mfi
(p8) ldfe FR_PP3 = [GR_Poly_P],16 // Load P3
@ -719,9 +719,9 @@ GLOBAL_LIBM_ENTRY(acoshl)
(p9) fms.s1 FR_XLog_Hi = FR_Two, FR_Arg, f1 // Hi of log arg = 2*X-1
(p9) br.cond.spnt huges_logl // special version of log
}
;;
;;
{ .mfi
{ .mfi
(p8) ldfe FR_PP2 = [GR_Poly_P],16 // Load P2
(p8) fma.s1 FR_2XM1 = FR_Two, FR_XM1, f0 // 2X0 = 2 * X0
add GR_ad_z_2 = 0x140, GR_ad_z_1 // Point to Constants_Z_2
@ -729,18 +729,18 @@ GLOBAL_LIBM_ENTRY(acoshl)
{ .mfb
(p8) ldfe FR_QQ2 = [GR_Poly_Q],16 // Load Q2
(p10) fma.s0 FR_Res = f0,f1,f0 // r = 0 (arg = 1)
(p10) br.ret.spnt b0 // return (arg = 1)
};;
(p10) br.ret.spnt b0 // return (arg = 1)
};;
{ .mmi
{ .mmi
(p8) ldfe FR_PP1 = [GR_Poly_P],16 // Load P1
(p8) ldfe FR_QQ1 = [GR_Poly_Q],16 // Load Q1
add GR_ad_tbl_2 = 0x180, GR_ad_z_1 // Point to Constants_G_H_h2
}
;;
{ .mfi
(p8) ldfe FR_PP0 = [GR_Poly_P] // Load P0
{ .mfi
(p8) ldfe FR_PP0 = [GR_Poly_P] // Load P0
fma.s1 FR_Tmp = f1, f1, FR_M2 // Tmp = 1 + m2
add GR_ad_tbl_3 = 0x280, GR_ad_z_1 // Point to Constants_G_H_h3
}
@ -748,17 +748,17 @@ GLOBAL_LIBM_ENTRY(acoshl)
(p8) ldfe FR_QQ0 = [GR_Poly_Q]
nop.f 0
(p8) br.cond.spnt near_1 // near 1 path
};;
{ .mfi
};;
{ .mfi
ldfe FR_log2_hi = [GR_ad_q],16 // Load log2_hi
nop.f 0
mov GR_Bias = 0x0FFFF // Create exponent bias
};;
{ .mfi
{ .mfi
nop.m 0
frsqrta.s1 FR_Rcp, p0 = FR_M2 // Rcp = 1/m2 reciprocal appr.
nop.i 0
};;
};;
{ .mfi
ldfe FR_log2_lo = [GR_ad_q],16 // Load log2_lo
@ -773,7 +773,7 @@ GLOBAL_LIBM_ENTRY(acoshl)
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_HH = FR_Half, FR_Rcp, f0 // h = 0.5 * Rcp
nop.i 0
};;
@ -783,14 +783,14 @@ GLOBAL_LIBM_ENTRY(acoshl)
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_M2L = FR_Tmp, f1, FR_M2L // low part of m2 = Tmp+m2l
nop.i 0
};;
{ .mfi
ldfe FR_Q2 = [GR_ad_q],16 // Load Q2
fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g
fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g
// 16 bit Newton Raphson iteration
nop.i 0
}
@ -807,7 +807,7 @@ GLOBAL_LIBM_ENTRY(acoshl)
};;
{ .mfi
nop.m 0
fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g
fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g
// 32 bit Newton Raphson iteration
nop.i 0
}
@ -825,7 +825,7 @@ GLOBAL_LIBM_ENTRY(acoshl)
{ .mfi
nop.m 0
fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g
fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g
// 64 bit Newton Raphson iteration
nop.i 0
}
@ -920,7 +920,7 @@ GLOBAL_LIBM_ENTRY(acoshl)
{ .mfi
nop.m 0
nop.f 0
extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
};;
{ .mfi
@ -952,7 +952,7 @@ GLOBAL_LIBM_ENTRY(acoshl)
pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 // Get bits 30-15 of X_1 * Z_2
};;
// WE CANNOT USE GR_X_2 IN NEXT 3 CYCLES ("DEAD" ZONE!)
// WE CANNOT USE GR_X_2 IN NEXT 3 CYCLES ("DEAD" ZONE!)
// BECAUSE OF POSSIBLE 10 CLOCKS STALL!
// (Just nops added - nothing to do here)
@ -1093,7 +1093,7 @@ GLOBAL_LIBM_ENTRY(acoshl)
{ .mfi
nop.m 0
fadd.s0 FR_Y_lo = FR_poly_hi, FR_poly_lo
fadd.s0 FR_Y_lo = FR_poly_hi, FR_poly_lo
// Y_lo=poly_hi+poly_lo
nop.i 0
};;
@ -1166,7 +1166,7 @@ huges_logl:
{ .mmi
ldfe FR_log2_lo = [GR_ad_q],16 // Load log2_lo
sub GR_N = GR_N, GR_Bias
sub GR_N = GR_N, GR_Bias
mov GR_exp_2tom80 = 0x0ffaf // Exponent of 2^-80
};;
@ -1185,7 +1185,7 @@ huges_logl:
{ .mmi
ldfe FR_Q2 = [GR_ad_q],16 // Load Q2
nop.m 0
extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
};;
{ .mmi
@ -1218,7 +1218,7 @@ huges_logl:
pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 // Get bits 30-15 of X_1*Z_2
};;
// WE CANNOT USE GR_X_2 IN NEXT 3 CYCLES ("DEAD" ZONE!)
// WE CANNOT USE GR_X_2 IN NEXT 3 CYCLES ("DEAD" ZONE!)
// BECAUSE OF POSSIBLE 10 CLOCKS STALL!
// (Just nops added - nothing to do here)
@ -1344,7 +1344,7 @@ huges_logl:
};;
{ .mfi
nop.m 0
fadd.s0 FR_Y_lo = FR_poly_hi, FR_poly_lo // Y_lo=poly_hi+poly_lo
fadd.s0 FR_Y_lo = FR_poly_hi, FR_poly_lo // Y_lo=poly_hi+poly_lo
nop.i 0
};;
{ .mfb
@ -1356,279 +1356,279 @@ huges_logl:
// NEAR ONE INTERVAL
near_1:
{ .mfi
nop.m 0
{ .mfi
nop.m 0
frsqrta.s1 FR_Rcp, p0 = FR_2XM1 // Rcp = 1/x reciprocal appr. &SQRT&
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
{ .mfi
nop.m 0
fma.s1 FR_PV6 = FR_PP5, FR_XM1, FR_PP4 // pv6 = P5*xm1+P4 $POLY$
nop.i 0
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_QV6 = FR_QQ5, FR_XM1, FR_QQ4 // qv6 = Q5*xm1+Q4 $POLY$
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
{ .mfi
nop.m 0
fma.s1 FR_PV4 = FR_PP3, FR_XM1, FR_PP2 // pv4 = P3*xm1+P2 $POLY$
nop.i 0
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_QV4 = FR_QQ3, FR_XM1, FR_QQ2 // qv4 = Q3*xm1+Q2 $POLY$
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
{ .mfi
nop.m 0
fma.s1 FR_XM12 = FR_XM1, FR_XM1, f0 // xm1^2 = xm1 * xm1 $POLY$
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
{ .mfi
nop.m 0
fma.s1 FR_PV2 = FR_PP1, FR_XM1, FR_PP0 // pv2 = P1*xm1+P0 $POLY$
nop.i 0
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_QV2 = FR_QQ1, FR_XM1, FR_QQ0 // qv2 = Q1*xm1+Q0 $POLY$
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
fma.s1 FR_GG = FR_Rcp, FR_2XM1, f0 // g = Rcp * x &SQRT&
nop.i 0
{ .mfi
nop.m 0
fma.s1 FR_GG = FR_Rcp, FR_2XM1, f0 // g = Rcp * x &SQRT&
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_HH = FR_Half, FR_Rcp, f0 // h = 0.5 * Rcp &SQRT&
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
{ .mfi
nop.m 0
fma.s1 FR_PV3 = FR_XM12, FR_PV6, FR_PV4//pv3=pv6*xm1^2+pv4 $POLY$
nop.i 0
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_QV3 = FR_XM12, FR_QV6, FR_QV4//qv3=qv6*xm1^2+qv4 $POLY$
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
{ .mfi
nop.m 0
fnma.s1 FR_EE = FR_GG, FR_HH, FR_Half // e = 0.5 - g * h &SQRT&
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
{ .mfi
nop.m 0
fma.s1 FR_PP = FR_XM12, FR_PV3, FR_PV2 //pp=pv3*xm1^2+pv2 $POLY$
nop.i 0
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_QQ = FR_XM12, FR_QV3, FR_QV2 //qq=qv3*xm1^2+qv2 $POLY$
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g &SQRT&
nop.i 0
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_HH = FR_HH, FR_EE, FR_HH // h = h * e + h &SQRT&
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
frcpa.s1 FR_Y0,p0 = f1,FR_QQ // y = frcpa(b) #DIV#
nop.i 0
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
fnma.s1 FR_EE = FR_GG, FR_HH, FR_Half // e = 0.5 - g*h &SQRT&
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_Q0 = FR_PP,FR_Y0,f0 // q = a*y #DIV#
nop.i 0
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
fnma.s1 FR_E0 = FR_Y0,FR_QQ,f1 // e = 1 - b*y #DIV#
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g &SQRT&
nop.i 0
nop.m 0
fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g &SQRT&
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_HH = FR_HH, FR_EE, FR_HH // h = h * e + h &SQRT&
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_E2 = FR_E0,FR_E0,FR_E0 // e2 = e+e^2 #DIV#
nop.i 0
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_E1 = FR_E0,FR_E0,f0 // e1 = e^2 #DIV#
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
fnma.s1 FR_EE = FR_GG, FR_HH, FR_Half // e = 0.5 - g * h &SQRT&
nop.i 0
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
fnma.s1 FR_DD = FR_GG, FR_GG, FR_2XM1 // d = x - g * g &SQRT&
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_Y1 = FR_Y0,FR_E2,FR_Y0 // y1 = y+y*e2 #DIV#
nop.i 0
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_E3 = FR_E1,FR_E1,FR_E0 // e3 = e+e1^2 #DIV#
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_GG = FR_DD, FR_HH, FR_GG // g = d * h + g &SQRT&
nop.i 0
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_HH = FR_HH, FR_EE, FR_HH // h = h * e + h &SQRT&
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_Y2 = FR_Y1,FR_E3,FR_Y0 // y2 = y+y1*e3 #DIV#
nop.i 0
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
fnma.s1 FR_R0 = FR_QQ,FR_Q0,FR_PP // r = a-b*q #DIV#
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
fnma.s1 FR_DD = FR_GG, FR_GG, FR_2XM1 // d = x - g * g &SQRT&
nop.i 0
nop.m 0
fnma.s1 FR_DD = FR_GG, FR_GG, FR_2XM1 // d = x - g * g &SQRT&
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
fnma.s1 FR_E4 = FR_QQ,FR_Y2,f1 // e4 = 1-b*y2 #DIV#
nop.i 0
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_X_Hi = FR_R0,FR_Y2,FR_Q0 // x = q+r*y2 #DIV#
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_GL = FR_DD, FR_HH, f0 // gl = d * h &SQRT&
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_Y3 = FR_Y2,FR_E4,FR_Y2 // y3 = y2+y2*e4 #DIV#
nop.i 0
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
fnma.s1 FR_R1 = FR_QQ,FR_X_Hi,FR_PP // r1 = a-b*x #DIV#
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_HH = FR_GG, FR_X_Hi, f0 // hh = gg * x_hi
nop.i 0
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_LH = FR_GL, FR_X_Hi, f0 // lh = gl * x_hi
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_X_lo = FR_R1,FR_Y3,f0 // x_lo = r1*y3 #DIV#
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_LL = FR_GL, FR_X_lo, f0 // ll = gl*x_lo
nop.i 0
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_HL = FR_GG, FR_X_lo, f0 // hl = gg * x_lo
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
fms.s1 FR_Res = FR_GL, f1, FR_LL // res = gl + ll
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
fms.s1 FR_Res = FR_Res, f1, FR_LH // res = res + lh
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
fms.s1 FR_Res = FR_Res, f1, FR_HL // res = res + hl
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
fms.s1 FR_Res = FR_Res, f1, FR_HH // res = res + hh
nop.i 0
nop.i 0
};;
{ .mfb
nop.m 0
nop.m 0
fma.s0 FR_Res = FR_Res, f1, FR_GG // result = res + gg
br.ret.sptk b0 // Exit for near 1 path
};;
@ -1639,9 +1639,9 @@ near_1:
acoshl_lt_pone:
{ .mfi
nop.m 0
nop.m 0
fmerge.s FR_Arg_X = FR_Arg, FR_Arg
nop.i 0
nop.i 0
};;
{ .mfb
mov GR_Parameter_TAG = 135
@ -1679,7 +1679,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
{ .mib
stfe [GR_Parameter_X] = FR_Arg_X // Parameter 1 to stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
nop.b 0
}
{ .mib
stfe [GR_Parameter_Y] = FR_Res // Parameter 3 to stack

View File

@ -690,70 +690,70 @@ F_CS6 = f36
F_CS7 = f37
F_CS8 = f38
F_CS9 = f39
F_S23 = f40
F_S45 = f41
F_S67 = f42
F_S89 = f43
F_S25 = f44
F_S69 = f45
F_S29 = f46
F_X2 = f47
F_X4 = f48
F_TSQRT = f49
F_DTX = f50
F_R = f51
F_R2 = f52
F_R3 = f53
F_R4 = f54
F_S23 = f40
F_S45 = f41
F_S67 = f42
F_S89 = f43
F_S25 = f44
F_S69 = f45
F_S29 = f46
F_X2 = f47
F_X4 = f48
F_TSQRT = f49
F_DTX = f50
F_R = f51
F_R2 = f52
F_R3 = f53
F_R4 = f54
F_C3 = f55
F_C5 = f56
F_C7 = f57
F_C9 = f58
F_P79 = f59
F_P35 = f60
F_P39 = f61
F_C3 = f55
F_C5 = f56
F_C7 = f57
F_C9 = f58
F_P79 = f59
F_P35 = f60
F_P39 = f61
F_ATHI = f62
F_ATLO = f63
F_ATHI = f62
F_ATLO = f63
F_T1 = f64
F_Y = f65
F_Y2 = f66
F_ANDMASK = f67
F_ORMASK = f68
F_S = f69
F_05 = f70
F_SQRT_1S2 = f71
F_DS = f72
F_Z = f73
F_1T2 = f74
F_DZ = f75
F_ZE = f76
F_YZ = f77
F_Y1S2 = f78
F_Y1S2X = f79
F_1X = f80
F_ST = f81
F_1T2_ST = f82
F_TSS = f83
F_Y1S2X2 = f84
F_DZ_TERM = f85
F_DTS = f86
F_DS2X = f87
F_T2 = f88
F_ZY1S2S = f89
F_Y1S2_1X = f90
F_T1 = f64
F_Y = f65
F_Y2 = f66
F_ANDMASK = f67
F_ORMASK = f68
F_S = f69
F_05 = f70
F_SQRT_1S2 = f71
F_DS = f72
F_Z = f73
F_1T2 = f74
F_DZ = f75
F_ZE = f76
F_YZ = f77
F_Y1S2 = f78
F_Y1S2X = f79
F_1X = f80
F_ST = f81
F_1T2_ST = f82
F_TSS = f83
F_Y1S2X2 = f84
F_DZ_TERM = f85
F_DTS = f86
F_DS2X = f87
F_T2 = f88
F_ZY1S2S = f89
F_Y1S2_1X = f90
F_TS = f91
F_PI2_LO = f92
F_PI2_HI = f93
F_S19 = f94
F_INV1T2_2 = f95
F_CORR = f96
F_DZ0 = f97
F_PI2_LO = f92
F_PI2_HI = f93
F_S19 = f94
F_INV1T2_2 = f95
F_CORR = f96
F_DZ0 = f97
F_C11 = f98
F_C13 = f99
F_C11 = f98
F_C13 = f99
F_C15 = f100
F_C17 = f101
F_P1113 = f102

View File

@ -40,9 +40,9 @@
// History
//==============================================================
// 02/02/00 Initial version
// 06/28/00 Improved speed
// 06/28/00 Improved speed
// 06/31/00 Changed register allocation because of some duplicate macros
// moved nan exit bundle up to gain a cycle.
// moved nan exit bundle up to gain a cycle.
// 08/08/00 Improved speed by avoiding SIR flush.
// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
@ -53,13 +53,13 @@
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/06/03 Reordered header: .section, .global, .proc, .align
// Description
//=========================================
// The asinf function computes the arc sine of x in the range [-pi,+pi].
// A doman error occurs for arguments not in the range [-1,+1].
// asinf(+-0) returns +-0
// asinf(x) returns a Nan and raises the invalid exception for |x| >1
// asinf(x) returns a Nan and raises the invalid exception for |x| >1
// The acosf function returns the arc cosine in the range [0, +pi] radians.
// A doman error occurs for arguments not in the range [-1,+1].
@ -252,351 +252,351 @@ LOCAL_OBJECT_END(asinf_coeff_2_table)
.section .text
GLOBAL_LIBM_ENTRY(asinf)
// Load the addresses of the two tables.
// Then, load the coefficients and other constants.
{ .mfi
{ .mfi
alloc r32 = ar.pfs,1,8,4,0
fnma.s1 asinf_t = f8,f8,f1
dep.z ASINF_GR_1by2 = 0x3f,24,8 // 0x3f000000
}
{ .mfi
}
{ .mfi
addl ASINF_Addr1 = @ltoff(asinf_coeff_1_table),gp
fma.s1 asinf_x2 = f8,f8,f0
addl ASINF_Addr2 = @ltoff(asinf_coeff_2_table),gp ;;
}
{ .mfi
{ .mfi
ld8 ASINF_Addr1 = [ASINF_Addr1]
fmerge.s asinf_abs_x = f1,f8
dep ASINF_GR_3by2 = -1,r0,22,8 // 0x3fc00000
}
{ .mlx
}
{ .mlx
nop.m 999
movl ASINF_GR_5by2 = 0x40200000;;
}
{ .mfi
{ .mfi
setf.s asinf_1by2 = ASINF_GR_1by2
fmerge.s asinf_sgn_x = f8,f1
nop.i 999
}
{ .mfi
}
{ .mfi
ld8 ASINF_Addr2 = [ASINF_Addr2]
nop.f 0
nop.i 999;;
}
{ .mfi
{ .mfi
setf.s asinf_5by2 = ASINF_GR_5by2
fcmp.lt.s1 p11,p12 = f8,f0
nop.i 999;;
}
{ .mmf
{ .mmf
ldfpd asinf_coeff_P1,asinf_coeff_P4 = [ASINF_Addr1],16
setf.s asinf_3by2 = ASINF_GR_3by2
fclass.m.unc p8,p0 = f8, 0xc3 ;; //@qnan | @snan
}
{ .mfi
{ .mfi
ldfpd asinf_coeff_P7,asinf_coeff_P6 = [ASINF_Addr1],16
fma.s1 asinf_t2 = asinf_t,asinf_t,f0
nop.i 999
}
{ .mfi
}
{ .mfi
ldfpd asinf_coeff_P3,asinf_coeff_P8 = [ASINF_Addr2],16
fma.s1 asinf_x4 = asinf_x2,asinf_x2,f0
nop.i 999;;
}
{ .mfi
{ .mfi
ldfpd asinf_coeff_P9,asinf_const_sqrt2by2 = [ASINF_Addr1]
fclass.m.unc p10,p0 = f8, 0x07 //@zero
nop.i 999
}
{ .mfi
}
{ .mfi
ldfpd asinf_coeff_P5,asinf_coeff_P2 = [ASINF_Addr2],16
fma.s1 asinf_x3 = f8,asinf_x2,f0
nop.i 999;;
}
{ .mfi
{ .mfi
ldfd asinf_const_piby2 = [ASINF_Addr2]
frsqrta.s1 asinf_B,p0 = asinf_t
nop.i 999
}
{ .mfb
}
{ .mfb
nop.m 999
(p8) fma.s.s0 f8 = f8,f1,f0
(p8) br.ret.spnt b0 ;; // Exit if x=nan
}
{ .mfb
{ .mfb
nop.m 999
fcmp.eq.s1 p6,p0 = asinf_abs_x,f1
(p10) br.ret.spnt b0 ;; // Exit if x=0
}
{ .mfi
}
{ .mfi
nop.m 999
fcmp.gt.s1 p9,p0 = asinf_abs_x,f1
nop.i 999;;
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 asinf_x8 = asinf_x4,asinf_x4,f0
nop.i 999
}
{ .mfb
}
{ .mfb
nop.m 999
fma.s1 asinf_t4 = asinf_t2,asinf_t2,f0
(p6) br.cond.spnt ASINF_ABS_ONE ;; // Branch if |x|=1
}
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 asinf_x5 = asinf_x2,asinf_x3,f0
nop.i 999
}
{ .mfb
{ .mfb
(p9) mov GR_Parameter_TAG = 62
fma.s1 asinf_yby2 = asinf_t,asinf_1by2,f0
(p9) br.cond.spnt __libm_error_region ;; // Branch if |x|>1
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 asinf_Az = asinf_t,asinf_B,f0
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 asinf_B2 = asinf_B,asinf_B,f0
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 asinf_poly_p1 = f8,asinf_coeff_P1,f0
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 asinf_2poly_p1 = asinf_coeff_P1,asinf_t,f1
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 asinf_poly_p3 = asinf_coeff_P4,asinf_x2,asinf_coeff_P3
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 asinf_2poly_p6 = asinf_coeff_P7,asinf_t,asinf_coeff_P6
nop.i 999;;
}
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 asinf_poly_p7 = asinf_x2,asinf_coeff_P8,asinf_coeff_P7
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 asinf_2poly_p2 = asinf_coeff_P3,asinf_t,asinf_coeff_P2
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 asinf_poly_p5 = asinf_x2,asinf_coeff_P6,asinf_coeff_P5
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 asinf_2poly_p4 = asinf_coeff_P5,asinf_t,asinf_coeff_P4
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.d.s1 asinf_x11 = asinf_x8,asinf_x3,f0
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fnma.s1 asinf_dz = asinf_B2,asinf_yby2,asinf_1by2
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 asinf_poly_p1a = asinf_x2,asinf_poly_p1,f8
nop.i 999
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 asinf_2poly_p8 = asinf_coeff_P9,asinf_t,asinf_coeff_P8
nop.i 999;;
}
// Get the absolute value of x and determine the region in which x lies
{ .mfi
{ .mfi
nop.m 999
fcmp.le.s1 p7,p8 = asinf_abs_x,asinf_const_sqrt2by2
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 asinf_poly_p2 = asinf_x2,asinf_poly_p3,asinf_coeff_P2
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 asinf_poly_p7a = asinf_x4,asinf_coeff_P9,asinf_poly_p7
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 asinf_2poly_p2a = asinf_2poly_p2,asinf_t2,asinf_2poly_p1
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
(p8) fma.s1 asinf_sgnx_t4 = asinf_sgn_x,asinf_t4,f0
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
(p8) fma.s1 asinf_2poly_p4a = asinf_2poly_p6,asinf_t2,asinf_2poly_p4
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
(p8) fma.s1 asinf_Sz = asinf_5by2,asinf_dz,asinf_3by2
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
(p8) fma.s1 asinf_d2z = asinf_dz,asinf_dz,f0
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
(p8) fma.s1 asinf_sgn_x_piby2 = asinf_sgn_x,asinf_const_piby2,f0
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
(p7) fma.d.s1 asinf_poly_Ax = asinf_x5,asinf_poly_p2,asinf_poly_p1a
nop.i 999;;
}
{ .mfi
}
{ .mfi
nop.m 999
(p7) fma.d.s1 asinf_poly_Bx = asinf_x4,asinf_poly_p7a,asinf_poly_p5
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
(p8) fma.s1 asinf_sgnx_2poly_p2 = asinf_sgn_x,asinf_2poly_p2a,f0
nop.i 999;;
}
{ .mfi
}
{ .mfi
nop.m 999
fcmp.eq.s0 p6,p0 = f8,f0 // Only purpose is to set D if x denormal
nop.i 999
}
{ .mfi
{ .mfi
nop.m 999
(p8) fma.s1 asinf_2poly_p4b = asinf_2poly_p8,asinf_t4,asinf_2poly_p4a
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
(p8) fma.s1 asinf_Fz = asinf_d2z,asinf_Sz,asinf_dz
nop.i 999;;
}
}
{ .mfi
{ .mfi
nop.m 999
(p8) fma.d.s1 asinf_Pt = asinf_2poly_p4b,asinf_sgnx_t4,asinf_sgnx_2poly_p2
nop.i 999;;
}
{ .mfi
}
{ .mfi
nop.m 999
(p8) fma.d.s1 asinf_z = asinf_Az,asinf_Fz,asinf_Az
nop.i 999;;
}
}
.pred.rel "mutex",p8,p7 //asinf_pred_GTsqrt2by2,asinf_pred_LEsqrt2by2
{ .mfi
{ .mfi
nop.m 999
(p8) fnma.s.s0 f8 = asinf_z,asinf_Pt,asinf_sgn_x_piby2
nop.i 999
}
{ .mfb
}
{ .mfb
nop.m 999
(p7) fma.s.s0 f8 = asinf_x11,asinf_poly_Bx,asinf_poly_Ax
br.ret.sptk b0 ;;
}
}
ASINF_ABS_ONE:
// Here for short exit if |x|=1
{ .mfb
{ .mfb
nop.m 999
fma.s.s0 f8 = asinf_sgn_x,asinf_const_piby2,f0
br.ret.sptk b0
}
}
;;
GLOBAL_LIBM_END(asinf)
// Stack operations when calling error support.
// (1) (2)
// sp -> + psp -> +
// | |
// | | <- GR_Y
// | |
// | <-GR_Y Y2->|
// | |
// | | <- GR_X
// | |
// sp-64 -> + sp -> +
// save ar.pfs save b0
// save gp
// (1) (2)
// sp -> + psp -> +
// | |
// | | <- GR_Y
// | |
// | <-GR_Y Y2->|
// | |
// | | <- GR_X
// | |
// sp-64 -> + sp -> +
// save ar.pfs save b0
// save gp
// Stack operations when calling error support.

View File

@ -687,70 +687,70 @@ F_CS6 = f36
F_CS7 = f37
F_CS8 = f38
F_CS9 = f39
F_S23 = f40
F_S45 = f41
F_S67 = f42
F_S89 = f43
F_S25 = f44
F_S69 = f45
F_S29 = f46
F_X2 = f47
F_X4 = f48
F_TSQRT = f49
F_DTX = f50
F_R = f51
F_R2 = f52
F_R3 = f53
F_R4 = f54
F_S23 = f40
F_S45 = f41
F_S67 = f42
F_S89 = f43
F_S25 = f44
F_S69 = f45
F_S29 = f46
F_X2 = f47
F_X4 = f48
F_TSQRT = f49
F_DTX = f50
F_R = f51
F_R2 = f52
F_R3 = f53
F_R4 = f54
F_C3 = f55
F_C5 = f56
F_C7 = f57
F_C9 = f58
F_P79 = f59
F_P35 = f60
F_P39 = f61
F_C3 = f55
F_C5 = f56
F_C7 = f57
F_C9 = f58
F_P79 = f59
F_P35 = f60
F_P39 = f61
F_ATHI = f62
F_ATLO = f63
F_ATHI = f62
F_ATLO = f63
F_T1 = f64
F_Y = f65
F_Y2 = f66
F_ANDMASK = f67
F_ORMASK = f68
F_S = f69
F_05 = f70
F_SQRT_1S2 = f71
F_DS = f72
F_Z = f73
F_1T2 = f74
F_DZ = f75
F_ZE = f76
F_YZ = f77
F_Y1S2 = f78
F_Y1S2X = f79
F_1X = f80
F_ST = f81
F_1T2_ST = f82
F_TSS = f83
F_Y1S2X2 = f84
F_DZ_TERM = f85
F_DTS = f86
F_DS2X = f87
F_T2 = f88
F_ZY1S2S = f89
F_Y1S2_1X = f90
F_T1 = f64
F_Y = f65
F_Y2 = f66
F_ANDMASK = f67
F_ORMASK = f68
F_S = f69
F_05 = f70
F_SQRT_1S2 = f71
F_DS = f72
F_Z = f73
F_1T2 = f74
F_DZ = f75
F_ZE = f76
F_YZ = f77
F_Y1S2 = f78
F_Y1S2X = f79
F_1X = f80
F_ST = f81
F_1T2_ST = f82
F_TSS = f83
F_Y1S2X2 = f84
F_DZ_TERM = f85
F_DTS = f86
F_DS2X = f87
F_T2 = f88
F_ZY1S2S = f89
F_Y1S2_1X = f90
F_TS = f91
F_PI2_LO = f92
F_PI2_HI = f93
F_S19 = f94
F_INV1T2_2 = f95
F_CORR = f96
F_DZ0 = f97
F_PI2_LO = f92
F_PI2_HI = f93
F_S19 = f94
F_INV1T2_2 = f95
F_CORR = f96
F_DZ0 = f97
F_C11 = f98
F_C13 = f99
F_C11 = f98
F_C13 = f99
F_C15 = f100
F_C17 = f101
F_P1113 = f102

View File

@ -80,9 +80,9 @@
//..
//..Suppose (v,u) = (y,x), we calculate atan(v/u) as follows:
//..A = y * frcpa(x) (so A = (y/x)(1 - beta))
//..atan(y/x) = atan(A) + atan( ((y/x)-A))/(1 + (y/x)A) ), the second term is
//..atan(y/x) = atan(A) + atan( ((y/x)-A))/(1 + (y/x)A) ), the second term is
//..a correction.
//..atan(A) is approximated by a polynomial
//..atan(A) is approximated by a polynomial
//..A + p1 A^3 + p2 A^5 + ... + p10 A^21,
//..atan(G) is approximated as follows:
//..Let G = (y - Ax)/(x + Ay), atan(G) can be approximated by G + g * p1
@ -90,9 +90,9 @@
//..
//..Suppose (v,u) = (x,y), we calculate atan(v/u) as follows:
//..Z = x * frcpa(y) (so Z = (x/y)(1 - beta))
//..atan(x/y) = atan(Z) + atan( ((x/y)-Z))/(1 + (x/y)Z) ), the second term is
//..atan(x/y) = atan(Z) + atan( ((x/y)-Z))/(1 + (x/y)Z) ), the second term is
//..a correction.
//..atan(Z) is approximated by a polynomial
//..atan(Z) is approximated by a polynomial
//..Z + p1 Z^3 + p2 Z^5 + ... + p10 Z^21,
//..atan(T) is approximated as follows:
//..Let T = (x - Ay)/(y + Ax), atan(T) can be approximated by T + t * p1
@ -103,7 +103,7 @@
//..atan(A) ~=~ A + p1 A^3 + ... + P10 A^21
//..
//..This polynomial is computed as follows:
//..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq
//..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq
//..A5 = Asq*Acub, A6 = Asq*A4; A11 = A5 * A6
//..
//..poly_A1 = p9 + Asq*p10, poly_A2 = p7 + Asq*p8, poly_A3 = p5 + Asq*p6
@ -112,7 +112,7 @@
//..
//..poly_A4 = p1 * A
//,,poly_A5 = p3 + Asq * p4, poly_A4 = A + Asq*poly_A4
//..poly_A5 = p2 + Asq * poly_A5
//..poly_A5 = p2 + Asq * poly_A5
//..poly_A4 = poly_A4 + A5 * poly_A5
//..
//..atan_A = poly_A4 + A11 * poly_A1
@ -132,7 +132,7 @@
//..atan(A) ~=~ A + p1 A^3 + ... + P10 A^21
//..
//..This polynomial is computed as follows:
//..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq
//..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq
//..A5 = Asq*Acub, A6 = Asq*A4; A11 = A5 * A6
//..
//..poly_A1 = p9 + Asq*p10, poly_A2 = p7 + Asq*p8, poly_A3 = p5 + Asq*p6
@ -141,7 +141,7 @@
//..
//..poly_A4 = p1 * A
//,,poly_A5 = p3 + Asq * p4, poly_A4 = A + Asq*poly_A4
//..poly_A5 = p2 + Asq * poly_A5
//..poly_A5 = p2 + Asq * poly_A5
//..poly_A4 = poly_A4 + A5 * poly_A5
//..
//..atan_A = poly_A4 + A11 * poly_A1
@ -154,34 +154,34 @@
//coef_pj, j = 1,2,...,10; atan(A) ~=~ A + p1 A^3 + p2 A^5 + ... + p10 A^21
//
// coef_p1 = -.3333332707155439167401311806315789E+00
// coef_p1 in dbl = BFD5 5555 1219 1621
// coef_p1 in dbl = BFD5 5555 1219 1621
//
// coef_p2 = .1999967670926658391827857030875748E+00
// coef_p2 in dbl = 3FC9 997E 7AFB FF4E
// coef_p2 in dbl = 3FC9 997E 7AFB FF4E
//
// coef_p3 = -.1427989384500152360161563301087296E+00
// coef_p3 in dbl = BFC2 473C 5145 EE38
// coef_p3 in dbl = BFC2 473C 5145 EE38
//
// coef_p4 = .1105852823460720770079031213661163E+00
// coef_p4 in dbl = 3FBC 4F51 2B18 65F5
// coef_p4 in dbl = 3FBC 4F51 2B18 65F5
//
// coef_p5 = -.8811839915595312348625710228448363E-01
// coef_p5 in dbl = BFB6 8EED 6A8C FA32
// coef_p5 in dbl = BFB6 8EED 6A8C FA32
//
// coef_p6 = .6742329836955067042153645159059714E-01
// coef_p6 in dbl = 3FB1 42A7 3D7C 54E3
// coef_p6 in dbl = 3FB1 42A7 3D7C 54E3
//
// coef_p7 = -.4468571068774672908561591262231909E-01
// coef_p7 in dbl = BFA6 E10B A401 393F
// coef_p7 in dbl = BFA6 E10B A401 393F
//
// coef_p8 = .2252333246746511135532726960586493E-01
// coef_p8 in dbl = 3F97 105B 4160 F86B
// coef_p8 in dbl = 3F97 105B 4160 F86B
//
// coef_p9 = -.7303884867007574742501716845542314E-02
// coef_p9 in dbl = BF7D EAAD AA33 6451
// coef_p9 in dbl = BF7D EAAD AA33 6451
//
// coef_p10 = .1109686868355312093949039454619058E-02
// coef_p10 in dbl = 3F52 2E5D 33BC 9BAA
// coef_p10 in dbl = 3F52 2E5D 33BC 9BAA
//
// Special values
@ -354,333 +354,333 @@ LOCAL_OBJECT_END(atan2f_coef_table2)
.section .text
GLOBAL_IEEE754_ENTRY(atan2f)
{ .mfi
{ .mfi
alloc r32 = ar.pfs,1,5,4,0
frcpa.s1 atan2f_Z0,p0 = f1,f8 // Approx to 1/y
nop.i 999
}
{ .mfi
}
{ .mfi
addl atan2f_GR_Addr_1 = @ltoff(atan2f_coef_table1),gp
fma.s1 atan2f_xsq = f9,f9,f0
nop.i 999 ;;
}
{ .mfi
{ .mfi
ld8 atan2f_GR_Addr_1 = [atan2f_GR_Addr_1]
frcpa.s1 atan2f_A0,p0 = f1,f9 // Approx to 1/x
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atan2f_ysq = f8,f8,f0
nop.i 999 ;;
}
{ .mfi
{ .mfi
nop.m 999
fcmp.ge.s1 p8,p9 = f9,f0 // Set p8 if x>=0, p9 if x<0
nop.i 999
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atan2f_xy = f9,f8,f0
nop.i 999 ;;
}
{ .mfi
{ .mfi
add atan2f_GR_Addr_2 = 0x30, atan2f_GR_Addr_1
fmerge.s atan2f_sgn_Y = f8,f1
nop.i 999 ;;
}
{ .mmf
}
{ .mmf
ldfpd atan2f_coef_p1,atan2f_coef_p10 = [atan2f_GR_Addr_1],16
ldfpd atan2f_coef_p9,atan2f_coef_p8 = [atan2f_GR_Addr_2],16
fclass.m p10,p0 = f9,0xe7 // Test x @inf|@snan|@qnan|@zero
}
}
;;
{ .mfi
{ .mfi
ldfpd atan2f_coef_p7,atan2f_coef_p6 = [atan2f_GR_Addr_1],16
fma.s1 atan2f_T_denom = atan2f_Z0,atan2f_xsq,f8
nop.i 999
}
{ .mfi
}
{ .mfi
ldfpd atan2f_coef_p5,atan2f_coef_p4 = [atan2f_GR_Addr_2],16
fma.s1 atan2f_Z = atan2f_Z0,f9,f0
nop.i 999 ;;
}
{ .mfi
{ .mfi
ldfpd atan2f_coef_p3,atan2f_coef_p2 = [atan2f_GR_Addr_1],16
fma.s1 atan2f_G_denom = atan2f_A0,atan2f_ysq,f9
nop.i 999
}
{ .mfi
}
{ .mfi
ldfpd atan2f_const_piby2,atan2f_const_pi = [atan2f_GR_Addr_2],16
fma.s1 atan2f_A = atan2f_A0,f8,f0
nop.i 999 ;;
}
{ .mfi
{ .mfi
ldfpd atan2f_const_piby4,atan2f_const_3piby4 = [atan2f_GR_Addr_2]
fclass.m p11,p0 = f8,0xe7 // Test y @inf|@snan|@qnan|@zero
nop.i 999
}
{ .mfb
}
{ .mfb
nop.m 999
fnma.s1 atan2f_T_numer = atan2f_Z0,atan2f_xy,f9
(p10) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO ;; // Branch on x nan,inf,zero
}
}
// p6 if |y|>|x|, p7 if |x|>=|y| , use xsq and ysq for test
{ .mfi
{ .mfi
nop.m 999
fcmp.gt.s1 p6,p7 = atan2f_ysq,atan2f_xsq
nop.i 999
}
{ .mfb
{ .mfb
nop.m 999
fnma.s1 atan2f_G_numer = atan2f_A0,atan2f_xy,f8
(p11) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO ;; // Branch on y nan,inf,zero
}
{ .mfi
{ .mfi
nop.m 999
(p8) fma.s1 atan2f_const_1 = atan2f_sgn_Y,f0,f0
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
(p9) fma.s1 atan2f_const_1 = atan2f_sgn_Y,f1,f0
nop.i 999 ;;
}
{ .mfi
{ .mfi
nop.m 999
(p6) fnma.s1 atan2f_U = atan2f_Z,f1,f0
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
(p6) fma.s1 atan2f_Usq = atan2f_Z,atan2f_Z,f0
nop.i 999 ;;
}
}
{ .mfi
{ .mfi
nop.m 999
(p7) fma.s1 atan2f_U = atan2f_A,f1,f0
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
(p7) fma.s1 atan2f_Usq = atan2f_A,atan2f_A,f0
nop.i 999 ;;
}
{ .mfi
{ .mfi
nop.m 999
(p6) frcpa.s1 atan2f_Q1,p0 = f1,atan2f_T_denom
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
(p6) fma.s1 atan2f_R_denom = atan2f_T_denom,f1,f0
nop.i 999 ;;
}
}
{ .mfi
{ .mfi
nop.m 999
(p7) frcpa.s1 atan2f_Q1,p0 = f1,atan2f_G_denom
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
(p7) fma.s1 atan2f_R_denom = atan2f_G_denom,f1,f0
nop.i 999 ;;
}
}
{ .mfi
{ .mfi
nop.m 999
(p6) fnma.s1 atan2f_R_numer = atan2f_T_numer,f1,f0
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
(p7) fma.s1 atan2f_R_numer = atan2f_G_numer,f1,f0
nop.i 999 ;;
}
}
{ .mfi
{ .mfi
nop.m 999
(p6) fnma.s1 atan2f_p1rnum = atan2f_T_numer,atan2f_coef_p1,f0
nop.i 999 ;;
}
{ .mfi
}
{ .mfi
nop.m 999
(p7) fma.s1 atan2f_p1rnum = atan2f_G_numer,atan2f_coef_p1,f0
nop.i 999 ;;
}
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atan2f_U4 = atan2f_Usq,atan2f_Usq,f0
nop.i 999
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atan2f_poly_u109 = atan2f_Usq,atan2f_coef_p10,atan2f_coef_p9
nop.i 999 ;;
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atan2f_poly_u87 = atan2f_Usq,atan2f_coef_p8,atan2f_coef_p7
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atan2f_poly_u65 = atan2f_Usq,atan2f_coef_p6,atan2f_coef_p5
nop.i 999 ;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atan2f_poly_u43 = atan2f_Usq,atan2f_coef_p4,atan2f_coef_p3
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fnma.s1 atan2f_Q_beta = atan2f_Q1,atan2f_R_denom,f1
nop.i 999 ;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atan2f_poly_u21 = atan2f_Usq,atan2f_coef_p2,atan2f_coef_p1
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atan2f_r = atan2f_Q1,atan2f_R_numer,f0
nop.i 999 ;;
}
{ .mfi
{ .mfi
nop.m 999
(p6) fma.s1 atan2f_C = atan2f_sgn_Y,atan2f_const_piby2,f0
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
(p7) fma.s1 atan2f_C = atan2f_const_1,atan2f_const_pi,f0
nop.i 999 ;;
}
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atan2f_U6 = atan2f_U4,atan2f_Usq,f0
nop.i 999
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atan2f_U8 = atan2f_U4,atan2f_U4,f0
nop.i 999 ;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atan2f_poly_u10to7 = atan2f_U4,atan2f_poly_u109,atan2f_poly_u87
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atan2f_pR = atan2f_p1rnum,atan2f_Q1,f0
nop.i 999 ;;
}
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atan2f_poly_u6to3 = atan2f_U4,atan2f_poly_u65,atan2f_poly_u43
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atan2f_Q2 = atan2f_Q1,atan2f_Q_beta,atan2f_Q1
nop.i 999 ;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atan2f_Q_beta2 = atan2f_Q_beta,atan2f_Q_beta,f0
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atan2f_rsq = atan2f_r,atan2f_r,f0
nop.i 999 ;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atan2f_poly_u210 = atan2f_Usq,atan2f_poly_u21,f1
nop.i 999 ;;
}
}
{ .mfi
nop.m 999
fcmp.eq.s0 p8,p0 = f8,f9 // Dummy op to set flag on denormal inputs
nop.i 999
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atan2f_poly_u10to3 = atan2f_U8,atan2f_poly_u10to7,atan2f_poly_u6to3
nop.i 999 ;;
}
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atan2f_Q3 = atan2f_Q2,atan2f_Q_beta2,atan2f_Q2
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atan2f_pRC = atan2f_rsq,atan2f_pR,atan2f_C
nop.i 999 ;;
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atan2f_poly_u10to0 = atan2f_U6,atan2f_poly_u10to3,atan2f_poly_u210
nop.i 999 ;;
}
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atan2f_pQRC = atan2f_R_numer,atan2f_Q3,atan2f_pRC
nop.i 999 ;;
}
}
{ .mfb
{ .mfb
nop.m 999
fma.s.s0 f8 = atan2f_U,atan2f_poly_u10to0,atan2f_pQRC
br.ret.sptk b0 ;;
}
}

View File

@ -1,4 +1,4 @@
.file "atanhl.s"
.file "atanhl.s"
// Copyright (c) 2001 - 2003, Intel Corporation
@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT
// LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,SPECIAL,
// EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code,and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
//*********************************************************************
//
// History:
// History:
// 09/10/01 Initial version
// 12/11/01 Corrected .restore syntax
// 05/20/02 Cleaned up namespace and sf0 syntax
@ -50,7 +50,7 @@
//
//*********************************************************************
//
// Function: atanhl(x) computes the principle value of the inverse
// Function: atanhl(x) computes the principle value of the inverse
// hyperbolic tangent of x.
//
//*********************************************************************
@ -71,10 +71,10 @@
// IEEE Special Conditions:
//
// atanhl(inf) = QNaN
// atanhl(-inf) = QNaN
// atanhl(+/-0) = +/-0
// atanhl(1) = +inf
// atanhl(-1) = -inf
// atanhl(-inf) = QNaN
// atanhl(+/-0) = +/-0
// atanhl(1) = +inf
// atanhl(-1) = -inf
// atanhl(|x|>1) = QNaN
// atanhl(SNaN) = QNaN
// atanhl(QNaN) = QNaN
@ -96,8 +96,8 @@
// Case atanhl_regular:
//
// Here we use formula atanhl(x) = sign(x)*log1pl(2*|x|/(1-|x|))/2 and
// calculation is subdivided into two stages. The first stage is
// calculating of X = 2*|x|/(1-|x|). The second one is calculating of
// calculation is subdivided into two stages. The first stage is
// calculating of X = 2*|x|/(1-|x|). The second one is calculating of
// sign(x)*log1pl(X)/2. To obtain required accuracy we use precise division
// algorythm output of which is a pair of two extended precision values those
// approximate result of division with accuracy higher than working
@ -114,7 +114,7 @@
//
// y = frcpa(b) initial approximation of 1/b
// q = a*y initial approximation of a/b
//
//
// e = 1 - b*y
// e2 = e + e^2
// e1 = e^2
@ -131,12 +131,12 @@
// r1 = a - b*X
// r1 = r1 - b_lo*X
// X_lo = r1*y3 low part of a/b
//
//
// 2. special log1p algorithm overview
// ***********************************
//
// Here we use a table lookup method. The basic idea is that in
// order to compute logl(Arg) = log1pl (Arg-1) for an argument Arg in [1,2),
// order to compute logl(Arg) = log1pl (Arg-1) for an argument Arg in [1,2),
// we construct a value G such that G*Arg is close to 1 and that
// logl(1/G) is obtainable easily from a table of values calculated
// beforehand. Thus
@ -167,7 +167,7 @@
// G := G_1 * G_2 * G_3
// r := (G * S_hi - 1) + G * S_lo
//
// These G_j's have the property that the product is exactly
// These G_j's have the property that the product is exactly
// representable and that |r| < 2^(-12) as a result.
//
// Step 2: Approximation
@ -201,7 +201,7 @@ data8 0x9249249249249249,0x00003FFC // C7
data8 0xCCCCCCCCCCCCCCCD,0x00003FFC // C5
data8 0xAAAAAAAAAAAAAAAA,0x00003FFD // C3
data4 0x3f000000 // 1/2
data4 0x00000000 // pad
data4 0x00000000 // pad
data4 0x00000000
data4 0x00000000
LOCAL_OBJECT_END(Constants_TaylorSeries)
@ -328,7 +328,7 @@ data4 0x3F71D488,0x3D693B9D
data8 0xBE049391B6B7C239
LOCAL_OBJECT_END(Constants_G_H_h2)
// G3 and H3 - IEEE single and h3 - IEEE double
// G3 and H3 - IEEE single and h3 - IEEE double
LOCAL_OBJECT_START(Constants_G_H_h3)
data4 0x3F7FFC00,0x38800100
data8 0x3D355595562224CD
@ -538,78 +538,78 @@ GLOBAL_LIBM_ENTRY(atanhl)
alloc r32 = ar.pfs,0,17,4,0
fnma.s1 FR_Bp = f8,f1,f1 // b = 1 - |arg| (for x>0)
mov GR_ExpMask = 0x1ffff
}
{ .mfi
}
{ .mfi
addl GR_ad_taylor = @ltoff(Constants_TaylorSeries),gp
fma.s1 FR_Bn = f8,f1,f1 // b = 1 - |arg| (for x<0)
mov GR_NearZeroBound = 0xfffa // biased exp of 1/32
};;
{ .mfi
};;
{ .mfi
getf.exp GR_ArgExp = f8
fcmp.lt.s1 p6,p7 = f8,f0 // is negative?
nop.i 0
}
{ .mfi
}
{ .mfi
ld8 GR_ad_taylor = [GR_ad_taylor]
fmerge.s FR_abs_x = f1,f8
nop.i 0
};;
{ .mfi
};;
{ .mfi
nop.m 0
fclass.m p8,p0 = f8,0x1C7 // is arg NaT,Q/SNaN or +/-0 ?
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 FR_x2 = f8,f8,f0
nop.i 0
};;
{ .mfi
};;
{ .mfi
add GR_ad_z_1 = 0x0F0,GR_ad_taylor
fclass.m p9,p0 = f8,0x0a // is arg -denormal ?
add GR_ad_taylor_2 = 0x010,GR_ad_taylor
}
{ .mfi
}
{ .mfi
add GR_ad_05 = 0x080,GR_ad_taylor
nop.f 0
nop.i 0
};;
{ .mfi
};;
{ .mfi
ldfe FR_C17 = [GR_ad_taylor],32
fclass.m p10,p0 = f8,0x09 // is arg +denormal ?
add GR_ad_tbl_1 = 0x040,GR_ad_z_1 // point to Constants_G_H_h1
}
{ .mfb
}
{ .mfb
add GR_ad_z_2 = 0x140,GR_ad_z_1 // point to Constants_Z_2
(p8) fma.s0 f8 = f8,f1,f0 // NaN or +/-0
(p8) br.ret.spnt b0 // exit for Nan or +/-0
};;
{ .mfi
};;
{ .mfi
ldfe FR_C15 = [GR_ad_taylor_2],32
fclass.m p15,p0 = f8,0x23 // is +/-INF ?
add GR_ad_tbl_2 = 0x180,GR_ad_z_1 // point to Constants_G_H_h2
}
{ .mfb
}
{ .mfb
ldfe FR_C13 = [GR_ad_taylor],32
(p9) fnma.s0 f8 = f8,f8,f8 // -denormal
(p9) br.ret.spnt b0 // exit for -denormal
};;
{ .mfi
};;
{ .mfi
ldfe FR_C11 = [GR_ad_taylor_2],32
fcmp.eq.s0 p13,p0 = FR_abs_x,f1 // is |arg| = 1?
nop.i 0
}
{ .mfb
}
{ .mfb
ldfe FR_C9 = [GR_ad_taylor],32
(p10) fma.s0 f8 = f8,f8,f8 // +denormal
(p10) br.ret.spnt b0 // exit for +denormal
};;
{ .mfi
};;
{ .mfi
ldfe FR_C7 = [GR_ad_taylor_2],32
(p6) frcpa.s1 FR_Yn,p11 = f1,FR_Bn // y = frcpa(b)
and GR_ArgExp = GR_ArgExp,GR_ExpMask // biased exponent
}
{ .mfb
}
{ .mfb
ldfe FR_C5 = [GR_ad_taylor],32
fnma.s1 FR_B = FR_abs_x,f1,f1 // b = 1 - |arg|
(p15) br.cond.spnt atanhl_gt_one // |arg| > 1
@ -639,20 +639,20 @@ GLOBAL_LIBM_ENTRY(atanhl)
ldfs FR_Half = [GR_ad_05]
(p7) fnma.s1 FR_B_lo = FR_Bp,f1,f1
nop.i 0
};;
};;
{ .mfi
nop.m 0
(p6) fnma.s1 FR_E0 = FR_Yn,FR_Bn,f1 // e = 1-b*y
(p6) fnma.s1 FR_E0 = FR_Yn,FR_Bn,f1 // e = 1-b*y
nop.i 0
}
{ .mfb
}
{ .mfb
nop.m 0
(p6) fma.s1 FR_Y0 = FR_Yn,f1,f0
(p8) br.cond.spnt atanhl_gt_one // |arg| > 1
};;
{ .mfi
nop.m 0
(p7) fnma.s1 FR_E0 = FR_Yp,FR_Bp,f1
(p7) fnma.s1 FR_E0 = FR_Yp,FR_Bp,f1
nop.i 0
}
{ .mfi
@ -804,11 +804,11 @@ GLOBAL_LIBM_ENTRY(atanhl)
{ .mfi
ldfe FR_log2_lo = [GR_ad_q],16 // load log2_lo
nop.f 0
sub GR_N = GR_N,GR_Bias
sub GR_N = GR_N,GR_Bias
};;
{ .mfi
ldfe FR_Q4 = [GR_ad_q],16 // load Q4
fms.s1 FR_S_lo = FR_AA,f1,FR_Z // form S_lo = AA - Z
fms.s1 FR_S_lo = FR_AA,f1,FR_Z // form S_lo = AA - Z
sub GR_minus_N = GR_Bias,GR_N // form exponent of 2^(-N)
};;
{ .mmf
@ -820,7 +820,7 @@ GLOBAL_LIBM_ENTRY(atanhl)
{ .mfi
ldfe FR_Q2 = [GR_ad_q],16 // load Q2
nop.f 0
extr.u GR_Index2 = GR_X_1,6,4 // extract bits 6-9 of X_1
extr.u GR_Index2 = GR_X_1,6,4 // extract bits 6-9 of X_1
};;
{ .mmi
ldfe FR_Q1 = [GR_ad_q] // load Q1
@ -862,17 +862,17 @@ GLOBAL_LIBM_ENTRY(atanhl)
}
{ .mfi
nop.m 0
nop.f 0
nop.f 0
nop.i 0
};;
{ .mfi
nop.m 0
nop.f 0
nop.f 0
nop.i 0
};;
{ .mfi
nop.m 0
nop.f 0
nop.f 0
nop.i 0
};;
@ -1068,7 +1068,7 @@ atanhl_near_zero:
{ .mfb
nop.m 0
fma.s0 f8 = FR_C17,FR_x3,f8
br.ret.sptk b0
br.ret.sptk b0
};;
atanhl_eq_one:

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 02/02/00 Initial version
// 02/02/00 Initial version
// 04/04/00 Unwind support added
// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
@ -56,12 +56,12 @@
//
// Registers used
//==============================================================
// general registers:
// general registers:
// r14 -> r40
// predicate registers used:
// p6 -> p11
// floating-point registers used:
// f9 -> f15; f32 -> f90;
// f9 -> f15; f32 -> f90;
// f8 has input, then output
//
// Overview of operation
@ -82,7 +82,7 @@
// 1. COSH_BY_POLY 0 < |x| < 0.25
// ===============
// Evaluate cosh(x) by a 12th order polynomial
// Care is take for the order of multiplication; and P2 is not exactly 1/4!,
// Care is take for the order of multiplication; and P2 is not exactly 1/4!,
// P3 is not exactly 1/6!, etc.
// cosh(x) = 1 + (P1*x^2 + P2*x^4 + P3*x^6 + P4*x^8 + P5*x^10 + P6*x^12)
//
@ -90,18 +90,18 @@
// =============
// cosh(x) = cosh(B+R)
// = cosh(B)cosh(R) + sinh(B)sinh(R)
//
//
// ax = |x| = M*log2/64 + R
// B = M*log2/64
// M = 64*N + j
// M = 64*N + j
// We will calculate M and get N as (M-j)/64
// The division is a shift.
// exp(B) = exp(N*log2 + j*log2/64)
// = 2^N * 2^(j*log2/64)
// cosh(B) = 1/2(e^B + e^-B)
// = 1/2(2^N * 2^(j*log2/64) + 2^-N * 2^(-j*log2/64))
// cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64))
// sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64))
// = 1/2(2^N * 2^(j*log2/64) + 2^-N * 2^(-j*log2/64))
// cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64))
// sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64))
// 2^(j*log2/64) is stored as Tjhi + Tjlo , j= -32,....,32
// Tjhi is double-extended (80-bit) and Tjlo is single(32-bit)
//
@ -109,7 +109,7 @@
// R = ax - M*log2_by_64_hi - M*log2_by_64_lo
// exp(R) = 1 + R +R^2(1/2! + R(1/3! + R(1/4! + ... + R(1/n!)...)
// = 1 + p_odd + p_even
// where the p_even uses the A coefficients and the p_even uses
// where the p_even uses the A coefficients and the p_even uses
// the B coefficients
//
// So sinh(R) = 1 + p_odd + p_even -(1 -p_odd -p_even)/2 = p_odd
@ -173,7 +173,7 @@ GR_Parameter_RESULT = r39
GR_Parameter_TAG = r40
f_ABS_X = f9
f_ABS_X = f9
f_X2 = f10
f_X4 = f11
f_tmp = f14
@ -228,16 +228,16 @@ f_Tmjlo = f68
f_S_hi = f69
f_SC_hi_temp = f70
f_C_lo_temp1 = f71
f_C_lo_temp2 = f72
f_C_lo_temp3 = f73
f_C_lo_temp4 = f73
f_C_lo_temp1 = f71
f_C_lo_temp2 = f72
f_C_lo_temp3 = f73
f_C_lo_temp4 = f73
f_C_lo = f74
f_C_hi = f75
f_Y_hi = f77
f_Y_lo_temp = f78
f_Y_lo = f79
f_Y_hi = f77
f_Y_lo_temp = f78
f_Y_lo = f79
f_NORM_X = f80
f_P1 = f81
@ -442,7 +442,7 @@ GLOBAL_IEEE754_ENTRY(coshl)
}
{ .mfi
nop.m 0
fnorm.s1 f_NORM_X = f8
fnorm.s1 f_NORM_X = f8
mov r_exp_2tom57 = 0xffff-57
}
;;
@ -450,7 +450,7 @@ GLOBAL_IEEE754_ENTRY(coshl)
{ .mfi
setf.d f_RSHF_2TO57 = r_rshf_2to57 // Form const 1.100 * 2^120
fclass.m p10,p0 = f8, 0x0b // Test for denorm
mov r_exp_mask = 0x1ffff
mov r_exp_mask = 0x1ffff
}
{ .mlx
setf.sig f_INV_LN2_2TO63 = r_sig_inv_ln2 // Form 1/ln2 * 2^63
@ -490,7 +490,7 @@ COSH_COMMON:
add r_ad5 = 0x580, r_ad1 // Point to j_lo_table midpoint
}
{ .mib
ldfe f_log2by64_hi = [r_ad1],16
ldfe f_log2by64_hi = [r_ad1],16
and r_exp_x = r_exp_mask, r_signexp_x
(p7) br.ret.spnt b0 // Exit if x=0
}
@ -498,36 +498,36 @@ COSH_COMMON:
// Get the A coefficients for COSH_BY_TBL
{ .mfi
ldfe f_A1 = [r_ad3],16
ldfe f_A1 = [r_ad3],16
fcmp.lt.s1 p8,p9 = f8,f0 // Test for x<0
cmp.lt p7,p0 = r_exp_x, r_exp_0_25 // Test x < 0.25
}
{ .mfb
add r_ad2o = 0x30, r_ad2e // Point to p_table odd coeffs
(p6) fma.s0 f8 = f8,f8,f0 // Result for x nan, inf
(p6) fma.s0 f8 = f8,f8,f0 // Result for x nan, inf
(p6) br.ret.spnt b0 // Exit for x nan, inf
}
;;
// Calculate X2 = ax*ax for COSH_BY_POLY
{ .mfi
ldfe f_log2by64_lo = [r_ad1],16
ldfe f_log2by64_lo = [r_ad1],16
nop.f 0
nop.i 0
}
{ .mfb
ldfe f_A2 = [r_ad3],16
ldfe f_A2 = [r_ad3],16
fma.s1 f_X2 = f_NORM_X, f_NORM_X, f0
(p7) br.cond.spnt COSH_BY_POLY
}
;;
// Here if |x| >= 0.25
COSH_BY_TBL:
COSH_BY_TBL:
// ******************************************************
// STEP 1 (TBL and EXP) - Argument reduction
// ******************************************************
// Get the following constants.
// Get the following constants.
// Inv_log2by64
// log2by64_hi
// log2by64_lo
@ -581,20 +581,20 @@ COSH_BY_TBL:
// Subtract RSHF constant to get rounded M as a floating point value
// M_temp * 2^(63-6) - 2^63
{ .mfb
ldfe f_B3 = [r_ad3],16
ldfe f_B3 = [r_ad3],16
fms.s1 f_M = f_M_temp, f_2TOM57, f_RSHF
(p6) br.cond.spnt COSH_HUGE // Branch if result will overflow
}
;;
{ .mfi
getf.sig r_M = f_M_temp
getf.sig r_M = f_M_temp
nop.f 0
cmp.ge p7,p6 = r_exp_x, r_exp_32 // Test if x >= 32
}
;;
// Calculate j. j is the signed extension of the six lsb of M. It
// Calculate j. j is the signed extension of the six lsb of M. It
// has a range of -32 thru 31.
// Calculate R
@ -637,8 +637,8 @@ COSH_BY_TBL:
// N = (M-j)/64
{ .mfi
ldfe f_Tjhi = [r_ad_J_hi]
fnma.s1 f_R = f_M, f_log2by64_lo, f_R_temp
shr r_N = r_Mmj, 0x6 // N = (M-j)/64
fnma.s1 f_R = f_M, f_log2by64_lo, f_R_temp
shr r_N = r_Mmj, 0x6 // N = (M-j)/64
}
{ .mfi
shladd r_ad_mJ_hi = r_mj, 4, r_ad4 // pointer to Tmjhi
@ -713,8 +713,8 @@ COSH_BY_TBL:
}
;;
//
// If TBL,
//
// If TBL,
// Calculate S_hi and S_lo, and C_hi
// SC_hi_temp = sneg * Tmjhi
// S_hi = spos * Tjhi - SC_hi_temp
@ -724,12 +724,12 @@ COSH_BY_TBL:
{ .mfi
nop.m 0
(p6) fma.s1 f_SC_hi_temp = f_sneg, f_Tmjhi, f0
(p6) fma.s1 f_SC_hi_temp = f_sneg, f_Tmjhi, f0
nop.i 0
}
;;
// If TBL,
// If TBL,
// C_lo_temp3 = sneg * Tmjlo
// C_lo_temp4 = spos * Tjlo + C_lo_temp3
// C_lo_temp4 = spos * Tjlo + (sneg * Tmjlo)
@ -752,7 +752,7 @@ COSH_BY_TBL:
}
;;
// If EXP,
// If EXP,
// Compute 2^(N-1) * Tjhi and 2^(N-1) * Tjlo
{ .mfi
nop.m 0
@ -811,7 +811,7 @@ COSH_BY_TBL:
{ .mfi
nop.m 0
(p6) fma.s1 f_C_lo_temp2 = f_sneg, f_Tmjhi, f_C_lo_temp1
(p6) fma.s1 f_C_lo_temp2 = f_sneg, f_Tmjhi, f_C_lo_temp1
nop.i 0
}
;;
@ -836,7 +836,7 @@ COSH_BY_TBL:
;;
// If TBL,
// Y_hi = C_hi
// Y_hi = C_hi
// Y_lo = S_hi*p_odd + (C_hi*p_even + C_lo)
{ .mfi
nop.m 0
@ -883,7 +883,7 @@ COSH_BY_TBL:
// Here if 0 < |x| < 0.25
COSH_BY_POLY:
COSH_BY_POLY:
{ .mmf
ldfe f_P6 = [r_ad2e],16
ldfe f_P5 = [r_ad2o],16
@ -900,7 +900,7 @@ COSH_BY_POLY:
{ .mmi
ldfe f_P2 = [r_ad2e],16
ldfe f_P1 = [r_ad2o],16
ldfe f_P1 = [r_ad2o],16
nop.i 0
}
;;
@ -1007,7 +1007,7 @@ COSH_DENORM:
// Here if |x| >= overflow limit
COSH_HUGE:
COSH_HUGE:
// for COSH_HUGE, put 24000 in exponent; take sign from input
{ .mmi
mov r_exp_huge = 0x15dbf
@ -1018,7 +1018,7 @@ COSH_HUGE:
;;
{ .mfi
alloc r32 = ar.pfs,0,5,4,0
alloc r32 = ar.pfs,0,5,4,0
fma.s1 f_signed_hi_lo = f_huge, f1, f1
nop.i 0
}
@ -1061,7 +1061,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
{ .mib
stfe [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
nop.b 0
}
{ .mib
stfe [GR_Parameter_Y] = f_pre_result // STORE Parameter 3 on stack

View File

@ -693,7 +693,7 @@ EXP_CERTAIN_UNDERFLOW:
nop.i 0
}
;;
{ .mfb
nop.m 0
fma.d.s0 f8 = fTmp, fTmp, f0 // Set I,U, tiny (+0.0) result

View File

@ -257,7 +257,7 @@ LOCAL_OBJECT_END(_expf_table)
.section .text
GLOBAL_IEEE754_ENTRY(expf)
{ .mlx
addl rTblAddr = @ltoff(_expf_table),gp
movl r64DivLn2 = 0x40571547652B82FE // 64/ln(2)
@ -612,7 +612,7 @@ EXP_CERTAIN_UNDERFLOW:
nop.i 0
}
;;
{ .mfb
nop.m 0
fma.s.s0 f8 = fTmp, fTmp, f0 // Set I,U, tiny (+0.0) result

View File

@ -366,7 +366,7 @@ loop64:
{ .mfi
nop.m 0
// Final iteration (p8): is FR_ABS_A the correct remainder
// Final iteration (p8): is FR_ABS_A the correct remainder
// (quotient was not overestimated) ?
(p8) fcmp.lt.unc.s1 p6, p10 = FR_QREM, f0
nop.i 0
@ -392,7 +392,7 @@ loop64:
nop.m 0
// add b to estimated remainder (to cover the case when the quotient was
// overestimated)
// also set correct sign by using
// also set correct sign by using
// FR_B_SGN_A = |b|*sgn(a), FR_ROUNDCONST = sgn(a)
(p6) fma.s0 f8 = FR_QREM, FR_ROUNDCONST, FR_B_SGN_A
nop.b 0

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
//*********************************************************************
//
// History:
// History:
// 02/02/00 hand-optimized
// 04/04/00 Unwind support added
// 06/20/00 new version
@ -86,7 +86,7 @@
// x2 = x * x in double-extended
// y2 = y * y in double-extended
// temp = x2 + y2 in double-extended
// sqrt(temp) rounded to double
// sqrt(temp) rounded to double
//
//*********************************************************************
@ -114,7 +114,7 @@ GLOBAL_IEEE754_ENTRY(hypot)
// Compute x*x
fma.s1 f10=f8,f8,f0
// r2=bias-1
mov r2=0xfffe
mov r2=0xfffe
}
{.mfi
// 63/8
@ -135,8 +135,8 @@ GLOBAL_IEEE754_ENTRY(hypot)
{.mfi
nop.m 0
// if possible overflow, copy f8 to f32
// set Denormal, if necessary
// (p8)
// set Denormal, if necessary
// (p8)
fma.d.s0 f32=f8,f1,f0
nop.i 0;;
}
@ -235,11 +235,11 @@ GLOBAL_IEEE754_ENTRY(hypot)
{ .mfi
nop.m 0
// Identify Natvals, Infs, NaNs, and Zeros
// Identify Natvals, Infs, NaNs, and Zeros
// and return result
fclass.m.unc p7, p0 = f12, 0x1E7
nop.i 0;;
}
}
{.mfb
// get exponent of x^2+y^2
getf.exp r3=f12
@ -260,7 +260,7 @@ GLOBAL_IEEE754_ENTRY(hypot)
// H0=0.5*z0
(p6) fma.s1 f15=f8,f7,f0
nop.i 0;;
}
}
{.mfi
@ -334,7 +334,7 @@ GLOBAL_IEEE754_ENTRY(hypot)
nop.i 0
}
{.mfi
// Is x^2 + y^2 well less than the overflow
// Is x^2 + y^2 well less than the overflow
// threshold?
(p6) cmp.lt.unc p7, p8 = r3,r2
// P=P13+d3*P47
@ -351,8 +351,8 @@ GLOBAL_IEEE754_ENTRY(hypot)
}
{ .mfi
nop.m 0
(p8) fsetc.s2 0x7F,0x42
nop.m 0
(p8) fsetc.s2 0x7F,0x42
// Possible overflow path, must detect by
// Setting widest range exponent with prevailing
// rounding mode.
@ -374,7 +374,7 @@ GLOBAL_IEEE754_ENTRY(hypot)
nop.i 0 ;;
}
{ .mfi
nop.m 0
nop.m 0
(p8) fcmp.lt.unc.s1 p9, p10 = f12, f11
nop.i 0 ;;
}
@ -382,7 +382,7 @@ GLOBAL_IEEE754_ENTRY(hypot)
nop.m 0
mov GR_Parameter_TAG = 46
// No overflow
(p9) br.ret.sptk b0;;
(p9) br.ret.sptk b0;;
}
GLOBAL_IEEE754_END(hypot)

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
//*********************************************************************
//
// History:
// History:
// 02/02/00 hand-optimized
// 04/04/00 Unwind support added
// 06/26/00 new version
@ -86,7 +86,7 @@
// x2 = x * x in double-extended
// y2 = y * y in double-extended
// temp = x2 + y2 in double-extended
// sqrt(temp) rounded to single precision
// sqrt(temp) rounded to single precision
//
//*********************************************************************
@ -113,7 +113,7 @@ GLOBAL_IEEE754_ENTRY(hypotf)
// Compute x*x
fma.s1 f10=f8,f8,f0
// r2=bias-1
mov r2=0xfffe
mov r2=0xfffe
}
{.mfi
nop.m 0
@ -132,8 +132,8 @@ GLOBAL_IEEE754_ENTRY(hypotf)
{.mfi
nop.m 0
// if possible overflow, copy f8 to f14
// set Denormal, if necessary
// (p8)
// set Denormal, if necessary
// (p8)
fma.s.s0 f14=f8,f1,f0
nop.i 0;;
}
@ -211,11 +211,11 @@ GLOBAL_IEEE754_ENTRY(hypotf)
{ .mfi
nop.m 0
// Identify Natvals, Infs, NaNs, and Zeros
// Identify Natvals, Infs, NaNs, and Zeros
// and return result
fclass.m.unc p7, p0 = f12, 0x1E7
nop.i 0
}
}
{.mfi
nop.m 0
// z0=frsqrta(a)
@ -243,7 +243,7 @@ GLOBAL_IEEE754_ENTRY(hypotf)
// H0=0.5*z0
(p6) fma.s1 f10=f8,f7,f0
nop.i 0;;
}
}
{.mfi
@ -287,7 +287,7 @@ GLOBAL_IEEE754_ENTRY(hypotf)
{.mfi
// Is x^2 + y^2 well less than the overflow
// Is x^2 + y^2 well less than the overflow
// threshold?
(p6) cmp.lt.unc p7, p8 = r3,r2
// P=P01+d2*P23
@ -304,8 +304,8 @@ GLOBAL_IEEE754_ENTRY(hypotf)
}
{ .mfi
nop.m 0
(p8) fsetc.s2 0x7F,0x42
nop.m 0
(p8) fsetc.s2 0x7F,0x42
// Possible overflow path, must detect by
// Setting widest range exponent with prevailing
// rounding mode.
@ -327,7 +327,7 @@ GLOBAL_IEEE754_ENTRY(hypotf)
nop.i 0 ;;
}
{ .mfi
nop.m 0
nop.m 0
(p8) fcmp.lt.unc.s1 p9, p10 = f12, f11
nop.i 0 ;;
}
@ -335,7 +335,7 @@ GLOBAL_IEEE754_ENTRY(hypotf)
nop.m 0
mov GR_Parameter_TAG = 47
// No overflow
(p9) br.ret.sptk b0;;
(p9) br.ret.sptk b0;;
}
GLOBAL_IEEE754_END(hypotf)
@ -343,7 +343,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mii
add GR_Parameter_Y=-32,sp // Parameter 2 value
mov GR_Parameter_TAG = 47
mov GR_Parameter_TAG = 47
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
@ -382,10 +382,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
};;
LOCAL_LIBM_END(__libm_error_region)

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
//*********************************************************************
//
// History:
// History:
// 02/02/00 hand-optimized
// 04/04/00 Unwind support added
// 06/20/00 new version
@ -112,7 +112,7 @@ GLOBAL_IEEE754_ENTRY(hypotl)
// Compute x*x
fma.s1 f10=f8,f8,f0
// r2=bias-1
mov r2=0xfffe
mov r2=0xfffe
}
{.mfi
nop.m 0
@ -131,8 +131,8 @@ GLOBAL_IEEE754_ENTRY(hypotl)
{.mfi
nop.m 0
// if possible overflow, copy f8 to f32
// set Denormal, if necessary
// (p8)
// set Denormal, if necessary
// (p8)
fma.s0 f32=f8,f1,f0
nop.i 0;;
}
@ -233,11 +233,11 @@ GLOBAL_IEEE754_ENTRY(hypotl)
}
{ .mfi
nop.m 0
// Identify Natvals, Infs, NaNs, and Zeros
// Identify Natvals, Infs, NaNs, and Zeros
// and return result
fclass.m.unc p7, p0 = f12, 0x1E7
nop.i 0
}
}
{.mfi
// get exponent of x^2+y^2
getf.exp r3=f12
@ -271,7 +271,7 @@ GLOBAL_IEEE754_ENTRY(hypotl)
// H0=0.5*z0
(p6) fma.s1 f15=f8,f7,f0
nop.i 0;;
}
}
{.mfb
nop.m 0
@ -364,7 +364,7 @@ GLOBAL_IEEE754_ENTRY(hypotl)
nop.i 0
}
{.mfi
// Is x^2 + y^2 well less than the overflow
// Is x^2 + y^2 well less than the overflow
// threshold?
(p6) cmp.lt.unc p7, p8 = r3,r2
// c=dxy+da
@ -388,8 +388,8 @@ GLOBAL_IEEE754_ENTRY(hypotl)
}
{ .mfi
nop.m 0
(p8) fsetc.s2 0x7F,0x42
nop.m 0
(p8) fsetc.s2 0x7F,0x42
// Possible overflow path, must detect by
// Setting widest range exponent with prevailing
// rounding mode.
@ -411,7 +411,7 @@ GLOBAL_IEEE754_ENTRY(hypotl)
nop.i 0 ;;
}
{ .mfi
nop.m 0
nop.m 0
(p8) fcmp.lt.unc.s1 p9, p10 = f12, f11
nop.i 0 ;;
}
@ -419,7 +419,7 @@ GLOBAL_IEEE754_ENTRY(hypotl)
nop.m 0
mov GR_Parameter_TAG = 45;
// No overflow
(p9) br.ret.sptk b0;;
(p9) br.ret.sptk b0;;
}
GLOBAL_IEEE754_END(hypotl)

View File

@ -1425,11 +1425,11 @@ log_log10_common:
fnorm.s1 FR_NormX = f8
mov GR_bias = 0xffff
};;
{ .mfi
setf.d FR_A3 = GR_A3 // create A3
fcmp.eq.s1 p12,p0 = f1,f8 // is x equal to 1.0?
dep.z GR_xorg = GR_xorg, 44, 19 // 0x3fefe00000000000
dep.z GR_xorg = GR_xorg, 44, 19 // 0x3fefe00000000000
// double precision memory
// representation of 255/256
}
@ -1519,7 +1519,7 @@ log_core:
{ .mfi
(p6) getf.exp GR_rexp = FR_r // Get signexp of x-1
(p7) fcvt.xf FR_N = FR_N
(p8) cmp.eq p9,p6 = r0,r0 // Also set p9 and clear p6 if log10
(p8) cmp.eq p9,p6 = r0,r0 // Also set p9 and clear p6 if log10
// and arg near 1
};;

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//=================================================================
// 09/11/00 Initial version
// 09/11/00 Initial version
// 03/19/01 Added one polynomial coefficient, to improve accuracy
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
@ -60,19 +60,19 @@
// j=0 if f<128; j=1 if f>=128
// T is a table that stores log2(1/y) (in entries 1..255) rounded to
// double extended precision; f is used as an index; T[255]=0
//
//
// If f=0 and b9=0, r is set to 2^{-8}* 0.b9 b10 ... b52 = m-1 (fractional part of m),
// and 0 is used instead of T[0]
// and 0 is used instead of T[0]
// (polynomial evaluation only, for m=1+r, 0<=r<2^{-9})
// If f=255, r is set to (m-2)/2 (T[255]=0, and only polynomial evaluation is used
// for m=2(1-r'), 0<=r'<2^{-9})
//
// log2(x) is approximated as
// (l-j) + T[f] + (c1*r+c2*r^2+...+c7*r^7), if f>0
//
//
// Special values
// Special values
//=================================================================
// log2(0)=-inf, raises Divide by Zero
// log2(+inf)=inf
@ -90,7 +90,7 @@
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35 // This reg. can safely be used
GR_SAVE_GP = r35 // This reg. can safely be used
GR_SAVE_SP = r36
GR_Parameter_X = r37
@ -388,15 +388,15 @@ LOCAL_OBJECT_END(T_table)
GLOBAL_LIBM_ENTRY(log2)
{ .mfi
alloc r32=ar.pfs,1,4,4,0
// y=frcpa(x)
alloc r32=ar.pfs,1,4,4,0
// y=frcpa(x)
frcpa.s1 f6,p0=f1,f8
// will form significand of 1.5 (to test whether the index is 128 or above)
// will form significand of 1.5 (to test whether the index is 128 or above)
mov r24=0xc
}
{.mfi
nop.m 0
// normalize x
// normalize x
fma.s1 f7=f8,f1,f0
// r2 = pointer to C_1...C_6 followed by T_table
addl r2 = @ltoff(poly_coeffs), gp;;
@ -406,7 +406,7 @@ GLOBAL_LIBM_ENTRY(log2)
getf.sig r25=f8
// f8 denormal ?
fclass.m p8,p10=f8,0x9
// will form significand of 1.5 (to test whether the index is 128 or above)
// will form significand of 1.5 (to test whether the index is 128 or above)
shl r24=r24,60
}
{.mfi
@ -420,7 +420,7 @@ GLOBAL_LIBM_ENTRY(log2)
getf.exp r29=f8
// load start address for C_1...C_6 followed by T_table
ld8 r2=[r2]
// will continue only for positive normal/denormal numbers
// will continue only for positive normal/denormal numbers
fclass.nm.unc p12,p7 = f8, 0x19 ;;
}
@ -465,7 +465,7 @@ GLOBAL_LIBM_ENTRY(log2)
{.mmi
// load C_6, C_7
ldfpd f12,f13=[r2],16
// r27=bias-1 (if index >=128, will add exponent+1)
// r27=bias-1 (if index >=128, will add exponent+1)
(p12) mov r27=0xfffe
(p8) shr.u r28=r25,63-8;;
}
@ -513,7 +513,7 @@ GLOBAL_LIBM_ENTRY(log2)
{.mmf
// load T (unless first 9 bits after leading 1 are 0)
(p12) ldfe f33=[r2]
// f8=expon - bias
// f8=expon - bias
setf.sig f8=r29
// set T=0 (if first 9 bits after leading 1 are 0)
(p8) fma.s1 f33=f0,f0,f0;;
@ -602,7 +602,7 @@ GLOBAL_LIBM_ENTRY(log2)
SPECIAL_LOG2:
{.mfi
{.mfi
nop.m 0
// x=+Infinity ?
fclass.m p7,p0=f8,0x21
@ -627,7 +627,7 @@ SPECIAL_LOG2:
(p7) br.ret.spnt b0;;
}
{.mfi
(p8) mov GR_Parameter_TAG = 170
(p8) mov GR_Parameter_TAG = 170
// log2(+/-0)=-infinity, raises Divide by Zero
// set f8=-0
(p8) fmerge.ns f8=f0,f8
@ -639,12 +639,12 @@ SPECIAL_LOG2:
(p8) br.cond.sptk __libm_error_region;;
}
{.mfb
(p6) mov GR_Parameter_TAG = 171
(p6) mov GR_Parameter_TAG = 171
// x<0: return NaN, raise Invalid
(p6) frcpa.s0 f8,p0=f0,f0
(p6) br.cond.sptk __libm_error_region;;
}
}
{.mfb
nop.m 0
@ -662,10 +662,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
@ -673,18 +673,18 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
{ .mmi
stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
}
{ .mib
stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
@ -699,10 +699,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
};;
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 09/11/00 Initial version
// 09/11/00 Initial version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
//
@ -58,19 +58,19 @@
// j=0 if f<128; j=1 if f>=128
// T is a table that stores log2(1/y) (in entries 1..255) rounded to
// double extended precision; f is used as an index; T[255]=0
//
//
// If f=0 and b9=0, r is set to 2^{-8}* 0.b9 b10 ... b52 = m-1 (fractional part of m),
// and 0 is used instead of T[0]
// and 0 is used instead of T[0]
// (polynomial evaluation only, for m=1+r, 0<=r<2^{-9})
// If f=255, r is set to (m-2)/2 (T[255]=0, and only polynomial evaluation is used
// for m=2(1-r'), 0<=r'<2^{-9})
//
// log2f(x) is approximated as
// (l-j) + T[f] + (c1*r+c2*r^2+...+c6*r^6), if f>0
//
//
// Special values
// Special values
//==============================================================
// log2f(0)=-inf, raises Divide by Zero
// log2f(+inf)=inf
@ -88,7 +88,7 @@
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35 // This reg. can safely be used
GR_SAVE_GP = r35 // This reg. can safely be used
GR_SAVE_SP = r36
GR_Parameter_X = r37
@ -255,15 +255,15 @@ LOCAL_OBJECT_END(T_table)
GLOBAL_LIBM_ENTRY(log2f)
{ .mfi
alloc r32=ar.pfs,1,4,4,0
// y=frcpa(x)
alloc r32=ar.pfs,1,4,4,0
// y=frcpa(x)
frcpa.s1 f6,p0=f1,f8
// will form significand of 1.5 (to test whether the index is 128 or above)
// will form significand of 1.5 (to test whether the index is 128 or above)
mov r24=0xc
}
{.mfi
nop.m 0
// normalize x
// normalize x
fma.s1 f7=f8,f1,f0
// r2 = pointer to C_1...C_6 followed by T_table
addl r2 = @ltoff(poly_coeffs), gp;;
@ -273,7 +273,7 @@ GLOBAL_LIBM_ENTRY(log2f)
getf.sig r25=f8
// f8 denormal ?
fclass.m p8,p10=f8,0x9
// will form significand of 1.5 (to test whether the index is 128 or above)
// will form significand of 1.5 (to test whether the index is 128 or above)
shl r24=r24,60
}
{.mfi
@ -287,7 +287,7 @@ GLOBAL_LIBM_ENTRY(log2f)
getf.exp r29=f8
// load start address for C_1...C_6 followed by T_table
ld8 r2=[r2]
// will continue only for positive normal/denormal numbers
// will continue only for positive normal/denormal numbers
fclass.nm.unc p12,p7 = f8, 0x19 ;;
}
@ -331,7 +331,7 @@ GLOBAL_LIBM_ENTRY(log2f)
// load C_3, C_4
ldfpd f10,f11=[r2],16
nop.f 0
// r27=bias-1 (if index >=128, will add exponent+1)
// r27=bias-1 (if index >=128, will add exponent+1)
(p12) mov r27=0xfffe;;
}
@ -360,7 +360,7 @@ GLOBAL_LIBM_ENTRY(log2f)
cmp.ltu p8,p12=r25,r26;;
}
{.mfi
// f8=expon - bias
// f8=expon - bias
setf.sig f8=r29
nop.f 0
// get T address
@ -440,7 +440,7 @@ GLOBAL_LIBM_ENTRY(log2f)
SPECIAL_log2f:
{.mfi
{.mfi
nop.m 0
// x=+Infinity ?
fclass.m p7,p0=f8,0x21
@ -465,7 +465,7 @@ SPECIAL_log2f:
(p7) br.ret.spnt b0;;
}
{.mfi
(p8) mov GR_Parameter_TAG = 172
(p8) mov GR_Parameter_TAG = 172
// log2f(+/-0)=-infinity, raises Divide by Zero
// set f8=-0
(p8) fmerge.ns f8=f0,f8
@ -477,12 +477,12 @@ SPECIAL_log2f:
(p8) br.cond.sptk __libm_error_region;;
}
{.mfb
(p6) mov GR_Parameter_TAG = 173
(p6) mov GR_Parameter_TAG = 173
// x<0: return NaN, raise Invalid
(p6) frcpa.s0 f8,p0=f0,f0
(p6) br.cond.sptk __libm_error_region;;
}
}
{.mfb
nop.m 0
@ -500,10 +500,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
@ -511,18 +511,18 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
{ .mmi
stfs [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfs [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
stfs [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
}
{ .mib
stfs [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
@ -537,10 +537,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
};;
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function

View File

@ -21,27 +21,27 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 09/25/00 Initial version
// 09/25/00 Initial version
// 11/22/00 Fixed accuracy bug (for mantissas near 1, 2)
// 12/07/00 Fixed C_1l constant, eliminated rounding errors in
// 12/07/00 Fixed C_1l constant, eliminated rounding errors in
// reduced argument (x*frcpa(x)-1)
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
@ -57,16 +57,16 @@
// Implementation
//
// Let x = 2^l * m, where m=1.b1 b2 ... b8 b9 ... b52
// y=frcpa(m), r=m*y-1, f=b1 b2 .. b8
// T_hi is a table that stores the 24 most significant bits of log2(1/y)
// y=frcpa(m), r=m*y-1, f=b1 b2 .. b8
// T_hi is a table that stores the 24 most significant bits of log2(1/y)
// (in entries 1..255) in single precision format
// T_low is a table that stores (log2(1/y)-T_high), rounded to double
// precision
// precision
//
// f is used as an index; T_high[255]=T_low[255]=0
//
//
// If f=0 and b9=0, r is set to 2^{-8}* 0.b9 b10 ... b52 = m-1 (fractional part of m),
// and 0 is used instead of T_high[0], T_low[0]
// and 0 is used instead of T_high[0], T_low[0]
// (polynomial evaluation only, for m=1+r, 0<=r<2^{-9})
// If f=255, r is set to (m-2)/2 (T[255]=0, and only polynomial evaluation is used
// for m=2(1-r'), 0<=r'<2^{-9})
@ -80,10 +80,10 @@
//
// log2l(x) is approximated as
// (l+T_high[f]+C1r) + (D+r*(c1+c2*r+c3*r^2...+c8*r^7)+(T_low[f]+C_1*E))
//
//
// Special values
// Special values
//==============================================================
// log2l(0)=-inf, raises Divide by Zero
// log2l(+inf)=inf
@ -101,7 +101,7 @@
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35 // This reg. can safely be used
GR_SAVE_GP = r35 // This reg. can safely be used
GR_SAVE_SP = r36
GR_Parameter_X = r37
@ -127,7 +127,7 @@ LOCAL_OBJECT_START(poly_coeffs)
data8 0xb8aa3b295c17f0bc, 0x00003fff // C_1
data8 0x3fca61762a7aded9, 0xbfc71547652b82fe // C_7, C_8
data8 0x3fd2776c50ef9bfe, 0xbfcec709dc3a03fd // C_5, C_6
data8 0x3fd2776c50ef9bfe, 0xbfcec709dc3a03fd // C_5, C_6
data8 0x3fdec709dc3a03fd, 0xbfd71547652b82fe // C_3, C_4
//data8 0xd871319ff0342580, 0x0000bfbd // C_1l (low part of C1)
data8 0x82f0025f2dc582ee, 0x0000bfbe // C_1l (low part of C1)
@ -345,9 +345,9 @@ LOCAL_OBJECT_END(T_low)
GLOBAL_IEEE754_ENTRY(log2l)
{ .mfi
alloc r32=ar.pfs,1,4,4,0
// normalize x
// y=frcpa(x)
alloc r32=ar.pfs,1,4,4,0
// normalize x
// y=frcpa(x)
frcpa.s1 f41,p0=f1,f8
// r26=bias-1
mov r26=0xfffe
@ -378,8 +378,8 @@ GLOBAL_IEEE754_ENTRY(log2l)
getf.exp r29=f8
// load start address for C_1...C_7 followed by T_table
ld8 r2=[r2]
// will continue only for positive normal/unnormal numbers
fclass.m.unc p0,p12 = f8, 0x19;;
// will continue only for positive normal/unnormal numbers
fclass.m.unc p0,p12 = f8, 0x19;;
}
@ -409,7 +409,7 @@ GLOBAL_IEEE754_ENTRY(log2l)
}
{.mfb
add r3=16,r2
add r3=16,r2
// r=x*y-1
fms.s1 f6=f41,f8,f1
(p12) br.cond.spnt SPECIAL_log2l
@ -468,10 +468,10 @@ GLOBAL_IEEE754_ENTRY(log2l)
// add 1 to the exponent additive term, and estimate log2(1-r)
(p10) add r29=1,r29
nop.f 0
(p7) br.cond.spnt LOG2_PSEUDO_ZERO
(p7) br.cond.spnt LOG2_PSEUDO_ZERO
}
{.mfi
// get T_low adress
// get T_low adress
shladd r3=r28,3,r3
// if first 8 bits after leading 1 are all ones, use polynomial approx. only
(p10) fms.s1 f6=f7,f36,f1
@ -514,7 +514,7 @@ GLOBAL_IEEE754_ENTRY(log2l)
.pred.rel "mutex",p8,p12
{.mfi
// f8=expon - bias
// f8=expon - bias
setf.sig f8=r29
// general case: 2^{16}+C1*r
(p12) fma.s1 f33=f6,f14,f32
@ -687,7 +687,7 @@ SPECIAL_log2l:
mov FR_X=f8
nop.i 0
}
{.mfi
{.mfi
nop.m 0
// x=+Infinity ?
fclass.m p7,p0=f8,0x21
@ -712,7 +712,7 @@ SPECIAL_log2l:
(p7) br.ret.spnt b0;;
}
{.mfi
(p8) mov GR_Parameter_TAG = 168
(p8) mov GR_Parameter_TAG = 168
// log2l(+/-0)=-infinity, raises Divide by Zero
// set f8=-0
(p8) fmerge.ns f8=f0,f8
@ -724,12 +724,12 @@ SPECIAL_log2l:
(p8) br.cond.sptk __libm_error_region;;
}
{.mfb
(p6) mov GR_Parameter_TAG = 169
(p6) mov GR_Parameter_TAG = 169
// x<0: return NaN, raise Invalid
(p6) frcpa.s0 f8,p0=f0,f0
(p6) br.cond.sptk __libm_error_region;;
}
}
{.mfb
nop.m 0
@ -746,7 +746,7 @@ LOG2_PSEUDO_ZERO:
nop.i 0
}
{.mfi
mov GR_Parameter_TAG = 168
mov GR_Parameter_TAG = 168
// log2l(+/-0)=-infinity, raises Divide by Zero
// set f8=-0
fmerge.ns f8=f0,f8
@ -768,10 +768,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
@ -779,18 +779,18 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
{ .mmi
stfe [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfe [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
stfe [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
}
{ .mib
stfe [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
@ -805,10 +805,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
};;
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function

View File

@ -1,4 +1,4 @@
.file "logl.s"
.file "logl.s"
// Copyright (c) 2000 - 2003, Intel Corporation
@ -21,26 +21,26 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
//*********************************************************************
//
// History:
// 05/21/01 Extracted logl and log10l from log1pl.s file, and optimized
// History:
// 05/21/01 Extracted logl and log10l from log1pl.s file, and optimized
// all paths.
// 06/20/01 Fixed error tag for x=-inf.
// 05/20/02 Cleaned up namespace and sf0 syntax
@ -74,20 +74,20 @@
// IEEE Special Conditions:
//
// Denormal fault raised on denormal inputs
// Overflow exceptions cannot occur
// Underflow exceptions raised when appropriate for log1p
// Overflow exceptions cannot occur
// Underflow exceptions raised when appropriate for log1p
// (Error Handling Routine called for underflow)
// Inexact raised when appropriate by algorithm
//
// logl(inf) = inf
// logl(-inf) = QNaN
// logl(+/-0) = -inf
// logl(-inf) = QNaN
// logl(+/-0) = -inf
// logl(SNaN) = QNaN
// logl(QNaN) = QNaN
// logl(EM_special Values) = QNaN
// log10l(inf) = inf
// log10l(-inf) = QNaN
// log10l(+/-0) = -inf
// log10l(-inf) = QNaN
// log10l(+/-0) = -inf
// log10l(SNaN) = QNaN
// log10l(QNaN) = QNaN
// log10l(EM_special Values) = QNaN
@ -106,11 +106,11 @@
// logl( 1 + X ) can be approximated by a simple polynomial
// in W = X-1. This polynomial resembles the truncated Taylor
// series W - W^/2 + W^3/3 - ...
//
//
// Case log_regular:
//
// Here we use a table lookup method. The basic idea is that in
// order to compute logl(Arg) for an argument Arg in [1,2), we
// order to compute logl(Arg) for an argument Arg in [1,2), we
// construct a value G such that G*Arg is close to 1 and that
// logl(1/G) is obtainable easily from a table of values calculated
// beforehand. Thus
@ -128,7 +128,7 @@
//
// X = 2^N * S_hi exactly
//
// where S_hi in [1,2)
// where S_hi in [1,2)
//
// Step 1: Argument Reduction
//
@ -137,7 +137,7 @@
// G := G_1 * G_2 * G_3
// r := (G * S_hi - 1)
//
// These G_j's have the property that the product is exactly
// These G_j's have the property that the product is exactly
// representable and that |r| < 2^(-12) as a result.
//
// Step 2: Approximation
@ -160,7 +160,7 @@
//
// Here we compute a simple polynomial. To exploit parallelism, we split
// the polynomial into two portions.
//
//
// W := X - 1
// Wsq := W * W
// W4 := Wsq*Wsq
@ -175,7 +175,7 @@
// Step 0. Initialization
// ----------------------
//
// Z := X
// Z := X
// N := unbaised exponent of Z
// S_hi := 2^(-N) * Z
//
@ -216,7 +216,7 @@
// with 1.0000 in fixed point.
//
//
// Define A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1
// Define A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1
// truncated to lsb = 2^(-8). Similar to A_1,
// A_2 is not needed in actual implementation. It
// helps explain how some of the values are defined.
@ -245,13 +245,13 @@
// Fetch G_3 := (1/A_3) truncated to 21 sig. bits.
// floating pt. Fetch is done using index_3.
//
// Compute G := G_1 * G_2 * G_3.
// Compute G := G_1 * G_2 * G_3.
//
// This is done exactly since each of G_j only has 21 sig. bits.
//
// Compute
// Compute
//
// r := (G*S_hi - 1)
// r := (G*S_hi - 1)
//
//
// Step 2. Approximation
@ -285,7 +285,7 @@
// Finally
//
// Y_hi := N*log2_hi + SUM ( log1byGj_hi )
// Y_lo := poly_hi + [ poly_lo +
// Y_lo := poly_hi + [ poly_lo +
// ( SUM ( log1byGj_lo ) + N*log2_lo ) ]
//
@ -294,7 +294,7 @@ RODATA
// ************* DO NOT CHANGE THE ORDER OF THESE TABLES *************
// P_8, P_7, P_6, P_5, P_4, P_3, P_2, and P_1
// P_8, P_7, P_6, P_5, P_4, P_3, P_2, and P_1
LOCAL_OBJECT_START(Constants_P)
data8 0xE3936754EFD62B15,0x00003FFB
@ -307,7 +307,7 @@ data8 0xAAAAAAAAAAAAAAAA,0x00003FFD
data8 0xFFFFFFFFFFFFFFFE,0x0000BFFD
LOCAL_OBJECT_END(Constants_P)
// log2_hi, log2_lo, Q_4, Q_3, Q_2, and Q_1
// log2_hi, log2_lo, Q_4, Q_3, Q_2, and Q_1
LOCAL_OBJECT_START(Constants_Q)
data8 0xB172180000000000,0x00003FFE
@ -327,7 +327,7 @@ LOCAL_OBJECT_END(Constants_1_by_LN10)
// Z1 - 16 bit fixed
LOCAL_OBJECT_START(Constants_Z_1)
data4 0x00008000
data4 0x00007879
@ -442,7 +442,7 @@ data4 0x3F71D488,0x3D693B9D
data8 0xBE049391B6B7C239
LOCAL_OBJECT_END(Constants_G_H_h2)
// G3 and H3 - IEEE single and h3 - IEEE double
// G3 and H3 - IEEE single and h3 - IEEE double
LOCAL_OBJECT_START(Constants_G_H_h3)
data4 0x3F7FFC00,0x38800100
@ -514,64 +514,64 @@ LOCAL_OBJECT_END(Constants_G_H_h3)
// Floating Point Registers
FR_Input_X = f8
FR_Input_X = f8
FR_Y_hi = f34
FR_Y_hi = f34
FR_Y_lo = f35
FR_Scale = f36
FR_X_Prime = f37
FR_S_hi = f38
FR_X_Prime = f37
FR_S_hi = f38
FR_W = f39
FR_G = f40
FR_H = f41
FR_wsq = f42
FR_wsq = f42
FR_w4 = f43
FR_h = f44
FR_w6 = f45
FR_w6 = f45
FR_G2 = f46
FR_H2 = f47
FR_poly_lo = f48
FR_P8 = f49
FR_P8 = f49
FR_poly_hi = f50
FR_P7 = f51
FR_h2 = f52
FR_rsq = f53
FR_P7 = f51
FR_h2 = f52
FR_rsq = f53
FR_P6 = f54
FR_r = f55
FR_r = f55
FR_log2_hi = f56
FR_log2_lo = f57
FR_p87 = f58
FR_p876 = f58
FR_p8765 = f58
FR_float_N = f59
FR_Q4 = f60
FR_log2_hi = f56
FR_log2_lo = f57
FR_p87 = f58
FR_p876 = f58
FR_p8765 = f58
FR_float_N = f59
FR_Q4 = f60
FR_p43 = f61
FR_p432 = f61
FR_p4321 = f61
FR_P4 = f62
FR_G3 = f63
FR_H3 = f64
FR_h3 = f65
FR_p43 = f61
FR_p432 = f61
FR_p4321 = f61
FR_P4 = f62
FR_G3 = f63
FR_H3 = f64
FR_h3 = f65
FR_Q3 = f66
FR_P3 = f67
FR_Q2 = f68
FR_P2 = f69
FR_1LN10_hi = f70
FR_Q3 = f66
FR_P3 = f67
FR_Q2 = f68
FR_P2 = f69
FR_1LN10_hi = f70
FR_Q1 = f71
FR_P1 = f72
FR_1LN10_lo = f73
FR_P5 = f74
FR_rcub = f75
FR_Q1 = f71
FR_P1 = f72
FR_1LN10_lo = f73
FR_P5 = f74
FR_rcub = f75
FR_Output_X_tmp = f76
FR_Output_X_tmp = f76
FR_X = f8
FR_Y = f0
@ -581,22 +581,22 @@ FR_RESULT = f76
// General Purpose Registers
GR_ad_p = r33
GR_Index1 = r34
GR_Index2 = r35
GR_signif = r36
GR_X_0 = r37
GR_X_1 = r38
GR_X_2 = r39
GR_Z_1 = r40
GR_Z_2 = r41
GR_N = r42
GR_Bias = r43
GR_M = r44
GR_Index3 = r45
GR_Index1 = r34
GR_Index2 = r35
GR_signif = r36
GR_X_0 = r37
GR_X_1 = r38
GR_X_2 = r39
GR_Z_1 = r40
GR_Z_2 = r41
GR_N = r42
GR_Bias = r43
GR_M = r44
GR_Index3 = r45
GR_ad_p2 = r46
GR_exp_mask = r47
GR_exp_2tom7 = r48
GR_ad_ln10 = r49
GR_exp_mask = r47
GR_exp_2tom7 = r48
GR_ad_ln10 = r49
GR_ad_tbl_1 = r50
GR_ad_tbl_2 = r51
GR_ad_tbl_3 = r52
@ -650,7 +650,7 @@ GLOBAL_IEEE754_ENTRY(log10l)
// Common code for logl and log10
LOGL_BEGIN:
LOGL_BEGIN:
{ .mfi
ld8 GR_ad_z_1 = [GR_ad_z_1] // Get pointer to Constants_Z_1
fclass.m p10, p0 = FR_Input_X, 0x0b // Test for denormal
@ -741,7 +741,7 @@ LOGL_64_COMMON:
{ .mmi
ldfe FR_log2_lo = [GR_ad_q],16 // Load log2_lo
(p14) ldfe FR_1LN10_hi = [GR_ad_ln10],16 // If log10l, load 1/ln10_hi
sub GR_N = GR_N, GR_Bias
sub GR_N = GR_N, GR_Bias
}
;;
@ -762,7 +762,7 @@ LOGL_64_COMMON:
{ .mmi
getf.exp GR_M = FR_W // Get signexp of w = x - 1
ldfe FR_Q2 = [GR_ad_q],16 // Load Q2
extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
}
;;
@ -1007,7 +1007,7 @@ LOGL_64_COMMON:
{ .mfi
nop.m 999
(p9) fadd.s1 FR_Y_lo = FR_poly_hi, FR_poly_lo // Y_lo = poly_hi + poly_lo
(p9) fadd.s1 FR_Y_lo = FR_poly_hi, FR_poly_lo // Y_lo = poly_hi + poly_lo
nop.i 999
}
;;
@ -1041,25 +1041,25 @@ LOGL_64_COMMON:
// Here if x=+-0
LOGL_64_zero:
LOGL_64_zero:
//
// If x=+-0 raise divide by zero and return -inf
//
//
{ .mfi
(p7) mov GR_Parameter_TAG = 0
fsub.s1 FR_Output_X_tmp = f0, f1
fsub.s1 FR_Output_X_tmp = f0, f1
nop.i 999
}
;;
{ .mfb
(p14) mov GR_Parameter_TAG = 6
frcpa.s0 FR_Output_X_tmp, p8 = FR_Output_X_tmp, f0
(p14) mov GR_Parameter_TAG = 6
frcpa.s0 FR_Output_X_tmp, p8 = FR_Output_X_tmp, f0
br.cond.sptk __libm_error_region
}
;;
LOGL_64_special:
LOGL_64_special:
{ .mfi
nop.m 999
fclass.m.unc p8, p0 = FR_Input_X, 0x1E1 // Test for natval, nan, +inf
@ -1067,21 +1067,21 @@ LOGL_64_special:
}
;;
//
//
// For SNaN raise invalid and return QNaN.
// For QNaN raise invalid and return QNaN.
// For +Inf return +Inf.
//
//
{ .mfb
nop.m 999
(p8) fmpy.s0 f8 = FR_Input_X, f1
(p8) fmpy.s0 f8 = FR_Input_X, f1
(p8) br.ret.sptk b0 // Return for natval, nan, +inf
}
;;
//
//
// For -Inf raise invalid and return QNaN.
//
//
{ .mmi
(p7) mov GR_Parameter_TAG = 1
nop.m 999
@ -1091,7 +1091,7 @@ LOGL_64_special:
{ .mfb
(p14) mov GR_Parameter_TAG = 7
fmpy.s0 FR_Output_X_tmp = FR_Input_X, f0
fmpy.s0 FR_Output_X_tmp = FR_Input_X, f0
br.cond.sptk __libm_error_region
}
;;
@ -1112,23 +1112,23 @@ LOGL_64_denormal:
}
;;
LOGL_64_unsupported:
//
LOGL_64_unsupported:
//
// Return generated NaN or other value.
//
//
{ .mfb
nop.m 999
fmpy.s0 f8 = FR_Input_X, f0
fmpy.s0 f8 = FR_Input_X, f0
br.ret.sptk b0
}
;;
// Here if -inf < x < 0
LOGL_64_negative:
//
LOGL_64_negative:
//
// Deal with x < 0 in a special way - raise
// invalid and produce QNaN indefinite.
//
//
{ .mfi
(p7) mov GR_Parameter_TAG = 1
frcpa.s0 FR_Output_X_tmp, p8 = f0, f0

View File

@ -1471,7 +1471,7 @@ POW_POSSIBLE_UNDER:
// 0.1...11 2^-3ffe (biased, 1)
// largest dn smallest normal
// Form small constant (2^-170) to correct underflow result near region of
// Form small constant (2^-170) to correct underflow result near region of
// smallest denormal in round-nearest.
// Put in s2 (td set, ftz set)
@ -1482,9 +1482,9 @@ POW_POSSIBLE_UNDER:
mov pow_GR_rcs0_mask = 0x0c00 // Set mask for rc.s0
}
{ .mfi
(p12) mov pow_GR_tmp = 0x2ffff - 170
(p12) mov pow_GR_tmp = 0x2ffff - 170
nop.f 999
(p13) mov pow_GR_tmp = 0x0ffff - 170
(p13) mov pow_GR_tmp = 0x0ffff - 170
}
;;

View File

@ -51,12 +51,12 @@
//
// API
//====================================================================
// double remainder(double,double);
// double remainder(double,double);
//
// Overview of operation
//====================================================================
// remainder(a,b)=a-i*b,
// where i is an integer such that, if b!=0 and a is finite,
// where i is an integer such that, if b!=0 and a is finite,
// |a/b-i|<=1/2. If |a/b-i|=1/2, i is even.
//
// Algorithm
@ -64,16 +64,16 @@
// a). eliminate special cases
// b). if |a/b|<0.25 (first quotient estimate), return a
// c). use single precision divide algorithm to get quotient q
// rounded to 24 bits of precision
// d). calculate partial remainders (using both q and q-ulp);
// select one and RZ(a/b) based on the sign of |a|-|b|*q
// rounded to 24 bits of precision
// d). calculate partial remainders (using both q and q-ulp);
// select one and RZ(a/b) based on the sign of |a|-|b|*q
// e). if the exponent difference (exponent(a)-exponent(b))
// is less than 24 (quotient estimate<2^{24}-2), use RZ(a/b)
// is less than 24 (quotient estimate<2^{24}-2), use RZ(a/b)
// and sticky bits to round to integer; exit loop and
// calculate final remainder
// f). if exponent(a)-exponent(b)>=24, select new value of a as
// the partial remainder calculated using RZ(a/b);
// repeat from c).
// the partial remainder calculated using RZ(a/b);
// repeat from c).
//
// Special cases
//====================================================================
@ -88,7 +88,7 @@
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35
GR_SAVE_GP = r35
GR_SAVE_SP = r36
GR_Parameter_X = r37
@ -128,7 +128,7 @@ GLOBAL_IEEE754_ENTRY(remainder)
// Y +-NAN, +-inf, +-0? p11
{ .mfi
setf.exp f32=r28
fclass.m.unc p11,p0 = f9, 0xe7
fclass.m.unc p11,p0 = f9, 0xe7
nop.i 999
}
// qnan snan inf norm unorm 0 -+
@ -137,8 +137,8 @@ GLOBAL_IEEE754_ENTRY(remainder)
// X +-NAN, +-inf, ? p9
{ .mfi
nop.m 999
fclass.m.unc p9,p0 = f8, 0xe3
nop.i 999;;
fclass.m.unc p9,p0 = f8, 0xe3
nop.i 999;;
}
{.mfi
@ -153,7 +153,7 @@ GLOBAL_IEEE754_ENTRY(remainder)
// y0 = 1 / b in f10
frcpa.s1 f10,p6=f13,f14
nop.i 0;;
}
}
{.bbb
(p9) br.cond.spnt FREM_X_NAN_INF
@ -164,10 +164,10 @@ GLOBAL_IEEE754_ENTRY(remainder)
// set D flag if a (f8) is denormal
fnma.s0 f6=f8,f1,f8
nop.i 0;;
}
}
remloop24:
remloop24:
{ .mfi
nop.m 0
// Step (2)
@ -184,7 +184,7 @@ remloop24:
nop.m 0
// r2=1.25*2^{-24}
movl r2=0x33a00000;;
}
}
{.mfi
nop.m 0
@ -217,12 +217,12 @@ remloop24:
// q2 = q1 + e1 * q1 in f6
(p6) fma.s1 f6=f7,f15,f15
nop.i 0;;
}
}
{.mmi
// f15=1.25*2^{-24}
setf.s f15=r2
// q<1/4 ? (i.e. expon< -2)
// q<1/4 ? (i.e. expon< -2)
(p7) cmp.gt p7,p0=r28,r29
nop.i 0;;
}
@ -230,7 +230,7 @@ remloop24:
{.mfb
// r29= -32+bias
mov r29=0xffdf
// if |a/b|<1/4, set D flag before returning
// if |a/b|<1/4, set D flag before returning
(p7) fma.d.s0 f9=f9,f0,f8
nop.b 0;;
}
@ -248,7 +248,7 @@ remloop24:
// set f8 to current a value | sign
fmerge.s f8=f8,f13
nop.i 0;;
}
}
{.mfi
@ -273,7 +273,7 @@ remloop24:
nop.m 0
cmp.eq p11,p14=r2,r28
nop.i 0;;
}
}
.pred.rel "mutex",p11,p14
{.mfi
@ -281,7 +281,7 @@ remloop24:
// if exp_q=2^23, then r=a-b*2^{23}
(p11) fnma.s1 f13=f12,f14,f13
nop.i 0
}
}
{.mfi
nop.m 0
// r2=a-b*q'
@ -302,7 +302,7 @@ remloop24:
.pred.rel "mutex",p8,p9
{.mfi
nop.m 0
nop.m 0
// (p8) Q=q+(last iteration ? sticky bits:0)
// i.e. Q=q+q*x (x=2^{-32} or 0)
(p8) fma.s1 f11=f11,f7,f11
@ -321,7 +321,7 @@ remloop24:
// (p10) new a =r
(p10) mov f13=f6
(p12) br.cond.sptk remloop24;;
}
}
// last iteration
{.mfi
@ -341,15 +341,15 @@ remloop24:
// save sign of a
fmerge.s f7=f8,f8
nop.i 0
} {.mfi
} {.mfi
nop.m 0
// normalize
fcvt.xf f11=f11
nop.i 0;;
}
}
{.mfi
nop.m 0
// This can be removed if sign of 0 is not important
// This can be removed if sign of 0 is not important
// get remainder using sf1
fnma.d.s1 f12=f9,f11,f8
nop.i 0
@ -363,24 +363,24 @@ remloop24:
{.mfi
nop.m 0
// f12=0?
// This can be removed if sign of 0 is not important
// This can be removed if sign of 0 is not important
fcmp.eq.unc.s1 p8,p0=f12,f0
nop.i 0;;
}
{.mfb
nop.m 0
// if f8=0, set sign correctly
// This can be removed if sign of 0 is not important
// This can be removed if sign of 0 is not important
(p8) fmerge.s f8=f7,f8
// return
br.ret.sptk b0;;
}
FREM_X_NAN_INF:
FREM_X_NAN_INF:
// Y zero ?
{.mfi
{.mfi
nop.m 0
fma.s1 f10=f9,f1,f0
nop.i 0;;
@ -394,20 +394,20 @@ FREM_X_NAN_INF:
nop.m 0
nop.i 0
// if Y zero
(p11) br.cond.spnt FREM_Y_ZERO;;
(p11) br.cond.spnt FREM_Y_ZERO;;
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
fclass.m.unc p8,p0 = f8, 0x23
fclass.m.unc p8,p0 = f8, 0x23
nop.i 999
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
fclass.m.unc p11,p0 = f8, 0x23
nop.i 999;;
fclass.m.unc p11,p0 = f8, 0x23
nop.i 999;;
}
// Y NaN ?
{.mfi
@ -420,10 +420,10 @@ FREM_X_NAN_INF:
// also set Denormal flag if necessary
(p8) fma.s0 f9=f9,f1,f0
nop.i 0
}
}
{ .mfi
nop.m 999
(p8) frcpa.s0 f8,p7 = f8,f8
(p8) frcpa.s0 f8,p7 = f8,f8
nop.i 999 ;;
}
@ -434,48 +434,48 @@ FREM_X_NAN_INF:
}
{ .mfi
nop.m 999
(p8) fma.d.s0 f8=f8,f1,f0
nop.i 0 ;;
(p8) fma.d.s0 f8=f8,f1,f0
nop.i 0 ;;
}
{ .mfb
nop.m 999
frcpa.s0 f8,p7=f8,f9
(p11) br.cond.spnt EXP_ERROR_RETURN;;
frcpa.s0 f8,p7=f8,f9
(p11) br.cond.spnt EXP_ERROR_RETURN;;
}
{ .mib
nop.m 0
nop.i 0
br.ret.spnt b0 ;;
br.ret.spnt b0 ;;
}
FREM_Y_NAN_INF_ZERO:
FREM_Y_NAN_INF_ZERO:
// Y INF
{ .mfi
nop.m 999
fclass.m.unc p7,p0 = f9, 0x23
fclass.m.unc p7,p0 = f9, 0x23
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p7) fma.d.s0 f8=f8,f1,f0
(p7) br.ret.spnt b0 ;;
(p7) fma.d.s0 f8=f8,f1,f0
(p7) br.ret.spnt b0 ;;
}
// Y NAN?
{ .mfi
nop.m 999
fclass.m.unc p9,p0 = f9, 0xc3
fclass.m.unc p9,p0 = f9, 0xc3
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p9) fma.d.s0 f8=f9,f1,f0
(p9) br.ret.spnt b0 ;;
(p9) fma.d.s0 f8=f9,f1,f0
(p9) br.ret.spnt b0 ;;
}
FREM_Y_ZERO:
@ -486,12 +486,12 @@ FREM_Y_ZERO:
// X NAN?
{ .mfi
nop.m 999
fclass.m.unc p9,p10 = f8, 0xc3
fclass.m.unc p9,p10 = f8, 0xc3
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p10) fclass.nm p9,p10 = f8, 0xff
(p10) fclass.nm p9,p10 = f8, 0xff
nop.i 999 ;;
}
@ -503,29 +503,29 @@ FREM_Y_ZERO:
{ .mfi
nop.m 999
(p10) frcpa.s0 f11,p7 = f0,f0
nop.i 999;;
(p10) frcpa.s0 f11,p7 = f0,f0
nop.i 999;;
}
{ .mfi
nop.m 999
fmerge.s f10 = f8, f8
fmerge.s f10 = f8, f8
nop.i 999
}
{ .mfi
nop.m 999
fma.d.s0 f8=f11,f1,f0
fma.d.s0 f8=f11,f1,f0
nop.i 999
}
EXP_ERROR_RETURN:
EXP_ERROR_RETURN:
{ .mib
mov GR_Parameter_TAG = 124
mov GR_Parameter_TAG = 124
nop.i 999
br.sptk __libm_error_region;;
br.sptk __libm_error_region;;
}
GLOBAL_IEEE754_END(remainder)
@ -538,10 +538,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
@ -549,18 +549,18 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
{ .mmi
stfd [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfd [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
stfd [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfd [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
@ -575,10 +575,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
};;
LOCAL_LIBM_END(__libm_error_region)

View File

@ -40,7 +40,7 @@
// History
//====================================================================
// 02/02/00 Initial version
// 03/02/00 New algorithm
// 03/02/00 New algorithm
// 04/04/00 Unwind support added
// 07/21/00 Fixed quotient=2^{24*m+23} bug
// 08/15/00 Bundle added after call to __libm_error_support to properly
@ -51,12 +51,12 @@
//
// API
//====================================================================
// float remainderf(float,float);
// float remainderf(float,float);
//
// Overview of operation
//====================================================================
// remainder(a,b)=a-i*b,
// where i is an integer such that, if b!=0 and a is finite,
// where i is an integer such that, if b!=0 and a is finite,
// |a/b-i|<=1/2. If |a/b-i|=1/2, i is even.
//
// Algorithm
@ -64,16 +64,16 @@
// a). eliminate special cases
// b). if |a/b|<0.25 (first quotient estimate), return a
// c). use single precision divide algorithm to get quotient q
// rounded to 24 bits of precision
// d). calculate partial remainders (using both q and q-ulp);
// select one and RZ(a/b) based on the sign of |a|-|b|*q
// rounded to 24 bits of precision
// d). calculate partial remainders (using both q and q-ulp);
// select one and RZ(a/b) based on the sign of |a|-|b|*q
// e). if the exponent difference (exponent(a)-exponent(b))
// is less than 24 (quotient estimate<2^{24}-2), use RZ(a/b)
// is less than 24 (quotient estimate<2^{24}-2), use RZ(a/b)
// and sticky bits to round to integer; exit loop and
// calculate final remainder
// f). if exponent(a)-exponent(b)>=24, select new value of a as
// the partial remainder calculated using RZ(a/b);
// repeat from c).
// the partial remainder calculated using RZ(a/b);
// repeat from c).
//
// Special cases
//====================================================================
@ -89,7 +89,7 @@
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35
GR_SAVE_GP = r35
GR_SAVE_SP = r36
GR_Parameter_X = r37
@ -129,7 +129,7 @@ GLOBAL_IEEE754_ENTRY(remainderf)
// Y +-NAN, +-inf, +-0? p11
{ .mfi
nop.m 999
fclass.m.unc p11,p0 = f9, 0xe7
fclass.m.unc p11,p0 = f9, 0xe7
nop.i 999
}
// qnan snan inf norm unorm 0 -+
@ -138,8 +138,8 @@ GLOBAL_IEEE754_ENTRY(remainderf)
// X +-NAN, +-inf, ? p9
{ .mfi
nop.m 999
fclass.m.unc p9,p0 = f8, 0xe3
nop.i 999;;
fclass.m.unc p9,p0 = f8, 0xe3
nop.i 999;;
}
{.mfi
@ -154,7 +154,7 @@ GLOBAL_IEEE754_ENTRY(remainderf)
// y0 = 1 / b in f10
frcpa.s1 f10,p6=f13,f14
nop.i 0;;
}
}
{.bbb
(p9) br.cond.spnt FREM_X_NAN_INF
(p11) br.cond.spnt FREM_Y_NAN_INF_ZERO
@ -164,10 +164,10 @@ GLOBAL_IEEE754_ENTRY(remainderf)
// set D flag if a (f8) is denormal
fnma.s0 f6=f8,f1,f8
nop.i 0;;
}
}
.align 32
remloop24:
remloop24:
{ .mfi
// f12=2^{24}-2
setf.s f12=r3
@ -175,26 +175,26 @@ remloop24:
// q0 = a * y0 in f15
(p6) fma.s1 f15=f13,f10,f0
nop.i 0
}
}
{ .mfi
nop.m 0
// Step (3)
// e0 = 1 - b * y0 in f7
(p6) fnma.s1 f7=f14,f10,f1
nop.i 0;;
}
}
{.mlx
nop.m 0
// r2=1.25*2^{-24}
movl r2=0x33a00000;;
}
}
{ .mfi
nop.m 0
// Step (4)
// q1 = q0 + e0 * q0 in f6
(p6) fma.s1 f6=f7,f15,f15
nop.i 0
}
}
{ .mfi
nop.m 0
// Step (5)
@ -215,17 +215,17 @@ remloop24:
// q2 = q1 + e1 * q1 in f6
(p6) fma.s1 f6=f7,f6,f6
nop.i 0
}
}
{ .mfi
mov r2=0x3e7
// Step (7)
// e2 = e1 * e1 in f7
(p6) fma.s1 f7=f7,f7,f0
nop.i 0;;
}
}
{.mmi
// q<1/4 ? (i.e. expon< -2)
// q<1/4 ? (i.e. expon< -2)
(p7) cmp.gt.unc p7,p0=r28,r29
nop.m 0
// r2=0x3e7000000
@ -235,7 +235,7 @@ remloop24:
{.mfb
// r2=0x3e7000001
add r2=1,r2
// if |a/b|<1/4, set D flag before returning
// if |a/b|<1/4, set D flag before returning
(p7) fma.s.s0 f9=f9,f0,f8
nop.b 0;;
}
@ -253,7 +253,7 @@ remloop24:
fmerge.s f8=f8,f13
// r2=2^{-24}+2^{-48} (double prec.)
shl r2=r2,28;;
}
}
{ .mfi
@ -263,14 +263,14 @@ remloop24:
// q3 = q2 + e2 * q2 in f6
(p6) fma.d.s1 f6=f7,f6,f6
nop.i 0;;
}
}
{ .mfi
nop.m 0
// Step (9)
// q = q3 in f11
(p6) fma.s.s1 f11=f6,f1,f0
nop.i 0;;
}
}
{.mfi
// f7=2^{-24}
setf.d f7=r2
@ -288,7 +288,7 @@ remloop24:
// r=a-b*q
fnma.s1 f6=f14,f11,f13
nop.i 0
}
}
{.mfi
nop.m 0
// q'=q-q*(1.25*2^{-24}) (q'=q-ulp)
@ -307,7 +307,7 @@ remloop24:
// r>0 iff q=RZ(a/b) and inexact
fcmp.gt.unc.s1 p8,p0=f6,f0
nop.i 0
}
}
{.mfi
nop.m 0
// r<0 iff q'=RZ(a/b) and inexact
@ -321,7 +321,7 @@ remloop24:
// i.e. Q=q+q*x (x=2^{-32} or 0)
(p8) fma.s1 f11=f11,f12,f11
nop.i 0
}
}
{.mfi
nop.m 0
// (p9) Q=q'+(last iteration ? sticky bits:0)
@ -336,7 +336,7 @@ remloop24:
// (p10) new a =r
(p10) mov f13=f6
(p12) br.cond.sptk remloop24;;
}
}
// last iteration
{.mfi
@ -356,16 +356,16 @@ remloop24:
// save sign of a
fmerge.s f7=f8,f8
nop.i 0
}
{.mfi
}
{.mfi
nop.m 0
// normalize
fcvt.xf f11=f11
nop.i 0;;
}
}
{.mfi
nop.m 0
// This can be removed if sign of 0 is not important
// This can be removed if sign of 0 is not important
// get remainder using sf1
fnma.s.s1 f12=f9,f11,f8
nop.i 0
@ -382,24 +382,24 @@ remloop24:
{.mfi
nop.m 0
// f12=0?
// This can be removed if sign of 0 is not important
// This can be removed if sign of 0 is not important
fcmp.eq.unc.s1 p8,p0=f12,f0
nop.i 0;;
}
{.mfb
nop.m 0
// if f8=0, set sign correctly
// This can be removed if sign of 0 is not important
// This can be removed if sign of 0 is not important
(p8) fmerge.s f8=f7,f8
// return
br.ret.sptk b0;;
}
FREM_X_NAN_INF:
FREM_X_NAN_INF:
// Y zero ?
{.mfi
{.mfi
nop.m 0
fma.s1 f10=f9,f1,f0
nop.i 0;;
@ -413,20 +413,20 @@ FREM_X_NAN_INF:
nop.m 0
nop.i 0
// if Y zero
(p11) br.cond.spnt FREM_Y_ZERO;;
(p11) br.cond.spnt FREM_Y_ZERO;;
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
fclass.m.unc p8,p0 = f8, 0x23
fclass.m.unc p8,p0 = f8, 0x23
nop.i 999
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
fclass.m.unc p11,p0 = f8, 0x23
nop.i 999;;
fclass.m.unc p11,p0 = f8, 0x23
nop.i 999;;
}
// Y NaN ?
{.mfi
@ -439,10 +439,10 @@ FREM_X_NAN_INF:
// also set Denormal flag if necessary
(p8) fma.s0 f9=f9,f1,f0
nop.i 0
}
}
{ .mfi
nop.m 999
(p8) frcpa.s0 f8,p7 = f8,f8
(p8) frcpa.s0 f8,p7 = f8,f8
nop.i 999 ;;
}
@ -453,48 +453,48 @@ FREM_X_NAN_INF:
}
{ .mfi
nop.m 999
(p8) fma.s.s0 f8=f8,f1,f0
nop.i 0 ;;
(p8) fma.s.s0 f8=f8,f1,f0
nop.i 0 ;;
}
{ .mfb
nop.m 999
frcpa.s0 f8,p7=f8,f9
(p11) br.cond.spnt EXP_ERROR_RETURN;;
frcpa.s0 f8,p7=f8,f9
(p11) br.cond.spnt EXP_ERROR_RETURN;;
}
{ .mib
nop.m 0
nop.i 0
br.ret.spnt b0 ;;
br.ret.spnt b0 ;;
}
FREM_Y_NAN_INF_ZERO:
FREM_Y_NAN_INF_ZERO:
// Y INF
{ .mfi
nop.m 999
fclass.m.unc p7,p0 = f9, 0x23
fclass.m.unc p7,p0 = f9, 0x23
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p7) fma.s.s0 f8=f8,f1,f0
(p7) br.ret.spnt b0 ;;
(p7) fma.s.s0 f8=f8,f1,f0
(p7) br.ret.spnt b0 ;;
}
// Y NAN?
{ .mfi
nop.m 999
fclass.m.unc p9,p0 = f9, 0xc3
fclass.m.unc p9,p0 = f9, 0xc3
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p9) fma.s.s0 f8=f9,f1,f0
(p9) br.ret.spnt b0 ;;
(p9) fma.s.s0 f8=f9,f1,f0
(p9) br.ret.spnt b0 ;;
}
FREM_Y_ZERO:
@ -505,12 +505,12 @@ FREM_Y_ZERO:
// X NAN?
{ .mfi
nop.m 999
fclass.m.unc p9,p10 = f8, 0xc3
fclass.m.unc p9,p10 = f8, 0xc3
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p10) fclass.nm p9,p10 = f8, 0xff
(p10) fclass.nm p9,p10 = f8, 0xff
nop.i 999 ;;
}
@ -522,29 +522,29 @@ FREM_Y_ZERO:
{ .mfi
nop.m 999
(p10) frcpa.s0 f11,p7 = f0,f0
(p10) frcpa.s0 f11,p7 = f0,f0
nop.i 999;;
}
{ .mfi
nop.m 999
fmerge.s f10 = f8, f8
fmerge.s f10 = f8, f8
nop.i 999
}
{ .mfi
nop.m 999
fma.s.s0 f8=f11,f1,f0
fma.s.s0 f8=f11,f1,f0
nop.i 999
}
EXP_ERROR_RETURN:
EXP_ERROR_RETURN:
{ .mib
mov GR_Parameter_TAG = 125
mov GR_Parameter_TAG = 125
nop.i 999
br.sptk __libm_error_region;;
br.sptk __libm_error_region;;
}
GLOBAL_IEEE754_END(remainderf)
@ -557,10 +557,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
@ -568,18 +568,18 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
{ .mmi
stfs [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support#;; // Call error handling function
}
{ .mmi
@ -594,10 +594,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
};;
LOCAL_LIBM_END(__libm_error_region)

View File

@ -40,7 +40,7 @@
// History
//====================================================================
// 02/02/00 Initial version
// 03/02/00 New algorithm
// 03/02/00 New algorithm
// 04/04/00 Unwind support added
// 07/21/00 Fixed quotient=2^{24*m+23}*1.q1...q23 1 bug
// 08/15/00 Bundle added after call to __libm_error_support to properly
@ -51,12 +51,12 @@
//
// API
//====================================================================
// long double remainderl(long double,long double);
// long double remainderl(long double,long double);
//
// Overview of operation
//====================================================================
// remainder(a,b)=a-i*b,
// where i is an integer such that, if b!=0 and a is finite,
// where i is an integer such that, if b!=0 and a is finite,
// |a/b-i|<=1/2. If |a/b-i|=1/2, i is even.
//
// Algorithm
@ -64,16 +64,16 @@
// a). eliminate special cases
// b). if |a/b|<0.25 (first quotient estimate), return a
// c). use single precision divide algorithm to get quotient q
// rounded to 24 bits of precision
// d). calculate partial remainders (using both q and q-ulp);
// select one and RZ(a/b) based on the sign of |a|-|b|*q
// rounded to 24 bits of precision
// d). calculate partial remainders (using both q and q-ulp);
// select one and RZ(a/b) based on the sign of |a|-|b|*q
// e). if the exponent difference (exponent(a)-exponent(b))
// is less than 24 (quotient estimate<2^{24}-2), use RZ(a/b)
// is less than 24 (quotient estimate<2^{24}-2), use RZ(a/b)
// and sticky bits to round to integer; exit loop and
// calculate final remainder
// f). if exponent(a)-exponent(b)>=24, select new value of a as
// the partial remainder calculated using RZ(a/b);
// repeat from c).
// the partial remainder calculated using RZ(a/b);
// repeat from c).
//
// Special cases
//====================================================================
@ -89,7 +89,7 @@
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35
GR_SAVE_GP = r35
GR_SAVE_SP = r36
GR_Parameter_X = r37
@ -137,7 +137,7 @@ cmp.eq p11,p10=r29,r0;;
// Y +-NAN, +-inf, +-0? p11
{ .mfi
nop.m 999
(p10) fclass.m p11,p10 = f9, 0xe7
(p10) fclass.m p11,p10 = f9, 0xe7
nop.i 999
}
// qnan snan inf norm unorm 0 -+
@ -146,8 +146,8 @@ cmp.eq p11,p10=r29,r0;;
// X +-NAN, +-inf, ? p9
{ .mfi
nop.m 999
fclass.m.unc p9,p8 = f8, 0xe3
nop.i 999;;
fclass.m.unc p9,p8 = f8, 0xe3
nop.i 999;;
}
{.mfi
@ -162,12 +162,12 @@ cmp.eq p11,p10=r29,r0;;
// y0 = 1 / b in f10
frcpa.s1 f10,p6=f13,f14
nop.i 0;;
}
}
// Y +-NAN, +-inf, +-0? p11
{ .mfi
nop.m 999
// pseudo-NaN ?
(p10) fclass.nm p11,p0 = f9, 0xff
(p10) fclass.nm p11,p0 = f9, 0xff
nop.i 999
}
@ -178,7 +178,7 @@ cmp.eq p11,p10=r29,r0;;
{ .mfi
nop.m 999
(p8) fclass.nm p9,p0 = f8, 0xff
(p8) fclass.nm p9,p0 = f8, 0xff
nop.i 999;;
}
@ -191,9 +191,9 @@ cmp.eq p11,p10=r29,r0;;
// set D flag if a (f8) is denormal
fnma.s0 f6=f8,f1,f8
nop.i 0;;
}
}
remloop24:
remloop24:
{ .mfi
nop.m 0
// Step (2)
@ -210,7 +210,7 @@ remloop24:
nop.m 0
// r2=1.25*2^{-24}
movl r2=0x33a00000;;
}
}
{.mfi
nop.m 0
@ -244,12 +244,12 @@ remloop24:
// q2 = q1 + e1 * q1 in f6
(p6) fma.s1 f6=f7,f15,f15
nop.i 0;;
}
}
{.mmi
// f15=1.25*2^{-24}
setf.s f15=r2
// q<1/4 ? (i.e. expon< -2)
// q<1/4 ? (i.e. expon< -2)
(p7) cmp.gt p7,p0=r28,r29
nop.i 0;;
}
@ -257,7 +257,7 @@ remloop24:
{.mfb
// r29= -32+bias
mov r29=0xffdf
// if |a/b|<1/4, set D flag before returning
// if |a/b|<1/4, set D flag before returning
(p7) fma.s0 f9=f9,f0,f8
nop.b 0;;
}
@ -275,7 +275,7 @@ remloop24:
// set f8 to current a value | sign
fmerge.s f8=f8,f13
nop.i 0;;
}
}
{.mfi
getf.exp r28=f6
// last step ? (q<2^{23})
@ -298,7 +298,7 @@ remloop24:
nop.m 0
cmp.eq p11,p14=r2,r28
nop.i 0;;
}
}
.pred.rel "mutex",p11,p14
{.mfi
@ -306,7 +306,7 @@ remloop24:
// if exp_q=2^23, then r=a-b*2^{23}
(p11) fnma.s1 f13=f12,f14,f13
nop.i 0
}
}
{.mfi
nop.m 0
// r2=a-b*q'
@ -327,7 +327,7 @@ remloop24:
.pred.rel "mutex",p8,p9
{.mfi
nop.m 0
nop.m 0
// (p8) Q=q+(last iteration ? sticky bits:0)
// i.e. Q=q+q*x (x=2^{-32} or 0)
(p8) fma.s1 f11=f11,f7,f11
@ -346,7 +346,7 @@ remloop24:
// (p10) new a =r
(p10) mov f13=f6
(p12) br.cond.sptk remloop24;;
}
}
// last iteration
{.mfi
@ -366,15 +366,15 @@ remloop24:
// save sign of a
fmerge.s f7=f8,f8
nop.i 0
} {.mfi
} {.mfi
nop.m 0
// normalize
fcvt.xf f11=f11
nop.i 0;;
}
}
{.mfi
nop.m 0
// This can be removed if sign of 0 is not important
// This can be removed if sign of 0 is not important
// get remainder using sf1
fnma.s1 f12=f9,f11,f8
nop.i 0
@ -388,14 +388,14 @@ remloop24:
{.mfi
nop.m 0
// f12=0?
// This can be removed if sign of 0 is not important
// This can be removed if sign of 0 is not important
fcmp.eq.unc.s1 p8,p0=f12,f0
nop.i 0;;
}
{.mfb
nop.m 0
// if f8=0, set sign correctly
// This can be removed if sign of 0 is not important
// This can be removed if sign of 0 is not important
(p8) fmerge.s f8=f7,f8
// return
br.ret.sptk b0;;
@ -403,10 +403,10 @@ remloop24:
FREM_X_NAN_INF:
FREM_X_NAN_INF:
// Y zero ?
{.mfi
{.mfi
nop.m 0
fma.s1 f10=f9,f1,f0
nop.i 0;;
@ -420,20 +420,20 @@ FREM_X_NAN_INF:
nop.m 0
nop.i 0
// if Y zero
(p11) br.cond.spnt FREM_Y_ZERO;;
(p11) br.cond.spnt FREM_Y_ZERO;;
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
fclass.m.unc p8,p0 = f8, 0x23
fclass.m.unc p8,p0 = f8, 0x23
nop.i 999
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
fclass.m.unc p11,p0 = f8, 0x23
nop.i 999;;
fclass.m.unc p11,p0 = f8, 0x23
nop.i 999;;
}
// Y NaN ?
{.mfi
@ -446,10 +446,10 @@ FREM_X_NAN_INF:
// also set Denormal flag if necessary
(p8) fnma.s0 f9=f9,f1,f9
nop.i 0
}
}
{ .mfi
nop.m 999
(p8) frcpa.s0 f8,p7 = f8,f8
(p8) frcpa.s0 f8,p7 = f8,f8
nop.i 999 ;;
}
@ -460,52 +460,52 @@ FREM_X_NAN_INF:
}
{ .mfi
nop.m 999
(p8) fma.s0 f8=f8,f1,f0
nop.i 0 ;;
(p8) fma.s0 f8=f8,f1,f0
nop.i 0 ;;
}
{ .mfb
nop.m 999
frcpa.s0 f8,p7=f8,f9
(p11) br.cond.spnt EXP_ERROR_RETURN;;
frcpa.s0 f8,p7=f8,f9
(p11) br.cond.spnt EXP_ERROR_RETURN;;
}
{ .mib
nop.m 0
nop.i 0
br.ret.spnt b0 ;;
br.ret.spnt b0 ;;
}
FREM_Y_NAN_INF_ZERO:
FREM_Y_NAN_INF_ZERO:
// Y INF
{ .mfi
nop.m 999
fclass.m.unc p7,p0 = f9, 0x23
fclass.m.unc p7,p0 = f9, 0x23
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p7) fma.s0 f8=f8,f1,f0
(p7) br.ret.spnt b0 ;;
(p7) fma.s0 f8=f8,f1,f0
(p7) br.ret.spnt b0 ;;
}
// Y NAN?
{ .mfi
nop.m 999
fclass.m.unc p9,p10 = f9, 0xc3
fclass.m.unc p9,p10 = f9, 0xc3
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p10) fclass.nm p9,p0 = f9, 0xff
(p10) fclass.nm p9,p0 = f9, 0xff
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p9) fma.s0 f8=f9,f1,f0
(p9) br.ret.spnt b0 ;;
(p9) fma.s0 f8=f9,f1,f0
(p9) br.ret.spnt b0 ;;
}
FREM_Y_ZERO:
@ -516,12 +516,12 @@ FREM_Y_ZERO:
// X NAN?
{ .mfi
nop.m 999
fclass.m.unc p9,p10 = f8, 0xc3
fclass.m.unc p9,p10 = f8, 0xc3
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p10) fclass.nm p9,p10 = f8, 0xff
(p10) fclass.nm p9,p10 = f8, 0xff
nop.i 999 ;;
}
@ -532,28 +532,28 @@ FREM_Y_ZERO:
}
{ .mfi
nop.m 999
(p10) frcpa.s0 f11,p7 = f0,f0
(p10) frcpa.s0 f11,p7 = f0,f0
nop.i 999;;
}
{ .mfi
nop.m 999
fmerge.s f10 = f8, f8
fmerge.s f10 = f8, f8
nop.i 999
}
{ .mfi
nop.m 999
fma.s0 f8=f11,f1,f0
fma.s0 f8=f11,f1,f0
nop.i 999;;
}
EXP_ERROR_RETURN:
EXP_ERROR_RETURN:
{ .mib
mov GR_Parameter_TAG = 123
mov GR_Parameter_TAG = 123
nop.i 999
br.sptk __libm_error_region;;
br.sptk __libm_error_region;;
}
GLOBAL_IEEE754_END(remainderl)
@ -564,10 +564,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
@ -575,18 +575,18 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
{ .mmi
stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
@ -601,10 +601,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
};;
LOCAL_LIBM_END(__libm_error_region)

View File

@ -378,7 +378,7 @@ SCALB_UNDERFLOW:
SCALB_NAN_INF_ZERO:
//
// Before entry, N has been converted to a fp integer in significand of
// Before entry, N has been converted to a fp integer in significand of
// FR_N_float_int
//
// Convert N_float_int to floating point value

View File

@ -378,7 +378,7 @@ SCALBF_UNDERFLOW:
SCALBF_NAN_INF_ZERO:
//
// Before entry, N has been converted to a fp integer in significand of
// Before entry, N has been converted to a fp integer in significand of
// FR_N_float_int
//
// Convert N_float_int to floating point value

View File

@ -378,7 +378,7 @@ SCALBL_UNDERFLOW:
SCALBL_NAN_INF_ZERO:
//
// Before entry, N has been converted to a fp integer in significand of
// Before entry, N has been converted to a fp integer in significand of
// FR_N_float_int
//
// Convert N_float_int to floating point value

View File

@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
@ -44,7 +44,7 @@
// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
// 10/12/00 Update to set denormal operand and underflow flags
// 01/22/01 Fixed to set inexact flag for small args. Fixed incorrect
// 01/22/01 Fixed to set inexact flag for small args. Fixed incorrect
// call to __libm_error_support for 710.476 < x < 11357.2166.
// 05/02/01 Reworked to improve speed of all paths
// 05/20/02 Cleaned up namespace and sf0 syntax
@ -58,12 +58,12 @@
//
// Registers used
//==============================================================
// general registers:
// general registers:
// r14 -> r40
// predicate registers used:
// p6 -> p11
// floating-point registers used:
// f9 -> f15; f32 -> f90;
// f9 -> f15; f32 -> f90;
// f8 has input, then output
//
// Overview of operation
@ -84,7 +84,7 @@
// 1. SINH_BY_POLY 0 < |x| < 0.25
// ===============
// Evaluate sinh(x) by a 13th order polynomial
// Care is take for the order of multiplication; and P_1 is not exactly 1/3!,
// Care is take for the order of multiplication; and P_1 is not exactly 1/3!,
// P_2 is not exactly 1/5!, etc.
// sinh(x) = sign * (series(e^x) - series(e^-x))/2
// = sign * (ax + ax^3/3! + ax^5/5! + ax^7/7! + ax^9/9! + ax^11/11!
@ -100,18 +100,18 @@
// =============
// sinh(x) = sinh(B+R)
// = sinh(B)cosh(R) + cosh(B)sinh(R)
//
//
// ax = |x| = M*log2/64 + R
// B = M*log2/64
// M = 64*N + j
// M = 64*N + j
// We will calculate M and get N as (M-j)/64
// The division is a shift.
// exp(B) = exp(N*log2 + j*log2/64)
// = 2^N * 2^(j*log2/64)
// sinh(B) = 1/2(e^B -e^-B)
// = 1/2(2^N * 2^(j*log2/64) - 2^-N * 2^(-j*log2/64))
// sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64))
// cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64))
// = 1/2(2^N * 2^(j*log2/64) - 2^-N * 2^(-j*log2/64))
// sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64))
// cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64))
// 2^(j*log2/64) is stored as Tjhi + Tjlo , j= -32,....,32
// Tjhi is double-extended (80-bit) and Tjlo is single(32-bit)
//
@ -119,7 +119,7 @@
// R = ax - M*log2_by_64_hi - M*log2_by_64_lo
// exp(R) = 1 + R +R^2(1/2! + R(1/3! + R(1/4! + ... + R(1/n!)...)
// = 1 + p_odd + p_even
// where the p_even uses the A coefficients and the p_even uses
// where the p_even uses the A coefficients and the p_even uses
// the B coefficients
//
// So sinh(R) = 1 + p_odd + p_even -(1 -p_odd -p_even)/2 = p_odd
@ -183,7 +183,7 @@ GR_Parameter_RESULT = r39
GR_Parameter_TAG = r40
f_ABS_X = f9
f_ABS_X = f9
f_X2 = f10
f_X4 = f11
f_tmp = f14
@ -238,16 +238,16 @@ f_Tmjlo = f68
f_S_hi = f69
f_SC_hi_temp = f70
f_S_lo_temp1 = f71
f_S_lo_temp2 = f72
f_S_lo_temp3 = f73
f_S_lo_temp4 = f73
f_S_lo_temp1 = f71
f_S_lo_temp2 = f72
f_S_lo_temp3 = f73
f_S_lo_temp4 = f73
f_S_lo = f74
f_C_hi = f75
f_Y_hi = f77
f_Y_lo_temp = f78
f_Y_lo = f79
f_Y_hi = f77
f_Y_lo_temp = f78
f_Y_lo = f79
f_NORM_X = f80
f_P1 = f81
@ -452,7 +452,7 @@ GLOBAL_IEEE754_ENTRY(sinhl)
}
{ .mfi
nop.m 0
fnorm.s1 f_NORM_X = f8
fnorm.s1 f_NORM_X = f8
mov r_exp_2tom57 = 0xffff-57
}
;;
@ -460,7 +460,7 @@ GLOBAL_IEEE754_ENTRY(sinhl)
{ .mfi
setf.d f_RSHF_2TO57 = r_rshf_2to57 // Form const 1.100 * 2^120
fclass.m p10,p0 = f8, 0x0b // Test for denorm
mov r_exp_mask = 0x1ffff
mov r_exp_mask = 0x1ffff
}
{ .mlx
setf.sig f_INV_LN2_2TO63 = r_sig_inv_ln2 // Form 1/ln2 * 2^63
@ -500,7 +500,7 @@ SINH_COMMON:
add r_ad5 = 0x580, r_ad1 // Point to j_lo_table midpoint
}
{ .mib
ldfe f_log2by64_hi = [r_ad1],16
ldfe f_log2by64_hi = [r_ad1],16
and r_exp_x = r_exp_mask, r_signexp_x
(p7) br.ret.spnt b0 // Exit if x=0
}
@ -508,36 +508,36 @@ SINH_COMMON:
// Get the A coefficients for SINH_BY_TBL
{ .mfi
ldfe f_A1 = [r_ad3],16
ldfe f_A1 = [r_ad3],16
fcmp.lt.s1 p8,p9 = f8,f0 // Test for x<0
cmp.lt p7,p0 = r_exp_x, r_exp_0_25 // Test x < 0.25
}
{ .mfb
add r_ad2o = 0x30, r_ad2e // Point to p_table odd coeffs
(p6) fma.s0 f8 = f8,f1,f0 // Result for x nan, inf
(p6) fma.s0 f8 = f8,f1,f0 // Result for x nan, inf
(p6) br.ret.spnt b0 // Exit for x nan, inf
}
;;
// Calculate X2 = ax*ax for SINH_BY_POLY
{ .mfi
ldfe f_log2by64_lo = [r_ad1],16
ldfe f_log2by64_lo = [r_ad1],16
nop.f 0
nop.i 0
}
{ .mfb
ldfe f_A2 = [r_ad3],16
ldfe f_A2 = [r_ad3],16
fma.s1 f_X2 = f_NORM_X, f_NORM_X, f0
(p7) br.cond.spnt SINH_BY_POLY
}
;;
// Here if |x| >= 0.25
SINH_BY_TBL:
SINH_BY_TBL:
// ******************************************************
// STEP 1 (TBL and EXP) - Argument reduction
// ******************************************************
// Get the following constants.
// Get the following constants.
// Inv_log2by64
// log2by64_hi
// log2by64_lo
@ -592,20 +592,20 @@ SINH_BY_TBL:
// Subtract RSHF constant to get rounded M as a floating point value
// M_temp * 2^(63-6) - 2^63
{ .mfb
ldfe f_B3 = [r_ad3],16
ldfe f_B3 = [r_ad3],16
fms.s1 f_M = f_M_temp, f_2TOM57, f_RSHF
(p6) br.cond.spnt SINH_HUGE // Branch if result will overflow
}
;;
{ .mfi
getf.sig r_M = f_M_temp
getf.sig r_M = f_M_temp
nop.f 0
cmp.ge p7,p6 = r_exp_x, r_exp_32 // Test if x >= 32
}
;;
// Calculate j. j is the signed extension of the six lsb of M. It
// Calculate j. j is the signed extension of the six lsb of M. It
// has a range of -32 thru 31.
// Calculate R
@ -648,8 +648,8 @@ SINH_BY_TBL:
// N = (M-j)/64
{ .mfi
ldfe f_Tjhi = [r_ad_J_hi]
fnma.s1 f_R = f_M, f_log2by64_lo, f_R_temp
shr r_N = r_Mmj, 0x6 // N = (M-j)/64
fnma.s1 f_R = f_M, f_log2by64_lo, f_R_temp
shr r_N = r_Mmj, 0x6 // N = (M-j)/64
}
{ .mfi
shladd r_ad_mJ_hi = r_mj, 4, r_ad4 // pointer to Tmjhi
@ -724,8 +724,8 @@ SINH_BY_TBL:
}
;;
//
// If TBL,
//
// If TBL,
// Calculate S_hi and S_lo, and C_hi
// SC_hi_temp = sneg * Tmjhi
// S_hi = spos * Tjhi - SC_hi_temp
@ -735,12 +735,12 @@ SINH_BY_TBL:
{ .mfi
nop.m 0
(p6) fma.s1 f_SC_hi_temp = f_sneg, f_Tmjhi, f0
(p6) fma.s1 f_SC_hi_temp = f_sneg, f_Tmjhi, f0
nop.i 0
}
;;
// If TBL,
// If TBL,
// S_lo_temp3 = sneg * Tmjlo
// S_lo_temp4 = spos * Tjlo - S_lo_temp3
// S_lo_temp4 = spos * Tjlo -(sneg * Tmjlo)
@ -763,7 +763,7 @@ SINH_BY_TBL:
}
;;
// If EXP,
// If EXP,
// Compute sgnx * 2^(N-1) * Tjhi and sgnx * 2^(N-1) * Tjlo
{ .mfi
nop.m 0
@ -822,7 +822,7 @@ SINH_BY_TBL:
{ .mfi
nop.m 0
(p6) fnma.s1 f_S_lo_temp2 = f_sneg, f_Tmjhi, f_S_lo_temp1
(p6) fnma.s1 f_S_lo_temp2 = f_sneg, f_Tmjhi, f_S_lo_temp1
nop.i 0
}
;;
@ -847,7 +847,7 @@ SINH_BY_TBL:
;;
// If TBL,
// Y_hi = S_hi
// Y_hi = S_hi
// Y_lo = C_hi*p_odd + (S_hi*p_even + S_lo)
{ .mfi
nop.m 0
@ -894,7 +894,7 @@ SINH_BY_TBL:
// Here if 0 < |x| < 0.25
SINH_BY_POLY:
SINH_BY_POLY:
{ .mmf
ldfe f_P6 = [r_ad2e],16
ldfe f_P5 = [r_ad2o],16
@ -911,7 +911,7 @@ SINH_BY_POLY:
{ .mmi
ldfe f_P2 = [r_ad2e],16
ldfe f_P1 = [r_ad2o],16
ldfe f_P1 = [r_ad2o],16
nop.i 0
}
;;
@ -1012,7 +1012,7 @@ SINH_DENORM:
{ .mfi
nop.m 0
(p6) fma.s0 f8 = f8,f8,f8 // If x +denorm, result=x+x^2
nop.i 0
nop.i 0
}
{ .mfb
nop.m 0
@ -1023,7 +1023,7 @@ SINH_DENORM:
// Here if |x| >= overflow limit
SINH_HUGE:
SINH_HUGE:
// for SINH_HUGE, put 24000 in exponent; take sign from input
{ .mmi
mov r_exp_huge = 0x15dbf
@ -1035,7 +1035,7 @@ SINH_HUGE:
.pred.rel "mutex",p8,p9
{ .mfi
alloc r32 = ar.pfs,0,5,4,0
alloc r32 = ar.pfs,0,5,4,0
(p8) fnma.s1 f_signed_hi_lo = f_huge, f1, f1
nop.i 0
}
@ -1083,7 +1083,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
{ .mib
stfe [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
nop.b 0
}
{ .mib
stfe [GR_Parameter_Y] = f_pre_result // STORE Parameter 3 on stack

View File

@ -3,9 +3,9 @@
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
//********************************************************************
@ -120,7 +120,7 @@ GLOBAL_IEEE754_ENTRY(sqrt)
setf.s f10=r3
// Step (1)
// y0 = 1/sqrt(a) in f7
fclass.m.unc p7,p8 = f8,0x3A
fclass.m.unc p7,p8 = f8,0x3A
nop.i 0;;
} { .mlx
nop.m 0
@ -238,7 +238,7 @@ GLOBAL_IEEE754_ENTRY(sqrt)
// g2 = g1 + d * h1 in f7
(p6) fma.d.s0 f8=f9,f6,f7
(p6) br.ret.sptk b0 ;;
}
}
{ .mfb
nop.m 0
@ -273,7 +273,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
//
// This branch includes all those special values that are not negative,
// with the result equal to frcpa(x)
//
//
.prologue
// We are distinguishing between over(under)flow and letting
@ -307,7 +307,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
{ .mib
stfd [GR_Parameter_X] = f15 // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
nop.b 0
}
{ .mib
stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack

View File

@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
//*********************************************************************
@ -55,7 +55,7 @@
//
//********************************************************************
//
// Accuracy: Correctly Rounded
// Accuracy: Correctly Rounded
//
//********************************************************************
//
@ -77,7 +77,7 @@
// All faults and exceptions should be raised correctly.
// sqrtf(QNaN) = QNaN
// sqrtf(SNaN) = QNaN
// sqrtf(+/-0) = +/-0
// sqrtf(+/-0) = +/-0
// sqrtf(negative) = QNaN and error handling is called
//
//********************************************************************
@ -91,7 +91,7 @@
GR_SAVE_B0 = r34
GR_SAVE_PFS = r33
GR_SAVE_GP = r35
GR_SAVE_GP = r35
GR_Parameter_X = r37
GR_Parameter_Y = r38
@ -119,12 +119,12 @@ GLOBAL_IEEE754_ENTRY(sqrtf)
setf.exp f12 = r2
// Step (1)
// y0 = 1/sqrt(a) in f7
fclass.m.unc p7,p8 = f8,0x3A
fclass.m.unc p7,p8 = f8,0x3A
nop.i 0
} { .mfi
nop.m 0
// Make a copy of x just in case
mov f13 = f8
// Make a copy of x just in case
mov f13 = f8
nop.i 0;;
} { .mfi
nop.m 0
@ -209,7 +209,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mii
add GR_Parameter_Y=-32,sp // Parameter 2 value
mov GR_Parameter_TAG = 50
mov GR_Parameter_TAG = 50
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
@ -248,10 +248,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
};;
LOCAL_LIBM_END(__libm_error_region)

View File

@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
//********************************************************************
@ -123,8 +123,8 @@ alloc r32= ar.pfs,0,5,4,0
nop.i 0;;
} { .mfi
nop.m 0
// Make copy input x
mov f13=f8
// Make copy input x
mov f13=f8
nop.i 0
} { .mfi
nop.m 0
@ -136,7 +136,7 @@ alloc r32= ar.pfs,0,5,4,0
// d0 = 1/2 - S0 * H0 in f10
(p6) fnma.s1 f10=f7,f9,f12
nop.i 0;;
}
}
{ .mfi
nop.m 0
mov f15=f8

View File

@ -1,6 +1,6 @@
/* file: libm_cpu_defs.h */
// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//

View File

@ -1,7 +1,7 @@
/* file: libm_error_codes.h */
/*
/*
// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//
@ -52,7 +52,7 @@
*/
#if !defined(__LIBM_ERROR_CODES_H__)
#define __LIBM_ERROR_CODES_H__
#define __LIBM_ERROR_CODES_H__
typedef enum
{
@ -192,9 +192,9 @@ typedef enum
tgamma_overflow, tgamma_negative, tgamma_reserve, /* 258, 259, 260 */
tgammaf_overflow, tgammaf_negative, tgammaf_reserve, /* 261, 262, 263 */
exp10l_underflow, exp10_underflow, exp10f_underflow, /* 264, 265, 266 */
nextafterl_underflow, nextafter_underflow,
nextafterl_underflow, nextafter_underflow,
nextafterf_underflow, /* 267, 268, 269 */
nexttowardl_underflow, nexttoward_underflow,
nexttowardl_underflow, nexttoward_underflow,
nexttowardf_underflow /* 270, 271, 272 */
} error_types;

View File

@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
@ -74,7 +74,7 @@
// Registers used
//==============================================================
//
// general registers:
// general registers:
// r14 exponent bias for x negative
// r15 exponent bias for x positive
// r16 signexp of x
@ -119,7 +119,7 @@ GLOBAL_LIBM_ENTRY(__libm_frexp)
// The normalization also sets fault flags and takes faults if necessary
{ .mfi
mov r20 = 0x1003f
fnorm.s0 f9 = f8
fnorm.s0 f9 = f8
nop.i 999 ;;
}
@ -137,7 +137,7 @@ GLOBAL_LIBM_ENTRY(__libm_frexp)
{ .mfi
setf.exp f11 = r14
(p7) fcmp.lt.s0 p7,p8 = f8,f0
(p6) cmp.eq.unc p10,p11 = r34, r0 ;;
(p6) cmp.eq.unc p10,p11 = r34, r0 ;;
}
// If x NAN, ZERO, INFINITY, set *y=0 and exit
@ -161,16 +161,16 @@ GLOBAL_LIBM_ENTRY(__libm_frexp)
{ .mfi
(p9) add r15 = 64, r15
(p9) fmpy.s0 f9 = f9, f12
cmp.eq p10,p11 = r34, r0 ;;
cmp.eq p10,p11 = r34, r0 ;;
}
// true exponent stored to int pointer
// the bias is treated as 0xfffe instead of
// the bias is treated as 0xfffe instead of
// normal 0xffff because we want the significand
// to be in the range <=0.5 sig < 1.0
// Store the value of the exponent at the pointer in r33
// If x>0 form significand result
// If x>0 form significand result
{ .mfi
nop.m 999
(p8) fmerge.se f8 = f10,f9
@ -178,7 +178,7 @@ GLOBAL_LIBM_ENTRY(__libm_frexp)
}
// Get signexp of normalized x
// If x<0 form significand result
// If x<0 form significand result
{ .mfi
getf.exp r16 = f9
(p7) fmerge.se f8 = f11,f9

View File

@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
@ -74,7 +74,7 @@
// Registers used
//==============================================================
//
// general registers:
// general registers:
// r14 exponent bias for x negative
// r15 exponent bias for x positive
// r16 signexp of x
@ -119,7 +119,7 @@ GLOBAL_LIBM_ENTRY(__libm_frexpf)
// The normalization also sets fault flags and takes faults if necessary
{ .mfi
mov r20 = 0x1003f
fnorm.s0 f9 = f8
fnorm.s0 f9 = f8
nop.i 999 ;;
}
@ -137,7 +137,7 @@ GLOBAL_LIBM_ENTRY(__libm_frexpf)
{ .mfi
setf.exp f11 = r14
(p7) fcmp.lt.s0 p7,p8 = f8,f0
(p6) cmp.eq.unc p10,p11 = r34, r0 ;;
(p6) cmp.eq.unc p10,p11 = r34, r0 ;;
}
// If x NAN, ZERO, INFINITY, set *y=0 and exit
@ -161,16 +161,16 @@ GLOBAL_LIBM_ENTRY(__libm_frexpf)
{ .mfi
(p9) add r15 = 64, r15
(p9) fmpy.s0 f9 = f9, f12
cmp.eq p10,p11 = r34, r0 ;;
cmp.eq p10,p11 = r34, r0 ;;
}
// true exponent stored to int pointer
// the bias is treated as 0xfffe instead of
// the bias is treated as 0xfffe instead of
// normal 0xffff because we want the significand
// to be in the range <=0.5 sig < 1.0
// Store the value of the exponent at the pointer in r33
// If x>0 form significand result
// If x>0 form significand result
{ .mfi
nop.m 999
(p8) fmerge.se f8 = f10,f9
@ -178,7 +178,7 @@ GLOBAL_LIBM_ENTRY(__libm_frexpf)
}
// Get signexp of normalized x
// If x<0 form significand result
// If x<0 form significand result
{ .mfi
getf.exp r16 = f9
(p7) fmerge.se f8 = f11,f9

View File

@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
@ -74,7 +74,7 @@
// Registers used
//==============================================================
//
// general registers:
// general registers:
// r14 exponent bias for x negative
// r15 exponent bias for x positive
// r16 signexp of x
@ -119,7 +119,7 @@ GLOBAL_LIBM_ENTRY(__libm_frexpl)
// The normalization also sets fault flags and takes faults if necessary
{ .mfi
mov r20 = 0x1003f
fnorm.s0 f9 = f8
fnorm.s0 f9 = f8
nop.i 999 ;;
}
@ -137,7 +137,7 @@ GLOBAL_LIBM_ENTRY(__libm_frexpl)
{ .mfi
setf.exp f11 = r14
(p7) fcmp.lt.s0 p7,p8 = f8,f0
(p6) cmp.eq.unc p10,p11 = r35, r0 ;;
(p6) cmp.eq.unc p10,p11 = r35, r0 ;;
}
// If x NAN, ZERO, INFINITY, set *y=0 and exit
@ -161,16 +161,16 @@ GLOBAL_LIBM_ENTRY(__libm_frexpl)
{ .mfi
(p9) add r15 = 64, r15
(p9) fmpy.s0 f9 = f9, f12
cmp.eq p10,p11 = r35, r0 ;;
cmp.eq p10,p11 = r35, r0 ;;
}
// true exponent stored to int pointer
// the bias is treated as 0xfffe instead of
// the bias is treated as 0xfffe instead of
// normal 0xffff because we want the significand
// to be in the range <=0.5 sig < 1.0
// Store the value of the exponent at the pointer in r34
// If x>0 form significand result
// If x>0 form significand result
{ .mfi
nop.m 999
(p8) fmerge.se f8 = f10,f9
@ -178,7 +178,7 @@ GLOBAL_LIBM_ENTRY(__libm_frexpl)
}
// Get signexp of normalized x
// If x<0 form significand result
// If x<0 form significand result
{ .mfi
getf.exp r16 = f9
(p7) fmerge.se f8 = f11,f9

View File

@ -48,8 +48,8 @@
//
// API
//==============================================================
// float __libm_scalblnf (float x, long int n, int long_int_type)
// input floating point f8 and long int n (r33)
// float __libm_scalblnf (float x, long int n, int long_int_type)
// input floating point f8 and long int n (r33)
// input long_int_type = 0 if long int defined as 32 bits, = 1 if 64 bits
// output floating point f8
//

View File

@ -2,7 +2,7 @@
// Copyright (C) 2000, 2001, Intel Corporation
// All rights reserved.
//
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
@ -21,26 +21,26 @@
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// *********************************************************************
//
// History:
// 02/02/00 Initial Version
// History:
// 02/02/00 Initial Version
// 4/04/00 Unwind support added
// 12/28/00 Fixed false invalid flags
//
@ -50,7 +50,7 @@
//
// *********************************************************************
//
// Accuracy: Very accurate for double-precision values
// Accuracy: Very accurate for double-precision values
//
// *********************************************************************
//
@ -72,7 +72,7 @@
//
// Denormal fault raised on denormal inputs
// Overflow exceptions do not occur
// Underflow exceptions raised when appropriate for tan
// Underflow exceptions raised when appropriate for tan
// (No specialized error handling for this routine)
// Inexact raised when appropriate by algorithm
//
@ -218,7 +218,7 @@
// tan( B + x ) = ------------------------
// 1 - tan(B)*tan(x)
//
// / \
// / \
// | tan(B) + tan(x) |
// = tan(B) + | ------------------------ - tan(B) |
@ -251,7 +251,7 @@
// cot( B + x ) = ------------------------
// tan(B) + tan(x)
//
// / \
// / \
// | 1 - tan(B)*tan(x) |
// = cot(B) + | ----------------------- - cot(B) |
@ -315,7 +315,7 @@
// / (1/[sin(B)*cos(B)]) * tan(x)
// tan(Arg) = sgn_r * | tan(B) + --------------------------------
// \ cot(B) - tan(x)
// \
// \
// + CORR |
// /
@ -335,7 +335,7 @@
// / (1/[sin(B)*cos(B)]) * tan(x)
// tan(Arg) = sgn_r * | -cot(B) + --------------------------------
// \ tan(B) + tan(x)
// \
// \
// + CORR |
// /
@ -457,7 +457,7 @@
// / (1/[sin(B)*cos(B)]) * tan(x)
// sgn_r * | tan(B) + -------------------------------- +
// \ cot(B) - tan(x)
// \
// \
// CORR |
// /
@ -562,7 +562,7 @@
// / (1/[sin(B)*cos(B)]) * tan(x)
// sgn_r * | -cot(B) + -------------------------------- +
// \ tan(B) + tan(x)
// \
// \
// CORR |
// /
@ -913,7 +913,7 @@ data4 0xAE8C11FD, 0x800960AD, 0x00004000, 0x00000000
data4 0x5FDBEC21, 0x8000E147, 0x00004000, 0x00000000
data4 0xA07791FA, 0x80186650, 0x00004000, 0x00000000
Arg = f8
Arg = f8
Result = f8
fp_tmp = f9
U_2 = f10
@ -1021,15 +1021,15 @@ delta1 = r36
table_ptr1 = r37
table_ptr2 = r38
i_0 = r39
i_1 = r40
N_fix_gr = r41
N_inc = r42
exp_Arg = r43
exp_r = r44
sig_r = r45
lookup = r46
table_offset = r47
Create_B = r48
i_1 = r40
N_fix_gr = r41
N_inc = r42
exp_Arg = r43
exp_r = r44
sig_r = r45
lookup = r46
table_offset = r47
Create_B = r48
gr_tmp = r49
GR_Parameter_X = r49
@ -1042,12 +1042,12 @@ GR_Parameter_r = r50
.proc __libm_tan
__libm_tan:
__libm_tan:
{ .mfi
alloc r32 = ar.pfs, 0,17,2,0
(p0) fclass.m.unc p6,p0 = Arg, 0x1E7
addl gr_tmp = -1,r0
addl gr_tmp = -1,r0
}
;;
@ -1073,7 +1073,7 @@ alloc r32 = ar.pfs, 0,17,2,0
;;
//
// Check for NatVals, Infs , NaNs, and Zeros
// Check for NatVals, Infs , NaNs, and Zeros
// Check for everything - if false, then must be pseudo-zero
// or pseudo-nan.
// Local table pointer
@ -1081,12 +1081,12 @@ alloc r32 = ar.pfs, 0,17,2,0
{ .mbb
(p0) add table_ptr2 = 96, table_ptr1
(p6) br.cond.spnt __libm_TAN_SPECIAL
(p6) br.cond.spnt __libm_TAN_SPECIAL
(p7) br.cond.spnt __libm_TAN_SPECIAL ;;
}
//
// Point to Inv_P_0
// Branch out to deal with unsupporteds and special values.
// Branch out to deal with unsupporteds and special values.
//
{ .mmf
@ -1110,7 +1110,7 @@ alloc r32 = ar.pfs, 0,17,2,0
{ .mmi
(p0) ldfs NEGTWO_TO_24 = [table_ptr1],12 ;;
//
// Do fcmp to generate Denormal exception
// Do fcmp to generate Denormal exception
// - can't do FNORM (will generate Underflow when U is unmasked!)
// Normalize input argument.
//
@ -1668,12 +1668,12 @@ alloc r32 = ar.pfs, 0,17,2,0
}
TAN_LARGER_ARG:
TAN_LARGER_ARG:
{ .mmf
(p0) addl table_ptr1 = @ltoff(TAN_BASE_CONSTANTS), gp
nop.m 999
(p0) fmpy.s1 N_0 = Arg, Inv_P_0
(p0) fmpy.s1 N_0 = Arg, Inv_P_0
}
;;
@ -2307,7 +2307,7 @@ TAN_LARGER_ARG:
}
TAN_SMALL_R:
TAN_SMALL_R:
{ .mii
nop.m 999
@ -2394,7 +2394,7 @@ TAN_SMALL_R:
(p11) ldfe P1_8 = [table_ptr1], -16 ;;
//
// N even: Poly1 = P1_2 + P1_3 * rsq
// N odd: poly1 = 1.0 + S_hi * r
// N odd: poly1 = 1.0 + S_hi * r
// 16 bits partial account for necessary (-1)
//
(p11) ldfe P1_7 = [table_ptr1], -16
@ -2679,7 +2679,7 @@ TAN_SMALL_R:
}
TAN_NORMAL_R:
TAN_NORMAL_R:
{ .mfi
(p0) getf.sig sig_r = r
@ -2847,7 +2847,7 @@ TAN_NORMAL_R:
// xsq = x * x
// N even: Tx = T_hi * x
// Load T_lo.
// Load C_lo - increment pointer to get SC_inv
// Load C_lo - increment pointer to get SC_inv
// - cant get all the way, do an add later.
//
(p0) add table_ptr2 = 569, table_ptr2 ;;
@ -3216,7 +3216,7 @@ ASM_SIZE_DIRECTIVE(__libm_tan)
.proc __libm_callout
__libm_callout:
TAN_ARG_TOO_LARGE:
TAN_ARG_TOO_LARGE:
.prologue
// (1)
{ .mfi
@ -3258,7 +3258,7 @@ TAN_ARG_TOO_LARGE:
// (4)
{ .mmi
mov gp = GR_SAVE_GP // Restore gp
(p0) mov N_fix_gr = r8
(p0) mov N_fix_gr = r8
nop.i 999
}
;;
@ -3304,7 +3304,7 @@ TAN_ARG_TOO_LARGE:
.restore sp
add sp = 64,sp // Restore stack pointer
(p6) br.cond.spnt TAN_SMALL_R
(p0) br.cond.sptk TAN_NORMAL_R
(p0) br.cond.sptk TAN_NORMAL_R
}
;;
.endp __libm_callout
@ -3322,7 +3322,7 @@ __libm_TAN_SPECIAL:
{ .mfb
nop.m 999
(p0) fmpy.s0 Arg = Arg, f0
(p0) br.ret.sptk b0
(p0) br.ret.sptk b0
}
.endp __libm_TAN_SPECIAL
ASM_SIZE_DIRECTIVE(__libm_TAN_SPECIAL)

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
//*********************************************************************
//
// History:
// History:
// 09/04/01 Initial version
// 09/13/01 Performance improved, symmetry problems fixed
// 10/10/01 Performance improved, split issues removed
@ -56,31 +56,31 @@
//
// Overview of operation
//==============================================================
//
//
// There are 6 paths:
// 1. x = 0, [S,Q]Nan or +/-INF
// Return asinhl(x) = x + x;
//
//
// 2. x = + denormal
// Return asinhl(x) = x - x^2;
//
//
// 3. x = - denormal
// Return asinhl(x) = x + x^2;
//
//
// 4. 'Near 0': max denormal < |x| < 1/128
// Return asinhl(x) = sign(x)*(x+x^3*(c3+x^2*(c5+x^2*(c7+x^2*(c9)))));
//
// 5. 'Huges': |x| > 2^63
// Return asinhl(x) = sign(x)*(logl(2*x));
//
//
// 6. 'Main path': 1/128 < |x| < 2^63
// b_hi + b_lo = x + sqrt(x^2 + 1);
// asinhl(x) = sign(x)*(log_special(b_hi, b_lo));
//
// Algorithm description
//
// Algorithm description
//==============================================================
//
// Main path algorithm
// Main path algorithm
// ( thanks to Peter Markstein for the idea of sqrt(x^2+1) computation! )
// *************************************************************************
//
@ -89,19 +89,19 @@
// 1) p2 = (p2_hi+p2_lo) = x^2+1 obtaining
// ------------------------------------
// p2_hi = x2_hi + 1, where x2_hi = x * x;
// p2_lo = x2_lo + p1_lo, where
// x2_lo = FMS(x*x-x2_hi),
// p2_lo = x2_lo + p1_lo, where
// x2_lo = FMS(x*x-x2_hi),
// p1_lo = (1 - p2_hi) + x2_hi;
//
// 2) g = (g_hi+g_lo) = sqrt(p2) = sqrt(p2_hi+p2_lo)
// ----------------------------------------------
// r = invsqrt(p2_hi) (8-bit reciprocal square root approximation);
// g = p2_hi * r (first 8 bit-approximation of sqrt);
//
//
// h = 0.5 * r;
// e = 0.5 - g * h;
// g = g * e + g (second 16 bit-approximation of sqrt);
//
//
// h = h * e + h;
// e = 0.5 - g * h;
// g = g * e + g (third 32 bit-approximation of sqrt);
@ -109,7 +109,7 @@
// h = h * e + h;
// e = 0.5 - g * h;
// g_hi = g * e + g (fourth 64 bit-approximation of sqrt);
//
//
// Remainder computation:
// h = h * e + h;
// d = (p2_hi - g_hi * g_hi) + p2_lo;
@ -119,15 +119,15 @@
// -------------------------------------------------------------------
// b_hi = (g_hi + x) + gl;
// b_lo = (g_hi - b_hi) + x + gl;
//
//
// Now we pass b presented as sum b_hi + b_lo to special version
// of logl function which accept a pair of arguments as
// 'mutiprecision' value.
//
// 'mutiprecision' value.
//
// Special log algorithm overview
// ================================
// Here we use a table lookup method. The basic idea is that in
// order to compute logl(Arg) = logl (Arg-1) for an argument Arg in [1,2),
// order to compute logl(Arg) = logl (Arg-1) for an argument Arg in [1,2),
// we construct a value G such that G*Arg is close to 1 and that
// logl(1/G) is obtainable easily from a table of values calculated
// beforehand. Thus
@ -157,7 +157,7 @@
// G := G_1 * G_2 * G_3
// r := (G * S_hi - 1) + G * S_lo
//
// These G_j's have the property that the product is exactly
// These G_j's have the property that the product is exactly
// representable and that |r| < 2^(-12) as a result.
//
// Step 2: Approximation
@ -166,7 +166,7 @@
//
// Step 3: Reconstruction
//
// Finally,
// Finally,
//
// logl( X ) = logl( 2^N * (S_hi + S_lo) )
// ~=~ N*logl(2) + logl(1/G) + logl(1 + r)
@ -176,25 +176,25 @@
//
// Registers used
//==============================================================
// Floating Point registers used:
// Floating Point registers used:
// f8, input
// f32 -> f101 (70 registers)
// General registers used:
// General registers used:
// r32 -> r57 (26 registers)
// Predicate registers used:
// p6 -> p11
// p6 for '0, NaNs, Inf' path
// p7 for '+ denormals' path
// p7 for '+ denormals' path
// p8 for 'near 0' path
// p9 for 'huges' path
// p10 for '- denormals' path
// p10 for '- denormals' path
// p11 for negative values
//
// Data tables
//==============================================================
RODATA
.align 64
@ -210,14 +210,14 @@ data8 0x999999999991D582, 0x00003FFB
data8 0xAAAAAAAAAAAAAAA9, 0x0000BFFC
LOCAL_OBJECT_END(Poly_C_near_0_35)
// Q coeffs
// Q coeffs
LOCAL_OBJECT_START(Constants_Q)
data4 0x00000000,0xB1721800,0x00003FFE,0x00000000
data4 0x00000000,0xB1721800,0x00003FFE,0x00000000
data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
data4 0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000
data4 0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000
data4 0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000
data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000
data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000
LOCAL_OBJECT_END(Constants_Q)
// Z1 - 16 bit fixed
@ -332,7 +332,7 @@ data4 0x3F71D488,0x3D693B9D
data8 0xBE049391B6B7C239
LOCAL_OBJECT_END(Constants_G_H_h2)
// G3 and H3 - IEEE single and h3 - IEEE double
// G3 and H3 - IEEE single and h3 - IEEE double
LOCAL_OBJECT_START(Constants_G_H_h3)
data4 0x3F7FFC00,0x38800100
data8 0x3D355595562224CD
@ -408,62 +408,62 @@ LOCAL_OBJECT_END(Constants_G_H_h3)
FR_Arg = f8
FR_Res = f8
FR_AX = f32
FR_XLog_Hi = f33
FR_XLog_Lo = f34
FR_XLog_Hi = f33
FR_XLog_Lo = f34
// Special logl registers
FR_Y_hi = f35
FR_Y_hi = f35
FR_Y_lo = f36
FR_Scale = f37
FR_X_Prime = f38
FR_S_hi = f39
FR_X_Prime = f38
FR_S_hi = f39
FR_W = f40
FR_G = f41
FR_H = f42
FR_wsq = f43
FR_wsq = f43
FR_w4 = f44
FR_h = f45
FR_w6 = f46
FR_w6 = f46
FR_G2 = f47
FR_H2 = f48
FR_poly_lo = f49
FR_P8 = f50
FR_P8 = f50
FR_poly_hi = f51
FR_P7 = f52
FR_h2 = f53
FR_rsq = f54
FR_P7 = f52
FR_h2 = f53
FR_rsq = f54
FR_P6 = f55
FR_r = f56
FR_r = f56
FR_log2_hi = f57
FR_log2_lo = f58
FR_float_N = f59
FR_Q4 = f60
FR_log2_hi = f57
FR_log2_lo = f58
FR_G3 = f61
FR_H3 = f62
FR_h3 = f63
FR_float_N = f59
FR_Q4 = f60
FR_Q3 = f64
FR_Q2 = f65
FR_1LN10_hi = f66
FR_G3 = f61
FR_H3 = f62
FR_h3 = f63
FR_Q1 = f67
FR_1LN10_lo = f68
FR_P5 = f69
FR_rcub = f70
FR_Q3 = f64
FR_Q2 = f65
FR_1LN10_hi = f66
FR_Neg_One = f71
FR_Z = f72
FR_AA = f73
FR_BB = f74
FR_S_lo = f75
FR_2_to_minus_N = f76
FR_Q1 = f67
FR_1LN10_lo = f68
FR_P5 = f69
FR_rcub = f70
FR_Neg_One = f71
FR_Z = f72
FR_AA = f73
FR_BB = f74
FR_S_lo = f75
FR_2_to_minus_N = f76
// Huge & Main path prolog registers
@ -512,22 +512,22 @@ GR_Poly_C_35 = r45
GR_Poly_C_79 = r46
// Special logl registers
GR_Index1 = r34
GR_Index2 = r35
GR_signif = r36
GR_X_0 = r37
GR_X_1 = r38
GR_X_2 = r39
GR_Z_1 = r40
GR_Z_2 = r41
GR_N = r42
GR_Bias = r43
GR_M = r44
GR_Index3 = r45
GR_exp_2tom80 = r45
GR_exp_mask = r47
GR_exp_2tom7 = r48
GR_ad_ln10 = r49
GR_Index1 = r34
GR_Index2 = r35
GR_signif = r36
GR_X_0 = r37
GR_X_1 = r38
GR_X_2 = r39
GR_Z_1 = r40
GR_Z_2 = r41
GR_N = r42
GR_Bias = r43
GR_M = r44
GR_Index3 = r45
GR_exp_2tom80 = r45
GR_exp_mask = r47
GR_exp_2tom7 = r48
GR_ad_ln10 = r49
GR_ad_tbl_1 = r50
GR_ad_tbl_2 = r51
GR_ad_tbl_3 = r52
@ -593,7 +593,7 @@ GLOBAL_LIBM_ENTRY(asinhl)
{ .mfb
cmp.le p9, p0 = GR_TwoP63, GR_ArgExp // if arg > 2^63 ('huges')
(p6) fma.s0 FR_Res = FR_Arg,f1,FR_Arg // r = a + a
(p6) br.ret.spnt b0 // return
(p6) br.ret.spnt b0 // return
};;
// (X^2 + 1) computation
{ .mfi
@ -677,7 +677,7 @@ GLOBAL_LIBM_ENTRY(asinhl)
{ .mfi
ldfe FR_Q1 = [GR_ad_q] // Load Q1
fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g
fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g
// 16 bit Newton Raphson iteration
nop.i 0
}
@ -695,7 +695,7 @@ GLOBAL_LIBM_ENTRY(asinhl)
{ .mfi
nop.m 0
fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g
fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g
// 32 bit Newton Raphson iteration
nop.i 0
}
@ -713,7 +713,7 @@ GLOBAL_LIBM_ENTRY(asinhl)
{ .mfi
nop.m 0
fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g
fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g
// 64 bit Newton Raphson iteration
nop.i 0
}
@ -806,7 +806,7 @@ GLOBAL_LIBM_ENTRY(asinhl)
{ .mfi
nop.m 0
nop.f 0
extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
};;
@ -839,7 +839,7 @@ GLOBAL_LIBM_ENTRY(asinhl)
pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 // Get bits 30-15 of X_1 * Z_2
};;
// WE CANNOT USE GR_X_2 IN NEXT 3 CYCLES ("DEAD" ZONE!)
// WE CANNOT USE GR_X_2 IN NEXT 3 CYCLES ("DEAD" ZONE!)
// BECAUSE OF POSSIBLE 10 CLOCKS STALL!
// So we can negate Q coefficients there for negative values
@ -1001,7 +1001,7 @@ GLOBAL_LIBM_ENTRY(asinhl)
{ .mfi
nop.m 0
fadd.s0 FR_Y_lo = FR_poly_hi, FR_poly_lo
fadd.s0 FR_Y_lo = FR_poly_hi, FR_poly_lo
// Y_lo=poly_hi+poly_lo
nop.i 0
}
@ -1088,7 +1088,7 @@ huges_logl:
{ .mmi
ldfe FR_log2_lo = [GR_ad_q],16 // Load log2_lo
sub GR_N = GR_N, GR_Bias
sub GR_N = GR_N, GR_Bias
mov GR_exp_2tom80 = 0x0ffaf // Exponent of 2^-80
};;
@ -1107,7 +1107,7 @@ huges_logl:
{ .mmi
nop.m 0
ldfe FR_Q2 = [GR_ad_q],16 // Load Q2
extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
};;
{ .mmi

View File

@ -183,351 +183,351 @@ LOCAL_OBJECT_END(atanf_coeff_2_table)
.section .text
GLOBAL_LIBM_ENTRY(atanf)
{ .mfi
{ .mfi
alloc r32 = ar.pfs,1,2,0,0
frcpa.s1 atanf_z,p0 = f1,f8
addl EXP_Addr2 = @ltoff(atanf_coeff_2_table),gp
}
{ .mfi
}
{ .mfi
addl EXP_Addr1 = @ltoff(atanf_coeff_1_table),gp
fma.s1 atanf_t = f8,f8,f0
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fmerge.s atanf_sgn_x = f8,f1
nop.i 999;;
}
{ .mfi
}
{ .mfi
ld8 EXP_Addr1 = [EXP_Addr1]
fmerge.s atanf_abs_x = f1,f8
nop.i 999
}
{ .mfi
}
{ .mfi
ld8 EXP_Addr2 = [EXP_Addr2]
nop.f 999
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fclass.m p8,p0 = f8,0x7 // @zero
nop.i 999;;
}
{ .mfi
}
{ .mfi
nop.m 999
fcmp.eq.unc.s0 p9,p10 = f8,f1
nop.i 999;;
}
{ .mfi
}
{ .mfi
ldfpd atanf_coeff_R4,atanf_coeff_R5 = [EXP_Addr1],16
fnma.s1 atanf_b = f8,atanf_z,f1
nop.i 999
}
{ .mfi
}
{ .mfi
ldfpd atanf_coeff_R1,atanf_coeff_R2 = [EXP_Addr2],16
fma.s1 atanf_zsq = atanf_z,atanf_z,f0
nop.i 999;;
}
{ .mfi
{ .mfi
ldfpd atanf_coeff_R3,atanf_coeff_P1 = [EXP_Addr1],16
fma.s1 atanf_xcub = f8,atanf_t,f0
nop.i 999
}
{ .mfi
}
{ .mfi
ldfpd atanf_coeff_Q6,atanf_coeff_Q7 = [EXP_Addr2],16
fma.s1 atanf_tsq = atanf_t,atanf_t,f0
nop.i 999;;
}
{ .mfi
{ .mfi
ldfpd atanf_coeff_Q8,atanf_coeff_Q9 = [EXP_Addr1],16
// fcmp.le.s1 atanf_pred_LE1,atanf_pred_GT1 = atanf_abs_x,f1
fcmp.le.s1 p6,p7 = atanf_abs_x,f1
nop.i 999
}
{ .mfi
}
{ .mfi
ldfpd atanf_coeff_Q4,atanf_coeff_Q5 = [EXP_Addr2],16
nop.f 999
nop.i 999;;
}
{ .mfi
{ .mfi
ldfpd atanf_coeff_Q2,atanf_coeff_Q3 = [EXP_Addr1],16
fclass.m p8,p0 = f8,0xe7 // @inf|@qnan|@snan|@zero
nop.i 999
}
{ .mfi
}
{ .mfi
ldfpd atanf_coeff_P5,atanf_coeff_P6 = [EXP_Addr2],16
nop.f 999
nop.i 999;;
}
{ .mfi
{ .mfi
ldfpd atanf_coeff_Q0,atanf_coeff_Q1 = [EXP_Addr1],16
nop.f 999
nop.i 999
}
{ .mfi
}
{ .mfi
ldfpd atanf_coeff_P7,atanf_coeff_P8 = [EXP_Addr2],16
nop.f 999
nop.i 999;;
}
{ .mfi
{ .mfi
ldfpd atanf_coeff_P3,atanf_coeff_P4 = [EXP_Addr1],16
fma.s1 atanf_bsq = atanf_b,atanf_b,f0
nop.i 999
}
{ .mfi
}
{ .mfi
ldfpd atanf_coeff_P9,atanf_coeff_P10 = [EXP_Addr2]
fma.s1 atanf_z4 = atanf_zsq,atanf_zsq,f0
nop.i 999;;
}
{ .mfi
{ .mfi
ldfpd atanf_coeff_P2,atanf_piby2 = [EXP_Addr1]
fma.s1 atanf_x6 = atanf_t,atanf_tsq,f0
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atanf_t4 = atanf_tsq,atanf_tsq,f0
nop.i 999;;
}
{ .mfb
{ .mfb
nop.m 999
fma.s1 atanf_x5 = atanf_t,atanf_xcub,f0
(p8) br.cond.spnt ATANF_X_INF_NAN_ZERO
}
}
;;
{ .mfi
{ .mfi
nop.m 999
fma.s1 atanf_poly_r1 = atanf_b,atanf_coeff_R1,f1
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_r3 = atanf_b,atanf_coeff_R5,atanf_coeff_R4
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atanf_poly_r2 = atanf_b,atanf_coeff_R3,atanf_coeff_R2
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atanf_z8 = atanf_z4,atanf_z4,f0
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atanf_poly_q2 = atanf_t,atanf_coeff_Q5,atanf_coeff_Q4
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_q3 = atanf_t,atanf_coeff_Q7,atanf_coeff_Q6
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atanf_z5 = atanf_z,atanf_z4,f0
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_q1 = atanf_t,atanf_coeff_Q9,atanf_coeff_Q8
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atanf_poly_q4 = atanf_t,atanf_coeff_Q1,atanf_coeff_Q0
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_q5 = atanf_t,atanf_coeff_Q3,atanf_coeff_Q2
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atanf_poly_p4 = f8,atanf_coeff_P1,f0
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_p5 = atanf_t,atanf_coeff_P4,atanf_coeff_P3
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atanf_poly_r1 = atanf_z8,atanf_poly_r1,f0
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atanf_z8_bsq = atanf_z8,atanf_bsq,f0
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atanf_poly_q2 = atanf_tsq,atanf_poly_q3,atanf_poly_q2
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_r2 = atanf_bsq,atanf_poly_r3,atanf_poly_r2
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atanf_poly_p2 = atanf_t,atanf_coeff_P8,atanf_coeff_P7
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_q1 = atanf_poly_q1,f1,atanf_tsq
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atanf_z13 = atanf_z5,atanf_z8,f0
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_p1 = atanf_t,atanf_coeff_P10,atanf_coeff_P9
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atanf_poly_p4 = atanf_t,atanf_poly_p4,f8
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_q4 = atanf_tsq,atanf_poly_q5,atanf_poly_q4
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atanf_poly_p3 = atanf_t,atanf_coeff_P6,atanf_coeff_P5
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_p5 = atanf_t,atanf_poly_p5,atanf_coeff_P2
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atanf_x11 = atanf_x5,atanf_x6,f0
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_r = atanf_z8_bsq,atanf_poly_r2,atanf_poly_r1
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s0 atanf_sgnx_piby2 = atanf_sgn_x,atanf_piby2,f0
nop.i 999
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_q2 = atanf_t4,atanf_poly_q1,atanf_poly_q2
nop.i 999;;
}
{ .mfi
{ .mfi
nop.m 999
fma.s1 atanf_poly_p1 = atanf_tsq,atanf_poly_p1,atanf_poly_p2
nop.i 999;;
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_p4 = atanf_x5,atanf_poly_p5,atanf_poly_p4
nop.i 999;;
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atanf_z21_poly_r = atanf_z13,atanf_poly_r,f0
nop.i 999;;
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_q = atanf_t4,atanf_poly_q2,atanf_poly_q4
nop.i 999;;
}
{ .mfi
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_p1 = atanf_tsq,atanf_poly_p1,atanf_poly_p3
nop.i 999;;
}
{ .mfi
}
{ .mfi
nop.m 999
//(atanf_pred_GT1) fnma.s atanf_answer = atanf_poly_q,atanf_z21_poly_r,atanf_sgnx_piby2
(p7) fnma.s.s0 atanf_answer = atanf_poly_q,atanf_z21_poly_r,atanf_sgnx_piby2
nop.i 999;;
}
{ .mfb
}
{ .mfb
nop.m 999
//(atanf_pred_LE1) fma.s atanf_answer = atanf_x11,atanf_poly_p1,atanf_poly_p4
(p6) fma.s.s0 atanf_answer = atanf_x11,atanf_poly_p1,atanf_poly_p4
br.ret.sptk b0
}
}

View File

@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
//
@ -82,25 +82,25 @@
// IEEE Special Conditions:
//
// Denormal fault raised on denormal inputs
// Underflow exceptions may occur
// Underflow exceptions may occur
// Special error handling for the y=0 and x=0 case
// Inexact raised when appropriate by algorithm
//
// atanl(SNaN) = QNaN
// atanl(QNaN) = QNaN
// atanl(+/-0) = +/- 0
// atanl(+/-Inf) = +/-pi/2
// atanl(+/-Inf) = +/-pi/2
//
// atan2l(Any NaN for x or y) = QNaN
// atan2l(+/-0,x) = +/-0 for x > 0
// atan2l(+/-0,x) = +/-pi for x < 0
// atan2l(+/-0,+0) = +/-0
// atan2l(+/-0,-0) = +/-pi
// atan2l(+/-0,x) = +/-0 for x > 0
// atan2l(+/-0,x) = +/-pi for x < 0
// atan2l(+/-0,+0) = +/-0
// atan2l(+/-0,-0) = +/-pi
// atan2l(y,+/-0) = pi/2 y > 0
// atan2l(y,+/-0) = -pi/2 y < 0
// atan2l(+/-y, Inf) = +/-0 for finite y > 0
// atan2l(+/-Inf, x) = +/-pi/2 for finite x
// atan2l(+/-y, -Inf) = +/-pi for finite y > 0
// atan2l(+/-Inf, x) = +/-pi/2 for finite x
// atan2l(+/-y, -Inf) = +/-pi for finite y > 0
// atan2l(+/-Inf, Inf) = +/-pi/4
// atan2l(+/-Inf, -Inf) = +/-3pi/4
//
@ -549,20 +549,20 @@ GR_SAVE_PFS = r33
GR_SAVE_B0 = r34
GR_SAVE_GP = r35
sign_X = r36
sign_Y = r37
swap = r38
table_ptr1 = r39
table_ptr2 = r40
k = r41
lookup = r42
exp_ArgX = r43
exp_ArgY = r44
exponent_Q = r45
significand_Q = r46
special = r47
sp_exp_Q = r48
sp_exp_4sig_Q = r49
table_base = r50
sign_Y = r37
swap = r38
table_ptr1 = r39
table_ptr2 = r40
k = r41
lookup = r42
exp_ArgX = r43
exp_ArgY = r44
exponent_Q = r45
significand_Q = r46
special = r47
sp_exp_Q = r48
sp_exp_4sig_Q = r49
table_base = r50
int_temp = r51
GR_Parameter_X = r49
@ -572,7 +572,7 @@ GR_Parameter_TAG = r52
GR_temp = r52
RODATA
.align 16
.align 16
LOCAL_OBJECT_START(Constants_atan)
// double pi/2
@ -597,7 +597,7 @@ data8 0xE36F716D2A5F89BD, 0x3FFB // Q_4
// Entries Tbl_lo (single precision)
// B = 1+Index/16+1/32 Index = 0
//
data8 0x3FE9A000A935BD8E
data8 0x3FE9A000A935BD8E
data4 0x23ACA08F, 0x00000000
//
// Entries Tbl_hi (double precision) Index = 0,1,...,15
@ -605,37 +605,37 @@ data4 0x23ACA08F, 0x00000000
// Entries Tbl_lo (single precision)
// Index = 0,1,...,15 B = 2^(-1)*(1+Index/16+1/32)
//
data8 0x3FDE77EB7F175A34
data8 0x3FDE77EB7F175A34
data4 0x238729EE, 0x00000000
data8 0x3FE0039C73C1A40B
data8 0x3FE0039C73C1A40B
data4 0x249334DB, 0x00000000
data8 0x3FE0C6145B5B43DA
data8 0x3FE0C6145B5B43DA
data4 0x22CBA7D1, 0x00000000
data8 0x3FE1835A88BE7C13
data8 0x3FE1835A88BE7C13
data4 0x246310E7, 0x00000000
data8 0x3FE23B71E2CC9E6A
data8 0x3FE23B71E2CC9E6A
data4 0x236210E5, 0x00000000
data8 0x3FE2EE628406CBCA
data8 0x3FE2EE628406CBCA
data4 0x2462EAF5, 0x00000000
data8 0x3FE39C391CD41719
data8 0x3FE39C391CD41719
data4 0x24B73EF3, 0x00000000
data8 0x3FE445065B795B55
data8 0x3FE445065B795B55
data4 0x24C11260, 0x00000000
data8 0x3FE4E8DE5BB6EC04
data8 0x3FE4E8DE5BB6EC04
data4 0x242519EE, 0x00000000
data8 0x3FE587D81F732FBA
data8 0x3FE587D81F732FBA
data4 0x24D4346C, 0x00000000
data8 0x3FE6220D115D7B8D
data8 0x3FE6220D115D7B8D
data4 0x24ED487B, 0x00000000
data8 0x3FE6B798920B3D98
data8 0x3FE6B798920B3D98
data4 0x2495FF1E, 0x00000000
data8 0x3FE748978FBA8E0F
data8 0x3FE748978FBA8E0F
data4 0x223D9531, 0x00000000
data8 0x3FE7D528289FA093
data8 0x3FE7D528289FA093
data4 0x242B0411, 0x00000000
data8 0x3FE85D69576CC2C5
data8 0x3FE85D69576CC2C5
data4 0x2335B374, 0x00000000
data8 0x3FE8E17AA99CC05D
data8 0x3FE8E17AA99CC05D
data4 0x24C27CFB, 0x00000000
//
// Entries Tbl_hi (double precision) Index = 0,1,...,15
@ -643,7 +643,7 @@ data4 0x24C27CFB, 0x00000000
// Entries Tbl_lo (single precision)
// Index = 0,1,...,15 B = 2^(-2)*(1+Index/16+1/32)
//
data8 0x3FD025FA510665B5
data8 0x3FD025FA510665B5
data4 0x24263482, 0x00000000
data8 0x3FD1151A362431C9
data4 0x242C8DC9, 0x00000000
@ -771,19 +771,19 @@ GLOBAL_IEEE754_ENTRY(atanl)
{ .mfi
ldfd P_hi = [table_ptr1],8 // Load double precision hi part of pi
fclass.m p8,p0 = ArgY_orig, 0x1e7 // Test y natval, nan, inf, zero
nop.i 999
nop.i 999
}
;;
{ .mfi
ldfps P_lo, TWO_TO_NEG3 = [table_ptr1], 8 // Load P_lo and constant 2^-3
nop.f 999
nop.i 999
nop.f 999
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 M = f1, f1, f0 // Set M = 1.0
nop.i 999
nop.i 999
}
;;
@ -803,7 +803,7 @@ GLOBAL_IEEE754_ENTRY(atanl)
{ .mfi
nop.m 999
fcmp.ge.s1 p6,p7 = Xsq, Ysq // Test for |x| >= |y| using squares
nop.i 999
nop.i 999
}
{ .mfb
nop.m 999
@ -857,19 +857,19 @@ GLOBAL_IEEE754_ENTRY(atan2l)
{ .mfi
ldfd P_hi = [table_ptr1],8 // Load double precision hi part of pi
fclass.m p8,p0 = ArgY_orig, 0x1e7 // Test y natval, nan, inf, zero
nop.i 999
nop.i 999
}
;;
{ .mfi
ldfps P_lo, TWO_TO_NEG3 = [table_ptr1], 8 // Load P_lo and constant 2^-3
fclass.m p9,p0 = ArgX_orig, 0x1e7 // Test x natval, nan, inf, zero
nop.i 999
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 M = f1, f1, f0 // Set M = 1.0
nop.i 999
nop.i 999
}
;;
@ -889,7 +889,7 @@ GLOBAL_IEEE754_ENTRY(atan2l)
{ .mfi
nop.m 999
fcmp.ge.s1 p6,p7 = Xsq, Ysq // Test for |x| >= |y| using squares
nop.i 999
nop.i 999
}
{ .mfb
nop.m 999
@ -995,7 +995,7 @@ ATANL_COMMON:
}
;;
// Create a single precision representation of the signexp of Q with the
// Create a single precision representation of the signexp of Q with the
// 4 most significant bits of the significand followed by a 1 and then 18 0's
{ .mfi
nop.m 999
@ -1071,7 +1071,7 @@ ATANL_COMMON:
;;
//
// Generate sign_exp_Q b_1 b_2 b_3 b_4 1 0 0 0 ... 0 in single precision
// Generate sign_exp_Q b_1 b_2 b_3 b_4 1 0 0 0 ... 0 in single precision
// representation. Note sign of Q is always 0.
//
{ .mfi
@ -1185,7 +1185,7 @@ ATANL_COMMON:
// C_hi_hold = 1 - C_hi * U_prime_hi (1)
{ .mfi
nop.m 999
fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
nop.i 999
}
;;
@ -1222,7 +1222,7 @@ ATANL_COMMON:
// C_hi_hold = 1 - C_hi * U_prime_hi (3)
{ .mfi
nop.m 999
fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
nop.i 999
}
;;
@ -1342,7 +1342,7 @@ ATANL_COMMON:
;;
ATANL_POLY:
ATANL_POLY:
// Here if 0 < V/U < 2^-3
//
// ***********************************************
@ -1523,7 +1523,7 @@ ATANL_POLY:
// Create small double in case need to raise underflow
{ .mfi
setf.d FR_temp = GR_temp
setf.d FR_temp = GR_temp
fma.s1 poly = z8, poly1, poly2 // poly = poly2 + z8 * poly1
nop.i 999
}
@ -1635,9 +1635,9 @@ ATANL_POLY:
}
;;
//
//
// If Res_lo is denormal test if Result equals zero
//
//
{ .mfi
nop.m 999
(p14) fclass.m.unc p14, p0 = Result, 0x07
@ -1657,10 +1657,10 @@ ATANL_POLY:
;;
ATANL_UNSUPPORTED:
ATANL_UNSUPPORTED:
{ .mfb
nop.m 999
fmpy.s0 Result = ArgX,ArgY
fmpy.s0 Result = ArgX,ArgY
br.ret.sptk b0
}
;;
@ -1713,7 +1713,7 @@ ATANL_X_SPECIAL:
// Here if x or y inf or zero
ATANL_SPECIAL_HANDLING:
ATANL_SPECIAL_HANDLING:
{ .mfi
nop.m 999
fclass.m p6, p7 = ArgY_orig, 0x007 // Test y zero
@ -1809,7 +1809,7 @@ ATANL_SPECIAL_HANDLING:
;;
// Here if y not zero
ATANL_ArgY_Not_ZERO:
ATANL_ArgY_Not_ZERO:
{ .mfi
nop.m 999
fclass.m p0, p10 = ArgY, 0x023 // Test y inf
@ -1841,7 +1841,7 @@ ATANL_ArgY_Not_ZERO:
;;
{ .mfi
(p6) add table_ptr1 = 16, table_ptr1 // Point to pi/2, if x finite
(p6) add table_ptr1 = 16, table_ptr1 // Point to pi/2, if x finite
fclass.m p8, p0 = ArgX, 0x022 // Test for x=-inf
nop.i 999
}
@ -1886,7 +1886,7 @@ ATANL_ArgY_Not_ZERO:
;;
// Here if y not INF, and x=0 or INF
ATANL_ArgY_Not_INF:
ATANL_ArgY_Not_INF:
//
// Return +PI/2 when ArgY NOT Inf, ArgY > 0 and ArgX = +/-0
// Return -PI/2 when ArgY NOT Inf, ArgY < 0 and ArgX = +/-0
@ -1953,7 +1953,7 @@ ATANL_ArgY_Not_INF:
;;
GLOBAL_IEEE754_END(atan2l)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -56,7 +56,7 @@
//
// The result is computed as
// cbrt(x)= cbrt(1 - (1 - x*y)) * (1/cbrt(y))
// where y = frcpa(x) = (-1)^sgn_y * 2^(3*k+j) * m_y,
// where y = frcpa(x) = (-1)^sgn_y * 2^(3*k+j) * m_y,
// m_y in [1,2), j in {0,1,2}
//
// cbrt(1 - (1 - x*y)) is approximated by a degree-6 polynomial
@ -68,13 +68,13 @@
// (double-extended precision) and D (single precision) as follows:
// T_hi (1 + D)= 1/cbrt(y) to about 80 bits of accuracy
//
// The tables are only stored for three exponent values (i.e.
// The tables are only stored for three exponent values (i.e.
// only for 2^j * m_y, where j in {0,1,2} and m_y covers the 256
// possible mantissas for an frcpa result); the index is formed
// by the 8 leading mantissa bits of x, which is the same index used
// by the hardware to get frcpa(x).
//
// The table values are multiplied by 2^k where e is the exponent of
// The table values are multiplied by 2^k where e is the exponent of
// the input number. This multiplication is carried out in parallel with
// the polynomial evaluation:
// T= 2^(k) * T_hi
@ -127,7 +127,7 @@
GR_TMP1 = r21
GR_SGNMASK = r22
GR_T_INDEX = r23
GR_IX_T = r23
GR_IX_T = r23
GR_IX_D = r24
GR_D_INDEX = r24
GR_TMP2 = r25
@ -817,7 +817,7 @@ GLOBAL_LIBM_ENTRY(cbrtl)
and GR_SIGN = GR_NORMEXPSGN, GR_SGNMASK
// eliminate leading 1 from GR_NORMSIG = 2nd table index
shl GR_INDEX2 = GR_NORMSIG, 1
// eliminate sign from exponent
// eliminate sign from exponent
andcm GR_NORMEXP = GR_NORMEXPSGN, GR_SGNMASK
}
;;
@ -829,8 +829,8 @@ GLOBAL_LIBM_ENTRY(cbrtl)
(p6) fnma.s1 FR_R = FR_RCP, FR_XNORM, f1
// Start computation of floor(exponent/3) by
// computing (2^20+2)/3*exponent = exponent*0x55556
// 1: exponent* = 5;
// (2^{16}-1)/3 = 0x5555:
// 1: exponent* = 5;
// (2^{16}-1)/3 = 0x5555:
// will form 0x5555*exponent by using shladd's
shladd GR_EXP5 = GR_NORMEXP, 2, GR_NORMEXP
}

View File

@ -52,7 +52,7 @@
// 09/05/02 Work range is widened by reduction strengthen (3 parts of Pi/16)
// 02/10/03 Reordered header: .section, .global, .proc, .align
// 08/08/03 Improved performance
// 10/28/04 Saved sincos_r_sincos to avoid clobber by dynamic loader
// 10/28/04 Saved sincos_r_sincos to avoid clobber by dynamic loader
// 03/31/05 Reformatted delimiters between data tables
// API
@ -71,12 +71,12 @@
// nfloat = Round result to integer (round-to-nearest)
//
// r = x - nfloat * pi/2^k
// Do this as ((((x - nfloat * HIGH(pi/2^k))) -
// nfloat * LOW(pi/2^k)) -
// Do this as ((((x - nfloat * HIGH(pi/2^k))) -
// nfloat * LOW(pi/2^k)) -
// nfloat * LOWEST(pi/2^k) for increased accuracy.
// pi/2^k is stored as two numbers that when added make pi/2^k.
// pi/2^k = HIGH(pi/2^k) + LOW(pi/2^k)
// HIGH and LOW parts are rounded to zero values,
// HIGH and LOW parts are rounded to zero values,
// and LOWEST is rounded to nearest one.
//
// x = (nfloat * pi/2^k) + r
@ -508,7 +508,7 @@ _SINCOS_COMMON2:
{ .mfi
ldfe sincos_Pi_by_16_3 = [sincos_AD_1],16
nop.f 999
dep.z sincos_r_exp = sincos_r_signexp, 0, 17
dep.z sincos_r_exp = sincos_r_signexp, 0, 17
};;
// Polynomial coefficients (Q4, P4, Q3, P3, Q2, Q1, P2, P1) loading
@ -516,7 +516,7 @@ _SINCOS_COMMON2:
// p10 is true if f8 exp is >= 0x1001a (2^27)
{ .mmb
ldfpd sincos_P4,sincos_Q4 = [sincos_AD_1],16
cmp.ge p10,p0 = sincos_r_exp,sincos_exp_limit
cmp.ge p10,p0 = sincos_r_exp,sincos_exp_limit
(p10) br.cond.spnt _SINCOS_LARGE_ARGS // Go to "large args" routine
};;
@ -542,7 +542,7 @@ _SINCOS_COMMON2:
{ .mfi
ldfpd sincos_P1,sincos_Q1 = [sincos_AD_1],16
fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_1, sincos_NORM_f8
nop.i 999
nop.i 999
};;
// Add 2^(k-1) (which is in sincos_r_sincos) to N
@ -551,7 +551,7 @@ _SINCOS_COMMON2:
;;
// Get M (least k+1 bits of N)
and sincos_GR_m = 0x1f,sincos_GR_n
nop.i 999
nop.i 999
};;
// sincos_r = sincos_r -sincos_Nfloat * sincos_Pi_by_16_2
@ -566,14 +566,14 @@ _SINCOS_COMMON2:
{ .mfi
add sincos_AD_2 = sincos_GR_32m, sincos_AD_1
(p8) fclass.m.unc p10,p0 = f8,0x0b
nop.i 999
nop.i 999
};;
// Load Sin and Cos table value using obtained index m (sincosf_AD_2)
{ .mfi
ldfe sincos_Sm = [sincos_AD_2],16
nop.f 999
nop.i 999
nop.f 999
nop.i 999
};;
// get rsq = r*r
@ -585,17 +585,17 @@ _SINCOS_COMMON2:
{ .mfi
nop.m 999
fmpy.s0 fp_tmp = fp_tmp,fp_tmp // forces inexact flag
nop.i 999
nop.i 999
};;
// sincos_r_exact = sincos_r -sincos_Nfloat * sincos_Pi_by_16_3
{ .mfi
nop.m 999
fnma.s1 sincos_r_exact = sincos_NFLOAT, sincos_Pi_by_16_3, sincos_r
nop.i 999
nop.i 999
};;
// Polynomials calculation
// Polynomials calculation
// P_1 = P4*r^2 + P3
// Q_2 = Q4*r^2 + Q3
{ .mfi
@ -606,7 +606,7 @@ _SINCOS_COMMON2:
{ .mfi
nop.m 999
fma.s1 sincos_Q_temp1 = sincos_rsq, sincos_Q4, sincos_Q3
nop.i 999
nop.i 999
};;
// get rcube = r^3 and S[m]*r^2
@ -618,10 +618,10 @@ _SINCOS_COMMON2:
{ .mfi
nop.m 999
fmpy.s1 sincos_rcub = sincos_r_exact, sincos_rsq
nop.i 999
nop.i 999
};;
// Polynomials calculation
// Polynomials calculation
// Q_2 = Q_1*r^2 + Q2
// P_1 = P_1*r^2 + P2
{ .mfi
@ -632,10 +632,10 @@ _SINCOS_COMMON2:
{ .mfi
nop.m 999
fma.s1 sincos_P_temp2 = sincos_rsq, sincos_P_temp1, sincos_P2
nop.i 999
nop.i 999
};;
// Polynomials calculation
// Polynomials calculation
// Q = Q_2*r^2 + Q1
// P = P_2*r^2 + P1
{ .mfi
@ -646,7 +646,7 @@ _SINCOS_COMMON2:
{ .mfi
nop.m 999
fma.s1 sincos_P = sincos_rsq, sincos_P_temp2, sincos_P1
nop.i 999
nop.i 999
};;
// Get final P and Q
@ -660,7 +660,7 @@ _SINCOS_COMMON2:
{ .mfi
nop.m 999
fma.s1 sincos_P = sincos_rcub,sincos_P, sincos_r_exact
nop.i 999
nop.i 999
};;
// If sin(denormal), force underflow to be set
@ -701,7 +701,7 @@ _SINCOS_SPECIAL_ARGS:
_SINCOS_UNORM:
// Here if x=unorm
{ .mfb
getf.exp sincos_r_signexp = sincos_NORM_f8 // Get signexp of x
getf.exp sincos_r_signexp = sincos_NORM_f8 // Get signexp of x
fcmp.eq.s0 p11,p0 = f8, f0 // Dummy op to set denorm flag
br.cond.sptk _SINCOS_COMMON2 // Return to main path
};;
@ -750,7 +750,7 @@ _SINCOS_LARGE_ARGS:
{ .mfi
nop.m 999
fmpy.s0 sincos_save_tmp = sincos_save_tmp, sincos_save_tmp
nop.i 999
nop.i 999
};;
{ .mib

View File

@ -69,7 +69,7 @@
// nfloat = Round result to integer (round-to-nearest)
//
// r = x - nfloat * pi/2^k
// Do this as (x - nfloat * HIGH(pi/2^k)) - nfloat * LOW(pi/2^k)
// Do this as (x - nfloat * HIGH(pi/2^k)) - nfloat * LOW(pi/2^k)
// for increased accuracy.
// pi/2^k is stored as two numbers that when added make pi/2^k.
@ -484,14 +484,14 @@ _SINCOSF_COMMON:
// Polynomial coefficients (Q2, Q1, P2, P1) loading
{ .mmi
ldfpd sincosf_P2,sincosf_Q2 = [sincosf_AD_1],16
nop.m 999
nop.i 999
nop.m 999
nop.i 999
};;
// Select exponent (17 lsb)
{ .mmi
ldfpd sincosf_P1,sincosf_Q1 = [sincosf_AD_1],16
nop.m 999
nop.m 999
dep.z sincosf_r_exp = sincosf_r_signexp, 0, 17
};;
@ -507,9 +507,9 @@ _SINCOSF_COMMON:
// Multiply x by scaled 16/pi and add large const to shift integer part of W to
// rightmost bits of significand
{ .mfi
nop.m 999
nop.m 999
fma.s1 sincosf_W_2TO61_RSH = sincosf_NORM_f8, sincosf_SIG_INV_PI_BY_16_2TO61, sincosf_RSHF_2TO61
nop.i 999
nop.i 999
};;
// sincosf_NFLOAT = Round_Int_Nearest(sincosf_W)
@ -517,14 +517,14 @@ _SINCOSF_COMMON:
{ .mfi
nop.m 999
fms.s1 sincosf_NFLOAT = sincosf_W_2TO61_RSH,sincosf_2TOM61,sincosf_RSHF
nop.i 999
nop.i 999
};;
// get N = (int)sincosf_int_Nfloat
{ .mfi
getf.sig sincosf_GR_n = sincosf_W_2TO61_RSH // integer N value
nop.f 999
nop.i 999
nop.i 999
};;
// Add 2^(k-1) (which is in sincosf_r_sincos=8) to N
@ -532,12 +532,12 @@ _SINCOSF_COMMON:
{ .mfi
add sincosf_GR_n = sincosf_GR_n, sincosf_r_sincos
fnma.s1 sincosf_r = sincosf_NFLOAT, sincosf_Pi_by_16_1, sincosf_NORM_f8
nop.i 999
nop.i 999
};;
// Get M (least k+1 bits of N)
{ .mmi
and sincosf_GR_m = 0x1f,sincosf_GR_n // Put mask 0x1F -
and sincosf_GR_m = 0x1f,sincosf_GR_n // Put mask 0x1F -
nop.m 999 // - select k+1 bits
nop.i 999
};;
@ -546,7 +546,7 @@ _SINCOSF_COMMON:
{ .mfi
shladd sincosf_AD_2 = sincosf_GR_32m, 4, sincosf_AD_1
(p8) fclass.m.unc p10,p0 = f8,0x0b // If sin denormal input -
nop.i 999
nop.i 999
};;
// Load Sin and Cos table value using obtained index m (sincosf_AD_2)
@ -572,10 +572,10 @@ _SINCOSF_COMMON:
{ .mfi
nop.m 999
fmpy.s0 fp_tmp = fp_tmp, fp_tmp // forces inexact flag
nop.i 999
nop.i 999
};;
// Polynomials calculation
// Polynomials calculation
// Q = Q2*r^2 + Q1
// P = P2*r^2 + P1
{ .mfi
@ -586,7 +586,7 @@ _SINCOSF_COMMON:
{ .mfi
nop.m 999
fma.s1 sincosf_P = sincosf_rsq, sincosf_P2, sincosf_P1
nop.i 999
nop.i 999
};;
// get rcube and S[m]*r^2
@ -598,7 +598,7 @@ _SINCOSF_COMMON:
{ .mfi
nop.m 999
fmpy.s1 sincosf_rcub = sincosf_r_exact, sincosf_rsq
nop.i 999
nop.i 999
};;
// Get final P and Q
@ -612,7 +612,7 @@ _SINCOSF_COMMON:
{ .mfi
nop.m 999
fma.s1 sincosf_P = sincosf_rcub,sincosf_P,sincosf_r_exact
nop.i 999
nop.i 999
};;
// If sinf(denormal) - force underflow to be set
@ -699,8 +699,8 @@ _SINCOSF_LARGE_ARGS:
}
{ .mfi // force inexact set
nop.m 999
fmpy.s0 sincosf_save_tmp = sincosf_save_tmp, sincosf_save_tmp
nop.i 999
fmpy.s0 sincosf_save_tmp = sincosf_save_tmp, sincosf_save_tmp
nop.i 999
};;
{ .mib

View File

@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
@ -61,7 +61,7 @@
// Return erf(x) = x *Pol9(x^2)
//
// 3. For several subranges of 0.5 <= |x| < 5.90625
// Return erf(x) = sign(x)*Pol19(y),
// Return erf(x) = sign(x)*Pol19(y),
// where y = (|x|-b)/a, Pol19(y) = A0 + A1*y^1 + A2*y^2 + ... + A19*y^19
//
// For each subrange there is particular set of coefficients.
@ -80,7 +80,7 @@
// 6. |x| = INF
// Return erf(x) = sign(x) * 1.0
//
// 7. x = [S,Q]NaN
// 7. x = [S,Q]NaN
// Return erf(x) = QNaN
//
// 8. x is positive denormal
@ -93,11 +93,11 @@
//
// Registers used
//==============================================================
// Floating Point registers used:
// Floating Point registers used:
// f8, input, output
// f32 -> f63
// General registers used:
// General registers used:
// r32 -> r48, r2, r3
// Predicate registers used:
@ -106,7 +106,7 @@
// p6 to filter out case when x = denormal
// p7 to filter out case when x = [Q,S]NaN or +/-0,
// used also to process denormals
// p8 to filter out case when 3.25 <= |x| < 4.0,
// p8 to filter out case when 3.25 <= |x| < 4.0,
// used also to process denormals
// p9 to filter out case when |x| = inf
// p10 to filter out case when |x| < 0.5
@ -169,7 +169,7 @@ fTSqr = f58
fTQuadr = f59
fTDeg3 = f60
fTDeg7 = f61
fArgAbsNormSgn = f62
fArgAbsNormSgn = f62
fTQuadrSgn = f63
// Data tables
@ -180,7 +180,7 @@ RODATA
LOCAL_OBJECT_START(erf_data)
// Coefficients ##0..15
// Polynomial coefficients for the erf(x), 0.5 <= |x| < 1.0
// Polynomial coefficients for the erf(x), 0.5 <= |x| < 1.0
data8 0xB69AC40646D1F6C1, 0x00003FD2 //A19
data8 0x90AD48C0118FA10C, 0x00003FD7 //A18
data8 0x826FBAD055EA4AB8, 0x0000BFDB //A17
@ -197,7 +197,7 @@ data8 0xB11E30BE912617D3, 0x00003FF0 //A7
data8 0xCCF89D9351CE26E3, 0x0000BFF4 //A6
data8 0xEFF75AD1F0F22809, 0x00003FF2 //A5
data8 0xBB793EF404C09A22, 0x00003FF8 //A4
// Polynomial coefficients for the erf(x), 1.0 <= |x| < 2.0
// Polynomial coefficients for the erf(x), 1.0 <= |x| < 2.0
data8 0xBAE93FF4174EA59B, 0x00003FE6 //A19
data8 0x8A0FD46092F95D44, 0x0000BFEA //A18
data8 0xA37B3242B7809E12, 0x00003FEC //A17
@ -214,7 +214,7 @@ data8 0xF84B80EFCA43895D, 0x00003FF8 //A7
data8 0x9722D22DA628A17B, 0x00003FF7 //A6
data8 0x8DB0A586F8F3381F, 0x0000BFFB //A5
data8 0x8DB0A5879F87E5BE, 0x00003FFB //A4
// Polynomial coefficients for the erf(x), 2.0 <= |x| < 3.25
// Polynomial coefficients for the erf(x), 2.0 <= |x| < 3.25
data8 0x9C4AF1F3A4B21AFC, 0x00003FF6 //A19
data8 0x8D40D5D5DB741AB8, 0x0000BFF9 //A18
data8 0xDEBE7099E0A75BA4, 0x00003FFA //A17
@ -231,7 +231,7 @@ data8 0xDD704DEFFB21B7E7, 0x0000BFFD //A7
data8 0xF0C9A6BBDE469115, 0x00003FF9 //A6
data8 0xD673A02CB5766633, 0x00003FFD //A5
data8 0x8D162CBAD8A12649, 0x0000BFFE //A4
// Polynomial coefficients for the erf(x), 4.0 <= |x| < 6.0
// Polynomial coefficients for the erf(x), 4.0 <= |x| < 6.0
data8 0xD4428B75C6FE8FD1, 0x0000BFFC //A19
data8 0xF76BE1935675D5C8, 0x00003FFE //A18
data8 0xFD6BB3B14AA7A8E6, 0x0000BFFF //A17
@ -250,12 +250,12 @@ data8 0xED3003E477A53EE7, 0x00003FF6 //A5
data8 0xA4C07E9BB3FCB0F3, 0x0000BFF4 //A4
//
// Coefficients ##16..19
// Polynomial coefficients for the erf(x), 0.5 <= |x| < 1.0
// Polynomial coefficients for the erf(x), 0.5 <= |x| < 1.0
data8 0x95FA98C337005D13, 0x0000BFF9 //A3
data8 0xE0F7E524D2808A97, 0x0000BFFB //A2
data8 0xE0F7E524D2808A98, 0x00003FFD //A1
data8 0x853F7AE0C76E915F, 0x00003FFE //A0
// Polynomial coefficients for the erf(x), 1.0 <= |x| < 2.0
// Polynomial coefficients for the erf(x), 1.0 <= |x| < 2.0
data8 0x8DB0A587A96ABCF0, 0x00003FFC //A3
data8 0xD488F84B7DE18DA8, 0x0000BFFD //A2
data8 0xD488F84B7DE12E9C, 0x00003FFD //A1
@ -264,13 +264,13 @@ data8 0xD7BB3D3A08445636, 0x00003FFE //A0
data8 0xC58571D23D5C4B3A, 0x00003FFD //A3
data8 0xA94DCF467CD6AFF3, 0x0000BFFC //A2
data8 0xA94DCF467CD10A16, 0x00003FFA //A1
data8 0xFECD70A13CAF1997, 0x00003FFE //A0
// Polynomial coefficients for the erf(x), 4.0 <= |x| < 6.0
data8 0xFECD70A13CAF1997, 0x00003FFE //A0
// Polynomial coefficients for the erf(x), 4.0 <= |x| < 6.0
data8 0xB01D2B4F0D5AB8B0, 0x00003FF1 //A3
data8 0x8858A465CE594BD1, 0x0000BFEE //A2
data8 0x8858A447456DE61D, 0x00003FEA //A1
data8 0xFFFFFFBDC88BB107, 0x00003FFE //A0
// Polynomial coefficients for the erf(x), 0.0 <= |x| < 0.5
// Polynomial coefficients for the erf(x), 0.0 <= |x| < 0.5
data8 0xBE839EDBB36C7FCE //A9
data8 0x3EBB7745A18DD242 //A8
data8 0xBF4C02DB238F2AFC //A5
@ -282,8 +282,8 @@ data8 0xBF9B82CE3127F4E4 //A3
data8 0x3FBCE2F21A042B25 //A2
data8 0x906EBA8214DB688D, 0x00003FFF //A0
// 1.0 - 2^(-63)
data8 0xFFFFFFFFFFFFFFFF, 0x00003FFE
// Polynomial coefficients for the erf(x), 3.25 <= |x| < 4.0
data8 0xFFFFFFFFFFFFFFFF, 0x00003FFE
// Polynomial coefficients for the erf(x), 3.25 <= |x| < 4.0
data8 0x95E91576C7A12250, 0x00003FE7 //A14
data8 0x8E5E0D0E1F5D3CB5, 0x0000BFEA //A13
data8 0xED761DAFAF814DE9, 0x00003FEB //A12
@ -300,7 +300,7 @@ data8 0xC6F1E695363BCB26, 0x0000BFF1 //A2
data8 0xF4DAF4680DA54C02, 0x00003FEF //A1
data8 0xFFFFB7CFB3F2ABBE, 0x00003FFE //A0
// A = 2.0/sqrt(Pi)
data8 0x906EBA8214DB688D, 0x00003FFF
data8 0x906EBA8214DB688D, 0x00003FFF
LOCAL_OBJECT_END(erf_data)
@ -319,7 +319,7 @@ GLOBAL_LIBM_ENTRY(erf)
}
;;
{ .mfi
getf.d rArg = f8 // x in GR
getf.d rArg = f8 // x in GR
fclass.m p6,p0 = f8, 0x0b // is x denormal ?
shl rThreeAndQ = rThreeAndQ, 44 // bits of 3.25
}
@ -350,7 +350,7 @@ GLOBAL_LIBM_ENTRY(erf)
nop.f 0
(p6) br.cond.spnt erf_denormal // branch out if x is denormal
}
;;
;;
{ .mfi
and rShiftedArgMasked = rShiftedArg, rMask // bias of x << 8
fmerge.s fArgAbs = f1, f8 // |x|
@ -361,15 +361,15 @@ GLOBAL_LIBM_ENTRY(erf)
(p7) fma.d.s0 f8 = f8,f1,f8 // NaN or +/-0
(p7) br.ret.spnt b0 // exit for x = NaN or +/-0
}
;;
;;
{ .mfi
sub rIndex = rShiftedArgMasked, rBias // index << 8
nop.f 0
cmp.lt p10, p0 = rShiftedArgMasked, rBias // p10 = 1 if |x| < 0.5
nop.f 0
cmp.lt p10, p0 = rShiftedArgMasked, rBias // p10 = 1 if |x| < 0.5
}
{ .mfb
// p8 = 1 if 3.25 <= |x| < 4.0
(p8) cmp.lt p8, p11 = rShiftedAbsArg, rBiasedExpOf4
// p8 = 1 if 3.25 <= |x| < 4.0
(p8) cmp.lt p8, p11 = rShiftedAbsArg, rBiasedExpOf4
fms.s1 fArgAbsNorm = fArgAbsNorm, f1, f1
(p10) br.cond.spnt erf_near_zero // branch out if |x| < 0.5
}
@ -387,10 +387,10 @@ GLOBAL_LIBM_ENTRY(erf)
}
;;
{ .mfi
adds rCoeffAddr2 = 16, rCoeffAddr1
adds rCoeffAddr2 = 16, rCoeffAddr1
fmerge.s fSignumX = f8, f1 // signum(x)
nop.i 0
}
}
{ .mfb
cmp.lt p12, p0 = rSaturation, rShiftedAbsArg // |x| > 5.90625?
nop.f 0
@ -436,7 +436,7 @@ GLOBAL_LIBM_ENTRY(erf)
ldfe fA13 = [rCoeffAddr1], 32
nop.f 0
// address of coefficients ##16..23
add rCoeffAddr3 = rCoeffAddr3, rIndex
add rCoeffAddr3 = rCoeffAddr3, rIndex
}
{.mfi
ldfe fA12 = [rCoeffAddr2], 32
@ -475,7 +475,7 @@ GLOBAL_LIBM_ENTRY(erf)
ldfe fA6 = [rCoeffAddr2], 32
nop.f 0
(p8) br.cond.spnt erf_3q_4 // branch out if 3.25 < |x| < 4.0
}
}
;;
{.mfi
ldfe fA5 = [rCoeffAddr1], 32
@ -565,7 +565,7 @@ GLOBAL_LIBM_ENTRY(erf)
;;
{ .mfi
nop.m 0
fma.s1 fA15 = fA15, fTSqr, fA13
fma.s1 fA15 = fA15, fTSqr, fA13
nop.i 0
}
{ .mfi
@ -587,19 +587,19 @@ GLOBAL_LIBM_ENTRY(erf)
}
;;
{ .mfi
nop.m 0
nop.m 0
fma.s1 fA7 = fA7, fTSqr, fA5
nop.i 0
}
;;
{ .mfi
nop.m 0
nop.m 0
fma.s1 fRes = fRes, fTQuadr, fA15
nop.i 0
}
;;
{ .mfi
nop.m 0
nop.m 0
fma.s1 fA4 = fA4, fTSqr, fA2
nop.i 0
}
@ -611,7 +611,7 @@ GLOBAL_LIBM_ENTRY(erf)
}
;;
{ .mfi
nop.m 0
nop.m 0
fma.s1 fA4 = fA7, fTDeg3, fA4
nop.i 0
}
@ -637,7 +637,7 @@ GLOBAL_LIBM_ENTRY(erf)
// Here if 3.25 < |x| < 4.0
.align 32
erf_3q_4:
erf_3q_4:
.pred.rel "mutex", p14, p15
{ .mfi
ldfe fA5 = [rCoeffAddr1], 32
@ -660,7 +660,7 @@ erf_3q_4:
fma.s1 fA15 = fA15, fArgAbs, fA14
nop.i 0
}
;;
;;
{ .mfi
nop.m 0
fma.s1 fA13 = fA13, fArgAbs, fA12
@ -671,7 +671,7 @@ erf_3q_4:
fma.s1 fA11 = fA11, fArgAbs, fA10
nop.i 0
}
;;
;;
{ .mfi
nop.m 0
fma.s1 fA9 = fA9, fArgAbs, fA8
@ -682,7 +682,7 @@ erf_3q_4:
fma.s1 fArgAbsNormSgn = fArgAbs, fSignumX, f0
nop.i 0
}
;;
;;
{ .mfi
nop.m 0
fma.s1 fTQuadr = fTSqr, fTSqr, f0
@ -694,24 +694,24 @@ erf_3q_4:
fma.s1 fRes = fRes, fTSqr, fA17
nop.i 0
}
;;
;;
{ .mfi
nop.m 0
fma.s1 fA15 = fA15, fTSqr, fA13
nop.i 0
}
;;
;;
{ .mfi
nop.m 0
fma.s1 fA11 = fA11, fTSqr, fA9
nop.i 0
}
}
{ .mfi
nop.m 0
fma.s1 fA7 = fA7, fArgAbs, fA6
nop.i 0
}
;;
;;
{ .mfi
nop.m 0
fma.s1 fTDeg7 = fTQuadr, fTSqr, f0
@ -722,10 +722,10 @@ erf_3q_4:
fma.s1 fRes = fRes, fTQuadr, fA15
nop.i 0
}
;;
;;
{ .mfi
nop.m 0
fma.s1 fA11 = fA11, fTSqr, fA7
fma.s1 fA11 = fA11, fTSqr, fA7
nop.i 0
}
;;
@ -734,7 +734,7 @@ erf_3q_4:
fma.s1 fRes = fRes, fTDeg7, fA11
nop.i 0
}
;;
;;
{ .mfi
nop.m 0
// result for negative argument
@ -754,7 +754,7 @@ erf_3q_4:
erf_near_zero:
{ .mfi
adds rCoeffAddr1 = 1280, rDataPtr // address of A9
fma.s1 fTSqr = fArgSqr, fArgSqr, f0 // x^4
fma.s1 fTSqr = fArgSqr, fArgSqr, f0 // x^4
nop.i 0
}
{ .mfi
@ -887,7 +887,7 @@ erf_saturation:
br.ret.sptk b0 // Exit for 5.90625 <=|x|< +inf
}
;;
// Here if x is double precision denormal
.align 32
erf_denormal:

View File

@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
@ -51,7 +51,7 @@
// Overview of operation
//==============================================================
// 1. 0 <= x <= 28.0
//
//
// erfc(x) = P14(z) * exp( -x^2 ), z = x - x(i).
//
// Comment:
@ -59,38 +59,38 @@
// Let x(i) = -1.0 + 2^(i/4),i=0,...19. So we have 20 unequal
// argument intervals [x(i),x(i+1)] with length ratio q = 2^(1/4).
// Values x(i) we have in the table erfc_xb_table.
//
//
// Let x(i)<= x < x(i+1).
// We can find i as exponent of number (x + 1)^4.
//
//
// Let P14(z) - polynomial approximation of degree 14 for function
// erfc(z+x(i)) * exp( (z+x(i))^2) and 0 <= z <= x(i+1)-x(i).
// Polynomial coeffitients we have in the table erfc_p_table.
//
// So we can find result for erfc(x) as above.
// Algorithm description for exp function see below.
//
// Algorithm description for exp function see below.
//
// 2. -6 <= x < 0
//
// erfc(x) = 2.0 - erfc(-x)
//
// 3. x > 28.0
// erfc(x) ~=~ 0.0
// erfc(x) ~=~ 0.0
//
// 4. x < -6.0
// erfc(x) ~=~ 2.0
// 4. x < -6.0
// erfc(x) ~=~ 2.0
// Special values
// Special values
//==============================================================
// erfc(+0) = 1.0
// erfc(-0) = 1.0
// erfc(+qnan) = +qnan
// erfc(-qnan) = -qnan
// erfc(+snan) = +qnan
// erfc(-snan) = -qnan
// erfc(+qnan) = +qnan
// erfc(-qnan) = -qnan
// erfc(+snan) = +qnan
// erfc(-snan) = -qnan
// erfc(-inf) = 2.0
// erfc(-inf) = 2.0
// erfc(+inf) = +0
//==============================================================
@ -118,16 +118,16 @@
// Comment for exp for erfc:
//
// We use quad precision for calculate input argument -x^2 and add
// result low bits to value delta in exp.
// result low bits to value delta in exp.
// Registers used
//==============================================================
// Floating Point registers used:
// Floating Point registers used:
// f8, input
// f9 -> f15, f32 -> f93
// General registers used:
// r32 -> r68
// General registers used:
// r32 -> r68
// Predicate registers used:
// p6 -> p15
@ -201,15 +201,15 @@ EXP_INV_LN2_2TO63 = f7
EXP_W_2TO56_RSH = f9
EXP_RSHF_2TO56 = f10
exp_P4 = f11
exp_P3 = f12
exp_P2 = f13
exp_P1 = f14
exp_P4 = f11
exp_P3 = f12
exp_P2 = f13
exp_P1 = f14
exp_ln2_by_128_hi = f15
exp_ln2_by_128_lo = f32
exp_ln2_by_128_lo = f32
EXP_RSHF = f33
EXP_Nfloat = f34
EXP_Nfloat = f34
exp_r = f35
exp_f = f36
exp_rsq = f37
@ -222,7 +222,7 @@ exp_P_lo = f43
exp_P_hi = f44
exp_P = f45
exp_S = f46
EXP_NORM_f8 = f47
EXP_NORM_f8 = f47
exp_S2 = f48
exp_T2 = f49
@ -355,7 +355,7 @@ LOCAL_OBJECT_END(erfc_xb_table)
LOCAL_OBJECT_START(erfc_p_table)
// Pol0
// Pol0
data8 0x8000000000000000, 0x00003FFF //A0 = +1.00000000000000000000e+00L
data8 0x906EBA8214DB688D, 0x0000BFFF //A1 = -1.12837916709551257389e+00L
data8 0xFFFFFFFFFFFFFFEB, 0x00003FFE //A2 = +9.99999999999999998841e-01L
@ -371,7 +371,7 @@ data8 0xE36112A686F5165B, 0x0000BFF6 //A11 = -3.46953111013788405745e-03L
data8 0xB3DD6B2DB3307D2E, 0x00003FF5 //A12 = +1.37226041156280127011e-03L
data8 0x8018A34267FED226, 0x0000BFF4 //A13 = -4.88648380816410282971e-04L
data8 0xFBBA6A7AEBD3ABD9, 0x00003FF1 //A14 = +1.20033353451879025825e-04L
// Pol1
// Pol1
data8 0xD15A1EF03BB91E71, 0x00003FFE //A0 = +8.17781385088640600540e-01L
data8 0xD1A4ADDAC3337118, 0x0000BFFE //A1 = -8.18919053944410683867e-01L
data8 0xA9AF9FFA2AD18CB0, 0x00003FFE //A2 = +6.62836073471060947628e-01L
@ -387,7 +387,7 @@ data8 0xBA821A59FC05FBAD, 0x0000BFF5 //A11 = -1.42294475244146555952e-03L
data8 0x8D535042E11A0D89, 0x00003FF4 //A12 = +5.39113782651680545599e-04L
data8 0xBE589447DB26564E, 0x0000BFF2 //A13 = -1.81528103431449706486e-04L
data8 0xABC8C7EF636F5B0A, 0x00003FF0 //A14 = +4.09565689009869217620e-05L
// Pol2
// Pol2
data8 0xA9973ABB272898B2, 0x00003FFE //A0 = +6.62463827792779356910e-01L
data8 0x945F1A7993F7AADD, 0x0000BFFE //A1 = -5.79576162988785154930e-01L
data8 0xD84439C6609A8A62, 0x00003FFD //A2 = +4.22395520654665085222e-01L
@ -403,7 +403,7 @@ data8 0x86FAEBB4438A20FA, 0x0000BFF4 //A11 = -5.14908443679775343409e-04L
data8 0xC2503856CE48A657, 0x00003FF2 //A12 = +1.85311660448280465934e-04L
data8 0xF52642F22A26965B, 0x0000BFF0 //A13 = -5.84481856856861454591e-05L
data8 0xC98588E1A95FFDBD, 0x00003FEE //A14 = +1.20116245684500489648e-05L
// Pol3
// Pol3
data8 0x887CBA2C47B1E2B5, 0x00003FFE //A0 = +5.33153186617432643784e-01L
data8 0xCD81909CF194328E, 0x0000BFFD //A1 = -4.01379126699602646289e-01L
data8 0x84DCA15C52122372, 0x00003FFD //A2 = +2.59495775718310530164e-01L
@ -419,7 +419,7 @@ data8 0xAAE3CAAB9D117591, 0x0000BFF2 //A11 = -1.62973223928790256249e-04L
data8 0xE7704D06A3080C19, 0x00003FF0 //A12 = +5.51792801195012080688e-05L
data8 0x875A5B53E510F305, 0x0000BFEF //A13 = -1.61353297293572230995e-05L
data8 0xC8F10CDDB9CC9A42, 0x00003FEC //A14 = +2.99426321046583353559e-06L
// Pol4
// Pol4
data8 0xDAEC3C07CAB590C1, 0x00003FFD //A0 = +4.27583576155807004411e-01L
data8 0x8BE271F8BE0280AC, 0x0000BFFD //A1 = -2.73212014783898564863e-01L
data8 0x9E13941E19661429, 0x00003FFC //A2 = +1.54371561371908397882e-01L
@ -435,7 +435,7 @@ data8 0xBC17A73E9CA51313, 0x0000BFF0 //A11 = -4.48447217225392170834e-05L
data8 0xED10FE8FC0E44CAD, 0x00003FEE //A12 = +1.41302576244352578317e-05L
data8 0xFE49912328516F81, 0x0000BFEC //A13 = -3.78917710289305330220e-06L
data8 0xA8F6077E25DAFD33, 0x00003FEA //A14 = +6.29428967202166402369e-07L
// Pol5
// Pol5
data8 0xAF72220985BED710, 0x00003FFD //A0 = +3.42667640364081975844e-01L
data8 0xBC1CB559042410AB, 0x0000BFFC //A1 = -1.83703263815036934677e-01L
data8 0xB730BF62E0B63A3C, 0x00003FFB //A2 = +8.94484474229911741150e-02L
@ -451,7 +451,7 @@ data8 0xB3911863705825F6, 0x0000BFEE //A11 = -1.07030140392753204852e-05L
data8 0xD023CF5C3F915685, 0x00003FEC //A12 = +3.10152594473606007552e-06L
data8 0xCA7016FADFF584F5, 0x0000BFEA //A13 = -7.54139761055503416594e-07L
data8 0xEEBB5CC0901D2BB0, 0x00003FE7 //A14 = +1.11168196441717301549e-07L
// Pol6
// Pol6
data8 0x8CD1160326A754AF, 0x00003FFD //A0 = +2.75032699474947383325e-01L
data8 0xFB22A4C657119388, 0x0000BFFB //A1 = -1.22624671271190511269e-01L
data8 0xD02B2CA872A774E9, 0x00003FFA //A2 = +5.08224243596176920409e-02L
@ -467,7 +467,7 @@ data8 0x950CBA5D80D8125E, 0x0000BFEC //A11 = -2.22101388436550539151e-06L
data8 0x9CE72C0409A3E800, 0x00003FEA //A12 = +5.84509280984781223375e-07L
data8 0x88CCD7A000D1C213, 0x0000BFE8 //A13 = -1.27405082040077425019e-07L
data8 0x8DF4EC84F093B1C0, 0x00003FE5 //A14 = +1.65259388738830506389e-08L
// Pol7
// Pol7
data8 0xE2BF82A153B1B82E, 0x00003FFC //A0 = +2.21433678719152843912e-01L
data8 0xA72A9AE0BD7F29D5, 0x0000BFFB //A1 = -8.16242313227913578068e-02L
data8 0xE98939292289EDBE, 0x00003FF9 //A2 = +2.85078159732432477516e-02L
@ -483,7 +483,7 @@ data8 0xD8D0ED030032926D, 0x0000BFE9 //A11 = -4.03851487695924456733e-07L
data8 0xCCA1CA2AC3EB8973, 0x00003FE7 //A12 = +9.52891963880517988726e-08L
data8 0x9E26A080F9DA39DE, 0x0000BFE5 //A13 = -1.84111863600343741644e-08L
data8 0x8F3DC58F64A92C62, 0x00003FE2 //A14 = +2.08443519336792003049e-09L
// Pol8
// Pol8
data8 0xB74C13E914E9666F, 0x00003FFC //A0 = +1.79001151181389950418e-01L
data8 0xDEB57268A58B763B, 0x0000BFFA //A1 = -5.43722600071728705200e-02L
data8 0x821FF0D4C605A4CD, 0x00003FF9 //A2 = +1.58843711598712515609e-02L
@ -499,7 +499,7 @@ data8 0x8BFE95FCD7B92763, 0x0000BFE7 //A11 = -6.51900079707465044843e-08L
data8 0xE9F15C8E7F58CF90, 0x00003FE4 //A12 = +1.36172642554216769522e-08L
data8 0x9E90F22B11FAF8B5, 0x0000BFE2 //A13 = -2.30744183054978535129e-09L
data8 0xF8CF74F1A138FBBA, 0x00003FDE //A14 = +2.26291720693360003233e-10L
// Pol9
// Pol9
data8 0x94D45274A831ED57, 0x00003FFC //A0 = +1.45341194505862183128e-01L
data8 0x94D4518B699A4A68, 0x0000BFFA //A1 = -3.63352952323113355459e-02L
data8 0x90C3B59FF403A916, 0x00003FF8 //A2 = +8.83572327421709216515e-03L
@ -515,7 +515,7 @@ data8 0xA34CD3DFAC12AA45, 0x0000BFE4 //A11 = -9.50531730989412282035e-09L
data8 0xEEBB49645DE0E34C, 0x00003FE1 //A12 = +1.73700091999434388879e-09L
data8 0x8C86D8677DEACFBA, 0x0000BFDF //A13 = -2.55616650187281815453e-10L
data8 0xBDB223D0FE2A7D6B, 0x00003FDB //A14 = +2.15659223402509415592e-11L
// Pol10
// Pol10
data8 0xF2C1812715E4050A, 0x00003FFB //A0 = +1.18533143048567888157e-01L
data8 0xC7DA2C565ADAEE57, 0x0000BFF9 //A1 = -2.43960252726894623056e-02L
data8 0xA15CEFFD632F697D, 0x00003FF7 //A2 = +4.92440908672041077933e-03L
@ -531,7 +531,7 @@ data8 0xAF86504D78D35E89, 0x0000BFE1 //A11 = -1.27711000692808421573e-09L
data8 0xDE1CE78ADB6DDF04, 0x00003FDE //A12 = +2.02010513073041015283e-10L
data8 0xE124FFAA267301A5, 0x0000BFDB //A13 = -2.55959692063871343080e-11L
data8 0x81F1BEBEFBE168D2, 0x00003FD8 //A14 = +1.84661980716000872722e-12L
// Pol11
// Pol11
data8 0xC6CE5D7D18203EAA, 0x00003FFB //A0 = +9.70732978630764996752e-02L
data8 0x86E8A30A76923C88, 0x0000BFF9 //A1 = -1.64683517829920230086e-02L
data8 0xB4A1CBB7576B4183, 0x00003FF6 //A2 = +2.75622581042760461528e-03L
@ -547,7 +547,7 @@ data8 0xB16A6CC5A3AE6E01, 0x0000BFDE //A11 = -1.61358659378896671620e-10L
data8 0xC0970F2551C52F96, 0x00003FDB //A12 = +2.18949565869759698947e-11L
data8 0xA6E029ABB3BB500C, 0x0000BFD8 //A13 = -2.37144541649446501026e-12L
data8 0xA3E43F3857D1B6A5, 0x00003FD4 //A14 = +1.45564973108152568130e-13L
// Pol12
// Pol12
data8 0xA36E35FC807B3E64, 0x00003FFB //A0 = +7.98000543291529334886e-02L
data8 0xB725A29237C8F94F, 0x0000BFF8 //A1 = -1.11784064873715046550e-02L
data8 0xCB51EF23EAD5F327, 0x00003FF5 //A2 = +1.55120891755237931425e-03L
@ -563,7 +563,7 @@ data8 0xABD305A38349EAEB, 0x0000BFDB //A11 = -1.95341618552982314342e-11L
data8 0x9EDB00104DB66DD9, 0x00003FD8 //A12 = +2.25747200093121867690e-12L
data8 0xE9F80AF513F2B8AB, 0x0000BFD4 //A13 = -2.07806143133802417637e-13L
data8 0xC2B840C3859AB166, 0x00003FD0 //A14 = +1.08091168358477817812e-14L
// Pol13
// Pol13
data8 0x86CD0BF01914407A, 0x00003FFB //A0 = +6.58207829138836028568e-02L
data8 0xF9F4A17FA70807C3, 0x0000BFF7 //A1 = -7.62803922344113067603e-03L
data8 0xE63BF84EDE20EDAA, 0x00003FF4 //A2 = +8.78273993036530088653e-04L
@ -579,7 +579,7 @@ data8 0xA1FB98FA19E62A4F, 0x0000BFD8 //A11 = -2.30191407969654156362e-12L
data8 0xFDB2E0599016AD1E, 0x00003FD4 //A12 = +2.25329742249079975388e-13L
data8 0x9E179A99CDD4BF4B, 0x0000BFD1 //A13 = -1.75517603530017718494e-14L
data8 0xDE4DE992A707C7BC, 0x00003FCC //A14 = +7.71273133169032472595e-16L
// Pol14
// Pol14
data8 0xDF0639E60CF6E96C, 0x00003FFA //A0 = +5.44492971101228988138e-02L
data8 0xAB6737B6065BD1C2, 0x0000BFF7 //A1 = -5.23081035867078490333e-03L
data8 0x8322CC0765FD9C27, 0x00003FF4 //A2 = +5.00243857322493802503e-04L
@ -595,7 +595,7 @@ data8 0x967A0ECC142382D9, 0x0000BFD5 //A11 = -2.67300472044743953909e-13L
data8 0xC6D8869855133985, 0x00003FD1 //A12 = +2.20763189681614758000e-14L
data8 0xD10AC0B228ABCECC, 0x0000BFCD //A13 = -1.45052027893524847250e-15L
data8 0xF7C6DEB4522487A3, 0x00003FC8 //A14 = +5.37280367113168366711e-17L
// Pol15
// Pol15
data8 0xB8F57DECFAC3B255, 0x00003FFA //A0 = +4.51559943173131409760e-02L
data8 0xEC1B8A6C822C036F, 0x0000BFF6 //A1 = -3.60271577347565115947e-03L
data8 0x963A6DD66951B72E, 0x00003FF3 //A2 = +2.86537625289770759336e-04L
@ -611,7 +611,7 @@ data8 0x8AF8F1E3FED32CEC, 0x0000BFD2 //A11 = -3.08580807479307213059e-14L
data8 0x9A88033A08842BEA, 0x00003FCE //A12 = +2.14455258045503137285e-15L
data8 0x88BCF775B7B3A939, 0x0000BFCA //A13 = -1.18601440246395438386e-16L
data8 0x88687B63A5B7135E, 0x00003FC5 //A14 = +3.69734984736162880476e-18L
// Pol16
// Pol16
data8 0x99B8A501204BF3E7, 0x00003FFA //A0 = +3.75296063885057657456e-02L
data8 0xA33FA20D2867C79C, 0x0000BFF6 //A1 = -2.49097544033960143953e-03L
data8 0xACFD14CA6AA55829, 0x00003FF2 //A2 = +1.64974783411741182991e-04L
@ -627,7 +627,7 @@ data8 0x805C040421E7A098, 0x0000BFCF //A11 = -3.56269003968981157635e-15L
data8 0xEFCCD20DE93A138E, 0x00003FCA //A12 = +2.07993414310230172191e-16L
data8 0xB259764466732080, 0x0000BFC6 //A13 = -9.66834364652262630640e-18L
data8 0x9597C1DB6AF830E4, 0x00003FC1 //A14 = +2.53420063550355940811e-19L
// Pol17
// Pol17
data8 0xFFFCBD66BAA4368C, 0x00003FF9 //A0 = +3.12484454387527380657e-02L
data8 0xE28174723762D197, 0x0000BFF5 //A1 = -1.72810121976742793952e-03L
data8 0xC81D832836019EC4, 0x00003FF1 //A2 = +9.54224026432644399736e-05L
@ -643,7 +643,7 @@ data8 0xEE034E350C65D2D9, 0x0000BFCB //A11 = -4.12886586201102092942e-16L
data8 0xBA94473E52495304, 0x00003FC7 //A12 = +2.02289587087169937807e-17L
data8 0xE913D34CBB853CEE, 0x0000BFC2 //A13 = -7.89697093687557412061e-19L
data8 0xA44576A85E8CAB59, 0x00003FBD //A14 = +1.73929048516879172258e-20L
// Pol18
// Pol18
data8 0xD579A3FE4622DED2, 0x00003FF9 //A0 = +2.60589793198885278242e-02L
data8 0x9D97EB84E7CD89C8, 0x0000BFF5 //A1 = -1.20234251012583627659e-03L
data8 0xE86EFDC2CCA5C47B, 0x00003FF0 //A2 = +5.54164790116744315389e-05L
@ -659,7 +659,7 @@ data8 0xDDF6F1B79F50E3C4, 0x0000BFC8 //A11 = -4.81309059042573202592e-17L
data8 0x91F283C0351A9ACA, 0x00003FC4 //A12 = +1.97795505638619048412e-18L
data8 0x990BC4FAFA9C7542, 0x0000BFBF //A13 = -6.48174913943425248713e-20L
data8 0xB536865B89676892, 0x00003FB9 //A14 = +1.19916696090758913485e-21L
// Pol19
// Pol19
data8 0xB241CEB1B7C953F1, 0x00003FF9 //A0 = +2.17598950382519671244e-02L
data8 0xDBD6FBA9B11B85E1, 0x0000BFF4 //A1 = -8.38622198373701898430e-04L
data8 0x877605B1AD082441, 0x00003FF0 //A2 = +3.22964249573360786077e-05L
@ -698,7 +698,7 @@ GLOBAL_LIBM_ENTRY(erfc)
mov exp_GR_rshf_2to56 = 0x4768 // begin 1.1 2^(63+56)
}
{ .mlx
mov exp_TB1_size = 0x100
mov exp_TB1_size = 0x100
movl exp_GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc //signif. of 1/ln2
};;
@ -710,7 +710,7 @@ GLOBAL_LIBM_ENTRY(erfc)
{ .mfi
mov exp_GR_exp_2tom56 = 0xffff-56
fnma.s1 EXP_NORM_f8 = f8, f8, f0 // high bits for -x^2
nop.i 0
nop.i 0
};;
@ -718,7 +718,7 @@ GLOBAL_LIBM_ENTRY(erfc)
{ .mfi
setf.sig EXP_INV_LN2_2TO63 = exp_GR_sig_inv_ln2 // form 1/ln2 * 2^63
(p6) fma.s1 FR_AbsArg = f1, f0, f8 // |x|, if x >= 0
mov GR_POS_ARG_ASYMP = 0x403C
mov GR_POS_ARG_ASYMP = 0x403C
}
{ .mfi
mov GR_NEG_ARG_ASYMP = 0x4018
@ -729,11 +729,11 @@ GLOBAL_LIBM_ENTRY(erfc)
{ .mfi
setf.exp EXP_2TOM56 = exp_GR_exp_2tom56 // 2^-56 for scaling Nfloat
fclass.m p10,p0 = f8, 0x21 // p10: x = +inf
mov exp_GR_17ones = 0x1FFFF
mov exp_GR_17ones = 0x1FFFF
}
{ .mlx
{ .mlx
setf.d EXP_RSHF_2TO56 = exp_GR_rshf_2to56 // const 1.10*2^(63+56)
movl GR_ERFC_XB_TB = 0x1A0
movl GR_ERFC_XB_TB = 0x1A0
};;
@ -744,9 +744,9 @@ GLOBAL_LIBM_ENTRY(erfc)
shl exp_GR_rshf = exp_GR_rshf, 48 //end 1.1 2^63 for right shift
}
{ .mfi
nop.m 0
nop.m 0
(p7) fma.s1 FR_Tmp = FR_Tmp1, FR_Tmp1, f0 // (|x|+1)^2, x<0
mov GR_0x1 = 0x1
mov GR_0x1 = 0x1
};;
{ .mfi
@ -763,7 +763,7 @@ GLOBAL_LIBM_ENTRY(erfc)
{ .mfi
nop.m 0
fclass.m p11,p0 = f8, 0xc3 // p11: x = nan
nop.i 0
nop.i 0
}
{ .mfi
setf.d EXP_RSHF = exp_GR_rshf //Form right shift const 1.100 * 2^63
@ -772,8 +772,8 @@ GLOBAL_LIBM_ENTRY(erfc)
};;
{ .mfi
setf.d FR_EpsNorm = GR_EpsNorm
nop.f 0
setf.d FR_EpsNorm = GR_EpsNorm
nop.f 0
(p6) shl GR_ARG_ASYMP = GR_POS_ARG_ASYMP, 48//p6:ARG_ASYMP= 28.0,x>=0
}
{ .mfi
@ -789,18 +789,18 @@ GLOBAL_LIBM_ENTRY(erfc)
};;
{ .mfi
sub GR_mBIAS = r0, GR_BIAS
sub GR_mBIAS = r0, GR_BIAS
fma.s1 FR_Tmp = FR_Tmp, FR_Tmp, f0 // (|x|+1)^4
nop.i 0
}
{ .mfi
ldfe exp_ln2_by_128_lo = [EXP_AD_TB1], 16
nop.f 0
nop.i 0
nop.i 0
};;
{ .mfi
getf.d GR_AbsArg = FR_AbsArg
getf.d GR_AbsArg = FR_AbsArg
nop.f 0
add GR_ERFC_XB_TB = GR_ERFC_XB_TB, EXP_AD_TB1//pointer to XB_TBL
}
@ -815,7 +815,7 @@ GLOBAL_LIBM_ENTRY(erfc)
fma.s1 EXP_W_2TO56_RSH = EXP_NORM_f8,EXP_INV_LN2_2TO63,EXP_RSHF_2TO56
shladd GR_ShftPi_bias = GR_ShftPi_bias, 4, r0 // BIAS * 240
}
{ .mfb
{ .mfb
nop.m 0
(p10) fma.d.s0 f8 = f0, f1, f0 // p10: y = 0 for x = +inf
(p10) br.ret.spnt b0 // p10: quick exit for x = +inf
@ -825,7 +825,7 @@ GLOBAL_LIBM_ENTRY(erfc)
.pred.rel "mutex",p6,p7
{ .mfi
(p6) cmp.gt.unc p15,p0 = GR_AbsArg,GR_ARG_ASYMP //p15: x > 28.0,p6: x >= 0
nop.f 0
nop.f 0
(p7) cmp.gt.unc p14,p0 = GR_AbsArg, GR_ARG_ASYMP //p14: x < - 6.0,p7: x < 0
}
{ .mfb
@ -833,9 +833,9 @@ GLOBAL_LIBM_ENTRY(erfc)
(p11) fma.d.s0 f8 = f8, f1, f0 //p11: y = x for x = nan
(p11) br.ret.spnt b0 //p11: quick exit for x = nan
};;
{ .mfi
add EXP_AD_P = exp_TB2_size, EXP_AD_TB2
{ .mfi
add EXP_AD_P = exp_TB2_size, EXP_AD_TB2
fms.s1 f8_sq_lo = f1, f1, f8_sq_lo // 1 - low bits for -x^2
nop.i 0
};;
@ -844,14 +844,14 @@ GLOBAL_LIBM_ENTRY(erfc)
ldfpd exp_P4, exp_P3 = [EXP_AD_P], 16
fmerge.s FR_X = f8,f8
shladd GR_ShftXBi_bias = GR_mBIAS, 4, r0
}
}
{ .mfb
nop.m 0
(p14) fnma.d.s0 FR_RESULT = FR_EpsNorm,FR_EpsNorm,FR_2 //p14:y ~=~ 2,x< -6.0
(p14) br.ret.spnt b0 //p14: quick exit for x < -6.0
};;
//p15: y ~=~ 0.0(result with underflow error), x > ARG_ASYMP = 28,
//p15: y ~=~ 0.0(result with underflow error), x > ARG_ASYMP = 28,
{ .mfi
ldfpd exp_P2, exp_P1 = [EXP_AD_P]
fma.d.s0 FR_Tmpf = f1, f1, FR_EpsNorm // flag i
@ -859,14 +859,14 @@ GLOBAL_LIBM_ENTRY(erfc)
}
{ .mfb
(p15) mov GR_Parameter_TAG = 208
(p15) fma.d.s0 FR_RESULT = FR_EpsNorm,FR_EpsNorm,f0
(p15) fma.d.s0 FR_RESULT = FR_EpsNorm,FR_EpsNorm,f0
(p15) br.cond.spnt __libm_error_region
};;
//p8: x < 27.0, result without ungerflow error
{ .mfi
getf.exp GR_IndxPlusBias = FR_Tmp // exp + bias for (|x|+1)^4
fcmp.lt.s1 p8,p0 = FR_NormX,FR_UnfBound
fcmp.lt.s1 p8,p0 = FR_NormX,FR_UnfBound
nop.i 0
}
{ .mfi
@ -878,11 +878,11 @@ GLOBAL_LIBM_ENTRY(erfc)
{ .mmi
shladd GR_ShftXBi = GR_IndxPlusBias, 4, GR_ShftXBi_bias
shladd GR_ShftPi = GR_IndxPlusBias, 4, GR_ShftPi_bias
shl GR_ShftPi_8 = GR_IndxPlusBias, 8
shl GR_ShftPi_8 = GR_IndxPlusBias, 8
};;
{ .mmi
getf.sig exp_GR_N = EXP_W_2TO56_RSH
getf.sig exp_GR_N = EXP_W_2TO56_RSH
add GR_ERFC_XB_TB = GR_ERFC_XB_TB, GR_ShftXBi// pointer to XB[i]
sub GR_ShftPi = GR_ShftPi_8, GR_ShftPi // (256-16)*i
};;
@ -890,13 +890,13 @@ GLOBAL_LIBM_ENTRY(erfc)
{ .mmi
ldfe FR_Xb = [GR_ERFC_XB_TB]
add GR_ShftA12 = 0xC0, GR_ShftPi // pointer shift for A12
add GR_ShftA13 = 0xD0, GR_ShftPi // pointer shift for A13
add GR_ShftA13 = 0xD0, GR_ShftPi // pointer shift for A13
};;
{ .mfi
add GR_P_A13 = GR_ERFC_P_TB, GR_ShftA13 // pointer to A13
nop.f 0
and exp_GR_index_1 = 0x0f, exp_GR_N
and exp_GR_index_1 = 0x0f, exp_GR_N
}
{ .mfi
add GR_P_A12 = GR_ERFC_P_TB, GR_ShftA12 // pointer to A12
@ -905,52 +905,52 @@ GLOBAL_LIBM_ENTRY(erfc)
};;
{ .mfi
ldfe FR_A12 = [GR_P_A12], -64
ldfe FR_A12 = [GR_P_A12], -64
nop.f 0
and exp_GR_index_2_16 = 0x70, exp_GR_N
and exp_GR_index_2_16 = 0x70, exp_GR_N
}
{ .mfi
ldfe FR_A13 = [GR_P_A13], -64
ldfe FR_A13 = [GR_P_A13], -64
nop.f 0
shladd EXP_AD_T1 = exp_GR_index_1, 4, EXP_AD_TB1
};;
shladd EXP_AD_T1 = exp_GR_index_1, 4, EXP_AD_TB1
};;
{ .mmi
{ .mmi
ldfe FR_A8 = [GR_P_A12], 32
ldfe FR_A9 = [GR_P_A13], 32
ldfe FR_A9 = [GR_P_A13], 32
add EXP_AD_T2 = EXP_AD_TB2, exp_GR_index_2_16
};;
{ .mmi
ldfe FR_A10 = [GR_P_A12], -96
ldfe FR_A11 = [GR_P_A13], -96
nop.i 0
nop.i 0
};;
{ .mmi
ldfe FR_A4 = [GR_P_A12], 32
ldfe FR_A5 = [GR_P_A13], 32
ldfe FR_A5 = [GR_P_A13], 32
shr r2 = exp_GR_N, 0x7
};;
{ .mfi
ldfe FR_A6 = [GR_P_A12], -64
{ .mfi
ldfe FR_A6 = [GR_P_A12], -64
fma.s1 exp_rP4pP3 = exp_r, exp_P4, exp_P3
nop.i 0
}
{ .mfi
ldfe FR_A7 = [GR_P_A13], -64
{ .mfi
ldfe FR_A7 = [GR_P_A13], -64
fma.s1 exp_rsq = exp_r, exp_r, f0
nop.i 0
};;
{ .mmi
ldfe FR_A2 = [GR_P_A12], -32
ldfe FR_A3 = [GR_P_A13], -32
ldfe FR_A3 = [GR_P_A13], -32
addl exp_GR_biased_M = 0xffff, r2
};;
{ .mmi
{ .mmi
ldfe FR_A0 = [GR_P_A12], 224
ldfe FR_A1 = [GR_P_A13]
nop.i 0
@ -975,12 +975,12 @@ GLOBAL_LIBM_ENTRY(erfc)
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 exp_rcube = exp_r, exp_rsq, f0
nop.i 0
}
{ .mfi
nop.m 0
}
{ .mfi
nop.m 0
fma.s1 exp_P_lo = exp_r, exp_rP4pP3, exp_P2
nop.i 0
};;
@ -988,41 +988,41 @@ GLOBAL_LIBM_ENTRY(erfc)
{ .mfi
nop.m 0
fnma.s1 exp_f = EXP_Nfloat, exp_ln2_by_128_lo, f8_sq_lo
nop.i 0
};;
{ .mfi
nop.m 0
fma.s1 FR_P14_0_1 = FR_LocArg, FR_LocArg, f0 // xloc ^2
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 FR_P14_0_2 = FR_A13, FR_LocArg, FR_A12
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
fma.s1 FR_P14_1_1 = FR_A9, FR_LocArg, FR_A8
fma.s1 FR_P14_0_1 = FR_LocArg, FR_LocArg, f0 // xloc ^2
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 FR_P14_1_2 = FR_A11, FR_LocArg, FR_A10
fma.s1 FR_P14_0_2 = FR_A13, FR_LocArg, FR_A12
nop.i 0
};;
{ .mfi
nop.m 0
fma.s1 FR_P14_1_1 = FR_A9, FR_LocArg, FR_A8
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 FR_P14_1_2 = FR_A11, FR_LocArg, FR_A10
nop.i 0
};;
{ .mfi
nop.m 0
fma.s1 FR_P14_2_1 = FR_A5, FR_LocArg, FR_A4
nop.i 0
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 FR_P14_2_2 = FR_A7, FR_LocArg, FR_A6
nop.i 0
};;
};;
{ .mfi
nop.m 0
@ -1057,7 +1057,7 @@ GLOBAL_LIBM_ENTRY(erfc)
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 exp_S1 = EXP_2M, exp_T1, f0
nop.i 0
};;
@ -1080,9 +1080,9 @@ GLOBAL_LIBM_ENTRY(erfc)
};;
{ .mfi
nop.m 0
fma.s1 exp_S = exp_S1, exp_S2, f0
nop.i 0
nop.m 0
fma.s1 exp_S = exp_S1, exp_S2, f0
nop.i 0
}
{ .mfi
nop.m 0
@ -1105,34 +1105,34 @@ GLOBAL_LIBM_ENTRY(erfc)
{ .mfi
nop.m 0
fma.s1 FR_Exp = exp_S, exp_P, exp_S // exp(-x^2)
nop.i 0
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 FR_Pol = FR_P14_13_2, FR_P14_12_1, FR_P14_13_1
fma.s1 FR_Pol = FR_P14_13_2, FR_P14_12_1, FR_P14_13_1
nop.i 0
};;
{ .mfi
nop.m 0
fma.d.s0 FR_Tmpf = f8, f1, f0 // flag d
nop.i 0
nop.i 0
};;
//p6: result for 0 < x < = 28.0,
//p7: result for -6.0 <= x < 0,
//p8: exit for - 6.0 <= x < UnfBound ~=~ 26.54..
.pred.rel "mutex",p6,p7
{ .mfi
nop.m 0
(p6) fma.d.s0 f8 = FR_Exp, FR_Pol, f0
nop.i 0
(p6) fma.d.s0 f8 = FR_Exp, FR_Pol, f0
nop.i 0
}
{ .mfb
mov GR_Parameter_TAG = 208
(p7) fnma.d.s0 f8 = FR_Exp, FR_Pol, FR_2
(p8) br.ret.sptk b0
(p7) fnma.d.s0 f8 = FR_Exp, FR_Pol, FR_2
(p8) br.ret.sptk b0
};;
GLOBAL_LIBM_END(erfc)
@ -1152,7 +1152,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
@ -1160,18 +1160,18 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
{ .mmi
stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
nop.b 0
}
{ .mib
stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
@ -1189,7 +1189,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
};;
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function

View File

@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
@ -51,23 +51,23 @@
// Overview of operation
//==============================================================
// 1. 0 <= x <= 10.06
//
//
// erfcf(x) = P15(x) * exp( -x^2 )
//
// Comment:
//
// Let x(0)=0, x(i) = 2^(i), i=1,...3, x(4)= 10.06
//
//
// Let x(i)<= x < x(i+1).
// We can find i as exponent of argument x (let i = 0 for 0<= x < 2 )
//
//
// Let P15(x) - polynomial approximation of degree 15 for function
// erfcf(x) * exp( x^2) and x(i) <= x <= x(i+1), i = 0,1,2,3
// Polynomial coeffitients we have in the table erfc_p_table.
//
// So we can find result for erfcf(x) as above.
// Algorithm description for exp function see below.
//
//
// 2. -4.4 <= x < 0
//
// erfcf(x) = 2.0 - erfcf(-x)
@ -77,20 +77,20 @@
// erfcf(x) ~=~ 0.0
//
// 4. x < -4.4
//
//
// erfcf(x) ~=~ 2.0
// Special values
// Special values
//==============================================================
// erfcf(+0) = 1.0
// erfcf(-0) = 1.0
// erfcf(+qnan) = +qnan
// erfcf(-qnan) = -qnan
// erfcf(+snan) = +qnan
// erfcf(-snan) = -qnan
// erfcf(+qnan) = +qnan
// erfcf(-qnan) = -qnan
// erfcf(+snan) = +qnan
// erfcf(-snan) = -qnan
// erfcf(-inf) = 2.0
// erfcf(-inf) = 2.0
// erfcf(+inf) = +0
//==============================================================
@ -123,12 +123,12 @@
//
// Registers used
//==============================================================
// Floating Point registers used:
// Floating Point registers used:
// f8, input
// f6,f7,f9 -> f11, f32 -> f92
// General registers used:
// r14 -> r22,r32 -> r50
// General registers used:
// r14 -> r22,r32 -> r50
// Predicate registers used:
// p6 -> p15
@ -195,10 +195,10 @@ EXP_INV_LN2_2TO63 = f7
EXP_W_2TO56_RSH = f9
exp_ln2_by_128_hi = f11
EXP_RSHF_2TO56 = f32
exp_ln2_by_128_lo = f33
EXP_RSHF_2TO56 = f32
exp_ln2_by_128_lo = f33
EXP_RSHF = f34
EXP_Nfloat = f35
EXP_Nfloat = f35
exp_r = f36
exp_rsq = f37
EXP_2M = f38
@ -206,7 +206,7 @@ exp_S1 = f39
exp_T1 = f40
exp_P = f41
exp_S = f42
EXP_NORM_f8 = f43
EXP_NORM_f8 = f43
exp_S2 = f44
exp_T2 = f45
@ -281,19 +281,19 @@ RODATA
// double-extended 1/ln(2)
// 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88
// 3fff b8aa 3b29 5c17 f0bc
// 3fff b8aa 3b29 5c17 f0bc
// For speed the significand will be loaded directly with a movl and setf.sig
// and the exponent will be bias+63 instead of bias+0. Thus subsequent
// computations need to scale appropriately.
// The constant 128/ln(2) is needed for the computation of w. This is also
// The constant 128/ln(2) is needed for the computation of w. This is also
// obtained by scaling the computations.
//
// Two shifting constants are loaded directly with movl and setf.d.
// 1. EXP_RSHF_2TO56 = 1.1000..00 * 2^(63-7)
// Two shifting constants are loaded directly with movl and setf.d.
// 1. EXP_RSHF_2TO56 = 1.1000..00 * 2^(63-7)
// This constant is added to x*1/ln2 to shift the integer part of
// x*128/ln2 into the rightmost bits of the significand.
// The result of this fma is EXP_W_2TO56_RSH.
// 2. EXP_RSHF = 1.1000..00 * 2^(63)
// 2. EXP_RSHF = 1.1000..00 * 2^(63)
// This constant is subtracted from EXP_W_2TO56_RSH * 2^(-56) to give
// the integer part of w, n, as a floating-point number.
// The result of this fms is EXP_Nfloat.
@ -345,7 +345,7 @@ LOCAL_OBJECT_END(exp_table_2)
LOCAL_OBJECT_START(erfc_p_table)
// Pol_0
// Pol_0
data8 0xBEA3260C63CB0446 //A15 = -5.70673541831883454676e-07
data8 0x3EE63D6178077654 //A14 = +1.06047480138940182343e-05
data8 0xBF18646BC5FC70A7 //A13 = -9.30491237309283694347e-05
@ -362,7 +362,7 @@ data8 0xBFE81270C361852B //A3 = -7.52251035312075583309e-01
data8 0x3FEFFFFFC67295FC //A2 = +9.99999892800303301771e-01
data8 0xBFF20DD74F8CD2BF //A1 = -1.12837916445020868099e+00
data8 0x3FEFFFFFFFFE7C1D //A0 = +9.99999999988975570714e-01
// Pol_1
// Pol_1
data8 0xBDE8EC4BDD953B56 //A15 = -1.81338928934942767144e-10
data8 0x3E43607F269E2A1C //A14 = +9.02309090272196442358e-09
data8 0xBE8C4D9E69C10E02 //A13 = -2.10875261143659275328e-07
@ -379,7 +379,7 @@ data8 0xBFE547BFE39AE2EA //A3 = -6.65008492032112467310e-01
data8 0x3FEE7C91BDF13578 //A2 = +9.52706213932898128515e-01
data8 0xBFF1CB5B61F8C589 //A1 = -1.11214769621105541214e+00
data8 0x3FEFEA56BC81FD37 //A0 = +9.97355812243688815239e-01
// Pol_2
// Pol_2
data8 0xBD302724A12F46E0 //A15 = -5.73866382814058809406e-14
data8 0x3D98889B75D3102E //A14 = +5.57829983681360947356e-12
data8 0xBDF16EA15074A1E9 //A13 = -2.53671153922423457844e-10
@ -396,7 +396,7 @@ data8 0xBFD224DE9F62C258 //A3 = -2.83500342989133623476e-01
data8 0x3FE28A95CB8C6D3E //A2 = +5.79417131000276437708e-01
data8 0xBFEC21205D358672 //A1 = -8.79043752717008257224e-01
data8 0x3FEDAE44D5EDFE5B //A0 = +9.27523057776805771830e-01
// Pol_3
// Pol_3
data8 0xBCA3BCA734AC82F1 //A15 = -1.36952437983096410260e-16
data8 0x3D16740DC3990612 //A14 = +1.99425676175410093285e-14
data8 0xBD77F4353812C46A //A13 = -1.36162367755616790260e-12
@ -419,8 +419,8 @@ LOCAL_OBJECT_END(erfc_p_table)
.section .text
GLOBAL_LIBM_ENTRY(erfcf)
// Form index i for table erfc_p_table as exponent of x
// We use i + bias in real calculations
// Form index i for table erfc_p_table as exponent of x
// We use i + bias in real calculations
{ .mlx
getf.exp GR_IndxPlusBias = f8 // (sign + exp + bias) of x
movl exp_GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc //signif.of 1/ln2
@ -445,14 +445,14 @@ GLOBAL_LIBM_ENTRY(erfcf)
;;
// Form two constants we need
// 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128
// 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128
// 1.1000..000 * 2^(63+63-7) to right shift int(w) into the significand
// p9: x = 0,+inf,-inf,nan,unnorm.
// p10: x!= 0,+inf,-inf,nan,unnorm.
{ .mfi
setf.sig EXP_INV_LN2_2TO63 = exp_GR_sig_inv_ln2 // Form 1/ln2*2^63
fclass.m p9,p10 = f8,0xef
fclass.m p9,p10 = f8,0xef
shl GR_ShftPi_bias = GR_BIAS, 7
}
{ .mfi
@ -484,7 +484,7 @@ GLOBAL_LIBM_ENTRY(erfcf)
}
;;
// Form shift GR_ShftPi from the beginning of erfc_p_table
// Form shift GR_ShftPi from the beginning of erfc_p_table
// to the polynomial with number i
{ .mfi
ldfps FR_UnfBound, FR_EpsNorm = [EXP_AD_TB1],8
@ -494,11 +494,11 @@ GLOBAL_LIBM_ENTRY(erfcf)
{ .mfi
setf.d EXP_RSHF = exp_GR_rshf // Form right shift 1.100 * 2^63
(p7) fms.s1 FR_AbsArg = f1, f0, f8 // |x| if x < 0
mov exp_TB1_size = 0x100
mov exp_TB1_size = 0x100
}
;;
// Form pointer GR_P_POINT_3 to the beginning of erfc_p_table
// Form pointer GR_P_POINT_3 to the beginning of erfc_p_table
{ .mfi
setf.d FR_05 = GR_05
nop.f 0
@ -517,7 +517,7 @@ GLOBAL_LIBM_ENTRY(erfcf)
add GR_P_POINT_2 = GR_P_POINT_3, GR_ShftPi
}
{ .mfi
ldfe exp_ln2_by_128_hi = [EXP_AD_TB1],16
ldfe exp_ln2_by_128_hi = [EXP_AD_TB1],16
fma.s1 FR_NormX = f8,f1,f0
add GR_P_POINT_3 = GR_P_POINT_3, GR_ShftPi
}
@ -526,19 +526,19 @@ GLOBAL_LIBM_ENTRY(erfcf)
// Load coefficients for polynomial P15(x)
{ .mfi
ldfpd FR_A15, FR_A14 = [GR_P_POINT_1], 16
nop.f 0
nop.f 0
add GR_P_POINT_3 = 0x30, GR_P_POINT_3
}
{ .mfi
ldfe exp_ln2_by_128_lo = [EXP_AD_TB1], 16
nop.f 0
add GR_P_POINT_2 = 0x20, GR_P_POINT_2
nop.f 0
add GR_P_POINT_2 = 0x20, GR_P_POINT_2
}
;;
// Now EXP_AD_TB1 points to the beginning of table 1
{ .mlx
ldfpd FR_A13, FR_A12 = [GR_P_POINT_1]
ldfpd FR_A13, FR_A12 = [GR_P_POINT_1]
movl GR_1_by_6 = 0x3FC5555555555555
}
{ .mfi
@ -564,10 +564,10 @@ GLOBAL_LIBM_ENTRY(erfcf)
// By adding 1.10...0*2^63 we shift and get round_int(W) in significand.
// We actually add 1.10...0*2^56 to X * Inv_log2 to do the same thing.
{ .mfi
ldfpd FR_A7, FR_A6 = [GR_P_POINT_3]
ldfpd FR_A7, FR_A6 = [GR_P_POINT_3]
fma.s1 EXP_W_2TO56_RSH = EXP_NORM_f8,EXP_INV_LN2_2TO63,EXP_RSHF_2TO56
add EXP_AD_TB2 = exp_TB1_size, EXP_AD_TB1
}
{ .mfi
ldfpd FR_A5, FR_A4 = [GR_P_POINT_4], 16
@ -581,7 +581,7 @@ GLOBAL_LIBM_ENTRY(erfcf)
fmerge.s FR_X = f8,f8
nop.i 0
}
{ .mfi
{ .mfi
ldfpd FR_A1, FR_A0 = [GR_P_POINT_1]
nop.f 0
nop.i 0
@ -601,7 +601,7 @@ GLOBAL_LIBM_ENTRY(erfcf)
nop.m 0
(p6) fcmp.gt.unc.s1 p15,p0 = FR_AbsArg, FR_POS_ARG_ASYMP //p6: x > 0
nop.i 0
}
}
;;
{ .mfi
@ -616,7 +616,7 @@ GLOBAL_LIBM_ENTRY(erfcf)
}
;;
// Nfloat = round_int(W)
// Nfloat = round_int(W)
// The signficand of EXP_W_2TO56_RSH contains the rounded integer part of W,
// as a twos complement number in the lower bits (that is, it may be negative).
// That twos complement number (called N) is put into exp_GR_N.
@ -629,12 +629,12 @@ GLOBAL_LIBM_ENTRY(erfcf)
nop.m 0
fms.s1 EXP_Nfloat = EXP_W_2TO56_RSH, EXP_2TOM56, EXP_RSHF
nop.i 0
}
}
{ .mfb
(p15) mov GR_Parameter_TAG = 209
(p15) fma.s.s0 FR_RESULT = FR_EpsNorm,FR_EpsNorm,f0 //Result.for x>10.06
(p15) br.cond.spnt __libm_error_region
}
}
;;
// Now we can calculate polynomial P15(x)
@ -652,19 +652,19 @@ GLOBAL_LIBM_ENTRY(erfcf)
{ .mfi
nop.m 0
fma.s1 FR_P15_1_2 = FR_A13, FR_AbsArg, FR_A12
nop.i 0
fma.s1 FR_P15_1_2 = FR_A13, FR_AbsArg, FR_A12
nop.i 0
}
;;
{ .mfi
getf.sig exp_GR_N = EXP_W_2TO56_RSH
fma.s1 FR_P15_2_1 = FR_A9, FR_AbsArg, FR_A8
nop.i 0
getf.sig exp_GR_N = EXP_W_2TO56_RSH
fma.s1 FR_P15_2_1 = FR_A9, FR_AbsArg, FR_A8
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 FR_P15_2_2 = FR_A11, FR_AbsArg, FR_A10
fma.s1 FR_P15_2_2 = FR_A11, FR_AbsArg, FR_A10
nop.i 0
}
;;
@ -672,7 +672,7 @@ GLOBAL_LIBM_ENTRY(erfcf)
{ .mfi
nop.m 0
fma.s1 FR_P15_3_1 = FR_A5, FR_AbsArg, FR_A4
nop.i 0
nop.i 0
}
{ .mfi
nop.m 0
@ -691,7 +691,7 @@ GLOBAL_LIBM_ENTRY(erfcf)
and exp_GR_index_1 = 0x0f, exp_GR_N
fma.s1 FR_P15_4_1 = FR_A1, FR_AbsArg, FR_A0
shr r2 = exp_GR_N, 0x7
}
{ .mfi
and exp_GR_index_2_16 = 0x70, exp_GR_N
@ -700,12 +700,12 @@ GLOBAL_LIBM_ENTRY(erfcf)
}
;;
// EXP_AD_T1 has address of T1
// EXP_AD_T2 has address if T2
// EXP_AD_T1 has address of T1
// EXP_AD_T2 has address if T2
{ .mfi
add EXP_AD_T2 = EXP_AD_TB2, exp_GR_index_2_16
nop.f 0
nop.f 0
shladd EXP_AD_T1 = exp_GR_index_1, 4, EXP_AD_TB1
}
{ .mfi
@ -716,15 +716,15 @@ GLOBAL_LIBM_ENTRY(erfcf)
;;
// Create Scale = 2^M
// r = x - Nfloat * ln2_by_128_hi
// r = x - Nfloat * ln2_by_128_hi
{ .mfi
setf.exp EXP_2M = exp_GR_biased_M
fma.s1 FR_P15_7_1 = FR_P15_0_1, FR_P15_1_1, FR_P15_1_2
nop.i 0
}
{ .mfi
ldfe exp_T2 = [EXP_AD_T2]
ldfe exp_T2 = [EXP_AD_T2]
nop.f 0
nop.i 0
}
@ -739,45 +739,45 @@ GLOBAL_LIBM_ENTRY(erfcf)
}
{ .mfi
nop.m 0
fma.s1 FR_P15_8_1 = FR_P15_1_1, FR_P15_2_2, FR_P15_2_1
fma.s1 FR_P15_8_1 = FR_P15_1_1, FR_P15_2_2, FR_P15_2_1
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 FR_P15_9_1 = FR_P15_1_1, FR_P15_4_2, FR_P15_4_1
fma.s1 FR_P15_9_1 = FR_P15_1_1, FR_P15_4_2, FR_P15_4_1
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 FR_P15_9_2 = FR_P15_1_1, FR_P15_3_2, FR_P15_3_1
fma.s1 FR_P15_9_2 = FR_P15_1_1, FR_P15_3_2, FR_P15_3_1
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 exp_P = FR_1_by_6, exp_r, FR_05
fma.s1 exp_P = FR_1_by_6, exp_r, FR_05
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 exp_rsq = exp_r, exp_r, f0
fma.s1 exp_rsq = exp_r, exp_r, f0
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 FR_P15_13_1 = FR_P15_7_2, FR_P15_7_1, FR_P15_8_1
fma.s1 FR_P15_13_1 = FR_P15_7_2, FR_P15_7_1, FR_P15_8_1
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 FR_P15_14_1 = FR_P15_7_2, FR_P15_9_2, FR_P15_9_1
fma.s1 FR_P15_14_1 = FR_P15_7_2, FR_P15_9_2, FR_P15_9_1
nop.i 0
}
{ .mfi
@ -794,7 +794,7 @@ GLOBAL_LIBM_ENTRY(erfcf)
}
{ .mfi
nop.m 0
fma.s1 exp_S1 = EXP_2M, exp_T2, f0
fma.s1 exp_S1 = EXP_2M, exp_T2, f0
nop.i 0
}
;;
@ -816,30 +816,30 @@ GLOBAL_LIBM_ENTRY(erfcf)
{ .mfi
nop.m 0
fma.s1 FR_Exp = exp_S, exp_P, exp_S // exp(-x^2)
nop.i 0
nop.i 0
}
;;
;;
{ .mfi
nop.m 0
fma.s.s0 FR_Tmpf = f8, f1, f0 // Flag d
nop.i 0
nop.i 0
}
;;
//p6: result for 0 < x < = POS_ARG_ASYMP
//p6: result for 0 < x < = POS_ARG_ASYMP
//p7: result for - NEG_ARG_ASYMP <= x < 0
//p8: exit for - NEG_ARG_ASYMP <= x <= UnfBound, x!=0
.pred.rel "mutex",p6,p7
{ .mfi
nop.m 0
(p6) fma.s.s0 f8 = FR_Exp, FR_Pol, f0
nop.i 0
(p6) fma.s.s0 f8 = FR_Exp, FR_Pol, f0
nop.i 0
}
{ .mfb
mov GR_Parameter_TAG = 209
(p7) fnma.s.s0 f8 = FR_Exp, FR_Pol, FR_2
(p8) br.ret.sptk b0
(p8) br.ret.sptk b0
}
;;
@ -847,7 +847,7 @@ GLOBAL_LIBM_ENTRY(erfcf)
{ .mfb
nop.m 0
nop.f 0
(p10) br.cond.spnt __libm_error_region
(p10) br.cond.spnt __libm_error_region
}
;;
@ -921,9 +921,9 @@ GLOBAL_LIBM_END(erfcf)
// Call via (p10) br.cond.spnt __libm_error_region
// for UnfBound < x < = POS_ARG_ASYMP
// for UnfBound < x < = POS_ARG_ASYMP
// and
//
//
// call via (p15) br.cond.spnt __libm_error_region
// for x > POS_ARG_ASYMP
@ -936,7 +936,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
@ -944,18 +944,18 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
{ .mmi
stfs [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfs [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
nop.b 0
}
{ .mib
stfs [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
@ -973,7 +973,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
};;
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function

File diff suppressed because it is too large Load Diff

View File

@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
@ -82,7 +82,7 @@
// 5. |x| = INF
// Return erff(x) = sign(x) * 1.0
//
// 6. x = [S,Q]NaN
// 6. x = [S,Q]NaN
// Return erff(x) = QNaN
//
// 7. x is positive denormal
@ -95,11 +95,11 @@
//
// Registers used
//==============================================================
// Floating Point registers used:
// Floating Point registers used:
// f8, input
// f32 -> f59
// General registers used:
// General registers used:
// r32 -> r45, r2, r3
// Predicate registers used:
@ -180,7 +180,7 @@ data8 0xBF468D71CF4F0918 // C3
data8 0x40312115B0932F24 // D0
data8 0xC0160D6CD0991EA3 // D1
data8 0xBFE04A567A6DBE4A // D2
data8 0xBF4207BC640D1509 // B0
data8 0xBF4207BC640D1509 // B0
// Polynomial coefficients for the erf(x), 0.25 <= |x| < 0.5
data8 0x3F90849356383F58 // C0
data8 0x3F830BD5BA240F09 // C1
@ -217,7 +217,7 @@ data8 0xC08A5C9D5FE8B9F6 // D0
data8 0x406EFF5F088CEC4B // D1
data8 0xC03A5743DF38FDE0 // D2
data8 0xBEE397A9FA5686A2 // B0
// Polynomial coefficients for the erf(x), -0.125 < x < 0.125
// Polynomial coefficients for the erf(x), -0.125 < x < 0.125
data8 0x3FF20DD7504270CB // C0
data8 0xBFD8127465AFE719 // C1
data8 0x3FBCE2D77791DD77 // C2
@ -269,10 +269,10 @@ GLOBAL_LIBM_ENTRY(erff)
;;
{ .mfi
getf.s rArg = f8 // x in GR
getf.s rArg = f8 // x in GR
fclass.m p7,p0 = f8, 0x0b // is x denormal ?
// sign bit and 2 most bits in significand
shl rMask = rMask, 20
shl rMask = rMask, 20
}
{ .mfi
ld8 rDataPtr = [rDataPtr]
@ -296,7 +296,7 @@ GLOBAL_LIBM_ENTRY(erff)
{ .mfi
andcm rOffset2 = rArg, rMask
fclass.m p6,p0 = f8, 0xc7 // is x [S,Q]NaN or +/-0 ?
shl rBound = rBound, 20 // 0.125f in GR
shl rBound = rBound, 20 // 0.125f in GR
}
{ .mfb
andcm rAbsArg = rArg, rSignBit // |x| in GR
@ -311,7 +311,7 @@ GLOBAL_LIBM_ENTRY(erff)
shr rOffset2 = rOffset2, 21
}
{ .mfi
cmp.lt p10, p8 = rAbsArg, rBound // |x| < 0.125?
cmp.lt p10, p8 = rAbsArg, rBound // |x| < 0.125?
nop.f 0
adds rCoeffAddr3 = 16, rDataPtr
}
@ -332,8 +332,8 @@ GLOBAL_LIBM_ENTRY(erff)
{ .mfi
shladd rCoeffAddr1 = rBias, 4, rDataPtr
fma.s1 fArg3Sgn = fArgSqr, f8, f0 // sign(x)*|x|^3
// is |x| < 4.0?
cmp.lt p11, p12 = rAbsArg, rSaturation
// is |x| < 4.0?
cmp.lt p11, p12 = rAbsArg, rSaturation
}
{ .mfi
shladd rCoeffAddr3 = rBias, 4, rCoeffAddr3
@ -345,7 +345,7 @@ GLOBAL_LIBM_ENTRY(erff)
{ .mfi
(p11) ldfpd fC0, fC1 = [rCoeffAddr1]
(p9) fmerge.s f8 = f8,f1 // +/- inf
(p12) adds rDataPtr = 512, rDataPtr
(p12) adds rDataPtr = 512, rDataPtr
}
{ .mfb
(p11) ldfpd fC2, fC3 = [rCoeffAddr3], 16
@ -434,7 +434,7 @@ GLOBAL_LIBM_ENTRY(erff)
{ .mfi
nop.m 0
fma.s1 fPolATmp = fA3, fAbsArg, fA2 // A3*|x| + A2
fma.s1 fPolATmp = fA3, fAbsArg, fA2 // A3*|x| + A2
nop.i 0
}
{ .mfi
@ -446,7 +446,7 @@ GLOBAL_LIBM_ENTRY(erff)
{ .mfi
nop.m 0
// C3*|x|^3 + C2*x^2 + C1*|x| + C0
fma.s1 fPolC = fPolC, fArgSqr, fPolCTmp
fma.s1 fPolC = fPolC, fArgSqr, fPolCTmp
nop.i 0
}
;;
@ -454,31 +454,31 @@ GLOBAL_LIBM_ENTRY(erff)
{ .mfi
nop.m 0
// PolD = sign(x)*(|x|^7 + D2*x^6 + D1*|x|^5 + D0*x^4)
fma.d.s1 fPolD = fPolD, fArg4Sgn, fPolDTmp
fma.d.s1 fPolD = fPolD, fArg4Sgn, fPolDTmp
nop.i 0
}
;;
{ .mfi
nop.m 0
// PolA = A3|x|^3 + A2*x^2 + A1*|x| + A0
fma.d.s1 fPolA = fPolATmp, fArgSqr, fPolA
// PolA = A3|x|^3 + A2*x^2 + A1*|x| + A0
fma.d.s1 fPolA = fPolATmp, fArgSqr, fPolA
nop.i 0
}
;;
;;
{ .mfi
nop.m 0
// PolC = B0*x^4 + C3*|x|^3 + C2*|x|^2 + C1*|x| + C0
fma.d.s1 fPolC = fPolC, f1, fB0
// PolC = B0*x^4 + C3*|x|^3 + C2*|x|^2 + C1*|x| + C0
fma.d.s1 fPolC = fPolC, f1, fB0
nop.i 0
}
;;
;;
{ .mfi
nop.m 0
(p14) fma.s.s0 f8 = fPolC, fPolD, fPolA // for positive x
nop.i 0
nop.i 0
}
{ .mfb
nop.m 0
@ -522,7 +522,7 @@ erff_saturation:
br.ret.sptk b0 // Exit for 4.0 <=|x|< +inf
}
;;
// Here if x is single precision denormal
erff_denormal:
{ .mfi

View File

@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
@ -68,7 +68,7 @@
//
// 3. Main path: 1/8 <= |x| < 6.53
// For several ranges of 1/8 <= |x| < 6.53
// Return erfl(x) = sign(x)*((A0H+A0L) + y*(A1H+A1L) + y^2*(A2H+A2L) +
// Return erfl(x) = sign(x)*((A0H+A0L) + y*(A1H+A1L) + y^2*(A2H+A2L) +
// + y^3*A3 + y^4*A4 + ... + y^25*A25 )
// where y = (|x|/a) - b
//
@ -83,7 +83,7 @@
// 4.0 <= |x| < 6.53 a = 4.0, b = 1.5
// ( [3.25;4.0] subrange separated for monotonicity issues resolve )
//
// 4. Saturation path: 6.53 <= |x| < +INF
// 4. Saturation path: 6.53 <= |x| < +INF
// Return erfl(x) = sign(x)*(1.0 - tiny_value)
// (tiny_value ~ 1e-1233)
//
@ -109,10 +109,10 @@
// Multiprecision have to be performed only for first few
// polynomial iterations (up to 3-rd x degree)
// Here we use the same parallelisation way as above:
// Split whole polynomial to first, "multiprecision" part, and second,
// Split whole polynomial to first, "multiprecision" part, and second,
// so called "tail", native precision part.
//
// 1) Multiprecision part:
// 1) Multiprecision part:
// [v1=(A0H+A0L)+y*(A1H+A1L)] + [v2=y^2*((A2H+A2L)+y*A3)]
// v1 and v2 terms calculated in parallel
//
@ -120,23 +120,23 @@
// v3 = x^4 * ( A4 + x*A5 + ... + x^21*A25 )
// v3 is splitted to 2 even parts (10 coefficient in each one).
// These 2 parts are also factorized using binary tree technique.
//
//
// So Multiprecision and Tail parts cost is almost the same
// and we have both results ready before final summation.
//
// 4. Saturation path: 6.53 <= |x| < +INF
// 4. Saturation path: 6.53 <= |x| < +INF
//
// We use formula sign(x)*(1.0 - tiny_value) instead of simple sign(x)*1.0
// just to meet IEEE requirements for different rounding modes in this case.
//
// Registers used
//==============================================================
// Floating Point registers used:
// Floating Point registers used:
// f8 - input & output
// f32 -> f90
// General registers used:
// r2, r3, r32 -> r52
// General registers used:
// r2, r3, r32 -> r52
// Predicate registers used:
// p0, p6 -> p11, p14, p15
@ -201,9 +201,9 @@ fA16 = f51
fA17 = f52
fA18 = f53
fA19 = f54
fA20 = f55
fA21 = f56
fA22 = f57
fA20 = f55
fA21 = f56
fA22 = f57
fA23 = f58
fA24 = f59
fA25 = f60
@ -232,10 +232,10 @@ fRes3H = f79
fRes3L = f80
fRes4 = f81
fTT = f82
fTT = f82
fTH = f83
fTL = f84
fTT2 = f85
fTT2 = f85
fTH2 = f86
fTL2 = f87
@ -252,7 +252,7 @@ RODATA
LOCAL_OBJECT_START(erfl_data)
////////// Main tables ///////////
_0p125_to_0p25_data: // exp = 2^-3
// Polynomial coefficients for the erf(x), 1/8 <= |x| < 1/4
// Polynomial coefficients for the erf(x), 1/8 <= |x| < 1/4
data8 0xACD9ED470F0BB048, 0x0000BFF4 //A3 = -6.5937529303909561891162915809e-04
data8 0xBF6A254428DDB452 //A2H = -3.1915980570631852578089571182e-03
data8 0xBC131B3BE3AC5079 //A2L = -2.5893976889070198978842231134e-19
@ -275,7 +275,7 @@ data8 0x92E992C58B7C3847, 0x0000BFC6 //A14 = -7.9641369349930600223371163611e-18
LOCAL_OBJECT_END(erfl_data)
LOCAL_OBJECT_START(_0p25_to_0p5_data)
// Polynomial coefficients for the erf(x), 1/4 <= |x| < 1/2
// Polynomial coefficients for the erf(x), 1/4 <= |x| < 1/2
data8 0xF083628E8F7CE71D, 0x0000BFF6 //A3 = -3.6699405305266733332335619531e-03
data8 0xBF978749A434FE4E //A2H = -2.2977018973732214746075186440e-02
data8 0xBC30B3FAFBC21107 //A2L = -9.0547407100537663337591537643e-19
@ -298,7 +298,7 @@ data8 0x9CC8FFFBDDCF9853, 0x0000BFD4 //A14 = -1.3925319209173383944263942226e-13
LOCAL_OBJECT_END(_0p25_to_0p5_data)
LOCAL_OBJECT_START(_0p5_to_1_data)
// Polynomial coefficients for the erf(x), 1/2 <= |x| < 1
// Polynomial coefficients for the erf(x), 1/2 <= |x| < 1
data8 0xDB742C8FB372DBE0, 0x00003FF6 //A3 = 3.3485993187250381721535255963e-03
data8 0xBFBEDC5644353C26 //A2H = -1.2054957547410136142751468924e-01
data8 0xBC6D7215B023455F //A2L = -1.2770012232203569059818773287e-17
@ -321,7 +321,7 @@ data8 0xB989FDB3795165C7, 0x00003FE1 //A14 = 1.3499740992928183247608593000e-09
LOCAL_OBJECT_END(_0p5_to_1_data)
LOCAL_OBJECT_START(_1_to_2_data)
// Polynomial coefficients for the erf(x), 1 <= |x| < 2.0
// Polynomial coefficients for the erf(x), 1 <= |x| < 2.0
data8 0x8E15015F5B55BEAC, 0x00003FFC //A3 = 1.3875200409423426678618977531e-01
data8 0xBFC6D5A95D0A1B7E //A2H = -1.7839543383544403942764233761e-01
data8 0xBC7499F704C80E02 //A2L = -1.7868888188464394090788198634e-17
@ -344,7 +344,7 @@ data8 0xEC6E63BB4507B585, 0x0000BFEE //A14 = -1.4092398243085031882423746824e-05
LOCAL_OBJECT_END(_1_to_2_data)
LOCAL_OBJECT_START(_2_to_3p25_data)
// Polynomial coefficients for the erf(x), 2 <= |x| < 3.25
// Polynomial coefficients for the erf(x), 2 <= |x| < 3.25
data8 0xCEDBA58E8EE6F055, 0x00003FF7 //A3 = 6.3128050215859026984338771121e-03
data8 0xBF5B60D5E974CBBD //A2H = -1.6710366233609740427984435840e-03
data8 0xBC0E11E2AEC18AF6 //A2L = -2.0376133202996259839305825162e-19
@ -367,7 +367,7 @@ data8 0xF2F3D8D21E8762E0, 0x0000BFF7 //A14 = -7.4143227286535936033409745884e-03
LOCAL_OBJECT_END(_2_to_3p25_data)
LOCAL_OBJECT_START(_4_to_6p53_data)
// Polynomial coefficients for the erf(x), 4 <= |x| < 6.53
// Polynomial coefficients for the erf(x), 4 <= |x| < 6.53
data8 0xDF3151BE8652827E, 0x00003FD5 //A3 = 3.9646979666953349095427642209e-13
data8 0xBD1C4A9787DF888B //A2H = -2.5127788450714750484839908889e-14
data8 0xB99B35483E4603FD //A2L = -3.3536613901268985626466020210e-31
@ -390,7 +390,7 @@ data8 0x965DA4A80008B7BC, 0x0000BFEE //A14 = -8.9624997201558650125662820562e-06
LOCAL_OBJECT_END(_4_to_6p53_data)
LOCAL_OBJECT_START(_3p25_to_4_data)
// Polynomial coefficients for the erf(x), 3.25 <= |x| < 4
// Polynomial coefficients for the erf(x), 3.25 <= |x| < 4
data8 0xB01D29846286CE08, 0x00003FEE //A3 = 1.0497207328743021499800978059e-05
data8 0xBEC10B1488AEB234 //A2H = -2.0317175474986489113480084279e-06
data8 0xBB7F19701B8B74F9 //A2L = -4.1159669348226960337518214996e-22
@ -415,7 +415,7 @@ LOCAL_OBJECT_END(_3p25_to_4_data)
//////// "Tail" tables //////////
LOCAL_OBJECT_START(_0p125_to_0p25_data_tail)
// Polynomial coefficients for the erf(x), 1/8 <= |x| < 1/4
// Polynomial coefficients for the erf(x), 1/8 <= |x| < 1/4
data8 0x93086CBD21ED3962, 0x00003FCA //A13 = 1.2753071968462837024755878679e-16
data8 0x83CB5045A6D4B419, 0x00003FCF //A12 = 3.6580237062957773626379648530e-15
data8 0x8FCDB723209690EB, 0x0000BFD3 //A11 = -6.3861616307180801527566117146e-14
@ -429,7 +429,7 @@ data8 0xCC43247F4410C54A, 0x00003FEF //A4 = 2.4349960762505993017186935493e-05
LOCAL_OBJECT_END(_0p125_to_0p25_data_tail)
LOCAL_OBJECT_START(_0p25_to_0p5_data_tail)
// Polynomial coefficients for the erf(x), 1/4 <= |x| < 1/2
// Polynomial coefficients for the erf(x), 1/4 <= |x| < 1/2
data8 0x8CEAC59AF361B78A, 0x0000BFD6 //A13 = -5.0063802958258679384986669123e-13
data8 0x9BC67404F348C0CE, 0x00003FDB //A12 = 1.7709590771868743572061278273e-11
data8 0xF4B5D0348AFAAC7A, 0x00003FDB //A11 = 2.7820329729584630464848160970e-11
@ -443,7 +443,7 @@ data8 0xAA94D5E68033B764, 0x00003FF4 //A4 = 6.5071635765452563856926608000e-04
LOCAL_OBJECT_END(_0p25_to_0p5_data_tail)
LOCAL_OBJECT_START(_0p5_to_1_data_tail)
// Polynomial coefficients for the erf(x), 1/2 <= |x| < 1
// Polynomial coefficients for the erf(x), 1/2 <= |x| < 1
data8 0x9ED99EDF111CB785, 0x0000BFE4 //A13 = -9.2462916180079278241704711522e-09
data8 0xDEAF7539AE2FB062, 0x0000BFE5 //A12 = -2.5923990465973151101298441139e-08
data8 0xA392D5E5CC9DB1A7, 0x00003FE9 //A11 = 3.0467952847327075747032372101e-07
@ -457,7 +457,7 @@ data8 0x9A4DAF550A2CC29A, 0x00003FF8 //A4 = 9.4179355839141698591817907680e-03
LOCAL_OBJECT_END(_0p5_to_1_data_tail)
LOCAL_OBJECT_START(_1_to_2_data_tail)
// Polynomial coefficients for the erf(x), 1 <= |x| < 2.0
// Polynomial coefficients for the erf(x), 1 <= |x| < 2.0
data8 0x969EAC5C7B46CAB9, 0x00003FEF //A13 = 1.7955281439310148162059582795e-05
data8 0xA2ED832912E9FCD9, 0x00003FF1 //A12 = 7.7690020847111408916570845775e-05
data8 0x85677C39C48E43E7, 0x0000BFF3 //A11 = -2.5444839340796031538582511806e-04
@ -471,7 +471,7 @@ data8 0xB6AD4AE850DBF526, 0x0000BFFA //A4 = -4.4598858458861014323191919669e-02
LOCAL_OBJECT_END(_1_to_2_data_tail)
LOCAL_OBJECT_START(_2_to_3p25_data_tail)
// Polynomial coefficients for the erf(x), 2 <= |x| < 3.25
// Polynomial coefficients for the erf(x), 2 <= |x| < 3.25
data8 0x847C24DAC7C7558B, 0x00003FF5 //A13 = 1.0107798565424606512130100541e-03
data8 0xCB6340EAF02C3DF8, 0x00003FF8 //A12 = 1.2413800617425931997420375435e-02
data8 0xB5163D252DBBC107, 0x0000BFF9 //A11 = -2.2105330871844825370020459523e-02
@ -485,7 +485,7 @@ data8 0x88E42D8F47FAB60E, 0x0000BFF9 //A4 = -1.6710366233609742619461063050e-02
LOCAL_OBJECT_END(_2_to_3p25_data_tail)
LOCAL_OBJECT_START(_4_to_6p53_data_tail)
// Polynomial coefficients for the erf(x), 4 <= |x| < 6.53
// Polynomial coefficients for the erf(x), 4 <= |x| < 6.53
data8 0xD8235ABF08B8A6D1, 0x00003FEE //A13 = 1.2882834877224764938429832586e-05
data8 0xAEDF44F9C77844C2, 0x0000BFEC //A12 = -2.6057980393716019511497492890e-06
data8 0xCCD5490956A4FCFD, 0x00003FEA //A11 = 7.6306293047300300284923464089e-07
@ -499,7 +499,7 @@ data8 0xA29C398F83F8A0D1, 0x0000BFD9 //A4 = -4.6216613698438694005327544047e-12
LOCAL_OBJECT_END(_4_to_6p53_data_tail)
LOCAL_OBJECT_START(_3p25_to_4_data_tail)
// Polynomial coefficients for the erf(x), 3.25 <= |x| < 4
// Polynomial coefficients for the erf(x), 3.25 <= |x| < 4
data8 0x95BE1BEAD738160F, 0x00003FF2 //A13 = 1.4280568455209843005829620687e-04
data8 0x8108C8FFAC0F0B21, 0x0000BFF4 //A12 = -4.9222685622046459346377033307e-04
data8 0xD72A7FAEE7832BBE, 0x00003FF4 //A11 = 8.2079319302109644436194651098e-04
@ -514,7 +514,7 @@ LOCAL_OBJECT_END(_3p25_to_4_data_tail)
LOCAL_OBJECT_START(_0_to_1o8_data)
// Polynomial coefficients for the erf(x), 0.0 <= |x| < 0.125
// Polynomial coefficients for the erf(x), 0.0 <= |x| < 0.125
data8 0x3FF20DD750429B6D, 0x3C71AE3A8DDFFEDE //A1H, A1L
data8 0xF8B0DACE42525CC2, 0x0000BFEE //A15
data8 0xFCD02E1BF0EC2C37, 0x00003FF1 //A13
@ -536,7 +536,7 @@ LOCAL_OBJECT_END(_denorm_data)
GLOBAL_LIBM_ENTRY(erfl)
{ .mfi
alloc r32 = ar.pfs, 0, 21, 0, 0
alloc r32 = ar.pfs, 0, 21, 0, 0
fmerge.se fArgAbsNorm = f1, f8 // normalized x (1.0 <= x < 2.0)
addl rSignBit = 0x20000, r0 // Set sign bit for exponent
}
@ -547,26 +547,26 @@ GLOBAL_LIBM_ENTRY(erfl)
{ .mfi
getf.exp rArgExp = f8 // Get arg exponent
fclass.m p6,p0 = f8, 0xEF // Filter 0, denormals and specials
fclass.m p6,p0 = f8, 0xEF // Filter 0, denormals and specials
// 0xEF = @qnan|@snan|@pos|@neg|@zero|@unorm|@inf
addl rBias = 0xfffc, r0 // Value to subtract from exp
addl rBias = 0xfffc, r0 // Value to subtract from exp
// to get actual interval number
}
{ .mfi
ld8 rDataPtr = [rDataPtr] // Get real common data pointer
fma.s1 fArgSqr = f8, f8, f0 // x^2 (for [0;1/8] path)
addl r2to4 = 0x10000, r0 // unbiased exponent
addl r2to4 = 0x10000, r0 // unbiased exponent
// for [2;4] binary interval
};;
{ .mfi
getf.sig rArgSig = f8 // Get arg significand
getf.sig rArgSig = f8 // Get arg significand
fcmp.lt.s1 p15, p14 = f8, f0 // Is arg negative/positive?
addl rSaturation = 0xd0e, r0 // First 12 bits of
// saturation value signif.
}
{ .mfi
setf.d f1p5 = r1p5 // 1.5 construction
setf.d f1p5 = r1p5 // 1.5 construction
fma.s1 f2p0 = f1,f1,f1 // 2.0 construction
addl r3p25Sign = 0xd00, r0 // First 12 bits of
// 3.25 value signif.
@ -586,7 +586,7 @@ GLOBAL_LIBM_ENTRY(erfl)
{ .mfi
sub rInterval = rArgExp, rBias // Get actual interval number
nop.f 0
shr.u rArgSig = rArgSig, 52 // Leave only 12 bits of sign.
shr.u rArgSig = rArgSig, 52 // Leave only 12 bits of sign.
}
{ .mfi
adds rShiftedDataPtr = 0x10, rDataPtr // Second ptr to data
@ -595,10 +595,10 @@ GLOBAL_LIBM_ENTRY(erfl)
};;
{ .mfi
(p8) cmp.le p8, p10 = r3p25Sign, rArgSig // If sign. is greater
(p8) cmp.le p8, p10 = r3p25Sign, rArgSig // If sign. is greater
// than 1.25? (means arg is in [3.25;4] interval)
nop.f 0
shl rOffset = rInterval, 8 // Make offset from
shl rOffset = rInterval, 8 // Make offset from
// interval number
}
{ .mfi
@ -609,30 +609,30 @@ GLOBAL_LIBM_ENTRY(erfl)
};;
{ .mfi
(p8) adds rOffset = 0x200, rOffset // Add additional offset
(p8) adds rOffset = 0x200, rOffset // Add additional offset
// if arg is in [3.25;4] (another data set)
fma.s1 fArgCube = fArgSqr, f8, f0 // x^3 (for [0;1/8] path)
shl rTailOffset = rInterval, 7 // Make offset to "tail" data
// from interval number
}
{ .mib
setf.exp fTiny = rTiny // Construct "tiny" value
setf.exp fTiny = rTiny // Construct "tiny" value
// for saturation path
cmp.ltu p11, p0 = 0x5, rInterval // if arg > 8
(p9) br.cond.spnt _0_to_1o8
(p9) br.cond.spnt _0_to_1o8
};;
{ .mfi
add rAddr1 = rDataPtr, rOffset // Get address for
// interval data
add rAddr1 = rDataPtr, rOffset // Get address for
// interval data
nop.f 0
shl rTailAddOffset = rInterval, 5 // Offset to interval
// "tail" data
// "tail" data
}
{ .mib
add rAddr2 = rShiftedDataPtr, rOffset // Get second
// address for interval data
(p7) cmp.leu p11, p0 = rSaturation, rArgSig // if arg is
// address for interval data
(p7) cmp.leu p11, p0 = rSaturation, rArgSig // if arg is
// in [6.53;8] interval
(p11) br.cond.spnt _saturation // Branch to Saturation path
};;
@ -660,14 +660,14 @@ GLOBAL_LIBM_ENTRY(erfl)
.pred.rel "mutex",p8,p10
{ .mfi
ldfe fA18 = [rAddr1], 16 // Load A18
(p8) fms.s1 fArgAbsNorm = fArgAbsNorm, f1, f2p0 // Add 2.0
(p8) fms.s1 fArgAbsNorm = fArgAbsNorm, f1, f2p0 // Add 2.0
// to normalized arg (for [3.24;4] interval)
adds rTailAddr2 = 0x10, rTailAddr1 // First tail
// data address
}
{ .mfi
ldfe fA25 = [rAddr2], 16 // Load A25
(p10) fms.s1 fArgAbsNorm = fArgAbsNorm, f1, f1p5 // Add 1.5
ldfe fA25 = [rAddr2], 16 // Load A25
(p10) fms.s1 fArgAbsNorm = fArgAbsNorm, f1, f1p5 // Add 1.5
// to normalized arg
nop.i 0
};;
@ -775,9 +775,9 @@ GLOBAL_LIBM_ENTRY(erfl)
fma.s1 fA23 = fA24, fArgAbsNorm, fA23 // Polynomial tail
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA21 = fA22, fArgAbsNorm, fA21 // Polynomial tail
fma.s1 fA21 = fA22, fArgAbsNorm, fA21 // Polynomial tail
nop.i 0
};;
@ -793,7 +793,7 @@ GLOBAL_LIBM_ENTRY(erfl)
fma.s1 fRes3L = fRes3L, f1, fTH // (A3*x+A2)*x^2
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA19 = fA20, fArgAbsNorm, fA19 // Polynomial tail
nop.i 0
@ -804,7 +804,7 @@ GLOBAL_LIBM_ENTRY(erfl)
fma.s1 fRes1H = fTH2, f1, fA0H // A1*x+A0
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fms.s1 fTL2 = fA1H, fArgAbsNorm, fTH2 // A1*x+A0
nop.i 0
@ -815,7 +815,7 @@ GLOBAL_LIBM_ENTRY(erfl)
fma.s1 fA8 = fA9, fArgAbsNorm, fA8 // Polynomial tail
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA10 = fA11, fArgAbsNorm, fA10 // Polynomial tail
nop.i 0
@ -835,7 +835,7 @@ GLOBAL_LIBM_ENTRY(erfl)
fms.s1 fArgAbsNorm11 = fArgAbsNorm4, fArgAbsNorm4, f0 // x^8
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA4 = fA5, fArgAbsNorm, fA4 // Polynomial tail
nop.i 0
@ -846,7 +846,7 @@ GLOBAL_LIBM_ENTRY(erfl)
fma.s1 fRes3L = fRes3L, f1, fA2L // (A3*x+A2)*x^2
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA6 = fA7, fArgAbsNorm, fA6 // Polynomial tail
nop.i 0
@ -857,7 +857,7 @@ GLOBAL_LIBM_ENTRY(erfl)
fma.s1 fTL2 = fTL2, f1, fTT2 // A1*x+A0
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fms.s1 fRes1L = fA0H, f1, fRes1H // A1*x+A0
nop.i 0
@ -868,7 +868,7 @@ GLOBAL_LIBM_ENTRY(erfl)
fma.s1 fA23 = fA25, fArgAbsNorm2, fA23 // Polynomial tail
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA12 = fA14, fArgAbsNorm2, fA12 // Polynomial tail
nop.i 0
@ -879,7 +879,7 @@ GLOBAL_LIBM_ENTRY(erfl)
fma.s1 fA19 = fA21, fArgAbsNorm2, fA19 // Polynomial tail
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA8 = fA10, fArgAbsNorm2, fA8 // Polynomial tail
nop.i 0
@ -890,7 +890,7 @@ GLOBAL_LIBM_ENTRY(erfl)
fma.s1 fA15 = fA17, fArgAbsNorm2, fA15 // Polynomial tail
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fms.s1 fArgAbsNorm11 = fArgAbsNorm11, fArgAbsNorm3, f0 // x^11
nop.i 0
@ -901,7 +901,7 @@ GLOBAL_LIBM_ENTRY(erfl)
fma.s1 fTT = fRes3L, fArgAbsNorm2, f0 // (A3*x+A2)*x^2
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA4 = fA6, fArgAbsNorm2, fA4 // Polynomial tail
nop.i 0
@ -918,7 +918,7 @@ GLOBAL_LIBM_ENTRY(erfl)
fma.s1 fA19 = fA23, fArgAbsNorm4, fA19 // Polynomial tail
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA8 = fA12, fArgAbsNorm4, fA8 // Polynomial tail
nop.i 0
@ -961,7 +961,7 @@ GLOBAL_LIBM_ENTRY(erfl)
{ .mfi
nop.m 0
fma.s1 fRes4 = fA15, fArgAbsNorm11, fA4 // Result of
fma.s1 fRes4 = fA15, fArgAbsNorm11, fA4 // Result of
// polynomial tail
nop.i 0
};;
@ -971,7 +971,7 @@ GLOBAL_LIBM_ENTRY(erfl)
fms.s1 fRes2L = fRes3H, fArgAbsNorm2, fRes2H // (A3*x+A2)*x^2
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fResH = fRes2H, f1, fRes1H // High result
nop.i 0
@ -983,12 +983,12 @@ GLOBAL_LIBM_ENTRY(erfl)
nop.i 0
};;
{ .mfi
{ .mfi
nop.m 0
fma.s1 fRes2L = fRes2L, f1, fTT // (A3*x+A2)*x^2
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fms.s1 fResL = fRes1H, f1, fResH // Low result
nop.i 0
@ -999,13 +999,13 @@ GLOBAL_LIBM_ENTRY(erfl)
fma.s1 fRes1L = fRes1L, f1, fRes2L // Low result
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fResL = fResL, f1, fRes2H // Low result
nop.i 0
};;
{ .mfi
{ .mfi
nop.m 0
(p15) fneg fResH = fResH // Invert high result if arg is neg.
nop.i 0
@ -1018,12 +1018,12 @@ GLOBAL_LIBM_ENTRY(erfl)
};;
.pred.rel "mutex",p14,p15
{ .mfi
{ .mfi
nop.m 0
(p14) fma.s0 f8 = fResH, f1, fResL // Add high and low results
nop.i 0
}
{ .mfb
{ .mfb
nop.m 0
(p15) fms.s0 f8 = fResH, f1, fResL // Add high and low results
br.ret.sptk b0 // Main path return
@ -1033,12 +1033,12 @@ GLOBAL_LIBM_ENTRY(erfl)
_saturation:
.pred.rel "mutex",p14,p15
{ .mfi
{ .mfi
nop.m 0
(p14) fms.s0 f8 = f1, f1, fTiny // Saturation result r = 1-tiny
nop.i 0
};;
{ .mfb
{ .mfb
nop.m 0
(p15) fnma.s0 f8 = f1, f1, fTiny // Saturation result r = tiny-1
br.ret.sptk b0 // Saturation path return
@ -1048,69 +1048,69 @@ _saturation:
// 0, denormals and special IEEE numbers path /////////////////////////////////
erfl_spec:
{ .mfi
{ .mfi
addl rDataPtr = 0xBE0, rDataPtr // Ptr to denormals coeffs
fclass.m p6,p0 = f8, 0x23 // To filter infinities
// 0x23 = @pos|@neg|@inf
// 0x23 = @pos|@neg|@inf
nop.i 0
};;
{ .mfi
{ .mfi
ldfpd fA1H, fA1L = [rDataPtr] // Load denormals coeffs A1H, A1L
fclass.m p7,p0 = f8, 0xC7 // To filter NaNs & Zeros
// 0xC7 = @pos|@neg|@zero|@qnan|@snan
nop.i 0
};;
{ .mfb
{ .mfb
nop.m 0
(p6) fmerge.s f8 = f8, f1 // +/-1 for INF args
(p6) fmerge.s f8 = f8, f1 // +/-1 for INF args
(p6) br.ret.spnt b0 // exit for x = INF
};;
{ .mfb
{ .mfb
nop.m 0
(p7) fma.s0 f8 = f8, f1, f8 // +/-0 for 0 args
(p7) fma.s0 f8 = f8, f1, f8 // +/-0 for 0 args
// and NaNs for NaNs
(p7) br.ret.spnt b0 // exit for x = NaN or +/-0
};;
{ .mfi
{ .mfi
nop.m 0
fnorm.s0 f8 = f8 // Normalize arg
nop.i 0
};;
{ .mfi
{ .mfi
nop.m 0
fms.s1 fRes1H = f8, fA1H, f0 // HighRes
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fms.s1 fRes1L = f8, fA1L, f0 // LowRes
nop.i 0
};;
{ .mfi
{ .mfi
nop.m 0
fms.s1 fRes1Hd = f8, fA1H, fRes1H // HighRes delta
nop.i 0
};;
{ .mfi
{ .mfi
nop.m 0
fma.s1 fRes = fRes1L, f1, fRes1Hd // LowRes+HighRes delta
nop.i 0
};;
{ .mfi
{ .mfi
nop.m 0
fma.s1 fRes = f8, f8, fRes // r=x^2+r
nop.i 0
};;
{ .mfb
{ .mfb
nop.m 0
fma.s0 f8 = fRes, f1, fRes1H // res = r+ResHigh
br.ret.sptk b0 // 0, denormals, specials return
@ -1120,120 +1120,120 @@ erfl_spec:
// 0 < |x| < 1/8 path /////////////////////////////////////////////////////////
_0_to_1o8:
{ .mmi
{ .mmi
adds rAddr1 = 0xB60, rDataPtr // Ptr 1 to coeffs
adds rAddr2 = 0xB80, rDataPtr // Ptr 2 to coeffs
nop.i 0
};;
{ .mmi
{ .mmi
ldfpd fA1H, fA1L = [rAddr1], 16 // Load A1High, A1Low
ldfe fA13 = [rAddr2], 16 // Load A13
nop.i 0
};;
{ .mmi
{ .mmi
ldfe fA15 = [rAddr1], 48 // Load A15
ldfe fA11 = [rAddr2], 32 // Load A11
nop.i 0
};;
{ .mmi
{ .mmi
ldfe fA9 = [rAddr1], 32 // Load A9
ldfe fA7 = [rAddr2], 32 // Load A7
nop.i 0
};;
{ .mmi
{ .mmi
ldfe fA5 = [rAddr1] // Load A5
ldfe fA3 = [rAddr2] // Load A3
nop.i 0
};;
{ .mfi
{ .mfi
nop.m 0
fms.s1 fRes1H = f8, fA1H, f0 // x*(A1H+A1L)
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fms.s1 fRes1L = f8, fA1L, f0 // x*(A1H+A1L)
nop.i 0
};;
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA11 = fA13, fArgSqr, fA11 // Polynomial tail
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fArgFour = fArgSqr, fArgSqr, f0 // a^4
fma.s1 fArgFour = fArgSqr, fArgSqr, f0 // a^4
nop.i 0
};;
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA3 = fA5, fArgSqr, fA3 // Polynomial tail
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA7 = fA9, fArgSqr, fA7 // Polynomial tail
nop.i 0
};;
{ .mfi
{ .mfi
nop.m 0
fms.s1 fRes1Hd = f8, fA1H, fRes1H // x*(A1H+A1L) delta
nop.i 0
};;
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA11 = fA15, fArgFour, fA11 // Polynomial tail
nop.i 0
};;
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA3 = fA7, fArgFour, fA3 // Polynomial tail
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fArgEight = fArgFour, fArgFour, f0 // a^8
nop.i 0
};;
{ .mfi
{ .mfi
nop.m 0
fma.s1 f8 = fRes1L, f1, fRes1Hd // x*(A1H+A1L)
nop.i 0
};;
{ .mfi
{ .mfi
nop.m 0
fma.s1 fRes = fA11, fArgEight, fA3 //Polynomial tail result
nop.i 0
};;
{ .mfi
{ .mfi
nop.m 0
fma.s1 f8 = fRes, fArgCube, f8 // (Polynomial tail)*x^3
nop.i 0
};;
{ .mfb
{ .mfb
nop.m 0
fma.s0 f8 = f8, f1, fRes1H // (Polynomial tail)*x^3 +
fma.s0 f8 = f8, f1, fRes1H // (Polynomial tail)*x^3 +
// + x*(A1H+A1L)
br.ret.sptk b0 // [0;1/8] interval return
};;
GLOBAL_LIBM_END(erfl)

View File

@ -70,7 +70,7 @@
// 5. x >= 709.7827
// Result overflows. Set I, O, and call error support
//
// 6. 2^-2 <= x < 709.7827 or -48.0 <= x < -2^-2
// 6. 2^-2 <= x < 709.7827 or -48.0 <= x < -2^-2
// This is the main path. The algorithm is described below:
// Take the input x. w is "how many log2/128 in x?"

View File

@ -68,10 +68,10 @@
// Here we know result is essentially -1 + eps, where eps only affects
// rounded result. Set I.
//
// 5. x >= 88.7228
// 5. x >= 88.7228
// Result overflows. Set I, O, and call error support
//
// 6. 2^-2 <= x < 88.7228 or -24.0 <= x < -2^-2
// 6. 2^-2 <= x < 88.7228 or -24.0 <= x < -2^-2
// This is the main path. The algorithm is described below:
// Take the input x. w is "how many log2/128 in x?"
@ -491,7 +491,7 @@ EXPM1_COMMON:
{ .mfb
nop.m 0
(p7) fma.s.s0 f8 = fA8765432, fXsq, fNormX // Small path,
(p7) fma.s.s0 f8 = fA8765432, fXsq, fNormX // Small path,
// result=xsq*A8765432+x
(p7) br.ret.spnt b0 // Exit if 2^-40 <= |x| < 2^-2
}

View File

@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
@ -51,36 +51,36 @@
// 04/17/03 Eliminated misplaced and unused data label
// 12/15/03 Eliminated call to error support on expm1l underflow
//
//*********************************************************************
//*********************************************************************
//
// Function: Combined expl(x) and expm1l(x), where
// x
// x
// expl(x) = e , for double-extended precision x values
// x
// expm1l(x) = e - 1 for double-extended precision x values
//
//*********************************************************************
//*********************************************************************
//
// Resources Used:
//
// Floating-Point Registers: f8 (Input and Return Value)
// f9-f15,f32-f77
// Floating-Point Registers: f8 (Input and Return Value)
// f9-f15,f32-f77
//
// General Purpose Registers:
// General Purpose Registers:
// r14-r38
// r35-r38 (Used to pass arguments to error handling routine)
//
//
// Predicate Registers: p6-p15
//
//*********************************************************************
//*********************************************************************
//
// IEEE Special Conditions:
//
// Denormal fault raised on denormal inputs
// Denormal fault raised on denormal inputs
// Overflow exceptions raised when appropriate for exp and expm1
// Underflow exceptions raised when appropriate for exp and expm1
// (Error Handling Routine called for overflow and Underflow)
// Inexact raised when appropriate by algorithm
// Inexact raised when appropriate by algorithm
//
// exp(inf) = inf
// exp(-inf) = +0
@ -89,13 +89,13 @@
// exp(0) = 1
// exp(EM_special Values) = QNaN
// exp(inf) = inf
// expm1(-inf) = -1
// expm1(-inf) = -1
// expm1(SNaN) = QNaN
// expm1(QNaN) = QNaN
// expm1(0) = 0
// expm1(EM_special Values) = QNaN
//
//*********************************************************************
//
//*********************************************************************
//
// Implementation and Algorithm Notes:
//
@ -109,36 +109,36 @@
// p6 for exp,
// p7 for expm1,
//
// On output,
// On output,
//
// scale*(Y_hi + Y_lo) approximates exp(X) if exp
// scale*(Y_hi + Y_lo) approximates exp(X)-1 if expm1
//
// The accuracy is sufficient for a highly accurate 64 sig.
// bit implementation. Safe is set if there is no danger of
// overflow/underflow when the result is composed from scale,
// Y_hi and Y_lo. Thus, we can have a fast return if Safe is set.
// Otherwise, one must prepare to handle the possible exception
// appropriately. Note that SAFE not set (false) does not mean
// bit implementation. Safe is set if there is no danger of
// overflow/underflow when the result is composed from scale,
// Y_hi and Y_lo. Thus, we can have a fast return if Safe is set.
// Otherwise, one must prepare to handle the possible exception
// appropriately. Note that SAFE not set (false) does not mean
// that overflow/underflow will occur; only the setting of SAFE
// guarantees the opposite.
//
// **** High Level Overview ****
// **** High Level Overview ****
//
// The method consists of three cases.
//
//
// If |X| < Tiny use case exp_tiny;
// else if |X| < 2^(-m) use case exp_small; m=12 for exp, m=7 for expm1
// else use case exp_regular;
//
// Case exp_tiny:
//
// 1 + X can be used to approximate exp(X)
// 1 + X can be used to approximate exp(X)
// X + X^2/2 can be used to approximate exp(X) - 1
//
// Case exp_small:
//
// Here, exp(X) and exp(X) - 1 can all be
// Here, exp(X) and exp(X) - 1 can all be
// approximated by a relatively simple polynomial.
//
// This polynomial resembles the truncated Taylor series
@ -175,9 +175,9 @@
// r := (X - N*L_hi) - N*L_lo
//
// We pick L_hi such that N*L_hi is representable in 64 sig. bits
// and thus the FMA X - N*L_hi is error free. So r is the
// 1 rounding error from an exact reduction with respect to
//
// and thus the FMA X - N*L_hi is error free. So r is the
// 1 rounding error from an exact reduction with respect to
//
// L_hi + L_lo.
//
// In particular, L_hi has 30 significant bit and can be stored
@ -187,10 +187,10 @@
// Step 2: Approximation
//
// exp(r) - 1 is approximated by a short polynomial of the form
//
//
// r + A_1 r^2 + A_2 r^3 + A_3 r^4 .
//
// Step 3: Composition from Table Values
// Step 3: Composition from Table Values
//
// The value 2^( N / 2^12 ) can be composed from a couple of tables
// of precalculated values. First, express N as three integers
@ -203,8 +203,8 @@
// lsb's, M_1 is the next 6, and K is simply N shifted right
// arithmetically (sign extended) by 12 bits.
//
// Now, 2^( N / 2^12 ) is simply
//
// Now, 2^( N / 2^12 ) is simply
//
// 2^K * 2^( M_1 / 2^6 ) * 2^( M_2 / 2^12 )
//
// Clearly, 2^K needs no tabulation. The other two values are less
@ -215,14 +215,14 @@
// Define two mathematical values, delta_1 and delta_2, implicitly
// such that
//
// T_1 = exp( [M_1 log(2)/2^6] - delta_1 )
// T_1 = exp( [M_1 log(2)/2^6] - delta_1 )
// T_2 = exp( [M_2 log(2)/2^12] - delta_2 )
//
// are representable as 24 significant bits. To illustrate the idea,
// we show how we define delta_1:
// we show how we define delta_1:
//
// T_1 := round_to_24_bits( exp( M_1 log(2)/2^6 ) )
// delta_1 = (M_1 log(2)/2^6) - log( T_1 )
// delta_1 = (M_1 log(2)/2^6) - log( T_1 )
//
// The last equality means mathematical equality. We then tabulate
//
@ -235,7 +235,7 @@
// T and W via
//
// T := T_1 * T_2 ...exactly
// W := W_1 + (1 + W_1)*W_2
// W := W_1 + (1 + W_1)*W_2
//
// W approximates exp( delta ) - 1 where delta = delta_1 + delta_2.
// The mathematical product of T and (W+1) is an accurate representation
@ -243,17 +243,17 @@
//
// Step 4. Reconstruction
//
// Finally, we can reconstruct exp(X), exp(X) - 1.
// Finally, we can reconstruct exp(X), exp(X) - 1.
// Because
//
// X = K * log(2) + (M_1*log(2)/2^6 - delta_1)
// X = K * log(2) + (M_1*log(2)/2^6 - delta_1)
// + (M_2*log(2)/2^12 - delta_2)
// + delta_1 + delta_2 + r ...accurately
// We have
//
// exp(X) ~=~ 2^K * ( T + T*[exp(delta_1+delta_2+r) - 1] )
// ~=~ 2^K * ( T + T*[exp(delta + r) - 1] )
// ~=~ 2^K * ( T + T*[(exp(delta)-1)
// ~=~ 2^K * ( T + T*[(exp(delta)-1)
// + exp(delta)*(exp(r)-1)] )
// ~=~ 2^K * ( T + T*( W + (1+W)*poly(r) ) )
// ~=~ 2^K * ( Y_hi + Y_lo )
@ -265,7 +265,7 @@
// exp(X)-1 ~=~ 2^K * ( Y_hi + Y_lo ) - 1
// ~=~ 2^K * ( Y_hi + Y_lo - 2^(-K) )
//
// and we combine Y_hi + Y_lo - 2^(-N) into the form of two
// and we combine Y_hi + Y_lo - 2^(-N) into the form of two
// numbers Y_hi + Y_lo carefully.
//
// **** Algorithm Details ****
@ -276,8 +276,8 @@
//
// Case exp_tiny:
//
// The important points are to ensure an accurate result under
// different rounding directions and a correct setting of the SAFE
// The important points are to ensure an accurate result under
// different rounding directions and a correct setting of the SAFE
// flag.
//
// If expm1 is 1, then
@ -296,11 +296,11 @@
// Here we compute a simple polynomial. To exploit parallelism, we split
// the polynomial into several portions.
//
// Let r = X
// Let r = X
//
// If exp ...i.e. exp( argument )
//
// rsq := r * r;
// rsq := r * r;
// r4 := rsq*rsq
// poly_lo := P_3 + r*(P_4 + r*(P_5 + r*P_6))
// poly_hi := r + rsq*(P_1 + r*P_2)
@ -390,7 +390,7 @@ GR_SAVE_GP = r34
GR_Parameter_X = r35
GR_Parameter_Y = r36
GR_Parameter_RESULT = r37
GR_Parameter_TAG = r38
GR_Parameter_TAG = r38
// Floating Point Registers
//
@ -480,25 +480,25 @@ FR_RESULT = f15
// double-extended 1/ln(2)
// 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88
// 3fff b8aa 3b29 5c17 f0bc
// 3fff b8aa 3b29 5c17 f0bc
// For speed the significand will be loaded directly with a movl and setf.sig
// and the exponent will be bias+63 instead of bias+0. Thus subsequent
// computations need to scale appropriately.
// The constant 2^12/ln(2) is needed for the computation of N. This is also
// The constant 2^12/ln(2) is needed for the computation of N. This is also
// obtained by scaling the computations.
//
// Two shifting constants are loaded directly with movl and setf.d.
// 1. RSHF_2TO51 = 1.1000..00 * 2^(63-12)
// Two shifting constants are loaded directly with movl and setf.d.
// 1. RSHF_2TO51 = 1.1000..00 * 2^(63-12)
// This constant is added to x*1/ln2 to shift the integer part of
// x*2^12/ln2 into the rightmost bits of the significand.
// The result of this fma is N_signif.
// 2. RSHF = 1.1000..00 * 2^(63)
// 2. RSHF = 1.1000..00 * 2^(63)
// This constant is subtracted from N_signif * 2^(-51) to give
// the integer part of N, N_fix, as a floating-point number.
// The result of this fms is float_N.
RODATA
.align 64
.align 64
LOCAL_OBJECT_START(Constants_exp_64_Arg)
//data8 0xB8AA3B295C17F0BC,0x0000400B // Inv_L = 2^12/log(2)
data8 0xB17217F400000000,0x00003FF2 // L_hi = hi part log(2)/2^12
@ -538,8 +538,8 @@ data8 0x8000000000000000, 0x00003FFE // Q1
LOCAL_OBJECT_END(Constants_exp_64_Q)
LOCAL_OBJECT_START(Constants_exp_64_T1)
data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29
data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5
data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29
data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5
data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DC
data4 0x3F91C3D3,0x3F935A2B,0x3F94F4F0,0x3F96942D
data4 0x3F9837F0,0x3F99E046,0x3F9B8D3A,0x3F9D3EDA
@ -557,21 +557,21 @@ data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C
LOCAL_OBJECT_END(Constants_exp_64_T1)
LOCAL_OBJECT_START(Constants_exp_64_T2)
data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4
data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7
data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E
data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349
data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987
data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA
data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610
data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A
data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8
data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA
data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50
data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA
data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07
data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269
data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE
data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4
data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7
data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E
data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349
data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987
data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA
data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610
data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A
data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8
data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA
data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50
data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA
data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07
data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269
data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE
data4 0x3F814E68,0x3F815402,0x3F81599C,0x3F815F37
LOCAL_OBJECT_END(Constants_exp_64_T2)
@ -652,14 +652,14 @@ GLOBAL_IEEE754_ENTRY(expm1l)
//
// Set p7 true for expm1, p6 false
//
//
{ .mlx
getf.exp GR_signexp_x = f8 // Get sign and exponent of x, redo if unorm
movl GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2
}
{ .mlx
addl GR_ad_Arg = @ltoff(Constants_exp_64_Arg#),gp
addl GR_ad_Arg = @ltoff(Constants_exp_64_Arg#),gp
movl GR_rshf_2to51 = 0x4718000000000000 // 1.10000 2^(63+51)
}
;;
@ -667,12 +667,12 @@ GLOBAL_IEEE754_ENTRY(expm1l)
{ .mfi
ld8 GR_ad_Arg = [GR_ad_Arg] // Point to Arg table
fclass.m p8, p0 = f8, 0x1E7 // Test x for natval, nan, inf, zero
cmp.eq p7, p6 = r0, r0
cmp.eq p7, p6 = r0, r0
}
{ .mfb
mov GR_exp_half = 0x0FFFE // Exponent of 0.5, for very small path
fnorm.s1 FR_norm_x = f8 // Normalize x
br.cond.sptk exp_continue
br.cond.sptk exp_continue
}
;;
@ -682,13 +682,13 @@ GLOBAL_IEEE754_END(expm1l)
GLOBAL_IEEE754_ENTRY(expl)
//
// Set p7 false for exp, p6 true
//
//
{ .mlx
getf.exp GR_signexp_x = f8 // Get sign and exponent of x, redo if unorm
movl GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2
}
{ .mlx
addl GR_ad_Arg = @ltoff(Constants_exp_64_Arg#),gp
addl GR_ad_Arg = @ltoff(Constants_exp_64_Arg#),gp
movl GR_rshf_2to51 = 0x4718000000000000 // 1.10000 2^(63+51)
}
;;
@ -705,9 +705,9 @@ GLOBAL_IEEE754_ENTRY(expl)
}
;;
exp_continue:
exp_continue:
// Form two constants we need
// 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128
// 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128
// 1.1000..000 * 2^(63+63-12) to right shift int(N) into the significand
{ .mfi
@ -832,7 +832,7 @@ exp_continue:
// Now we are on the main path for |x| >= 2^-m, m=12 for exp, m=7 for expm1
//
// float_N = round_int(N_signif)
// float_N = round_int(N_signif)
// The signficand of N_signif contains the rounded integer part of X * 2^12/ln2,
// as a twos complement number in the lower bits (that is, it may be negative).
// That twos complement number (called N) is put into GR_N.
@ -934,7 +934,7 @@ exp_continue:
;;
{ .mfi
(p7) cmp.lt.unc p8, p9 = 10, GR_K // If expm1, set p8 if K > 10
(p7) cmp.lt.unc p8, p9 = 10, GR_K // If expm1, set p8 if K > 10
fma.s1 FR_poly = FR_r, FR_poly, FR_A1 // poly = r * poly + A1
nop.i 999
}
@ -1033,8 +1033,8 @@ exp_continue:
}
;;
EXP_SMALL:
EXP_SMALL:
// Here if 2^-60 < |x| < 2^-m, m=12 for exp, m=7 for expm1
{ .mfi
(p7) ldfe FR_Q3 = [GR_ad_Q],16 // Get Q3 for small path, if expm1
@ -1146,7 +1146,7 @@ EXP_SMALL:
;;
EXP_VERY_SMALL:
EXP_VERY_SMALL:
//
// Here if 0 < |x| < 2^-60
// If exp, result = 1.0 + x
@ -1224,8 +1224,8 @@ EXP_CERTAIN_UNDERFLOW_ZERO:
(p7) br.ret.sptk b0 // If expm1, exit
}
;;
EXP_OVERFLOW:
// Here if x >= min_oflow_x
{ .mmi
@ -1309,7 +1309,7 @@ EXP_POSSIBLE_UNDERFLOW:
;;
EXP_64_SPECIAL:
EXP_64_SPECIAL:
// Here if x natval, nan, inf, zero
// If x natval, +inf, or if expm1 and x zero, just return x.
// The other cases must be tested for, and results set.
@ -1367,7 +1367,7 @@ EXP_64_SPECIAL:
;;
EXP_64_UNSUPPORTED:
EXP_64_UNSUPPORTED:
// Here if x unsupported type
{ .mfb
nop.m 999

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 02/02/00 Initial version
// 02/02/00 Initial version
// 02/07/02 Added __libm_fabs entry point to test in case compiler inlines
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
@ -50,7 +50,7 @@
//
// Overview of operation
//==============================================================
// returns absolute value of x
// returns absolute value of x
// floating-point registers used: 1
// f8, input
@ -69,14 +69,14 @@ GLOBAL_IEEE754_ENTRY(fabs)
{ .mfi
nop.m 999
fcmp.eq.unc.s0 p6,p7 = f8,f1
fcmp.eq.unc.s0 p6,p7 = f8,f1
nop.i 999 ;;
}
{ .mfb
nop.m 999
fmerge.s f8 = f0,f8
br.ret.sptk b0 ;;
fmerge.s f8 = f0,f8
br.ret.sptk b0 ;;
}
GLOBAL_IEEE754_END(fabs)

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 02/02/00 Initial version
// 02/02/00 Initial version
// 02/07/02 Added __libm_fabsf entry point to test in case compiler inlines
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
@ -50,7 +50,7 @@
//
// Overview of operation
//==============================================================
// returns absolute value of x
// returns absolute value of x
// floating-point registers used: 1
// f8, input
@ -69,14 +69,14 @@ GLOBAL_IEEE754_ENTRY(fabsf)
{ .mfi
nop.m 999
fcmp.eq.unc.s0 p6,p7 = f8,f1
fcmp.eq.unc.s0 p6,p7 = f8,f1
nop.i 999 ;;
}
{ .mfb
nop.m 999
fmerge.s f8 = f0,f8
br.ret.sptk b0 ;;
fmerge.s f8 = f0,f8
br.ret.sptk b0 ;;
}
GLOBAL_IEEE754_END(fabsf)

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 02/02/00 Initial version
// 02/02/00 Initial version
// 02/07/02 Added __libm_fabsl entry point to test in case compiler inlines
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
@ -50,7 +50,7 @@
//
// Overview of operation
//==============================================================
// returns absolute value of x
// returns absolute value of x
// floating-point registers used: 1
// f8, input
@ -69,14 +69,14 @@ GLOBAL_IEEE754_ENTRY(fabsl)
{ .mfi
nop.m 999
fcmp.eq.unc.s0 p6,p7 = f8,f1
fcmp.eq.unc.s0 p6,p7 = f8,f1
nop.i 999 ;;
}
{ .mfb
nop.m 999
fmerge.s f8 = f0,f8
br.ret.sptk b0 ;;
fmerge.s f8 = f0,f8
br.ret.sptk b0 ;;
}
GLOBAL_IEEE754_END(fabsl)

View File

@ -30,7 +30,7 @@ ENTRY (__finite)
(p6) mov ret0 = 0
(p7) mov ret0 = 1
br.ret.sptk.many rp
}
}
END (__finite)
strong_alias (__finite, __finitef)

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 06/07/01 Initial version
// 06/07/01 Initial version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
//
@ -64,7 +64,7 @@ GLOBAL_LIBM_ENTRY(fma)
{ .mfb
nop.m 999
fma.d.s0 f8 = f8, f9, f10 // Result = x * y + z
br.ret.sptk b0
br.ret.sptk b0
}
;;

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 06/07/01 Initial version
// 06/07/01 Initial version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
//
@ -64,7 +64,7 @@ GLOBAL_LIBM_ENTRY(fmaf)
{ .mfb
nop.m 999
fma.s.s0 f8 = f8, f9, f10 // Result = x * y + z
br.ret.sptk b0
br.ret.sptk b0
}
;;

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 06/07/01 Initial version
// 06/07/01 Initial version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
//
@ -64,7 +64,7 @@ GLOBAL_LIBM_ENTRY(fmal)
{ .mfb
nop.m 999
fma.s0 f8 = f8, f9, f10 // Result = x * y + z
br.ret.sptk b0
br.ret.sptk b0
}
;;

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 05/31/01 Initial version
// 05/31/01 Initial version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
//
@ -50,7 +50,7 @@
// Overview of operation
//==============================================================
// returns the algebraic maximum of 2 input values
//
//
// Special cases:
// fmax(x, nan) returns x if x is numeric // Must special case this one
// fmax(nan, y) returns y if y is numeric
@ -59,7 +59,7 @@
// fmax(-0,+0) returns +0
// fmax(-0,-0) returns -0
// fmax(+0,-0) returns +0 // Must special case this one
//
//
// SNaN causes invalid to be set
// floating-point registers used: 2
@ -107,7 +107,7 @@ GLOBAL_LIBM_ENTRY(fmax)
{ .mfb
nop.m 999
(p10) fmerge.s f8 = f9, f9 // If x nan, return y, else do nothing (returns x)
br.ret.sptk b0
br.ret.sptk b0
}
;;

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 05/31/01 Initial version
// 05/31/01 Initial version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
//
@ -50,7 +50,7 @@
// Overview of operation
//==============================================================
// returns the algebraic maximum of 2 input values
//
//
// Special cases:
// fmaxf(x, nan) returns x if x is numeric // Must special case this one
// fmaxf(nan, y) returns y if y is numeric
@ -59,7 +59,7 @@
// fmaxf(-0,+0) returns +0
// fmaxf(-0,-0) returns -0
// fmaxf(+0,-0) returns +0 // Must special case this one
//
//
// SNaN causes invalid to be set
// floating-point registers used: 2
@ -107,7 +107,7 @@ GLOBAL_LIBM_ENTRY(fmaxf)
{ .mfb
nop.m 999
(p10) fmerge.s f8 = f9, f9 // If x nan, return y, else do nothing (returns x)
br.ret.sptk b0
br.ret.sptk b0
}
;;

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 05/31/01 Initial version
// 05/31/01 Initial version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
//
@ -50,7 +50,7 @@
// Overview of operation
//==============================================================
// returns the algebraic maximum of 2 input values
//
//
// Special cases:
// fmaxl(x, nan) returns x if x is numeric // Must special case this one
// fmaxl(nan, y) returns y if y is numeric
@ -59,7 +59,7 @@
// fmaxl(-0,+0) returns +0
// fmaxl(-0,-0) returns -0
// fmaxl(+0,-0) returns +0 // Must special case this one
//
//
// SNaN causes invalid to be set
// floating-point registers used: 2
@ -107,7 +107,7 @@ GLOBAL_LIBM_ENTRY(fmaxl)
{ .mfb
nop.m 999
(p10) fmerge.s f8 = f9, f9 // If x nan, return y, else do nothing (returns x)
br.ret.sptk b0
br.ret.sptk b0
}
;;

View File

@ -32,7 +32,7 @@ ENTRY (__fpclassify)
(p7) fclass.m p7, p8 = farg0, @inf
(p6) br.ret.sptk.many rp
;;
}
}
{
.mfb
(p7) mov ret0 = 1

View File

@ -23,12 +23,12 @@
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
@ -53,12 +53,12 @@ double __libm_frexp(double, int*, int);
double frexp(double x, int *y)
{
#ifdef SIZE_INT_64
#ifdef SIZE_INT_64
return( __libm_frexp(x, y, 1) );
#else
#ifdef SIZE_INT_32
#ifdef SIZE_INT_32
return( __libm_frexp(x, y, 0) );
#endif

View File

@ -23,12 +23,12 @@
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
@ -53,12 +53,12 @@ float __libm_frexpf(float, int*, int);
float frexpf(float x, int *y)
{
#ifdef SIZE_INT_64
#ifdef SIZE_INT_64
return( __libm_frexpf(x, y, 1) );
#else
#ifdef SIZE_INT_32
#ifdef SIZE_INT_32
return( __libm_frexpf(x, y, 0) );
#endif

View File

@ -23,12 +23,12 @@
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
@ -53,12 +53,12 @@ long double __libm_frexpl(long double, int*, int);
long double frexpl(long double x, int *y)
{
#ifdef SIZE_INT_64
#ifdef SIZE_INT_64
return( __libm_frexpl(x, y, 1) );
#else
#ifdef SIZE_INT_32
#ifdef SIZE_INT_32
return( __libm_frexpl(x, y, 0) );
#endif

View File

@ -23,12 +23,12 @@
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
@ -48,11 +48,11 @@ double __libm_ldexp(double, int, int);
double ldexp(double x, int n)
{
#ifdef SIZE_INT_64
return __libm_ldexp(x,n,1);
#ifdef SIZE_INT_64
return __libm_ldexp(x,n,1);
#else
#ifdef SIZE_INT_32
#ifdef SIZE_INT_32
return __libm_ldexp(x,n,0);
#endif

View File

@ -23,12 +23,12 @@
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
@ -48,11 +48,11 @@ float __libm_ldexpf(float, int, int);
float ldexpf(float x, int n)
{
#ifdef SIZE_INT_64
return __libm_ldexpf(x,n,1);
#ifdef SIZE_INT_64
return __libm_ldexpf(x,n,1);
#else
#ifdef SIZE_INT_32
#ifdef SIZE_INT_32
return __libm_ldexpf(x,n,0);
#endif

View File

@ -23,12 +23,12 @@
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
@ -48,11 +48,11 @@ long double __libm_ldexpl(long double, int, int);
long double ldexpl(long double x, int n)
{
#ifdef SIZE_INT_64
return __libm_ldexpl(x,n,1);
#ifdef SIZE_INT_64
return __libm_ldexpl(x,n,1);
#else
#ifdef SIZE_INT_32
#ifdef SIZE_INT_32
return __libm_ldexpl(x,n,0);
#endif

View File

@ -1,4 +1,4 @@
.file "log1pl.s"
.file "log1pl.s"
// Copyright (c) 2000 - 2003, Intel Corporation
@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
//*********************************************************************
//
// History:
// History:
// 02/02/00 Initial version
// 04/04/00 Unwind support added
// 08/15/00 Bundle added after call to __libm_error_support to properly
@ -74,14 +74,14 @@
// IEEE Special Conditions:
//
// Denormal fault raised on denormal inputs
// Overflow exceptions cannot occur
// Underflow exceptions raised when appropriate for log1p
// Overflow exceptions cannot occur
// Underflow exceptions raised when appropriate for log1p
// Inexact raised when appropriate by algorithm
//
// log1pl(inf) = inf
// log1pl(-inf) = QNaN
// log1pl(+/-0) = +/-0
// log1pl(-1) = -inf
// log1pl(-inf) = QNaN
// log1pl(+/-0) = +/-0
// log1pl(-1) = -inf
// log1pl(SNaN) = QNaN
// log1pl(QNaN) = QNaN
// log1pl(EM_special Values) = QNaN
@ -105,11 +105,11 @@
// log1pl( X ) = log( X+1 ) can be approximated by a simple polynomial
// in W = X. This polynomial resembles the truncated Taylor
// series W - W^/2 + W^3/3 - ...
//
//
// Case log_regular:
//
// Here we use a table lookup method. The basic idea is that in
// order to compute logl(Arg) = log1pl (Arg-1) for an argument Arg in [1,2),
// order to compute logl(Arg) = log1pl (Arg-1) for an argument Arg in [1,2),
// we construct a value G such that G*Arg is close to 1 and that
// logl(1/G) is obtainable easily from a table of values calculated
// beforehand. Thus
@ -137,7 +137,7 @@
// G := G_1 * G_2 * G_3
// r := (G * S_hi - 1) + G * S_lo
//
// These G_j's have the property that the product is exactly
// These G_j's have the property that the product is exactly
// representable and that |r| < 2^(-12) as a result.
//
// Step 2: Approximation
@ -160,8 +160,8 @@
//
// Although log1pl(X) is basically X, we would like to preserve the inexactness
// nature as well as consistent behavior under different rounding modes.
// We can do this by computing the result as
//
// We can do this by computing the result as
//
// log1pl(X) = X - X*X
//
//
@ -169,7 +169,7 @@
//
// Here we compute a simple polynomial. To exploit parallelism, we split
// the polynomial into two portions.
//
//
// W := X
// Wsq := W * W
// W4 := Wsq*Wsq
@ -226,7 +226,7 @@
// with 1.0000 in fixed point.
//
//
// Define A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1
// Define A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1
// truncated to lsb = 2^(-8). Similar to A_1,
// A_2 is not needed in actual implementation. It
// helps explain how some of the values are defined.
@ -255,11 +255,11 @@
// Fetch G_3 := (1/A_3) truncated to 21 sig. bits.
// floating pt. Fetch is done using index_3.
//
// Compute G := G_1 * G_2 * G_3.
// Compute G := G_1 * G_2 * G_3.
//
// This is done exactly since each of G_j only has 21 sig. bits.
//
// Compute
// Compute
//
// r := (G*S_hi - 1) + G*S_lo using 2 FMA operations.
//
@ -298,7 +298,7 @@
// Finally
//
// Y_hi := N*log2_hi + SUM ( log1byGj_hi )
// Y_lo := poly_hi + [ poly_lo +
// Y_lo := poly_hi + [ poly_lo +
// ( SUM ( log1byGj_lo ) + N*log2_lo ) ]
//
@ -307,7 +307,7 @@ RODATA
// ************* DO NOT CHANGE THE ORDER OF THESE TABLES *************
// P_8, P_7, P_6, P_5, P_4, P_3, P_2, and P_1
// P_8, P_7, P_6, P_5, P_4, P_3, P_2, and P_1
LOCAL_OBJECT_START(Constants_P)
//data4 0xEFD62B15,0xE3936754,0x00003FFB,0x00000000
@ -328,15 +328,15 @@ data8 0xAAAAAAAAAAAAAAAA,0x00003FFD
data8 0xFFFFFFFFFFFFFFFE,0x0000BFFD
LOCAL_OBJECT_END(Constants_P)
// log2_hi, log2_lo, Q_4, Q_3, Q_2, and Q_1
// log2_hi, log2_lo, Q_4, Q_3, Q_2, and Q_1
LOCAL_OBJECT_START(Constants_Q)
//data4 0x00000000,0xB1721800,0x00003FFE,0x00000000
//data4 0x00000000,0xB1721800,0x00003FFE,0x00000000
//data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
//data4 0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000
//data4 0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000
//data4 0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000
//data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000
//data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000
data8 0xB172180000000000,0x00003FFE
data8 0x82E308654361C4C6,0x0000BFE2
data8 0xCCCCCAF2328833CB,0x00003FFC
@ -356,7 +356,7 @@ LOCAL_OBJECT_END(Constants_1_by_LN10)
// Z1 - 16 bit fixed
LOCAL_OBJECT_START(Constants_Z_1)
data4 0x00008000
data4 0x00007879
@ -471,7 +471,7 @@ data4 0x3F71D488,0x3D693B9D
data8 0xBE049391B6B7C239
LOCAL_OBJECT_END(Constants_G_H_h2)
// G3 and H3 - IEEE single and h3 - IEEE double
// G3 and H3 - IEEE single and h3 - IEEE double
LOCAL_OBJECT_START(Constants_G_H_h3)
data4 0x3F7FFC00,0x38800100
@ -543,70 +543,70 @@ LOCAL_OBJECT_END(Constants_G_H_h3)
// Floating Point Registers
FR_Input_X = f8
FR_Input_X = f8
FR_Y_hi = f34
FR_Y_hi = f34
FR_Y_lo = f35
FR_Scale = f36
FR_X_Prime = f37
FR_S_hi = f38
FR_X_Prime = f37
FR_S_hi = f38
FR_W = f39
FR_G = f40
FR_H = f41
FR_wsq = f42
FR_wsq = f42
FR_w4 = f43
FR_h = f44
FR_w6 = f45
FR_w6 = f45
FR_G2 = f46
FR_H2 = f47
FR_poly_lo = f48
FR_P8 = f49
FR_P8 = f49
FR_poly_hi = f50
FR_P7 = f51
FR_h2 = f52
FR_rsq = f53
FR_P7 = f51
FR_h2 = f52
FR_rsq = f53
FR_P6 = f54
FR_r = f55
FR_r = f55
FR_log2_hi = f56
FR_log2_lo = f57
FR_p87 = f58
FR_p876 = f58
FR_p8765 = f58
FR_float_N = f59
FR_Q4 = f60
FR_log2_hi = f56
FR_log2_lo = f57
FR_p87 = f58
FR_p876 = f58
FR_p8765 = f58
FR_float_N = f59
FR_Q4 = f60
FR_p43 = f61
FR_p432 = f61
FR_p4321 = f61
FR_P4 = f62
FR_G3 = f63
FR_H3 = f64
FR_h3 = f65
FR_p43 = f61
FR_p432 = f61
FR_p4321 = f61
FR_P4 = f62
FR_G3 = f63
FR_H3 = f64
FR_h3 = f65
FR_Q3 = f66
FR_P3 = f67
FR_Q2 = f68
FR_P2 = f69
FR_1LN10_hi = f70
FR_Q3 = f66
FR_P3 = f67
FR_Q2 = f68
FR_P2 = f69
FR_1LN10_hi = f70
FR_Q1 = f71
FR_P1 = f72
FR_1LN10_lo = f73
FR_P5 = f74
FR_rcub = f75
FR_Q1 = f71
FR_P1 = f72
FR_1LN10_lo = f73
FR_P5 = f74
FR_rcub = f75
FR_Output_X_tmp = f76
FR_Neg_One = f77
FR_Z = f78
FR_AA = f79
FR_BB = f80
FR_S_lo = f81
FR_2_to_minus_N = f82
FR_Output_X_tmp = f76
FR_Neg_One = f77
FR_Z = f78
FR_AA = f79
FR_BB = f80
FR_S_lo = f81
FR_2_to_minus_N = f82
FR_X = f8
FR_Y = f0
@ -616,24 +616,24 @@ FR_RESULT = f76
// General Purpose Registers
GR_ad_p = r33
GR_Index1 = r34
GR_Index2 = r35
GR_signif = r36
GR_X_0 = r37
GR_X_1 = r38
GR_X_2 = r39
GR_Index1 = r34
GR_Index2 = r35
GR_signif = r36
GR_X_0 = r37
GR_X_1 = r38
GR_X_2 = r39
GR_minus_N = r39
GR_Z_1 = r40
GR_Z_2 = r41
GR_N = r42
GR_Bias = r43
GR_M = r44
GR_Index3 = r45
GR_exp_2tom80 = r45
GR_Z_1 = r40
GR_Z_2 = r41
GR_N = r42
GR_Bias = r43
GR_M = r44
GR_Index3 = r45
GR_exp_2tom80 = r45
GR_ad_p2 = r46
GR_exp_mask = r47
GR_exp_2tom7 = r48
GR_ad_ln10 = r49
GR_exp_mask = r47
GR_exp_2tom7 = r48
GR_ad_ln10 = r49
GR_ad_tbl_1 = r50
GR_ad_tbl_2 = r51
GR_ad_tbl_3 = r52
@ -769,14 +769,14 @@ GLOBAL_IEEE754_ENTRY(log1pl)
//
{ .mmi
ldfe FR_log2_lo = [GR_ad_q],16 // Load log2_lo
sub GR_N = GR_N, GR_Bias
sub GR_N = GR_N, GR_Bias
mov GR_exp_2tom80 = 0x0ffaf // Exponent of 2^-80
}
;;
{ .mfi
ldfe FR_Q4 = [GR_ad_q],16 // Load Q4
fms.s1 FR_S_lo = FR_AA, f1, FR_Z // Form S_lo = AA - Z
fms.s1 FR_S_lo = FR_AA, f1, FR_Z // Form S_lo = AA - Z
sub GR_minus_N = GR_Bias, GR_N // Form exponent of 2^(-N)
}
;;
@ -791,7 +791,7 @@ GLOBAL_IEEE754_ENTRY(log1pl)
{ .mmi
getf.exp GR_M = FR_W // Get signexp of w = x
ldfe FR_Q2 = [GR_ad_q],16 // Load Q2
extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
}
;;
@ -1055,7 +1055,7 @@ GLOBAL_IEEE754_ENTRY(log1pl)
{ .mfi
nop.m 999
(p9) fadd.s1 FR_Y_lo = FR_poly_hi, FR_poly_lo // Y_lo = poly_hi + poly_lo
(p9) fadd.s1 FR_Y_lo = FR_poly_hi, FR_poly_lo // Y_lo = poly_hi + poly_lo
nop.i 999
}
;;
@ -1070,25 +1070,25 @@ GLOBAL_IEEE754_ENTRY(log1pl)
// Here if x=-1
LOG1P_EQ_Minus_1:
LOG1P_EQ_Minus_1:
//
// If x=-1 raise divide by zero and return -inf
//
//
{ .mfi
mov GR_Parameter_TAG = 138
fsub.s1 FR_Output_X_tmp = f0, f1
fsub.s1 FR_Output_X_tmp = f0, f1
nop.i 999
}
;;
{ .mfb
nop.m 999
frcpa.s0 FR_Output_X_tmp, p8 = FR_Output_X_tmp, f0
frcpa.s0 FR_Output_X_tmp, p8 = FR_Output_X_tmp, f0
br.cond.sptk __libm_error_region
}
;;
LOG1P_special:
LOG1P_special:
{ .mfi
nop.m 999
fclass.m.unc p8, p0 = FR_Input_X, 0x1E1 // Test for natval, nan, +inf
@ -1096,46 +1096,46 @@ LOG1P_special:
}
;;
//
//
// For SNaN raise invalid and return QNaN.
// For QNaN raise invalid and return QNaN.
// For +Inf return +Inf.
//
//
{ .mfb
nop.m 999
(p8) fmpy.s0 f8 = FR_Input_X, f1
(p8) fmpy.s0 f8 = FR_Input_X, f1
(p8) br.ret.sptk b0 // Return for natval, nan, +inf
}
;;
//
//
// For -Inf raise invalid and return QNaN.
//
//
{ .mfb
mov GR_Parameter_TAG = 139
fmpy.s0 FR_Output_X_tmp = FR_Input_X, f0
fmpy.s0 FR_Output_X_tmp = FR_Input_X, f0
br.cond.sptk __libm_error_region
}
;;
LOG1P_unsupported:
//
LOG1P_unsupported:
//
// Return generated NaN or other value.
//
//
{ .mfb
nop.m 999
fmpy.s0 f8 = FR_Input_X, f0
fmpy.s0 f8 = FR_Input_X, f0
br.ret.sptk b0
}
;;
// Here if -inf < x < -1
LOG1P_LT_Minus_1:
//
LOG1P_LT_Minus_1:
//
// Deal with x < -1 in a special way - raise
// invalid and produce QNaN indefinite.
//
//
{ .mfb
mov GR_Parameter_TAG = 139
frcpa.s0 FR_Output_X_tmp, p8 = f0, f0

View File

@ -21,27 +21,27 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 02/02/00 Initial version
// 04/04/00 Improved speed, corrected result for NaN input
// 12/22/00 Fixed so inexact flag is never set, and invalid is not set for
// 12/22/00 Fixed so inexact flag is never set, and invalid is not set for
// qnans nor for inputs larger than 2^63.
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
@ -78,17 +78,17 @@
// CALCULATION: NOT HUGE, NOT SMALL
// To get the integer part
// Take the floating-point input and truncate
// Take the floating-point input and truncate
// then convert this integer to fp Call it MODF_INTEGER_PART
// Subtract MODF_INTEGER_PART from MODF_NORM_F8 to get fraction part
// Then put fraction part in f8
// Then put fraction part in f8
// put integer part MODF_INTEGER_PART into *iptr
// Registers used
//==============================================================
// predicate registers used:
// predicate registers used:
// p6 - p13
// 0xFFFF 0x10033
@ -99,21 +99,21 @@
// p13 --------------------------------------------------->|
//
// floating-point registers used:
// floating-point registers used:
MODF_NORM_F8 = f9
MODF_FRACTION_PART = f10
MODF_INTEGER_PART = f11
MODF_INT_INTEGER_PART = f12
// general registers used
// general registers used
modf_signexp = r14
modf_GR_no_frac = r15
modf_GR_FFFF = r16
modf_17_ones = r17
modf_17_ones = r17
modf_exp = r18
// r33 = iptr
.section .text
GLOBAL_LIBM_ENTRY(modf)
@ -122,7 +122,7 @@ GLOBAL_LIBM_ENTRY(modf)
// Assume input is normalized and get signexp
// Normalize input just in case
// Form exponent bias
// Form exponent bias
{ .mfi
getf.exp modf_signexp = f8
fnorm.s0 MODF_NORM_F8 = f8
@ -150,9 +150,9 @@ GLOBAL_LIBM_ENTRY(modf)
// Is x unnorm?
// qnan snan inf norm unorm 0 -+
// 0 0 0 0 1 0 11 = 0x0b UNORM
// Set p13 to indicate calculation path, else p6 if nan or inf
// Set p13 to indicate calculation path, else p6 if nan or inf
{ .mfi
and modf_exp = modf_17_ones, modf_signexp
and modf_exp = modf_17_ones, modf_signexp
fclass.m.unc p8,p0 = f8, 0x0b
nop.i 999 ;;
}

View File

@ -21,27 +21,27 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 02/02/00 Initial version
// 04/04/00 Improved speed, corrected result for NaN input
// 12/22/00 Fixed so inexact flag is never set, and invalid is not set for
// 12/22/00 Fixed so inexact flag is never set, and invalid is not set for
// qnans nor for inputs larger than 2^63.
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
@ -78,17 +78,17 @@
// CALCULATION: NOT HUGE, NOT SMALL
// To get the integer part
// Take the floating-point input and truncate
// Take the floating-point input and truncate
// then convert this integer to fp Call it MODF_INTEGER_PART
// Subtract MODF_INTEGER_PART from MODF_NORM_F8 to get fraction part
// Then put fraction part in f8
// Then put fraction part in f8
// put integer part MODF_INTEGER_PART into *iptr
// Registers used
//==============================================================
// predicate registers used:
// predicate registers used:
// p6 - p13
// 0xFFFF 0x10016
@ -99,21 +99,21 @@
// p13 --------------------------------------------------->|
//
// floating-point registers used:
// floating-point registers used:
MODF_NORM_F8 = f9
MODF_FRACTION_PART = f10
MODF_INTEGER_PART = f11
MODF_INT_INTEGER_PART = f12
// general registers used
// general registers used
modf_signexp = r14
modf_GR_no_frac = r15
modf_GR_FFFF = r16
modf_17_ones = r17
modf_17_ones = r17
modf_exp = r18
// r33 = iptr
.section .text
GLOBAL_LIBM_ENTRY(modff)
@ -122,7 +122,7 @@ GLOBAL_LIBM_ENTRY(modff)
// Assume input is normalized and get signexp
// Normalize input just in case
// Form exponent bias
// Form exponent bias
{ .mfi
getf.exp modf_signexp = f8
fnorm.s0 MODF_NORM_F8 = f8
@ -150,9 +150,9 @@ GLOBAL_LIBM_ENTRY(modff)
// Is x unnorm?
// qnan snan inf norm unorm 0 -+
// 0 0 0 0 1 0 11 = 0x0b UNORM
// Set p13 to indicate calculation path, else p6 if nan or inf
// Set p13 to indicate calculation path, else p6 if nan or inf
{ .mfi
and modf_exp = modf_17_ones, modf_signexp
and modf_exp = modf_17_ones, modf_signexp
fclass.m.unc p8,p0 = f8, 0x0b
nop.i 999 ;;
}

View File

@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
@ -42,7 +42,7 @@
// 02/02/00 Initial version
// 04/04/00 Improved speed, corrected result for NaN input
// 05/30/00 Fixed bug for exponent 0x1003e
// 12/22/00 Fixed so inexact flag is never set, and invalid is not set for
// 12/22/00 Fixed so inexact flag is never set, and invalid is not set for
// qnans nor for inputs larger than 2^63.
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
@ -73,17 +73,17 @@
// CALCULATION: NOT HUGE, NOT SMALL
// To get the integer part
// Take the floating-point input and truncate
// Take the floating-point input and truncate
// then convert this integer to fp Call it MODF_INTEGER_PART
// Subtract MODF_INTEGER_PART from MODF_NORM_F8 to get fraction part
// Then put fraction part in f8
// Then put fraction part in f8
// put integer part MODF_INTEGER_PART into *iptr
// Registers used
//==============================================================
// predicate registers used:
// predicate registers used:
// p6 - p13
// 0xFFFF 0x1003e
@ -94,21 +94,21 @@
// p13 --------------------------------------------------->|
//
// floating-point registers used:
// floating-point registers used:
MODF_NORM_F8 = f9
MODF_FRACTION_PART = f10
MODF_INTEGER_PART = f11
MODF_INT_INTEGER_PART = f12
// general registers used
// general registers used
modf_signexp = r14
modf_GR_no_frac = r15
modf_GR_FFFF = r16
modf_17_ones = r17
modf_17_ones = r17
modf_exp = r18
// r34 = iptr
.section .text
GLOBAL_LIBM_ENTRY(modfl)
@ -117,7 +117,7 @@ GLOBAL_LIBM_ENTRY(modfl)
// Assume input is normalized and get signexp
// Normalize input just in case
// Form exponent bias
// Form exponent bias
{ .mfi
getf.exp modf_signexp = f8
fnorm.s0 MODF_NORM_F8 = f8
@ -145,9 +145,9 @@ GLOBAL_LIBM_ENTRY(modfl)
// Is x unnorm?
// qnan snan inf norm unorm 0 -+
// 0 0 0 0 1 0 11 = 0x0b UNORM
// Set p13 to indicate calculation path, else p6 if nan or inf
// Set p13 to indicate calculation path, else p6 if nan or inf
{ .mfi
and modf_exp = modf_17_ones, modf_signexp
and modf_exp = modf_17_ones, modf_signexp
fclass.m.unc p8,p0 = f8, 0x0b
nop.i 999 ;;
}

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 02/02/00 Initial version
// 02/02/00 Initial version
// 03/03/00 Modified to conform to C9X, and improve speed of main path
// 03/14/00 Fixed case where x is a power of 2, and x > y, improved speed
// 04/04/00 Unwind support added
@ -104,8 +104,8 @@ FR_tmp = f39
//
// Overview of operation
//==============================================================
// nextafter determines the next representable value
// after x in the direction of y.
// nextafter determines the next representable value
// after x in the direction of y.
.section .text
@ -116,7 +116,7 @@ GLOBAL_LIBM_ENTRY(nextafter)
// Form smallest denormal significand = ulp size
{ .mfi
getf.exp GR_exp = f8
fcmp.lt.s1 p10,p11 = f8, f9
fcmp.lt.s1 p10,p11 = f8, f9
addl GR_sden_sig = 0x800, r0
}
// Form largest normal significand 0xfffffffffffff800
@ -131,7 +131,7 @@ GLOBAL_LIBM_ENTRY(nextafter)
// Form largest normal exponent
{ .mfi
getf.sig GR_sig = f8
fcmp.eq.s0 p6,p0 = f8, f9
fcmp.eq.s0 p6,p0 = f8, f9
addl GR_max_pexp = 0x103fe, r0
}
// Move largest normal significand to fp reg for special cases
@ -148,7 +148,7 @@ GLOBAL_LIBM_ENTRY(nextafter)
// It decreases (p13 set) if x<y and x<0 or if x>y and x>=0
{ .mfi
setf.sig FR_sden_sig = GR_sden_sig
fclass.m p8,p0 = f8, 0xc3
fclass.m p8,p0 = f8, 0xc3
(p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask
}
{ .mfi
@ -176,7 +176,7 @@ GLOBAL_LIBM_ENTRY(nextafter)
// Form new exponent in case result exponent needs incrementing or decrementing
{ .mfi
setf.exp FR_new_exp = GR_exp
fclass.m p9,p0 = f9, 0xc3
fclass.m p9,p0 = f9, 0xc3
(p12) add GR_exp1 = 1, GR_exp
}
{ .mib
@ -194,7 +194,7 @@ GLOBAL_LIBM_ENTRY(nextafter)
}
{ .mfb
nop.m 999
(p8) fma.s0 f8 = f8,f1,f9
(p8) fma.s0 f8 = f8,f1,f9
(p8) br.ret.spnt b0 ;; // Exit if x=nan
}
@ -202,12 +202,12 @@ GLOBAL_LIBM_ENTRY(nextafter)
// Is x=inf?
{ .mfi
setf.exp FR_exp1 = GR_exp1
fclass.m p6,p0 = f8, 0x23
fclass.m p6,p0 = f8, 0x23
addl GR_exp_mask = 0x1ffff, r0
}
{ .mfb
setf.sig FR_snorm_sig = GR_snorm_sig
(p9) fma.s0 f8 = f8,f1,f9
(p9) fma.s0 f8 = f8,f1,f9
(p9) br.ret.spnt b0 ;; // Exit if y=nan
}
@ -216,14 +216,14 @@ GLOBAL_LIBM_ENTRY(nextafter)
{ .mfb
setf.sig FR_lden_sig = GR_lden_sig
mov FR_save_f8 = f8
(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0
(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0
}
// Mask off the sign to get x_exp
{ .mfb
and GR_x_exp = GR_exp_mask, GR_exp
nop.f 999
(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf
(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf
}
// Check 6 special cases when significand rolls over:
@ -237,7 +237,7 @@ GLOBAL_LIBM_ENTRY(nextafter)
// Set p9, result is sig=max_den_sig, exp same, signal underflow and inexact
// 5 sig size decr, x_sig=min_den_sig, x_exp = min_exp
// Set p10, result is zero, sign of x, signal underflow and inexact
// 6 sig size decr, x_sig=min_sig, x_exp < min_exp
// 6 sig size decr, x_sig=min_sig, x_exp < min_exp
// Set p14, result is zero, sign of x, signal underflow and inexact
//
// Form exponent of smallest double denormal (if normalized register format)
@ -343,7 +343,7 @@ NEXT_UNDERFLOW_TO_ZERO:
br.cond.sptk NEXT_COMMON_FINISH ;;
}
NEXT_INF:
NEXT_INF:
// Here if f8 is +- infinity
// INF
// if f8 is +inf, no matter what y is return largest double
@ -357,16 +357,16 @@ NEXT_INF:
{ .mfb
nop.m 999
fmerge.s f8 = f8,FR_lnorm
br.ret.sptk b0 ;;
fmerge.s f8 = f8,FR_lnorm
br.ret.sptk b0 ;;
}
NEXT_ZERO:
NEXT_ZERO:
// Here if f8 is +- zero
// ZERO
// if f8 is zero and y is +, return + smallest double denormal
// if f8 is zero and y is -, return - smallest double denormal
// if f8 is zero and y is +, return + smallest double denormal
// if f8 is zero and y is -, return - smallest double denormal
{ .mfi
nop.m 999
@ -384,7 +384,7 @@ NEXT_ZERO:
// Add correct sign from direction arg
{ .mfi
nop.m 999
fmerge.s f8 = f9,FR_sden
fmerge.s f8 = f9,FR_sden
nop.i 999 ;;
}
@ -395,7 +395,7 @@ NEXT_ZERO:
br.cond.sptk NEXT_UNDERFLOW ;;
}
NEXT_UNDERFLOW:
NEXT_UNDERFLOW:
// Here if result is a denorm, or input is finite and result is zero
// Call error support to report possible range error
{ .mib
@ -405,7 +405,7 @@ NEXT_UNDERFLOW:
}
;;
NEXT_OVERFLOW:
NEXT_OVERFLOW:
// Here if input is finite, but result will be infinite
// Use frcpa to generate infinity of correct sign
// Call error support to report possible range error
@ -464,7 +464,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
{ .mib
stfd [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
nop.b 0
}
{ .mib
stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 02/02/00 Initial version
// 02/02/00 Initial version
// 03/03/00 Modified to conform to C9X, and improve speed of main path
// 03/14/00 Fixed case where x is a power of 2, and x > y, improved speed
// 04/04/00 Unwind support added
@ -104,8 +104,8 @@ FR_tmp = f39
//
// Overview of operation
//==============================================================
// nextafterf determines the next representable value
// after x in the direction of y.
// nextafterf determines the next representable value
// after x in the direction of y.
.section .text
@ -135,14 +135,14 @@ GLOBAL_LIBM_ENTRY(nextafterf)
// Extract significand from x
// Form largest normal significand
{ .mlx
nop.m 0
nop.m 0
movl GR_lnorm_sig = 0xffffff0000000000 ;;
}
// Move largest normal significand to fp reg for special cases
{ .mfi
setf.sig FR_lnorm_sig = GR_lnorm_sig
nop.f 0
nop.f 0
addl GR_sign_mask = 0x20000, r0 ;;
}
@ -153,7 +153,7 @@ GLOBAL_LIBM_ENTRY(nextafterf)
// It decreases (p13 set) if x<y and x<0 or if x>y and x>=0
{ .mfi
setf.sig FR_sden_sig = GR_sden_sig
fclass.m p8,p0 = f8, 0xc3
fclass.m p8,p0 = f8, 0xc3
(p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask
}
{ .mfi
@ -182,7 +182,7 @@ GLOBAL_LIBM_ENTRY(nextafterf)
// Form new exponent in case result exponent needs incrementing or decrementing
{ .mfi
setf.exp FR_new_exp = GR_exp
fclass.m p9,p0 = f9, 0xc3
fclass.m p9,p0 = f9, 0xc3
(p12) add GR_exp1 = 1, GR_exp
}
{ .mib
@ -200,7 +200,7 @@ GLOBAL_LIBM_ENTRY(nextafterf)
}
{ .mfb
nop.m 999
(p8) fma.s0 f8 = f8,f1,f9
(p8) fma.s0 f8 = f8,f1,f9
(p8) br.ret.spnt b0 ;; // Exit if x=nan
}
@ -208,12 +208,12 @@ GLOBAL_LIBM_ENTRY(nextafterf)
// Is x=inf?
{ .mfi
setf.exp FR_exp1 = GR_exp1
fclass.m p6,p0 = f8, 0x23
fclass.m p6,p0 = f8, 0x23
addl GR_exp_mask = 0x1ffff, r0
}
{ .mfb
setf.sig FR_snorm_sig = GR_snorm_sig
(p9) fma.s0 f8 = f8,f1,f9
(p9) fma.s0 f8 = f8,f1,f9
(p9) br.ret.spnt b0 ;; // Exit if y=nan
}
@ -222,14 +222,14 @@ GLOBAL_LIBM_ENTRY(nextafterf)
{ .mfb
setf.sig FR_lden_sig = GR_lden_sig
mov FR_save_f8 = f8
(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0
(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0
}
// Mask off the sign to get x_exp
{ .mfb
and GR_x_exp = GR_exp_mask, GR_exp
nop.f 999
(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf
(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf
}
// Check 6 special cases when significand rolls over:
@ -243,7 +243,7 @@ GLOBAL_LIBM_ENTRY(nextafterf)
// Set p9, result is sig=max_den_sig, exp same, signal underflow and inexact
// 5 sig size decr, x_sig=min_den_sig, x_exp = min_exp
// Set p10, result is zero, sign of x, signal underflow and inexact
// 6 sig size decr, x_sig=min_sig, x_exp < min_exp
// 6 sig size decr, x_sig=min_sig, x_exp < min_exp
// Set p14, result is zero, sign of x, signal underflow and inexact
//
// Form exponent of smallest float denormal (if normalized register format)
@ -349,7 +349,7 @@ NEXT_UNDERFLOW_TO_ZERO:
br.cond.sptk NEXT_COMMON_FINISH ;;
}
NEXT_INF:
NEXT_INF:
// Here if f8 is +- infinity
// INF
// if f8 is +inf, no matter what y is return largest float
@ -363,16 +363,16 @@ NEXT_INF:
{ .mfb
nop.m 999
fmerge.s f8 = f8,FR_lnorm
br.ret.sptk b0 ;;
fmerge.s f8 = f8,FR_lnorm
br.ret.sptk b0 ;;
}
NEXT_ZERO:
NEXT_ZERO:
// Here if f8 is +- zero
// ZERO
// if f8 is zero and y is +, return + smallest float denormal
// if f8 is zero and y is -, return - smallest float denormal
// if f8 is zero and y is +, return + smallest float denormal
// if f8 is zero and y is -, return - smallest float denormal
{ .mfi
nop.m 999
@ -390,7 +390,7 @@ NEXT_ZERO:
// Add correct sign from direction arg
{ .mfi
nop.m 999
fmerge.s f8 = f9,FR_sden
fmerge.s f8 = f9,FR_sden
nop.i 999 ;;
}
@ -401,7 +401,7 @@ NEXT_ZERO:
br.cond.sptk NEXT_UNDERFLOW ;;
}
NEXT_UNDERFLOW:
NEXT_UNDERFLOW:
// Here if result is a denorm, or input is finite and result is zero
// Call error support to report possible range error
{ .mib
@ -411,7 +411,7 @@ NEXT_UNDERFLOW:
}
;;
NEXT_OVERFLOW:
NEXT_OVERFLOW:
// Here if input is finite, but result will be infinite
// Use frcpa to generate infinity of correct sign
// Call error support to report possible range error
@ -470,7 +470,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
{ .mib
stfs [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
nop.b 0
}
{ .mib
stfs [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 02/02/00 Initial version
// 02/02/00 Initial version
// 03/03/00 Modified to conform to C9X, and improve speed of main path
// 03/14/00 Fixed case where x is a power of 2, and x > y, improved speed
// 04/04/00 Unwind support added
@ -48,7 +48,7 @@
// set [the previously overwritten] GR_Parameter_RESULT.
// 09/09/00 Updated fcmp so that qnans do not raise invalid.
// 12/15/00 Fixed case of smallest long double normal to largest denormal,
// now adhere to C99 for two zero args, and fixed flag settings
// now adhere to C99 for two zero args, and fixed flag settings
// for several cases
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
@ -105,8 +105,8 @@ FR_tmp = f39
//
// Overview of operation
//==============================================================
// nextafterl determines the next representable value
// after x in the direction of y.
// nextafterl determines the next representable value
// after x in the direction of y.
.section .text
@ -117,7 +117,7 @@ GLOBAL_LIBM_ENTRY(nextafterl)
// Form smallest denormal significand = ulp size
{ .mfi
getf.exp GR_exp = f8
fcmp.lt.s1 p10,p11 = f8, f9
fcmp.lt.s1 p10,p11 = f8, f9
addl GR_sden_sig = 0x1, r0
}
// Form largest normal significand 0xffffffffffffffff
@ -150,7 +150,7 @@ GLOBAL_LIBM_ENTRY(nextafterl)
// It decreases (p13 set) if x<y and x<0 or if x>y and x>=0
{ .mfi
setf.sig FR_sden_sig = GR_sden_sig
fclass.m p8,p0 = f8, 0xc3
fclass.m p8,p0 = f8, 0xc3
(p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask
}
// Move smallest normal exp to fp regs
@ -180,7 +180,7 @@ GLOBAL_LIBM_ENTRY(nextafterl)
// Form new exponent in case result exponent needs incrementing or decrementing
{ .mfi
setf.exp FR_new_exp = GR_exp
fclass.m p9,p0 = f9, 0xc3
fclass.m p9,p0 = f9, 0xc3
(p12) add GR_exp1 = 1, GR_exp
}
{ .mib
@ -198,7 +198,7 @@ GLOBAL_LIBM_ENTRY(nextafterl)
}
{ .mfb
setf.exp FR_den_exp = GR_min_pexp
(p8) fma.s0 f8 = f8,f1,f9
(p8) fma.s0 f8 = f8,f1,f9
(p8) br.ret.spnt b0 ;; // Exit if x=nan
}
@ -206,12 +206,12 @@ GLOBAL_LIBM_ENTRY(nextafterl)
// Is x=inf?
{ .mfi
setf.exp FR_exp1 = GR_exp1
fclass.m p6,p0 = f8, 0x23
fclass.m p6,p0 = f8, 0x23
addl GR_exp_mask = 0x1ffff, r0
}
{ .mfb
setf.sig FR_snorm_sig = GR_snorm_sig
(p9) fma.s0 f8 = f8,f1,f9
(p9) fma.s0 f8 = f8,f1,f9
(p9) br.ret.spnt b0 ;; // Exit if y=nan
}
@ -220,14 +220,14 @@ GLOBAL_LIBM_ENTRY(nextafterl)
{ .mfb
setf.sig FR_lden_sig = GR_lden_sig
mov FR_save_f8 = f8
(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0
(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0
}
// Mask off the sign to get x_exp
{ .mfb
and GR_x_exp = GR_exp_mask, GR_exp
nop.f 999
(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf
(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf
}
// Check 5 special cases when significand rolls over:
@ -252,7 +252,7 @@ GLOBAL_LIBM_ENTRY(nextafterl)
{ .mmi
(p6) cmp.lt.unc p6,p7 = GR_x_exp, GR_max_pexp
(p10) cmp.eq.unc p10,p0 = GR_new_sig, r0
(p9) cmp.le.unc p9,p8 = GR_x_exp, GR_min_pexp
(p9) cmp.le.unc p9,p8 = GR_x_exp, GR_min_pexp
;;
}
@ -347,7 +347,7 @@ NEXT_UNDERFLOW_TO_ZERO:
br.cond.sptk NEXT_COMMON_FINISH ;;
}
NEXT_INF:
NEXT_INF:
// Here if f8 is +- infinity
// INF
// if f8 is +inf, no matter what y is return largest long double
@ -362,16 +362,16 @@ NEXT_INF:
{ .mfb
nop.m 999
fmerge.s f8 = f8,FR_lnorm
br.ret.sptk b0 ;;
fmerge.s f8 = f8,FR_lnorm
br.ret.sptk b0 ;;
}
NEXT_ZERO:
NEXT_ZERO:
// Here if f8 is +- zero
// ZERO
// if f8 is zero and y is +, return + smallest long double denormal
// if f8 is zero and y is -, return - smallest long double denormal
// if f8 is zero and y is +, return + smallest long double denormal
// if f8 is zero and y is -, return - smallest long double denormal
{ .mfi
nop.m 999
@ -389,7 +389,7 @@ NEXT_ZERO:
// Add correct sign from direction arg
{ .mfi
nop.m 999
fmerge.s f8 = f9,FR_sden
fmerge.s f8 = f9,FR_sden
nop.i 999 ;;
}
@ -400,7 +400,7 @@ NEXT_ZERO:
br.cond.sptk NEXT_UNDERFLOW ;;
}
NEXT_UNDERFLOW:
NEXT_UNDERFLOW:
// Here if result is a denorm, or input is finite and result is zero
// Call error support to report possible range error
{ .mib
@ -410,7 +410,7 @@ NEXT_UNDERFLOW:
}
;;
NEXT_OVERFLOW:
NEXT_OVERFLOW:
// Here if input is finite, but result will be infinite
// Use frcpa to generate infinity of correct sign
// Call error support to report possible range error
@ -469,7 +469,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
{ .mib
stfe [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
nop.b 0
}
{ .mib
stfe [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 08/15/01 Initial version
// 08/15/01 Initial version
// 08/23/01 Corrected error tag number
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
@ -96,8 +96,8 @@ FR_tmp = f39
//
// Overview of operation
//==============================================================
// nexttoward determines the next representable value
// after x in the direction of y.
// nexttoward determines the next representable value
// after x in the direction of y.
.section .text
@ -108,7 +108,7 @@ GLOBAL_LIBM_ENTRY(nexttoward)
// Form smallest denormal significand = ulp size
{ .mfi
getf.exp GR_exp = f8
fcmp.lt.s1 p10,p11 = f8, f9
fcmp.lt.s1 p10,p11 = f8, f9
addl GR_sden_sig = 0x800, r0
}
// Form largest normal significand 0xfffffffffffff800
@ -123,7 +123,7 @@ GLOBAL_LIBM_ENTRY(nexttoward)
// Form largest normal exponent
{ .mfi
getf.sig GR_sig = f8
fcmp.eq.s0 p6,p0 = f8, f9
fcmp.eq.s0 p6,p0 = f8, f9
addl GR_max_pexp = 0x103fe, r0
}
// Move largest normal significand to fp reg for special cases
@ -140,7 +140,7 @@ GLOBAL_LIBM_ENTRY(nexttoward)
// It decreases (p13 set) if x<y and x<0 or if x>y and x>=0
{ .mfi
setf.sig FR_sden_sig = GR_sden_sig
fclass.m p8,p0 = f8, 0xc3
fclass.m p8,p0 = f8, 0xc3
(p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask
}
{ .mfi
@ -168,7 +168,7 @@ GLOBAL_LIBM_ENTRY(nexttoward)
// Form new exponent in case result exponent needs incrementing or decrementing
{ .mfi
setf.exp FR_new_exp = GR_exp
fclass.m p9,p0 = f9, 0xc3
fclass.m p9,p0 = f9, 0xc3
(p12) add GR_exp1 = 1, GR_exp
}
{ .mib
@ -186,7 +186,7 @@ GLOBAL_LIBM_ENTRY(nexttoward)
}
{ .mfb
nop.m 999
(p8) fma.s0 f8 = f8,f1,f9
(p8) fma.s0 f8 = f8,f1,f9
(p8) br.ret.spnt b0 ;; // Exit if x=nan
}
@ -194,12 +194,12 @@ GLOBAL_LIBM_ENTRY(nexttoward)
// Is x=inf?
{ .mfi
setf.exp FR_exp1 = GR_exp1
fclass.m p6,p0 = f8, 0x23
fclass.m p6,p0 = f8, 0x23
addl GR_exp_mask = 0x1ffff, r0
}
{ .mfb
setf.sig FR_snorm_sig = GR_snorm_sig
(p9) fma.s0 f8 = f8,f1,f9
(p9) fma.s0 f8 = f8,f1,f9
(p9) br.ret.spnt b0 ;; // Exit if y=nan
}
@ -208,14 +208,14 @@ GLOBAL_LIBM_ENTRY(nexttoward)
{ .mfb
setf.sig FR_lden_sig = GR_lden_sig
mov FR_save_f8 = f8
(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0
(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0
}
// Mask off the sign to get x_exp
{ .mfb
and GR_x_exp = GR_exp_mask, GR_exp
nop.f 999
(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf
(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf
}
// Check 6 special cases when significand rolls over:
@ -229,7 +229,7 @@ GLOBAL_LIBM_ENTRY(nexttoward)
// Set p9, result is sig=max_den_sig, exp same, signal underflow and inexact
// 5 sig size decr, x_sig=min_den_sig, x_exp = min_exp
// Set p10, result is zero, sign of x, signal underflow and inexact
// 6 sig size decr, x_sig=min_sig, x_exp < min_exp
// 6 sig size decr, x_sig=min_sig, x_exp < min_exp
// Set p14, result is zero, sign of x, signal underflow and inexact
//
// Form exponent of smallest double denormal (if normalized register format)
@ -335,7 +335,7 @@ NEXT_UNDERFLOW_TO_ZERO:
br.cond.sptk NEXT_COMMON_FINISH ;;
}
NEXT_INF:
NEXT_INF:
// Here if f8 is +- infinity
// INF
// if f8 is +inf, no matter what y is return largest double
@ -349,16 +349,16 @@ NEXT_INF:
{ .mfb
nop.m 999
fmerge.s f8 = f8,FR_lnorm
br.ret.sptk b0 ;;
fmerge.s f8 = f8,FR_lnorm
br.ret.sptk b0 ;;
}
NEXT_ZERO:
NEXT_ZERO:
// Here if f8 is +- zero
// ZERO
// if f8 is zero and y is +, return + smallest double denormal
// if f8 is zero and y is -, return - smallest double denormal
// if f8 is zero and y is +, return + smallest double denormal
// if f8 is zero and y is -, return - smallest double denormal
{ .mfi
nop.m 999
@ -376,7 +376,7 @@ NEXT_ZERO:
// Add correct sign from direction arg
{ .mfi
nop.m 999
fmerge.s f8 = f9,FR_sden
fmerge.s f8 = f9,FR_sden
nop.i 999 ;;
}
@ -387,7 +387,7 @@ NEXT_ZERO:
br.cond.sptk NEXT_UNDERFLOW ;;
}
NEXT_UNDERFLOW:
NEXT_UNDERFLOW:
// Here if result is a denorm, or input is finite and result is zero
// Call error support to report possible range error
{ .mib
@ -397,7 +397,7 @@ NEXT_UNDERFLOW:
}
;;
NEXT_OVERFLOW:
NEXT_OVERFLOW:
// Here if input is finite, but result will be infinite
// Use frcpa to generate infinity of correct sign
// Call error support to report possible range error
@ -456,7 +456,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
{ .mib
stfd [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
nop.b 0
}
{ .mib
stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 08/15/01 Initial version
// 08/15/01 Initial version
// 08/23/01 Corrected error tag number
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
@ -96,8 +96,8 @@ FR_tmp = f39
//
// Overview of operation
//==============================================================
// nexttowardf determines the next representable value
// after x in the direction of y.
// nexttowardf determines the next representable value
// after x in the direction of y.
.section .text
@ -127,14 +127,14 @@ GLOBAL_LIBM_ENTRY(nexttowardf)
// Extract significand from x
// Form largest normal significand
{ .mlx
nop.m 0
nop.m 0
movl GR_lnorm_sig = 0xffffff0000000000 ;;
}
// Move largest normal significand to fp reg for special cases
{ .mfi
setf.sig FR_lnorm_sig = GR_lnorm_sig
nop.f 0
nop.f 0
addl GR_sign_mask = 0x20000, r0 ;;
}
@ -145,7 +145,7 @@ GLOBAL_LIBM_ENTRY(nexttowardf)
// It decreases (p13 set) if x<y and x<0 or if x>y and x>=0
{ .mfi
setf.sig FR_sden_sig = GR_sden_sig
fclass.m p8,p0 = f8, 0xc3
fclass.m p8,p0 = f8, 0xc3
(p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask
}
{ .mfi
@ -174,7 +174,7 @@ GLOBAL_LIBM_ENTRY(nexttowardf)
// Form new exponent in case result exponent needs incrementing or decrementing
{ .mfi
setf.exp FR_new_exp = GR_exp
fclass.m p9,p0 = f9, 0xc3
fclass.m p9,p0 = f9, 0xc3
(p12) add GR_exp1 = 1, GR_exp
}
{ .mib
@ -192,7 +192,7 @@ GLOBAL_LIBM_ENTRY(nexttowardf)
}
{ .mfb
nop.m 999
(p8) fma.s0 f8 = f8,f1,f9
(p8) fma.s0 f8 = f8,f1,f9
(p8) br.ret.spnt b0 ;; // Exit if x=nan
}
@ -200,12 +200,12 @@ GLOBAL_LIBM_ENTRY(nexttowardf)
// Is x=inf?
{ .mfi
setf.exp FR_exp1 = GR_exp1
fclass.m p6,p0 = f8, 0x23
fclass.m p6,p0 = f8, 0x23
addl GR_exp_mask = 0x1ffff, r0
}
{ .mfb
setf.sig FR_snorm_sig = GR_snorm_sig
(p9) fma.s0 f8 = f8,f1,f9
(p9) fma.s0 f8 = f8,f1,f9
(p9) br.ret.spnt b0 ;; // Exit if y=nan
}
@ -214,14 +214,14 @@ GLOBAL_LIBM_ENTRY(nexttowardf)
{ .mfb
setf.sig FR_lden_sig = GR_lden_sig
mov FR_save_f8 = f8
(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0
(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0
}
// Mask off the sign to get x_exp
{ .mfb
and GR_x_exp = GR_exp_mask, GR_exp
nop.f 999
(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf
(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf
}
// Check 6 special cases when significand rolls over:
@ -235,7 +235,7 @@ GLOBAL_LIBM_ENTRY(nexttowardf)
// Set p9, result is sig=max_den_sig, exp same, signal underflow and inexact
// 5 sig size decr, x_sig=min_den_sig, x_exp = min_exp
// Set p10, result is zero, sign of x, signal underflow and inexact
// 6 sig size decr, x_sig=min_sig, x_exp < min_exp
// 6 sig size decr, x_sig=min_sig, x_exp < min_exp
// Set p14, result is zero, sign of x, signal underflow and inexact
//
// Form exponent of smallest float denormal (if normalized register format)
@ -341,7 +341,7 @@ NEXT_UNDERFLOW_TO_ZERO:
br.cond.sptk NEXT_COMMON_FINISH ;;
}
NEXT_INF:
NEXT_INF:
// Here if f8 is +- infinity
// INF
// if f8 is +inf, no matter what y is return largest float
@ -355,16 +355,16 @@ NEXT_INF:
{ .mfb
nop.m 999
fmerge.s f8 = f8,FR_lnorm
br.ret.sptk b0 ;;
fmerge.s f8 = f8,FR_lnorm
br.ret.sptk b0 ;;
}
NEXT_ZERO:
NEXT_ZERO:
// Here if f8 is +- zero
// ZERO
// if f8 is zero and y is +, return + smallest float denormal
// if f8 is zero and y is -, return - smallest float denormal
// if f8 is zero and y is +, return + smallest float denormal
// if f8 is zero and y is -, return - smallest float denormal
{ .mfi
nop.m 999
@ -382,7 +382,7 @@ NEXT_ZERO:
// Add correct sign from direction arg
{ .mfi
nop.m 999
fmerge.s f8 = f9,FR_sden
fmerge.s f8 = f9,FR_sden
nop.i 999 ;;
}
@ -393,7 +393,7 @@ NEXT_ZERO:
br.cond.sptk NEXT_UNDERFLOW ;;
}
NEXT_UNDERFLOW:
NEXT_UNDERFLOW:
// Here if result is a denorm, or input is finite and result is zero
// Call error support to report possible range error
{ .mib
@ -403,7 +403,7 @@ NEXT_UNDERFLOW:
}
;;
NEXT_OVERFLOW:
NEXT_OVERFLOW:
// Here if input is finite, but result will be infinite
// Use frcpa to generate infinity of correct sign
// Call error support to report possible range error
@ -462,7 +462,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
{ .mib
stfs [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
nop.b 0
}
{ .mib
stfs [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 08/15/01 Initial version
// 08/15/01 Initial version
// 08/23/01 Corrected error tag number
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
@ -96,8 +96,8 @@ FR_tmp = f39
//
// Overview of operation
//==============================================================
// nexttowardl determines the next representable value
// after x in the direction of y.
// nexttowardl determines the next representable value
// after x in the direction of y.
.section .text
@ -108,7 +108,7 @@ GLOBAL_LIBM_ENTRY(nexttowardl)
// Form smallest denormal significand = ulp size
{ .mfi
getf.exp GR_exp = f8
fcmp.lt.s1 p10,p11 = f8, f9
fcmp.lt.s1 p10,p11 = f8, f9
addl GR_sden_sig = 0x1, r0
}
// Form largest normal significand 0xffffffffffffffff
@ -141,7 +141,7 @@ GLOBAL_LIBM_ENTRY(nexttowardl)
// It decreases (p13 set) if x<y and x<0 or if x>y and x>=0
{ .mfi
setf.sig FR_sden_sig = GR_sden_sig
fclass.m p8,p0 = f8, 0xc3
fclass.m p8,p0 = f8, 0xc3
(p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask
}
// Move smallest normal exp to fp regs
@ -171,7 +171,7 @@ GLOBAL_LIBM_ENTRY(nexttowardl)
// Form new exponent in case result exponent needs incrementing or decrementing
{ .mfi
setf.exp FR_new_exp = GR_exp
fclass.m p9,p0 = f9, 0xc3
fclass.m p9,p0 = f9, 0xc3
(p12) add GR_exp1 = 1, GR_exp
}
{ .mib
@ -189,7 +189,7 @@ GLOBAL_LIBM_ENTRY(nexttowardl)
}
{ .mfb
setf.exp FR_den_exp = GR_min_pexp
(p8) fma.s0 f8 = f8,f1,f9
(p8) fma.s0 f8 = f8,f1,f9
(p8) br.ret.spnt b0 ;; // Exit if x=nan
}
@ -197,12 +197,12 @@ GLOBAL_LIBM_ENTRY(nexttowardl)
// Is x=inf?
{ .mfi
setf.exp FR_exp1 = GR_exp1
fclass.m p6,p0 = f8, 0x23
fclass.m p6,p0 = f8, 0x23
addl GR_exp_mask = 0x1ffff, r0
}
{ .mfb
setf.sig FR_snorm_sig = GR_snorm_sig
(p9) fma.s0 f8 = f8,f1,f9
(p9) fma.s0 f8 = f8,f1,f9
(p9) br.ret.spnt b0 ;; // Exit if y=nan
}
@ -211,14 +211,14 @@ GLOBAL_LIBM_ENTRY(nexttowardl)
{ .mfb
setf.sig FR_lden_sig = GR_lden_sig
mov FR_save_f8 = f8
(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0
(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0
}
// Mask off the sign to get x_exp
{ .mfb
and GR_x_exp = GR_exp_mask, GR_exp
nop.f 999
(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf
(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf
}
// Check 5 special cases when significand rolls over:
@ -243,7 +243,7 @@ GLOBAL_LIBM_ENTRY(nexttowardl)
{ .mmi
(p6) cmp.lt.unc p6,p7 = GR_x_exp, GR_max_pexp
(p10) cmp.eq.unc p10,p0 = GR_new_sig, r0
(p9) cmp.le.unc p9,p8 = GR_x_exp, GR_min_pexp
(p9) cmp.le.unc p9,p8 = GR_x_exp, GR_min_pexp
;;
}
@ -338,7 +338,7 @@ NEXT_UNDERFLOW_TO_ZERO:
br.cond.sptk NEXT_COMMON_FINISH ;;
}
NEXT_INF:
NEXT_INF:
// Here if f8 is +- infinity
// INF
// if f8 is +inf, no matter what y is return largest long double
@ -353,16 +353,16 @@ NEXT_INF:
{ .mfb
nop.m 999
fmerge.s f8 = f8,FR_lnorm
br.ret.sptk b0 ;;
fmerge.s f8 = f8,FR_lnorm
br.ret.sptk b0 ;;
}
NEXT_ZERO:
NEXT_ZERO:
// Here if f8 is +- zero
// ZERO
// if f8 is zero and y is +, return + smallest long double denormal
// if f8 is zero and y is -, return - smallest long double denormal
// if f8 is zero and y is +, return + smallest long double denormal
// if f8 is zero and y is -, return - smallest long double denormal
{ .mfi
nop.m 999
@ -380,7 +380,7 @@ NEXT_ZERO:
// Add correct sign from direction arg
{ .mfi
nop.m 999
fmerge.s f8 = f9,FR_sden
fmerge.s f8 = f9,FR_sden
nop.i 999 ;;
}
@ -391,7 +391,7 @@ NEXT_ZERO:
br.cond.sptk NEXT_UNDERFLOW ;;
}
NEXT_UNDERFLOW:
NEXT_UNDERFLOW:
// Here if result is a denorm, or input is finite and result is zero
// Call error support to report possible range error
{ .mib
@ -401,7 +401,7 @@ NEXT_UNDERFLOW:
}
;;
NEXT_OVERFLOW:
NEXT_OVERFLOW:
// Here if input is finite, but result will be infinite
// Use frcpa to generate infinity of correct sign
// Call error support to report possible range error
@ -460,7 +460,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
{ .mib
stfe [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
nop.b 0
}
{ .mib
stfe [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack

View File

@ -159,7 +159,7 @@ ROUND_COMMON:
{ .mmi
cmp.lt p6,p0 = rExp, rExpHalf // Is |x| < 0.5?
cmp.ge p7,p0 = rExp, rBigexp // Is |x| >= 2^52?
cmp.lt p10,p0 = rExp, rExpHalf // Is |x| < 0.5?
cmp.lt p10,p0 = rExp, rExpHalf // Is |x| < 0.5?
}
;;

View File

@ -159,7 +159,7 @@ ROUND_COMMON:
{ .mmi
cmp.lt p6,p0 = rExp, rExpHalf // Is |x| < 0.5?
cmp.ge p7,p0 = rExp, rBigexp // Is |x| >= 2^23?
cmp.lt p10,p0 = rExp, rExpHalf // Is |x| < 0.5?
cmp.lt p10,p0 = rExp, rExpHalf // Is |x| < 0.5?
}
;;

View File

@ -159,7 +159,7 @@ ROUND_COMMON:
{ .mmi
cmp.lt p6,p0 = rExp, rExpHalf // Is |x| < 0.5?
cmp.ge p7,p0 = rExp, rBigexp // Is |x| >= 2^63?
cmp.lt p10,p0 = rExp, rExpHalf // Is |x| < 0.5?
cmp.lt p10,p0 = rExp, rExpHalf // Is |x| < 0.5?
}
;;

View File

@ -23,12 +23,12 @@
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
@ -48,11 +48,11 @@ float __libm_scalblnf(float, long int, int);
float scalblnf(float x, long int n)
{
#ifdef SIZE_LONG_INT_64
return __libm_scalblnf(x,n,1);
#ifdef SIZE_LONG_INT_64
return __libm_scalblnf(x,n,1);
#else
#ifdef SIZE_LONG_INT_32
#ifdef SIZE_LONG_INT_32
return __libm_scalblnf(x,n,0);
#endif

View File

@ -23,12 +23,12 @@
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
@ -48,11 +48,11 @@ double __libm_scalbn(double, int, int);
double scalbn(double x, int n)
{
#ifdef SIZE_INT_64
return __libm_scalbn(x,n,1);
#ifdef SIZE_INT_64
return __libm_scalbn(x,n,1);
#else
#ifdef SIZE_INT_32
#ifdef SIZE_INT_32
return __libm_scalbn(x,n,0);
#endif

View File

@ -23,12 +23,12 @@
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
@ -48,11 +48,11 @@ float __libm_scalbnf(float, int, int);
float scalbnf(float x, int n)
{
#ifdef SIZE_INT_64
return __libm_scalbnf(x,n,1);
#ifdef SIZE_INT_64
return __libm_scalbnf(x,n,1);
#else
#ifdef SIZE_INT_32
#ifdef SIZE_INT_32
return __libm_scalbnf(x,n,0);
#endif

View File

@ -23,12 +23,12 @@
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
@ -48,11 +48,11 @@ long double __libm_scalbnl(long double, int, int);
long double scalbnl(long double x, int n)
{
#ifdef SIZE_INT_64
return __libm_scalbnl(x,n,1);
#ifdef SIZE_INT_64
return __libm_scalbnl(x,n,1);
#else
#ifdef SIZE_INT_32
#ifdef SIZE_INT_32
return __libm_scalbnl(x,n,0);
#endif

View File

@ -30,7 +30,7 @@ ENTRY (__signbit)
(p6) mov ret0 = 1
(p7) mov ret0 = 0
br.ret.sptk.many rp
}
}
END (__signbit)
strong_alias (__signbit, __signbitf)

View File

@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
@ -54,11 +54,11 @@
// If x = sig * 2**n with 1 <= sig < 2
// significand returns sig
//
// predicate registers used:
// predicate registers used:
// p6, p7
//
// floating-point registers used:
// f8, f9, f10
// floating-point registers used:
// f8, f9, f10
.section .text
GLOBAL_LIBM_ENTRY(significand)
@ -69,12 +69,12 @@ GLOBAL_LIBM_ENTRY(significand)
// f10 gets f8(sign) with f1(exp,significand)
{ .mfi
nop.m 999
fmerge.s f10 = f8,f1
fmerge.s f10 = f8,f1
nop.i 999
}
{ .mfi
nop.m 999
fnorm.s0 f9 = f8
fnorm.s0 f9 = f8
nop.i 999 ;;
}
@ -91,7 +91,7 @@ GLOBAL_LIBM_ENTRY(significand)
// return sign(f8) exp(f8) significand(f8), normalized.
{ .mfi
nop.m 999
fclass.m.unc p0,p6 = f8, 0xe7
fclass.m.unc p0,p6 = f8, 0xe7
nop.i 999 ;;
}
@ -109,7 +109,7 @@ GLOBAL_LIBM_ENTRY(significand)
{ .mfb
nop.m 999
fnorm.d.s0 f8 = f8
fnorm.d.s0 f8 = f8
br.ret.sptk b0 ;;
}

View File

@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
@ -53,11 +53,11 @@
// If x = sig * 2**n with 1 <= sig < 2
// significandf returns sig
//
// predicate registers used:
// predicate registers used:
// p6, p7
//
// floating-point registers used:
// f8, f9, f10
// floating-point registers used:
// f8, f9, f10
.section .text
GLOBAL_LIBM_ENTRY(significandf)
@ -68,12 +68,12 @@ GLOBAL_LIBM_ENTRY(significandf)
// f10 gets f8(sign) with f1(exp,significand)
{ .mfi
nop.m 999
fmerge.s f10 = f8,f1
fmerge.s f10 = f8,f1
nop.i 999
}
{ .mfi
nop.m 999
fnorm.s0 f9 = f8
fnorm.s0 f9 = f8
nop.i 999 ;;
}
@ -90,7 +90,7 @@ GLOBAL_LIBM_ENTRY(significandf)
// return sign(f8) exp(f8) significand(f8), normalized.
{ .mfi
nop.m 999
fclass.m.unc p0,p6 = f8, 0xe7
fclass.m.unc p0,p6 = f8, 0xe7
nop.i 999 ;;
}

View File

@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
@ -54,11 +54,11 @@
// If x = sig * 2**n with 1 <= sig < 2
// significandl returns sig
//
// predicate registers used:
// predicate registers used:
// p6, p7
//
// floating-point registers used:
// f8, f9, f10
// floating-point registers used:
// f8, f9, f10
.section .text
GLOBAL_LIBM_ENTRY(significandl)
@ -69,12 +69,12 @@ GLOBAL_LIBM_ENTRY(significandl)
// f10 gets f8(sign) with f1(exp,significand)
{ .mfi
nop.m 999
fmerge.s f10 = f8,f1
fmerge.s f10 = f8,f1
nop.i 999
}
{ .mfi
nop.m 999
fnorm.s0 f9 = f8
fnorm.s0 f9 = f8
nop.i 999 ;;
}
@ -84,14 +84,14 @@ GLOBAL_LIBM_ENTRY(significandl)
fclass.m.unc p7,p0 = f8, 0x0b
nop.i 999 ;;
}
// p6 = TRUE ==> x is not (nan,inf,0)
// return sign(f8) exp(f1) significand(f8)
// else x is (nan,inf,0)
// return sign(f8) exp(f8) significand(f8), normalized.
{ .mfi
nop.m 999
fclass.m.unc p0,p6 = f8, 0xe7
fclass.m.unc p0,p6 = f8, 0xe7
nop.i 999 ;;
}
@ -131,7 +131,7 @@ SIGNIFICAND_DENORM:
// This will be the final result unless x double-extended denormal
{ .mfi
nop.m 999
fnorm.s0 f8 = f8
fnorm.s0 f8 = f8
nop.i 999 ;;
}
@ -146,7 +146,7 @@ SIGNIFICAND_DENORM:
// Final normalization if x double-extended denorm
{ .mfb
nop.m 999
(p7) fnorm.s0 f8 = f8
(p7) fnorm.s0 f8 = f8
br.ret.sptk b0 ;;
}

View File

@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History

View File

@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History

View File

@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
@ -71,10 +71,10 @@
//
// 3. Main path: 0.25 <= |x| < 19.0625
// For several ranges of 0.25 <= |x| < 19.0625
// Return tanh(x) = sign(x)*(A0 + y*A1 + y^2*A2 +
// Return tanh(x) = sign(x)*(A0 + y*A1 + y^2*A2 +
// + y^3*A3 + ... + y^19*A19)
// where y = (|x|/a) - b
//
//
// For each range there is particular set of coefficients.
// Below is the list of ranges:
// 1/4 <= |x| < 1/2 a = 0.25, b = 1.0
@ -87,28 +87,28 @@
// 8.0 <= |x| < 13.0 a = 8.0, b = 1.0
// 13.0 <= |x| < 16.0 a = 8.0, b = 2.0
// 16.0 <= |x| < 19.0625 a = 16.0, b = 1.0
// ( [3.25;4.0], [6.5;8.0], [13.0;16.0] subranges separated
// ( [3.25;4.0], [6.5;8.0], [13.0;16.0] subranges separated
// for monotonicity issues resolve )
//
// 4. Saturation path: 19.0625 <= |x| < +INF
// 4. Saturation path: 19.0625 <= |x| < +INF
// Return tanh(x) = sign(x)*(1.0 - tiny_value)
// (tiny_value ~ 2^(-63))
//
// Registers used
//==============================================================================
// Floating Point registers used:
// Floating Point registers used:
// f8 = input, output
// f32 -> f64
//
// General registers used:
// General registers used:
// r32 -> r51, r2, r3
//
// Predicate registers used:
// p6, p8, p10, p11, p12, p14, p15
// p6 arg is zero, denormal or special IEEE
// p8 to filter out case when signd(x) > 1.625
// p8 to filter out case when signd(x) > 1.625
// p10 to filter out case when |x| < 0.25
// p11 to filter out case when signd(x) <= 1.625
// p11 to filter out case when signd(x) <= 1.625
// p12 to filter out case when |x| >= 19.0625
// p14 set to 1 for positive x
// p15 set to 1 for negative x
@ -169,7 +169,7 @@ fTSqr = f58
fTQuadr = f59
fTDeg3 = f60
fTDeg7 = f61
fArgAbsNormSgn = f62
fArgAbsNormSgn = f62
fTQuadrSgn = f63
fTwo = f64
@ -184,7 +184,7 @@ LOCAL_OBJECT_START(tanh_data)
// Main path coefficients:
// Coefficients ##0..15 ("main" coefficient tables)
// Polynomial coefficients for the tanh(x), 0.25 <= |x| < 0.5
// Polynomial coefficients for the tanh(x), 0.25 <= |x| < 0.5
data8 0xE9D218BC9A3FB55A, 0x00003FC7 //A19
data8 0xC8C0D38687F36EBA, 0x00003FCE //A18
data8 0xA2663E519FAC8A43, 0x0000BFD2 //A17
@ -202,7 +202,7 @@ data8 0x8E1C15876AA589AD, 0x0000BFEF //A6
data8 0x942226246A8C2A86, 0x00003FF1 //A5
data8 0x8F06D9FF7DB47261, 0x00003FF4 //A4
//
// Polynomial coefficients for the tanh(x), 0.5 <= |x| < 1.0
// Polynomial coefficients for the tanh(x), 0.5 <= |x| < 1.0
data8 0xC4A7B8FB672A8520, 0x00003FDC //A19
data8 0xA20724B847E13499, 0x0000BFE0 //A18
data8 0xE17DB53F02E4D340, 0x00003FE2 //A17
@ -220,7 +220,7 @@ data8 0xCE63E8FA6B96480B, 0x0000BFF4 //A6
data8 0xDF017BE0D4FE45D8, 0x0000BFF4 //A5
data8 0xA8A0C6E2226DF3CD, 0x00003FF8 //A4
//
// Polynomial coefficients for the tanh(x), 1.0 <= |x| < 2.0
// Polynomial coefficients for the tanh(x), 1.0 <= |x| < 2.0
data8 0x8E89D2EBFDAA160B, 0x00003FE9 //A19
data8 0xDD9226310A272046, 0x0000BFEC //A18
data8 0xA038042D28B0D665, 0x00003FEF //A17
@ -238,7 +238,7 @@ data8 0x9B02FE0DAF42C08F, 0x00003FF9 //A6
data8 0xBDACE06F531D9491, 0x0000BFFA //A5
data8 0xE3048AD1DB2F648C, 0x00003FF9 //A4
//
// Polynomial coefficients for the tanh(x), 2.0 <= |x| < 3.25
// Polynomial coefficients for the tanh(x), 2.0 <= |x| < 3.25
data8 0x856EC3B0330A385A, 0x00003FEB //A19
data8 0xC641D69DAE2D429C, 0x0000BFF2 //A18
data8 0xC683EB0BE1343FFF, 0x00003FF5 //A17
@ -256,7 +256,7 @@ data8 0xA6CAAD4A3E31A7D5, 0x0000BFF8 //A6
data8 0x9CABD76D1D5C3878, 0x00003FFC //A5
data8 0x92906D077941CAA9, 0x0000BFFD //A4
//
// Polynomial coefficients for the tanh(x), 4.0 <= |x| < 6.5
// Polynomial coefficients for the tanh(x), 4.0 <= |x| < 6.5
data8 0x9232D19F71709AC9, 0x0000BFF5 //A19
data8 0x819E31323F5DD3F8, 0x00003FF8 //A18
data8 0xDA8E1CDB8D23DC29, 0x0000BFF9 //A17
@ -274,7 +274,7 @@ data8 0xF4CA0B968AF2DDE2, 0x0000BFFC //A6
data8 0xB99874B482BD17EE, 0x00003FFC //A5
data8 0xE93FB2F99431DC1D, 0x0000BFFB //A4
//
// Polynomial coefficients for the tanh(x), 8.0 <= |x| < 13.0
// Polynomial coefficients for the tanh(x), 8.0 <= |x| < 13.0
data8 0xAAA9EB7EADA85CEC, 0x00003FF5 //A19
data8 0x980C80EE05A6BE78, 0x0000BFF8 //A18
data8 0x818DA9F5396390A5, 0x00003FFA //A17
@ -292,7 +292,7 @@ data8 0xABD91DCE40D5EECB, 0x0000BFF7 //A6
data8 0x80E375C1B847B72F, 0x00003FF6 //A5
data8 0xA11C7DD978CF700A, 0x0000BFF4 //A4
//
// Polynomial coefficients for the tanh(x), 16.0 <= |x| < 19.0625
// Polynomial coefficients for the tanh(x), 16.0 <= |x| < 19.0625
data8 0xE29D17C510F86F6B, 0x00003FF3 //A19
data8 0x88FE52EB39A3A98C, 0x0000BFF5 //A18
data8 0xA406547E50360693, 0x00003FF5 //A17
@ -311,7 +311,7 @@ data8 0xF358B2C46F10CBAF, 0x00003FE3 //A5
data8 0x98176FD06229A385, 0x0000BFE1 //A4
//
// Binary subranges
// Polynomial coefficients for the tanh(x), 3.25 <= |x| < 4.0
// Polynomial coefficients for the tanh(x), 3.25 <= |x| < 4.0
data8 0xEF2EE841288F6706, 0x00003FE9 //A19
data8 0xE65D5B74B85F82A6, 0x00003FEB //A18
data8 0xE495FC21E42A79FF, 0x00003FEA //A17
@ -329,7 +329,7 @@ data8 0xF4CA0661307243C7, 0x0000BFF6 //A6
data8 0xB998746D57061F74, 0x00003FF7 //A5
data8 0xE93FB2F482327C19, 0x0000BFF7 //A4
//
// Polynomial coefficients for the tanh(x), 6.5 <= |x| < 8.0
// Polynomial coefficients for the tanh(x), 6.5 <= |x| < 8.0
data8 0xEB189B71ADC40BE2, 0x00003FEA //A19
data8 0xA60B46F9FF6DC2DF, 0x00003FEA //A18
data8 0xBB061CDD9F368B9D, 0x00003FEC //A17
@ -347,7 +347,7 @@ data8 0xABD9E63CA575B950, 0x0000BFF1 //A6
data8 0x80E38B18E8D0F460, 0x00003FF1 //A5
data8 0xA11C80E20AAFDD3C, 0x0000BFF0 //A4
//
// Polynomial coefficients for the tanh(x), 13.0 <= |x| < 16.0
// Polynomial coefficients for the tanh(x), 13.0 <= |x| < 16.0
data8 0xBECD0AF7E22E5594, 0x00003FE9 //A19
data8 0xE2834E2D68C1128C, 0x00003FEA //A18
data8 0x97B117611B317379, 0x00003FEB //A17
@ -366,19 +366,19 @@ data8 0xF358D8A7FC012D5D, 0x00003FDE //A5
data8 0x98176E2309B7C73A, 0x0000BFDD //A4
//
// Coefficients ##16..19 ("tail" coefficient tables)
// Polynomial coefficients for the tanh(x), 0.25 <= |x| < 0.5
// Polynomial coefficients for the tanh(x), 0.25 <= |x| < 0.5
data8 0x838F209ABB9BA7B3, 0x0000BFF7 //A3
data8 0xEBC0AC78DA4FC500, 0x0000BFF8 //A2
data8 0xF0A4D02960B60E69, 0x00003FFC //A1
data8 0xFACBF534D0E42F8A, 0x00003FFC //A0
//
// Polynomial coefficients for the tanh(x), 0.5 <= |x| < 1.0
// Polynomial coefficients for the tanh(x), 0.5 <= |x| < 1.0
data8 0xC0ECBDC0A0D133A6, 0x0000BFF8 //A3
data8 0xBA13A076BF8E812F, 0x0000BFFB //A2
data8 0xC954A37D1A1CA070, 0x00003FFD //A1
data8 0xEC9A9EBAB4579B29, 0x00003FFD //A0
//
// Polynomial coefficients for the tanh(x), 1.0 <= |x| < 2.0
// Polynomial coefficients for the tanh(x), 1.0 <= |x| < 2.0
data8 0xD42E9175A6EA1397, 0x00003FFB //A3
data8 0xA3C361378A55CF56, 0x0000BFFD //A2
data8 0xD706E07CC8622983, 0x00003FFD //A1
@ -427,7 +427,7 @@ data8 0xE42327B9B0D7202F, 0x0000BFD8 //A2
data8 0xE42327BB13076BD6, 0x00003FD5 //A1
data8 0xFFFFFFFFFFF8DEE7, 0x00003FFE //A0
//
// Polynomial coefficients for the tanh(x), 0.0 <= |x| < 0.25
// Polynomial coefficients for the tanh(x), 0.0 <= |x| < 0.25
// ('tanh_near_zero' path)
data8 0xBF2BA5D26E479D0C //A9
data8 0x3F4336D96F81EE26 //A8
@ -441,7 +441,7 @@ data8 0x3FC1111111111108 //A2
//
// 1.0 - 2^(-63)
// ('tanh_saturation' path)
data8 0xFFFFFFFFFFFFFFFF, 0x00003FFE
data8 0xFFFFFFFFFFFFFFFF, 0x00003FFE
LOCAL_OBJECT_END(tanh_data)
// CAUTION: The order of table coefficients shouldn't be changed!
@ -461,8 +461,8 @@ GLOBAL_LIBM_ENTRY(tanh)
};;
{ .mfi
getf.d rArg = f8 // x in GR
fclass.m p6,p0 = f8, 0xEF // Filter 0, denormals and specials
getf.d rArg = f8 // x in GR
fclass.m p6,p0 = f8, 0xEF // Filter 0, denormals and specials
// 0xEF = @qnan|@snan|@pos|@neg|@zero|@unorm|@inf
shl rArgSgnd = rArgSgnd, 52 // mask for exponent
}
@ -493,11 +493,11 @@ GLOBAL_LIBM_ENTRY(tanh)
nop.f 0
(p6) br.cond.spnt _tanh_spec // Branch to zero, denorm & specs
};;
{ .mfi
and rShiftedArgMasked = rShiftedArg, rMask // bias of x << 8
fmerge.s fArgAbs = f1, f8 // |x|
shr rShiftedAbsArg = rAbsArg, 44 // Select only necessary
shr rShiftedAbsArg = rAbsArg, 44 // Select only necessary
// bits of absolute arg
}
{ .mfi
@ -509,28 +509,28 @@ GLOBAL_LIBM_ENTRY(tanh)
{ .mfi
sub rIndex = rShiftedArgMasked, rBias // index << 8
nop.f 0
nop.f 0
cmp.lt p10, p0 = rShiftedArgMasked, rBias // p10=1 if |x|<0.25
}
{ .mfb
(p8) cmp.gt p8, p11 = rAbsArg, rTwo // If arg is greater than 2.0?
// (then we should use binary subranges)
nop.f 0
nop.f 0
(p10) br.cond.spnt tanh_near_zero // branch out if |x| < 0.25
};;
.pred.rel "mutex",p8,p11
{ .mfi
(p8) add rIndex = 0x400, rIndex // Make pointer to binary
(p8) add rIndex = 0x400, rIndex // Make pointer to binary
// subranges
(p11) fms.s1 fArgAbsNorm = fArgAbsNorm, f1, f1 // |x|/b - 1.0
addl rSaturation = 0x40331, r0 // shifted bits of 19.0625
}
{ .mfi
nop.m 0
nop.m 0
(p8) fms.s1 fArgAbsNorm = fArgAbsNorm, f1, fTwo // |x|/b - 2.0
// this is only for binary subranges [3.25;4], [6.5;8], [13.0;16]
nop.i 0
nop.i 0
}
;;
@ -544,7 +544,7 @@ GLOBAL_LIBM_ENTRY(tanh)
adds rCoeffAddr2 = 16, rCoeffAddr1 // Shifted pointer to coeffs
fmerge.s fSignumX = f8, f1 // signum(x)
nop.i 0
}
}
{ .mfb
cmp.le p12, p0 = rSaturation, rShiftedAbsArg // |x|>=19.0625?
nop.f 0
@ -595,15 +595,15 @@ GLOBAL_LIBM_ENTRY(tanh)
{.mfi
ldfe fA12 = [rCoeffAddr2], 32 // Load A12
nop.f 0
cmp.lt p15, p14 = rArg, r0 // Arg positive (p14)
cmp.lt p15, p14 = rArg, r0 // Arg positive (p14)
// or negative (p15)?
};;
{.mfi
ldfe fA11 = [rCoeffAddr1], 32 // Load A11
nop.f 0
add rCoeffAddr4 = rCoeffAddr3, rCoeffAddr4 // shifted "tail"
// coeffs to load
add rCoeffAddr4 = rCoeffAddr3, rCoeffAddr4 // shifted "tail"
// coeffs to load
}
{.mfi
ldfe fA10 = [rCoeffAddr2], 32 // Load A10
@ -721,7 +721,7 @@ GLOBAL_LIBM_ENTRY(tanh)
{ .mfi
nop.m 0
fma.s1 fA15 = fA15, fTSqr, fA13 // Polynomial
fma.s1 fA15 = fA15, fTSqr, fA13 // Polynomial
nop.i 0
}
{ .mfi
@ -743,19 +743,19 @@ GLOBAL_LIBM_ENTRY(tanh)
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 fA7 = fA7, fTSqr, fA5 // Polynomial
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 fRes = fRes, fTQuadr, fA15 // Polynomial
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 fA4 = fA4, fTSqr, fA2 // Polynomial
nop.i 0
};;
@ -767,7 +767,7 @@ GLOBAL_LIBM_ENTRY(tanh)
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 fA4 = fA7, fTDeg3, fA4 // Polynomial
nop.i 0
};;
@ -797,7 +797,7 @@ GLOBAL_LIBM_ENTRY(tanh)
tanh_near_zero:
{ .mfi
adds rCoeffAddr1 = 0xC80, rDataPtr // address of A9
fma.s0 fTSqr = fArgSqr, fArgSqr, f0 // x^4
fma.s0 fTSqr = fArgSqr, fArgSqr, f0 // x^4
nop.i 0
}
{ .mfi
@ -931,51 +931,51 @@ tanh_saturation:
// 0, denormals and special IEEE numbers path /////////////////////////////////
_tanh_spec:
{ .mfi
cmp.lt p15, p14 = rArg, r0 // Is arg negative (p15)
{ .mfi
cmp.lt p15, p14 = rArg, r0 // Is arg negative (p15)
// or positive p14)
fclass.m p6,p0 = f8, 0x23 // To filter infinities
// 0x23 = @pos|@neg|@inf
// 0x23 = @pos|@neg|@inf
nop.i 0
};;
{ .mfi
{ .mfi
nop.m 0
fclass.m p7,p0 = f8, 0xC7 // To filter NaNs & Zeros
// 0xC7 = @pos|@neg|@zero|@qnan|@snan
nop.i 0
};;
{ .mfb
{ .mfb
nop.m 0
(p6) fmerge.s f8 = f8, f1 // +/-1 for INF args
(p6) fmerge.s f8 = f8, f1 // +/-1 for INF args
(p6) br.ret.spnt b0 // exit for x = INF
};;
{ .mfb
{ .mfb
nop.m 0
(p7) fma.d.s0 f8 = f8, f1, f8 // +/-0 for 0 args
(p7) fma.d.s0 f8 = f8, f1, f8 // +/-0 for 0 args
// and NaNs for NaNs
(p7) br.ret.spnt b0 // exit for x = NaN or +/-0
};;
{ .mfi
{ .mfi
nop.m 0
fnorm.s0 f8 = f8 // Normalize arg
nop.i 0
};;
.pred.rel "mutex",p14,p15
{ .mfi
{ .mfi
nop.m 0
(p14) fnma.d.s0 f8 = f8, f8, f8 // res = r-r^2
nop.i 0
}
{ .mfb
{ .mfb
nop.m 0
(p15) fma.d.s0 f8 = f8, f8, f8 // res = r+r^2
br.ret.sptk b0 // 0, denormals, specials return

View File

@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
@ -85,7 +85,7 @@
// 6. |x| = INF
// Return tanhf(x) = sign(x) * 1.0
//
// 7. x = [S,Q]NaN
// 7. x = [S,Q]NaN
// Return tanhf(x) = QNaN
//
// 8. x is positive denormal
@ -96,11 +96,11 @@
//
// Registers used
//==============================================================
// Floating Point registers used:
// Floating Point registers used:
// f8, input
// f32 -> f59
// General registers used:
// General registers used:
// r32 -> r46, r2, r3
// Predicate registers used:
@ -220,7 +220,7 @@ data8 0xC0BE48CFADE2431E // D0
data8 0x4090E74249760FDD // D1
data8 0xC04B6F537FCF2F1E // D2
data8 0x3E0DCD879C91ADEA // B0
// Polynomial coefficients for the tanh(x), -0.3125 < x < 0.3125
// Polynomial coefficients for the tanh(x), -0.3125 < x < 0.3125
data8 0xBFD555551E8245B7 // A0
data8 0x3FC110E63F52E689 // A1
data8 0xBFAB8CD6A5B7BAFA // A2
@ -250,7 +250,7 @@ data8 0xBFB1DEA49A831CBC // A0
data8 0x3FFA729FC7085674 // A1
data8 0xBFF2F44D923A8FA4 // A2
data8 0x3FE092FC5712227E // A3
// Polynomial coefficients for the tanh(x), 8.0 <= |x| <= 9.125
// Polynomial coefficients for the tanh(x), 8.0 <= |x| <= 9.125
data8 0x3FEFFF5769EE3041 // A0
data8 0x3EFBBF148D850891 // A1
data8 0xBEC86BCEF0F5C2FE // A2
@ -275,10 +275,10 @@ GLOBAL_LIBM_ENTRY(tanhf)
;;
{ .mfi
getf.s rArg = f8 // x in GR
getf.s rArg = f8 // x in GR
fclass.m p7,p0 = f8, 0x0b // is x denormal ?
// sign bit and 2 most bits in significand
shl rMask = rMask, 20
shl rMask = rMask, 20
}
{ .mfi
ld8 rDataPtr = [rDataPtr]
@ -317,7 +317,7 @@ GLOBAL_LIBM_ENTRY(tanhf)
shr rOffset2 = rOffset2, 21
}
{ .mfi
cmp.lt p10, p8 = rAbsArg, rBound // |x| < 0.3125?
cmp.lt p10, p8 = rAbsArg, rBound // |x| < 0.3125?
nop.f 0
adds rCoeffAddr3 = 16, rDataPtr
}
@ -338,8 +338,8 @@ GLOBAL_LIBM_ENTRY(tanhf)
{ .mfi
shladd rCoeffAddr1 = rBias, 4, rDataPtr
fma.s1 fArg3Sgn = fArgSqr, f8, f0 // sign(x)*|x|^3
// is |x| < 9.125?
cmp.lt p11, p12 = rAbsArg, rSaturation
// is |x| < 9.125?
cmp.lt p11, p12 = rAbsArg, rSaturation
}
{ .mfi
shladd rCoeffAddr3 = rBias, 4, rCoeffAddr3
@ -351,7 +351,7 @@ GLOBAL_LIBM_ENTRY(tanhf)
{ .mfi
(p11) ldfpd fC0, fC1 = [rCoeffAddr1]
(p9) fmerge.s f8 = f8,f1 // +/- inf
(p12) adds rDataPtr = 544, rDataPtr
(p12) adds rDataPtr = 544, rDataPtr
}
{ .mfb
(p11) ldfpd fC2, fC3 = [rCoeffAddr3], 16
@ -404,7 +404,7 @@ GLOBAL_LIBM_ENTRY(tanhf)
{ .mfb
nop.m 0
fma.s1 fArg6Sgn = fArg3, fArg3Sgn, f0 // sign(x)*|x|^6
(p13) br.cond.spnt tanhf_close_to_saturation
(p13) br.cond.spnt tanhf_close_to_saturation
}
;;
@ -440,7 +440,7 @@ GLOBAL_LIBM_ENTRY(tanhf)
{ .mfi
nop.m 0
fma.s1 fPolATmp = fA3, fAbsArg, fA2 // A3*|x| + A2
fma.s1 fPolATmp = fA3, fAbsArg, fA2 // A3*|x| + A2
nop.i 0
}
{ .mfi
@ -452,7 +452,7 @@ GLOBAL_LIBM_ENTRY(tanhf)
{ .mfi
nop.m 0
// C3*|x|^3 + C2*x^2 + C1*|x| + C0
fma.s1 fPolC = fPolC, fArgSqr, fPolCTmp
fma.s1 fPolC = fPolC, fArgSqr, fPolCTmp
nop.i 0
}
;;
@ -460,31 +460,31 @@ GLOBAL_LIBM_ENTRY(tanhf)
{ .mfi
nop.m 0
// PolD = sign(x)*(|x|^7 + D2*x^6 + D1*|x|^5 + D0*x^4)
fma.d.s1 fPolD = fPolD, fArg4Sgn, fPolDTmp
fma.d.s1 fPolD = fPolD, fArg4Sgn, fPolDTmp
nop.i 0
}
;;
{ .mfi
nop.m 0
// PolA = A3|x|^3 + A2*x^2 + A1*|x| + A0
fma.d.s1 fPolA = fPolATmp, fArgSqr, fPolA
// PolA = A3|x|^3 + A2*x^2 + A1*|x| + A0
fma.d.s1 fPolA = fPolATmp, fArgSqr, fPolA
nop.i 0
}
;;
;;
{ .mfi
nop.m 0
// PolC = B0*x^4 + C3*|x|^3 + C2*|x|^2 + C1*|x| + C0
fma.d.s1 fPolC = fPolC, f1, fB0
// PolC = B0*x^4 + C3*|x|^3 + C2*|x|^2 + C1*|x| + C0
fma.d.s1 fPolC = fPolC, f1, fB0
nop.i 0
}
;;
;;
{ .mfi
nop.m 0
(p14) fma.s.s0 f8 = fPolC, fPolD, fPolA // for positive x
nop.i 0
nop.i 0
}
{ .mfb
nop.m 0
@ -528,7 +528,7 @@ tanhf_saturation:
br.ret.sptk b0 // Exit for 9.125 <=|x|< +inf
}
;;
// Here if 8.0 <= |x| < 9.125
tanhf_close_to_saturation:
{ .mfi
@ -540,7 +540,7 @@ tanhf_close_to_saturation:
nop.m 0
fma.s1 fPolA = fA3, fAbsArg, fA2 // A3*|x| + A2
nop.i 0
}
}
;;
.pred.rel "mutex", p14, p15
@ -548,7 +548,7 @@ tanhf_close_to_saturation:
nop.m 0
// for positive x
(p14) fma.s.s0 f8 = fPolA, fArgSqr, fPolATmp
nop.i 0
nop.i 0
}
{ .mfb
nop.m 0

View File

@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
@ -68,7 +68,7 @@
//
// 3. Main path: 1/8 <= |x| < 22.8
// For several ranges of 1/8 <= |x| < 22.8
// Return tanhl(x) = sign(x)*((A0H+A0L) + y*(A1H+A1L) + y^2*(A2H+A2L) +
// Return tanhl(x) = sign(x)*((A0H+A0L) + y*(A1H+A1L) + y^2*(A2H+A2L) +
// + y^3*A3 + y^4*A4 + ... + y^25*A25 )
// where y = (|x|/a) - b
//
@ -85,10 +85,10 @@
// 8.0 <= |x| < 13.0 a = 8.0, b = 1.5
// 13.0 <= |x| < 16.0 a = 8.0, b = 2.0
// 16.0 <= |x| < 22.8 a = 16.0, b = 1.5
// ( [3.25;4.0], [6.5;8.0], [13.9;16.0] subranges separated
// ( [3.25;4.0], [6.5;8.0], [13.9;16.0] subranges separated
// for monotonicity issues resolve )
//
// 4. Saturation path: 22.8 <= |x| < +INF
// 4. Saturation path: 22.8 <= |x| < +INF
// Return tanhl(x) = sign(x)*(1.0 - tiny_value)
// (tiny_value ~ 1e-1233)
//
@ -112,10 +112,10 @@
// Multiprecision have to be performed only for first few
// polynomial iterations (up to 3-rd x degree)
// Here we use the same parallelisation way as above:
// Split whole polynomial to first, "multiprecision" part, and second,
// Split whole polynomial to first, "multiprecision" part, and second,
// so called "tail", native precision part.
//
// 1) Multiprecision part:
// 1) Multiprecision part:
// [v1=(A0H+A0L)+y*(A1H+A1L)] + [v2=y^2*((A2H+A2L)+y*A3)]
// v1 and v2 terms calculated in parallel
//
@ -123,7 +123,7 @@
// v3 = x^4 * ( A4 + x*A5 + ... + x^21*A25 )
// v3 is splitted to 2 even parts (10 coefficient in each one).
// These 2 parts are also factorized using binary tree technique.
//
//
// So Multiprecision and Tail parts cost is almost the same
// and we have both results ready before final summation.
//
@ -132,29 +132,29 @@
// not at the last operation but much more earlier and at
// several places.
//
// 4. Saturation path: 22.8 <= |x| < +INF
// 4. Saturation path: 22.8 <= |x| < +INF
//
// We use formula sign(x)*(1.0 - tiny_value) instead of simple sign(x)*1.0
// just to meet IEEE requirements for different rounding modes in this case.
//
// Registers used
//==============================================================
// Floating Point registers used:
// Floating Point registers used:
// f8 - input & output
// f32 -> f92
// General registers used:
// r2, r3, r32 -> r52
// General registers used:
// r2, r3, r32 -> r52
// Predicate registers used:
// p0, p6 -> p11, p14, p15
// p6 - arg is zero, denormal or special IEEE
// p7 - arg is in [16;32] binary interval
// p8 - arg is in one of subranges
// p8 - arg is in one of subranges
// [3.25;4.0], [6.5;8.0], [13.9;16.0]
// p9 - arg < 1/8
// p10 - arg is NOT in one of subranges
// p10 - arg is NOT in one of subranges
// [3.25;4.0], [6.5;8.0], [13.9;16.0]
// p11 - arg in saturation domain
// p14 - arg is positive
@ -211,9 +211,9 @@ fA16 = f51
fA17 = f52
fA18 = f53
fA19 = f54
fA20 = f55
fA21 = f56
fA22 = f57
fA20 = f55
fA21 = f56
fA22 = f57
fA23 = f58
fA24 = f59
fA25 = f60
@ -242,10 +242,10 @@ fRes3H = f79
fRes3L = f80
fRes4 = f81
fTT = f82
fTT = f82
fTH = f83
fTL = f84
fTT2 = f85
fTT2 = f85
fTH2 = f86
fTL2 = f87
@ -264,7 +264,7 @@ LOCAL_OBJECT_START(tanhl_data)
////////// Main tables ///////////
_0p125_to_0p25_data: // exp = 2^-3
// Polynomial coefficients for the tanh(x), 1/8 <= |x| < 1/4
// Polynomial coefficients for the tanh(x), 1/8 <= |x| < 1/4
data8 0x93D27D6AE7E835F8, 0x0000BFF4 //A3 = -5.6389704216278164626050408239e-04
data8 0xBF66E8668A78A8BC //A2H = -2.7963640930198357253955165902e-03
data8 0xBBD5384EFD0E7A54 //A2L = -1.7974001252014762983581666453e-20
@ -287,7 +287,7 @@ data8 0x83C8DDF213711381, 0x0000BFCC //A14 = -4.5721980583985311263109531319e-16
LOCAL_OBJECT_END(tanhl_data)
LOCAL_OBJECT_START(_0p25_to_0p5_data)
// Polynomial coefficients for the tanh(x), 1/4 <= |x| < 1/2
// Polynomial coefficients for the tanh(x), 1/4 <= |x| < 1/2
data8 0xB6E27B747C47C8AD, 0x0000BFF6 //A3 = -2.7905990032063258105302045572e-03
data8 0xBF93FD54E226F8F7 //A2H = -1.9521070769536099515084615064e-02
data8 0xBC491BC884F6F18A //A2L = -2.7222721075104525371410300625e-18
@ -310,7 +310,7 @@ data8 0x905F6F124AF956B1, 0x00003FD8 //A14 = 2.0516607231389483452611375485e-12
LOCAL_OBJECT_END(_0p25_to_0p5_data)
LOCAL_OBJECT_START(_0p5_to_1_data)
// Polynomial coefficients for the tanh(x), 1/2 <= |x| < 1
// Polynomial coefficients for the tanh(x), 1/2 <= |x| < 1
data8 0xAB402BE491EE72A7, 0x00003FF7 //A3 = 5.2261556931080934657023772945e-03
data8 0xBFB8403D3DDA87BE //A2H = -9.4730212784752659826992271519e-02
data8 0xBC6FF7BC2AB71A8B //A2L = -1.3863786398568460929625760740e-17
@ -333,7 +333,7 @@ data8 0xC78363FF929EFF62, 0x0000BFE4 //A14 = -1.1613199289622686725595739572e-08
LOCAL_OBJECT_END(_0p5_to_1_data)
LOCAL_OBJECT_START(_1_to_2_data)
// Polynomial coefficients for the tanh(x), 1 <= |x| < 2.0
// Polynomial coefficients for the tanh(x), 1 <= |x| < 2.0
data8 0xB3D8FB48A548D99A, 0x00003FFB //A3 = 8.7816203264683800892441646129e-02
data8 0xBFC4EFBD8FB38E3B //A2H = -1.6356629864377389416141284073e-01
data8 0xBC77687FD8087B23 //A2L = -2.0303377679446772162287121190e-17
@ -356,7 +356,7 @@ data8 0x8672AF27EB0823B7, 0x00003FEF //A14 = 1.6027448793338500004496520337e-05
LOCAL_OBJECT_END(_1_to_2_data)
LOCAL_OBJECT_START(_2_to_3p25_data)
// Polynomial coefficients for the tanh(x), 2 <= |x| < 3.25
// Polynomial coefficients for the tanh(x), 2 <= |x| < 3.25
data8 0xD45657BEC559E366, 0x00003FFA //A3 = 5.1840155367548909799883161889e-02
data8 0xBFA41B109CA6AB81 //A2H = -3.9268988726084870510835145296e-02
data8 0xBC2C3D708A4E56C5 //A2L = -7.6544669252238280132415018518e-19
@ -379,7 +379,7 @@ data8 0xE1851A2D00737A5D, 0x00003FF2 //A14 = 2.1507256570895163202182573369e-04
LOCAL_OBJECT_END(_2_to_3p25_data)
LOCAL_OBJECT_START(_4_to_6p5_data)
// Polynomial coefficients for the tanh(x), 4 <= |x| < 6.5
// Polynomial coefficients for the tanh(x), 4 <= |x| < 6.5
data8 0x896FDBD321A0BE58, 0x00003FF5 //A3 = 1.0485606995331904734870550114e-03
data8 0xBF39C522B95A37D6 //A2H = -3.9321992640217512306882730044e-04
data8 0xBBA9B3EC39A45338 //A2L = -2.7213922673282819034134988241e-21
@ -402,7 +402,7 @@ data8 0x922EC6F3CFE0496E, 0x0000BFF4 //A14 = -5.5764283474946207558456581668e-04
LOCAL_OBJECT_END(_4_to_6p5_data)
LOCAL_OBJECT_START(_8_to_13_data)
// Polynomial coefficients for the tanh(x), 8 <= |x| < 13
// Polynomial coefficients for the tanh(x), 8 <= |x| < 13
data8 0xDD6050A898303460, 0x00003FE6 //A3 = 5.1543170295688189081352133793e-08
data8 0xBE44C1078FDBADC0 //A2H = -9.6643444318955652627581125180e-09
data8 0xBAF95FCAA6DBBA6F //A2L = -1.3118146684038113473094275420e-24
@ -425,7 +425,7 @@ data8 0x82DEDAA539A3A3F1, 0x0000BFF1 //A14 = -6.2403928644276709411156885292e-05
LOCAL_OBJECT_END(_8_to_13_data)
LOCAL_OBJECT_START(_16_to_22p8_data)
// Polynomial coefficients for the tanh(x), 16 <= |x| < 22.88
// Polynomial coefficients for the tanh(x), 16 <= |x| < 22.88
data8 0x992C00F33DDE804D, 0x00003FCE //A3 = 2.1256869805798788337547274131e-15
data8 0x3C8D42EA28102760 //A2H = 5.0760412270332007485198379096e-17
data8 0x391A747B43B072DD //A2L = 1.2737621993898125881520341053e-33
@ -448,7 +448,7 @@ data8 0xDA2470DE110B293E, 0x00003FF1 //A14 = 1.0401837693241806604296821650e-04
LOCAL_OBJECT_END(_16_to_22p8_data)
LOCAL_OBJECT_START(_3p25_to_4_data)
// Polynomial coefficients for the tanh(x), 3.25 <= |x| < 4
// Polynomial coefficients for the tanh(x), 3.25 <= |x| < 4
data8 0xE9E07240432926E6, 0x00003FF7 //A3 = 7.1373517862636557382403555215e-03
data8 0xBF75F495227AF306 //A2H = -5.3602052282115727338540622782e-03
data8 0xBBBE92D355A6B716 //A2L = -6.4741983326810209847018826624e-21
@ -471,7 +471,7 @@ data8 0x8987DF26A6789CCF, 0x00003FEE //A14 = 8.1974714257536543772040700977e-06
LOCAL_OBJECT_END(_3p25_to_4_data)
LOCAL_OBJECT_START(_6p5_to_8_data)
// Polynomial coefficients for the tanh(x), 6.5 <= |x| < 8.0
// Polynomial coefficients for the tanh(x), 6.5 <= |x| < 8.0
data8 0xA11C8A63815E5657, 0x00003FEF //A3 = 1.9205985861286093001394561449e-05
data8 0xBEDE355AD6CB61D8 //A2H = -7.2022479400070228499307345427e-06
data8 0xBB8E6B50B8468A63 //A2L = -8.0518953122203408718779840543e-22
@ -494,7 +494,7 @@ data8 0xBE25D0FD069D0A93, 0x0000BFEE //A14 = -1.1333687314965721384777951065e-05
LOCAL_OBJECT_END(_6p5_to_8_data)
LOCAL_OBJECT_START(_13_to_16_data)
// Polynomial coefficients for the tanh(x), 13 <= |x| < 16
// Polynomial coefficients for the tanh(x), 13 <= |x| < 16
data8 0x98176FD2075BDBD5, 0x00003FDB //A3 = 1.7290807363028159200235264756e-11
data8 0xBD8C8464F76162D1 //A2H = -3.2420263805679445515400340441e-12
data8 0xBA2D56B508E0F1FD //A2L = -1.8515322669984580704502445180e-28
@ -519,7 +519,7 @@ LOCAL_OBJECT_END(_13_to_16_data)
//////// "Tail" tables //////////
LOCAL_OBJECT_START(_0p125_to_0p25_data_tail)
// Polynomial coefficients for the erf(x), 1/8 <= |x| < 1/4
// Polynomial coefficients for the erf(x), 1/8 <= |x| < 1/4
data8 0x9D7D206E97ADC83A, 0x0000BFCC //A13 = -5.4639895428711257047470806445e-16
data8 0xA8972B666A845810, 0x00003FD3 //A12 = 7.4869224589947988668562043110e-14
data8 0x9A5B31511C9F4698, 0x0000BFD4 //A11 = -1.3709586467430093373657009487e-13
@ -533,7 +533,7 @@ data8 0xE7C2AE92CB36769B, 0x00003FEF //A4 = 2.7628001723157068127646694830e-05
LOCAL_OBJECT_END(_0p125_to_0p25_data_tail)
LOCAL_OBJECT_START(_0p25_to_0p5_data_tail)
// Polynomial coefficients for the tanh(x), 1/4 <= |x| < 1/2
// Polynomial coefficients for the tanh(x), 1/4 <= |x| < 1/2
data8 0x9E2972C008B9965E, 0x0000BFDC //A13 = -3.5961854154738002253192260213e-11
data8 0xC3EABA3D219BEA8A, 0x00003FDB //A12 = 2.2273173303628274478819473067e-11
data8 0xC50FB68D960D5CD9, 0x00003FE1 //A11 = 1.4338102430978399800743148719e-09
@ -547,7 +547,7 @@ data8 0xAC262F3F8CF49C02, 0x00003FF4 //A4 = 6.5669692402266433496312492412e-04
LOCAL_OBJECT_END(_0p25_to_0p5_data_tail)
LOCAL_OBJECT_START(_0p5_to_1_data_tail)
// Polynomial coefficients for the tanh(x), 1/2 <= |x| < 1
// Polynomial coefficients for the tanh(x), 1/2 <= |x| < 1
data8 0xDF67FB36FFA2A538, 0x00003FE7 //A13 = 1.0403160796697495720021114635e-07
data8 0xB7FB80FB5AFA63A4, 0x0000BFE8 //A12 = -1.7134699677764282023124981753e-07
data8 0xC87625A0BA7D6C5F, 0x0000BFEA //A11 = -7.4677732458471897291461679095e-07
@ -561,7 +561,7 @@ data8 0xCC4AB2EC0965499B, 0x00003FF7 //A4 = 6.2344907419841579664122448353e-03
LOCAL_OBJECT_END(_0p5_to_1_data_tail)
LOCAL_OBJECT_START(_1_to_2_data_tail)
// Polynomial coefficients for the tanh(x), 1 <= |x| < 2.0
// Polynomial coefficients for the tanh(x), 1 <= |x| < 2.0
data8 0xCCAEE174EAC17F78, 0x0000BFEE //A13 = -1.2200065117856038355953618829e-05
data8 0xA39DD0981D1A2776, 0x0000BFF0 //A12 = -3.9009204899026604074167603200e-05
data8 0xB7104FA27FAF80D0, 0x00003FF2 //A11 = 1.7458316338540792661905876072e-04
@ -575,7 +575,7 @@ data8 0xCC8286B331BD8AAA, 0x0000BFF9 //A4 = -2.4964583478826523250880337777e-02
LOCAL_OBJECT_END(_1_to_2_data_tail)
LOCAL_OBJECT_START(_2_to_3p25_data_tail)
// Polynomial coefficients for the tanh(x), 2 <= |x| < 3.25
// Polynomial coefficients for the tanh(x), 2 <= |x| < 3.25
data8 0x92E1711A3BD6408B, 0x0000BFF4 //A13 = -5.6030514548041036913731470443e-04
data8 0x8B9BD885FF3E98C5, 0x00003FF5 //A12 = 1.0651304064581604055612602669e-03
data8 0xD041356C7FA26A22, 0x0000BFF5 //A11 = -1.5888574328066952147023520244e-03
@ -589,7 +589,7 @@ data8 0xD01D077B42E7ED76, 0x0000BFFA //A4 = -5.0808934425896607486919526567e-02
LOCAL_OBJECT_END(_2_to_3p25_data_tail)
LOCAL_OBJECT_START(_4_to_6p5_data_tail)
// Polynomial coefficients for the tanh(x), 4 <= |x| < 6.5
// Polynomial coefficients for the tanh(x), 4 <= |x| < 6.5
data8 0x870CCE8C76C52C7E, 0x00003FF5 //A13 = 1.0303499350193060915603525934e-03
data8 0xE1431E54AD2A738B, 0x0000BFF5 //A12 = -1.7186140560972621669872002486e-03
data8 0xAB20056533E28734, 0x00003FF6 //A11 = 2.6111615345168277554841545330e-03
@ -603,7 +603,7 @@ data8 0x896E211733AD9D40, 0x0000BFF6 //A4 = -2.0970183170010094667442967500e-03
LOCAL_OBJECT_END(_4_to_6p5_data_tail)
LOCAL_OBJECT_START(_8_to_13_data_tail)
// Polynomial coefficients for the tanh(x), 8 <= |x| < 13
// Polynomial coefficients for the tanh(x), 8 <= |x| < 13
data8 0xE50C3476BED020AA, 0x00003FF0 //A13 = 5.4609221347524272615754239857e-05
data8 0xBA16F5F4EDC0EABC, 0x0000BFF0 //A12 = -4.4367239594986428539386662937e-05
data8 0x8B916C2F002C3D91, 0x00003FF0 //A11 = 3.3275617838067362533536610680e-05
@ -617,7 +617,7 @@ data8 0xDD6050A7761D67BB, 0x0000BFE8 //A4 = -2.0617268111985310661707082242e-07
LOCAL_OBJECT_END(_8_to_13_data_tail)
LOCAL_OBJECT_START(_16_to_22p8_data_tail)
// Polynomial coefficients for the tanh(x), 16 <= |x| < 22.88
// Polynomial coefficients for the tanh(x), 16 <= |x| < 22.88
data8 0xEAF4AF87336E81B1, 0x00003FEF //A13 = 2.8008914392791730186582989654e-05
data8 0xD5B309EA768E2711, 0x00003FED //A12 = 6.3687375204024238267961143128e-06
data8 0xA4048CA537113538, 0x00003FEB //A11 = 1.2220276227448617951538196845e-06
@ -631,7 +631,7 @@ data8 0x86BC347939478174, 0x00003FD3 //A4 = 5.9834437707863962671883176163e-14
LOCAL_OBJECT_END(_16_to_22p8_data_tail)
LOCAL_OBJECT_START(_3p25_to_4_data_tail)
// Polynomial coefficients for the tanh(x), 3.25 <= |x| < 4
// Polynomial coefficients for the tanh(x), 3.25 <= |x| < 4
data8 0xBE9A2BE19F21BA1C, 0x0000BFEE //A13 = -1.1360778336288065244475976873e-05
data8 0xF84910F515BDB014, 0x00003FED //A12 = 7.3994819819577018481862729782e-06
data8 0xC4C84FB788AA4007, 0x00003FEF //A11 = 2.3458298013663976251972482656e-05
@ -645,7 +645,7 @@ data8 0xE93FB2F48233275B, 0x0000BFF7 //A4 = -7.1181892208343798194003322900e-03
LOCAL_OBJECT_END(_3p25_to_4_data_tail)
LOCAL_OBJECT_START(_6p5_to_8_data_tail)
// Polynomial coefficients for the tanh(x), 6.5 <= |x| < 8.0
// Polynomial coefficients for the tanh(x), 6.5 <= |x| < 8.0
data8 0xA6881D7D21774BFD, 0x00003FEF //A13 = 1.9852125640303530752913966680e-05
data8 0x875E983AA042E605, 0x0000BFF0 //A12 = -3.2274606306629334402383651599e-05
data8 0xCB19E01E94FC133C, 0x00003FF0 //A11 = 4.8423069963831314927026982707e-05
@ -659,7 +659,7 @@ data8 0xA11C80E20ADA5A64, 0x0000BFF0 //A4 = -3.8411937140983728563216440713e-05
LOCAL_OBJECT_END(_6p5_to_8_data_tail)
LOCAL_OBJECT_START(_13_to_16_data_tail)
// Polynomial coefficients for the tanh(x), 13 <= |x| < 16
// Polynomial coefficients for the tanh(x), 13 <= |x| < 16
data8 0x9D6CCDA4767CA6D9, 0x00003FE5 //A13 = 1.8326683535066775712253572575e-08
data8 0xFFAF154F334BF403, 0x0000BFE4 //A12 = -1.4882762852665077172347508377e-08
data8 0xBFC68FA7C61B6C17, 0x00003FE4 //A11 = 1.1162810813806544919835662888e-08
@ -673,7 +673,7 @@ data8 0x98176FD2074C1D77, 0x0000BFDD //A4 = -6.9163229452106125388824134881e-11
LOCAL_OBJECT_END(_13_to_16_data_tail)
LOCAL_OBJECT_START(_0_to_1o8_data)
// Polynomial coefficients for the tanh(x), 0.0 <= |x| < 0.125
// Polynomial coefficients for the tanh(x), 0.0 <= |x| < 0.125
data8 0xBA0EC1879495150B, 0x0000BFF5 // A15 = -1.4195071451378679802688367813e-03
data8 0xEB5A82898D1BCBA4, 0x00003FF6 // A13 = 3.5912102408030526706365632879e-03
data8 0x91370DAFE0B64438, 0x0000BFF8 // A11 = -8.8632234251336964576640807982e-03
@ -688,7 +688,7 @@ LOCAL_OBJECT_END(_0_to_1o8_data)
GLOBAL_LIBM_ENTRY(tanhl)
{ .mfi
alloc r32 = ar.pfs, 0, 21, 0, 0
alloc r32 = ar.pfs, 0, 21, 0, 0
fmerge.se fArgAbsNorm = f1, f8 // normalized x (1.0 <= x < 2.0)
addl rSignBit = 0x20000, r0 // Set sign bit for exponent
}
@ -699,26 +699,26 @@ GLOBAL_LIBM_ENTRY(tanhl)
{ .mfi
getf.exp rArgExp = f8 // Get arg exponent
fclass.m p6,p0 = f8, 0xEF // Filter 0, denormals and specials
fclass.m p6,p0 = f8, 0xEF // Filter 0, denormals and specials
// 0xEF = @qnan|@snan|@pos|@neg|@zero|@unorm|@inf
addl rBias = 0xfffc, r0 // Value to subtract from exp
addl rBias = 0xfffc, r0 // Value to subtract from exp
// to get actual interval number
}
{ .mfi
ld8 rDataPtr = [rDataPtr] // Get real common data pointer
fma.s1 fArgSqr = f8, f8, f0 // x^2 (for [0;1/8] path)
addl r2to4 = 0x10000, r0 // unbiased exponent
addl r2to4 = 0x10000, r0 // unbiased exponent
// for [2;4] binary interval
};;
{ .mfi
getf.sig rArgSig = f8 // Get arg significand
getf.sig rArgSig = f8 // Get arg significand
fcmp.lt.s1 p15, p14 = f8, f0 // Is arg negative/positive?
addl rSaturation = 0xb70, r0 // First 12 bits of
// saturation value signif.
}
{ .mfi
setf.d f1p5 = r1p5 // 1.5 construction
setf.d f1p5 = r1p5 // 1.5 construction
fma.s1 f2p0 = f1,f1,f1 // 2.0 construction
addl r1625Sign = 0xd01, r0 // First 12 bits of
// 1.625 value signif.
@ -733,13 +733,13 @@ GLOBAL_LIBM_ENTRY(tanhl)
{ .mfb
addl rTiny = 0xf000, r0 // Tiny value for saturation path
nop.f 0
(p6) br.cond.spnt tanhl_spec // Branch to zero, denorm & specs
(p6) br.cond.spnt tanhl_spec // Branch to zero, denorm & specs
};;
{ .mfi
sub rInterval = rArgExp, rBias // Get actual interval number
nop.f 0
shr.u rArgSig = rArgSig, 52 // Leave only 12 bits of sign.
shr.u rArgSig = rArgSig, 52 // Leave only 12 bits of sign.
}
{ .mfi
adds rShiftedDataPtr = 0x10, rDataPtr // Second ptr to data
@ -748,10 +748,10 @@ GLOBAL_LIBM_ENTRY(tanhl)
};;
{ .mfi
(p8) cmp.le p8, p10 = r1625Sign, rArgSig // If signd is greater
(p8) cmp.le p8, p10 = r1625Sign, rArgSig // If signd is greater
// than 1.625? (arg is at one of binary subranges)
nop.f 0
shl rOffset = rInterval, 8 // Make offset from
shl rOffset = rInterval, 8 // Make offset from
// interval number
}
{ .mfi
@ -762,30 +762,30 @@ GLOBAL_LIBM_ENTRY(tanhl)
};;
{ .mfi
(p8) adds rOffset = 0x400, rOffset // Add additional offset
(p8) adds rOffset = 0x400, rOffset // Add additional offset
// (arg is at one of binary subranges)
fma.s1 fArgCube = fArgSqr, f8, f0 // x^3 (for [0;1/8] path)
shl rTailOffset = rInterval, 7 // Make offset to "tail" data
// from interval number
}
{ .mib
setf.exp fTiny = rTiny // Construct "tiny" value
setf.exp fTiny = rTiny // Construct "tiny" value
// for saturation path
cmp.ltu p11, p0 = 0x7, rInterval // if arg > 32
(p9) br.cond.spnt _0_to_1o8
(p9) br.cond.spnt _0_to_1o8
};;
{ .mfi
add rAddr1 = rDataPtr, rOffset // Get address for
// interval data
add rAddr1 = rDataPtr, rOffset // Get address for
// interval data
nop.f 0
shl rTailAddOffset = rInterval, 5 // Offset to interval
// "tail" data
// "tail" data
}
{ .mib
add rAddr2 = rShiftedDataPtr, rOffset // Get second
// address for interval data
(p7) cmp.leu p11, p0 = rSaturation, rArgSig // if arg is
// address for interval data
(p7) cmp.leu p11, p0 = rSaturation, rArgSig // if arg is
// in [22.8;32] interval
(p11) br.cond.spnt _saturation // Branch to Saturation path
};;
@ -813,14 +813,14 @@ GLOBAL_LIBM_ENTRY(tanhl)
.pred.rel "mutex",p8,p10
{ .mfi
ldfe fA18 = [rAddr1], 16 // Load A18
(p8) fms.s1 fArgAbsNorm = fArgAbsNorm, f1, f2p0 // Add 2.0
(p8) fms.s1 fArgAbsNorm = fArgAbsNorm, f1, f2p0 // Add 2.0
// (arg is at one of binary subranges)
adds rTailAddr2 = 0x10, rTailAddr1 // First tail
// data address
}
{ .mfi
ldfe fA25 = [rAddr2], 16 // Load A25
(p10) fms.s1 fArgAbsNorm = fArgAbsNorm, f1, f1p5 // Add 1.5
ldfe fA25 = [rAddr2], 16 // Load A25
(p10) fms.s1 fArgAbsNorm = fArgAbsNorm, f1, f1p5 // Add 1.5
// to normalized arg
nop.i 0
};;
@ -928,9 +928,9 @@ GLOBAL_LIBM_ENTRY(tanhl)
fma.s1 fA23 = fA24, fArgAbsNorm, fA23 // Polynomial tail
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA21 = fA22, fArgAbsNorm, fA21 // Polynomial tail
fma.s1 fA21 = fA22, fArgAbsNorm, fA21 // Polynomial tail
nop.i 0
};;
@ -946,7 +946,7 @@ GLOBAL_LIBM_ENTRY(tanhl)
fma.s1 fRes3L = fRes3L, f1, fTH // (A3*x+A2)*x^2
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA19 = fA20, fArgAbsNorm, fA19 // Polynomial tail
nop.i 0
@ -957,7 +957,7 @@ GLOBAL_LIBM_ENTRY(tanhl)
fma.s1 fRes1H = fTH2, f1, fA0H // A1*x+A0
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fms.s1 fTL2 = fA1H, fArgAbsNorm, fTH2 // A1*x+A0
nop.i 0
@ -968,7 +968,7 @@ GLOBAL_LIBM_ENTRY(tanhl)
fma.s1 fA8 = fA9, fArgAbsNorm, fA8 // Polynomial tail
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA10 = fA11, fArgAbsNorm, fA10 // Polynomial tail
nop.i 0
@ -990,7 +990,7 @@ GLOBAL_LIBM_ENTRY(tanhl)
fms.s1 fArgAbsNorm11 = fArgAbsNorm4, fArgAbsNorm4, f0 // x^8
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA4 = fA5, fArgAbsNorm, fA4 // Polynomial tail
nop.i 0
@ -1001,7 +1001,7 @@ GLOBAL_LIBM_ENTRY(tanhl)
fma.s1 fRes3L = fRes3L, f1, fA2L // (A3*x+A2)*x^2
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA6 = fA7, fArgAbsNorm, fA6 // Polynomial tail
nop.i 0
@ -1012,7 +1012,7 @@ GLOBAL_LIBM_ENTRY(tanhl)
fma.s1 fTL2 = fTL2, f1, fTT2 // A1*x+A0
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fms.s1 fRes1L = fA0H, f1, fRes1H // A1*x+A0
nop.i 0
@ -1023,7 +1023,7 @@ GLOBAL_LIBM_ENTRY(tanhl)
fma.s1 fA23 = fA25, fArgAbsNorm2, fA23 // Polynomial tail
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA12 = fA14, fArgAbsNorm2, fA12 // Polynomial tail
nop.i 0
@ -1034,7 +1034,7 @@ GLOBAL_LIBM_ENTRY(tanhl)
fma.s1 fA19 = fA21, fArgAbsNorm2, fA19 // Polynomial tail
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA8 = fA10, fArgAbsNorm2, fA8 // Polynomial tail
nop.i 0
@ -1045,7 +1045,7 @@ GLOBAL_LIBM_ENTRY(tanhl)
fma.s1 fA15 = fA17, fArgAbsNorm2, fA15 // Polynomial tail
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fms.s1 fArgAbsNorm11 = fArgAbsNorm11, fArgAbsNorm3, f0 // x^11
nop.i 0
@ -1056,7 +1056,7 @@ GLOBAL_LIBM_ENTRY(tanhl)
fma.s1 fTT = fRes3L, fArgAbsNorm2, f0 // (A3*x+A2)*x^2
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA4 = fA6, fArgAbsNorm2, fA4 // Polynomial tail
nop.i 0
@ -1078,7 +1078,7 @@ GLOBAL_LIBM_ENTRY(tanhl)
fma.s1 fA19 = fA23, fArgAbsNorm4, fA19 // Polynomial tail
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA8 = fA12, fArgAbsNorm4, fA8 // Polynomial tail
nop.i 0
@ -1131,7 +1131,7 @@ GLOBAL_LIBM_ENTRY(tanhl)
fms.s1 fRes2L = fRes3H, fArgAbsNorm2, fRes2H // (A3*x+A2)*x^2
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fResH = fRes2H, f1, fRes1H // High result
nop.i 0
@ -1148,12 +1148,12 @@ GLOBAL_LIBM_ENTRY(tanhl)
nop.i 0
};;
{ .mfi
{ .mfi
nop.m 0
fma.s1 fRes2L = fRes2L, f1, fTT // (A3*x+A2)*x^2
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fms.s1 fResL = fRes1H, f1, fResH // Low result
nop.i 0
@ -1165,7 +1165,7 @@ GLOBAL_LIBM_ENTRY(tanhl)
// .s0 - for symmetry issue resolving at +/-inf rounding mode
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fResL = fResL, f1, fRes2H // Low result
nop.i 0
@ -1185,12 +1185,12 @@ GLOBAL_LIBM_ENTRY(tanhl)
};;
.pred.rel "mutex",p14,p15
{ .mfi
{ .mfi
nop.m 0
(p14) fma.s0 f8 = fResL, f1, fResH// Add high and low results
nop.i 0
}
{ .mfb
{ .mfb
nop.m 0
(p15) fms.s0 f8 = fResL, f1, fResH // Add high and low results
br.ret.sptk b0 // Main path return
@ -1200,12 +1200,12 @@ GLOBAL_LIBM_ENTRY(tanhl)
_saturation:
.pred.rel "mutex",p14,p15
{ .mfi
{ .mfi
nop.m 0
(p14) fms.s0 f8 = f1, f1, fTiny // Saturation result r = 1-tiny
nop.i 0
};;
{ .mfb
{ .mfb
nop.m 0
(p15) fnma.s0 f8 = f1, f1, fTiny // Saturation result r = tiny-1
br.ret.sptk b0 // Saturation path return
@ -1215,46 +1215,46 @@ _saturation:
// 0, denormals and special IEEE numbers path /////////////////////////////////
tanhl_spec:
{ .mfi
{ .mfi
nop.m 0
fclass.m p6,p0 = f8, 0x23 // To filter infinities
// 0x23 = @pos|@neg|@inf
// 0x23 = @pos|@neg|@inf
nop.i 0
};;
{ .mfi
{ .mfi
nop.m 0
fclass.m p7,p0 = f8, 0xC7 // To filter NaNs & Zeros
// 0xC7 = @pos|@neg|@zero|@qnan|@snan
nop.i 0
};;
{ .mfb
{ .mfb
nop.m 0
(p6) fmerge.s f8 = f8, f1 // +/-1 for INF args
(p6) fmerge.s f8 = f8, f1 // +/-1 for INF args
(p6) br.ret.spnt b0 // exit for x = INF
};;
{ .mfb
{ .mfb
nop.m 0
(p7) fma.s0 f8 = f8, f1, f8 // +/-0 for 0 args
(p7) fma.s0 f8 = f8, f1, f8 // +/-0 for 0 args
// and NaNs for NaNs
(p7) br.ret.spnt b0 // exit for x = NaN or +/-0
};;
{ .mfi
{ .mfi
nop.m 0
fnorm.s0 f8 = f8 // Normalize arg
nop.i 0
};;
.pred.rel "mutex",p14,p15
{ .mfi
{ .mfi
nop.m 0
(p14) fnma.s0 f8 = f8, f8, f8 // res = r-r^2
nop.i 0
}
{ .mfb
{ .mfb
nop.m 0
(p15) fma.s0 f8 = f8, f8, f8 // res = r+r^2
br.ret.sptk b0 // 0, denormals, IEEE specials return
@ -1264,83 +1264,83 @@ tanhl_spec:
// 0 < |x| < 1/8 path /////////////////////////////////////////////////////////
_0_to_1o8:
{ .mmi
{ .mmi
adds rAddr1 = 0x11e0, rDataPtr // Ptr 1 to coeffs
adds rAddr2 = 0x11f0, rDataPtr // Ptr 2 to coeffs
nop.i 0
};;
{ .mmi
{ .mmi
ldfe fA15 = [rAddr1], 32 // Load A15
ldfe fA13 = [rAddr2], 32 // Load A13
nop.i 0
};;
{ .mmi
{ .mmi
ldfe fA11 = [rAddr1], 32 // Load A11
ldfe fA9 = [rAddr2], 32 // Load A9
nop.i 0
};;
{ .mmi
{ .mmi
ldfe fA7 = [rAddr1], 32 // Load A7
ldfe fA5 = [rAddr2] // Load A5
nop.i 0
};;
{ .mfi
{ .mfi
ldfe fA3 = [rAddr1] // Load A3
fma.s1 fA11 = fA13, fArgSqr, fA11 // Polynomial tail
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fArgFour = fArgSqr, fArgSqr, f0 // a^4
fma.s1 fArgFour = fArgSqr, fArgSqr, f0 // a^4
nop.i 0
};;
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA3 = fA5, fArgSqr, fA3 // Polynomial tail
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA7 = fA9, fArgSqr, fA7 // Polynomial tail
nop.i 0
};;
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA11 = fA15, fArgFour, fA11 // Polynomial tail
nop.i 0
};;
{ .mfi
{ .mfi
nop.m 0
fma.s1 fA3 = fA7, fArgFour, fA3 // Polynomial tail
nop.i 0
}
{ .mfi
{ .mfi
nop.m 0
fma.s1 fArgEight = fArgFour, fArgFour, f0 // a^8
nop.i 0
};;
{ .mfi
{ .mfi
nop.m 0
fma.s1 fRes = fA11, fArgEight, fA3 //Polynomial tail result
nop.i 0
};;
{ .mfb
{ .mfb
nop.m 0
fma.s0 f8 = fRes, fArgCube, f8 // (Polynomial tail)*x^3
br.ret.sptk b0 // [0;1/8] interval return
};;
GLOBAL_LIBM_END(tanhl)

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
//*********************************************************************
//
// History:
// History:
//
// 02/02/00 (hand-optimized)
// 04/04/00 Unwind support added

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT
// LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,SPECIAL,
// EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code,and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
//*********************************************************************
//
// History:
// History:
// 10/12/01 Initial version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
@ -74,8 +74,8 @@
// IEEE Special Conditions:
//
// tgamma(+inf) = +inf
// tgamma(-inf) = QNaN
// tgamma(+/-0) = +/-inf
// tgamma(-inf) = QNaN
// tgamma(+/-0) = +/-inf
// tgamma(x<0, x - integer) = QNaN
// tgamma(SNaN) = QNaN
// tgamma(QNaN) = QNaN
@ -85,7 +85,7 @@
// Overview
//
// The method consists of three cases.
//
//
// If 2 <= x < OVERFLOW_BOUNDARY use case tgamma_regular;
// else if 0 < x < 2 use case tgamma_from_0_to_2;
// else if -(i+1) < x < -i, i = 0...184 use case tgamma_negatives;
@ -110,9 +110,9 @@
// r = x - N, note 0 <= r < 1
//
// n = N & ~0xF - index of table that contains coefficient of
// polynomial approximation
// polynomial approximation
// i = N & 0xF - is used in recursive formula
//
//
//
// Step 2: Approximation
// ---------------------
@ -124,7 +124,7 @@
// -----------------
// In case when i > 0 we need to multiply P22n(r) by product
// R(i)=(x-1)*(x-2)*...*(x-i). To reduce number of fp-instructions
// we can calculate R as follow:
// we can calculate R as follow:
// R(i) = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-1))*(x-i)) if i is
// even or R = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-2))*(x-(i-1)))*
// *(i-1) if i is odd. In both cases we need to calculate
@ -145,7 +145,7 @@
// if 1.25 <= x < 1.5 than GAMMA(x) = P15(x-x_min) where
// x_min is point of local minimum on [1; 2] interval.
// if 1.5 <= x < 2.0 than GAMMA(x) = P15(x-1.5)
// and
// and
// if 0 < x < 1 than GAMMA(x) = GAMMA(x+1)/x
//
// Case -(i+1) < x < -i, i = 0...184
@ -156,14 +156,14 @@
//
// Step 1: Reduction
// -----------------
// Note that period of sin(PI*x) is 2 and range reduction for
// sin(PI*x) is like to range reduction for GAMMA(x)
// Note that period of sin(PI*x) is 2 and range reduction for
// sin(PI*x) is like to range reduction for GAMMA(x)
// i.e r = x - [x] with exception of cases
// when r > 0.5 (in such cases r = 1 - (x - [x])).
//
// Step 2: Approximation
// ---------------------
// To approximate sin(PI*x)/PI = sin(PI*(2*n+r))/PI =
// To approximate sin(PI*x)/PI = sin(PI*(2*n+r))/PI =
// = (-1)^n*sin(PI*r)/PI Taylor series is used.
// sin(PI*r)/PI ~ S21(r).
//
@ -171,7 +171,7 @@
// ----------------
// To calculate 1/(x*GAMMA(x)*S21(r)) we use frcpa instruction
// with following Newton-Raphson interations.
//
//
//
//*********************************************************************
@ -807,7 +807,7 @@ GLOBAL_LIBM_ENTRY(tgamma)
}
{ .mfb
ldfe FR_C01 = [GR_ad_Co],32
(p7) fms.s1 FR_r02 = FR_r02,f1,f1
(p7) fms.s1 FR_r02 = FR_r02,f1,f1
// jump if x is NaTVal, NaN, +/-0, +/-INF
(p10) br.cond.spnt tgamma_spec
};;
@ -882,7 +882,7 @@ GLOBAL_LIBM_ENTRY(tgamma)
{ .mfi
ldfe FR_C30 = [GR_ad_Co],32
fma.s1 FR_Rq3 = FR_Rq3,FR_6,FR_X2pX // (x-5)*(x-6)
nop.i 0
nop.i 0
};;
{ .mfi
ldfe FR_C40 = [GR_ad_Ce],32
@ -902,7 +902,7 @@ GLOBAL_LIBM_ENTRY(tgamma)
}
{ .mfi
ldfe FR_C70 = [GR_ad_Co7],32
fma.s1 FR_rs = f0,f0,FR_r // reduced arg for sin(pi*x)
fma.s1 FR_rs = f0,f0,FR_r // reduced arg for sin(pi*x)
add GR_ad_Co = 0x550,GR_ad_Data
};;
{ .mfi
@ -1036,12 +1036,12 @@ GLOBAL_LIBM_ENTRY(tgamma)
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_C01 = FR_C01,FR_C11,f0
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_C21 = FR_C21,FR_C31,f0
nop.i 0
}
@ -1051,9 +1051,9 @@ GLOBAL_LIBM_ENTRY(tgamma)
(p12) cmp.lt.unc p7,p0 = 2,GR_Sig2 // should mul by FR_Rq2?
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_C41 = FR_C41,FR_C51,f0
nop.i 0
nop.i 0
}
{ .mfi
nop.m 0
@ -1061,7 +1061,7 @@ GLOBAL_LIBM_ENTRY(tgamma)
(p12) cmp.lt.unc p9,p0 = 6,GR_Sig2 // should mul by FR_Rq4?
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_C61 = FR_C61,FR_C71,f0
(p15) cmp.eq p11,p0 = r0,r0
}
@ -1071,7 +1071,7 @@ GLOBAL_LIBM_ENTRY(tgamma)
(p12) cmp.lt.unc p8,p0 = 10,GR_Sig2 // should mul by FR_Rq6?
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_C81 = FR_C81,FR_C91,f0
nop.i 0
}
@ -1081,8 +1081,8 @@ GLOBAL_LIBM_ENTRY(tgamma)
(p14) cmp.ltu p0,p11 = 0x9,GR_Tbl_Ind
};;
{ .mfi
nop.m 0
fcvt.xf FR_RqLin = FR_Xt
nop.m 0
fcvt.xf FR_RqLin = FR_Xt
nop.i 0
}
{ .mfi
@ -1101,12 +1101,12 @@ GLOBAL_LIBM_ENTRY(tgamma)
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_C01 = FR_C01,FR_C21,f0
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_rs4 = FR_rs2,FR_rs2,f0
(p12) cmp.lt.unc p8,p0 = 4,GR_Sig2 // should mul by FR_Rq3?
};;
@ -1121,19 +1121,19 @@ GLOBAL_LIBM_ENTRY(tgamma)
(p12) cmp.lt.unc p9,p0 = 12,GR_Sig2 // should mul by FR_Rq7?
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_C41 = FR_C41,FR_C61,f0
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
(p9) fma.s1 FR_Rq5 = FR_Rq5,FR_Rq7,f0
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_C81 = FR_C81,FR_CA1,f0
nop.i 0
nop.i 0
}
{ .mfi
nop.m 0
@ -1161,12 +1161,12 @@ GLOBAL_LIBM_ENTRY(tgamma)
mov GR_ExpOf1 = 0x2FFFF
}
{ .mfi
nop.m 0
nop.m 0
(p6) fms.s1 FR_RqLin = FR_AbsX,f1,FR_RqLin
(p12) cmp.lt.unc p8,p0 = 8,GR_Sig2 // should mul by FR_Rq5?
};;
{ .mfi
nop.m 0
nop.m 0
fma.s1 FR_C01 = FR_C01,FR_C41,f0
nop.i 0
}
@ -1192,7 +1192,7 @@ GLOBAL_LIBM_ENTRY(tgamma)
}
{ .mfi
nop.m 0
(p15) fcmp.lt.unc.s1 p0,p10 = FR_AbsX,FR_OvfBound // x >= overflow_boundary
(p15) fcmp.lt.unc.s1 p0,p10 = FR_AbsX,FR_OvfBound // x >= overflow_boundary
nop.i 0
};;
{ .mfi
@ -1217,7 +1217,7 @@ GLOBAL_LIBM_ENTRY(tgamma)
(p15) cmp.eq.unc p0,p11 = r0,GR_SigRqLin
}
{ .mfb
nop.m 0
nop.m 0
fma.s1 FR_GAMMA = FR_C01,FR_C81,f0
(p11) br.cond.spnt tgamma_positives
};;
@ -1233,12 +1233,12 @@ GLOBAL_LIBM_ENTRY(tgamma)
};;
.pred.rel "mutex",p8,p9
{ .mfi
nop.m 0
nop.m 0
(p9) fma.s1 FR_GAMMA = FR_GAMMA,FR_Rq1,f0
tbit.z p6,p7 = GR_Sig,0 // p6 if sin<0, p7 if sin>0
}
{ .mfi
nop.m 0
nop.m 0
(p8) fma.s1 FR_GAMMA = FR_GAMMA,FR_RqLin,f0
nop.i 0
};;
@ -1249,12 +1249,12 @@ GLOBAL_LIBM_ENTRY(tgamma)
};;
.pred.rel "mutex",p6,p7
{ .mfi
nop.m 0
(p6) fnma.s1 FR_GAMMA = FR_GAMMA,FR_S21,f0
nop.i 0
nop.m 0
(p6) fnma.s1 FR_GAMMA = FR_GAMMA,FR_S21,f0
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
(p7) fma.s1 FR_GAMMA = FR_GAMMA,FR_S21,f0
mov GR_Sig2 = 1
};;
@ -1292,7 +1292,7 @@ GLOBAL_LIBM_ENTRY(tgamma)
{ .mfi
nop.m 0
fma.s1 FR_Rcp1 = FR_Rcp0,FR_Rcp1,FR_Rcp0
nop.i 0
nop.i 0
};;
// NR method: ineration #2
{ .mfi
@ -1340,12 +1340,12 @@ GLOBAL_LIBM_ENTRY(tgamma)
tgamma_positives:
.pred.rel "mutex",p8,p9
{ .mfi
nop.m 0
nop.m 0
(p9) fma.d.s0 f8 = FR_GAMMA,FR_Rq1,f0
nop.i 0
}
{ .mfb
nop.m 0
nop.m 0
(p8) fma.d.s0 f8 = FR_GAMMA,FR_RqLin,f0
br.ret.sptk b0
};;
@ -1366,7 +1366,7 @@ tgamma_from_0_to_2:
};;
{ .mfi
(p6) getf.sig GR_Sig = FR_NormX
nop.f 0
nop.f 0
(p6) shl GR_Sig2 = GR_Sig2,63
}
{ .mfi
@ -1375,9 +1375,9 @@ tgamma_from_0_to_2:
(p6) mov GR_NzOvfBound = 0xFBFF
};;
{ .mfi
cmp.eq p8,p0 = GR_Sign_Exp,GR_ExpOf05 // r02 >= 1/2
cmp.eq p8,p0 = GR_Sign_Exp,GR_ExpOf05 // r02 >= 1/2
nop.f 0
cmp.eq p9,p10 = GR_Sign_Exp,GR_ExpOf025 // r02 >= 1/4
cmp.eq p9,p10 = GR_Sign_Exp,GR_ExpOf025 // r02 >= 1/4
}
{ .mfi
(p6) cmp.ltu.unc p11,p0 = GR_Sign_Exp,GR_NzOvfBound // p11 <- overflow
@ -1396,83 +1396,83 @@ tgamma_from_0_to_2:
(p11) br.cond.spnt tgamma_ovf_near_0 //tgamma_spec_res
};;
{ .mfi
ldfe FR_A15 = [GR_ad_Co],32
ldfe FR_A15 = [GR_ad_Co],32
nop.f 0
(p12) cmp.eq.unc p13,p0 = GR_Sig,GR_Sig2
}
{ .mfb
ldfe FR_A14 = [GR_ad_Ce],32
ldfe FR_A14 = [GR_ad_Ce],32
nop.f 0
(p13) br.cond.spnt tgamma_ovf_near_0_boundary //tgamma_spec_res
};;
{ .mfi
ldfe FR_A13 = [GR_ad_Co],32
ldfe FR_A13 = [GR_ad_Co],32
nop.f 0
nop.i 0
}
{ .mfi
ldfe FR_A12 = [GR_ad_Ce],32
ldfe FR_A12 = [GR_ad_Ce],32
nop.f 0
nop.i 0
};;
.pred.rel "mutex",p9,p10
{ .mfi
ldfe FR_A11 = [GR_ad_Co],32
(p10) fma.s1 FR_r2 = FR_r02,FR_r02,f0
ldfe FR_A11 = [GR_ad_Co],32
(p10) fma.s1 FR_r2 = FR_r02,FR_r02,f0
nop.i 0
}
{ .mfi
ldfe FR_A10 = [GR_ad_Ce],32
(p9) fma.s1 FR_r2 = FR_r,FR_r,f0
ldfe FR_A10 = [GR_ad_Ce],32
(p9) fma.s1 FR_r2 = FR_r,FR_r,f0
nop.i 0
};;
{ .mfi
ldfe FR_A9 = [GR_ad_Co],32
ldfe FR_A9 = [GR_ad_Co],32
(p6) fma.s1 FR_Rcp1 = FR_Rcp0,FR_Rcp1,FR_Rcp0
nop.i 0
}
{ .mfi
ldfe FR_A8 = [GR_ad_Ce],32
ldfe FR_A8 = [GR_ad_Ce],32
(p10) fma.s1 FR_r = f0,f0,FR_r02
nop.i 0
};;
{ .mfi
ldfe FR_A7 = [GR_ad_Co],32
ldfe FR_A7 = [GR_ad_Co],32
nop.f 0
nop.i 0
}
{ .mfi
ldfe FR_A6 = [GR_ad_Ce],32
nop.f 0
nop.i 0
};;
{ .mfi
ldfe FR_A5 = [GR_ad_Co],32
nop.f 0
nop.i 0
}
{ .mfi
ldfe FR_A4 = [GR_ad_Ce],32
ldfe FR_A6 = [GR_ad_Ce],32
nop.f 0
nop.i 0
};;
{ .mfi
ldfe FR_A3 = [GR_ad_Co],32
ldfe FR_A5 = [GR_ad_Co],32
nop.f 0
nop.i 0
}
{ .mfi
ldfe FR_A2 = [GR_ad_Ce],32
ldfe FR_A4 = [GR_ad_Ce],32
nop.f 0
nop.i 0
};;
{ .mfi
ldfe FR_A1 = [GR_ad_Co],32
fma.s1 FR_r4 = FR_r2,FR_r2,f0
ldfe FR_A3 = [GR_ad_Co],32
nop.f 0
nop.i 0
}
{ .mfi
ldfe FR_A0 = [GR_ad_Ce],32
ldfe FR_A2 = [GR_ad_Ce],32
nop.f 0
nop.i 0
};;
{ .mfi
ldfe FR_A1 = [GR_ad_Co],32
fma.s1 FR_r4 = FR_r2,FR_r2,f0
nop.i 0
}
{ .mfi
ldfe FR_A0 = [GR_ad_Ce],32
nop.f 0
nop.i 0
};;
@ -1493,7 +1493,7 @@ tgamma_from_0_to_2:
};;
{ .mfi
nop.m 0
fma.s1 FR_r8 = FR_r4,FR_r4,f0
fma.s1 FR_r8 = FR_r4,FR_r4,f0
nop.i 0
};;
{ .mfi
@ -1573,17 +1573,17 @@ tgamma_from_0_to_2:
};;
.pred.rel "mutex",p6,p7
{ .mfi
nop.m 0
nop.m 0
(p6) fma.s1 FR_A15 = FR_A15,FR_r8,FR_A7
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
(p7) fma.d.s0 f8 = FR_A15,FR_r8,FR_A7
nop.i 0
};;
{ .mfb
nop.m 0
nop.m 0
(p6) fma.d.s0 f8 = FR_A15,FR_Rcp3,f0
br.ret.sptk b0
};;
@ -1606,7 +1606,7 @@ tgamma_ovf_near_0_boundary:
{ .mfi
nop.m 0
nop.f 0
shl r8 = r8,52
shl r8 = r8,52
};;
{ .mfi
sub r8 = r8,r0,1
@ -1616,12 +1616,12 @@ tgamma_ovf_near_0_boundary:
.pred.rel "mutex",p14,p15
{ .mfi
// set p8 to 0 in case of overflow and to 1 otherwise
// for negative arg:
// for negative arg:
// no overflow if rounding mode either Z or +Inf, i.e.
// GR_fpsr > 1
(p14) cmp.lt p8,p0 = 1,GR_fpsr
nop.f 0
// for positive arg:
// for positive arg:
// no overflow if rounding mode either Z or -Inf, i.e.
// (GR_fpsr & 1) == 0
(p15) tbit.z p0,p8 = GR_fpsr,0
@ -1636,7 +1636,7 @@ tgamma_ovf_near_0_boundary:
tgamma_ovf_near_0:
{ .mfi
mov r8 = 0x1FFFE
nop.f 0
nop.f 0
nop.i 0
};;
{ .mfi
@ -1646,12 +1646,12 @@ tgamma_ovf_near_0:
};;
.pred.rel "mutex",p14,p15
{ .mfi
nop.m 0
nop.m 0
(p15) fma.d.s0 f8 = f9,f9,f0 // Set I,O and +INF result
nop.i 0
nop.i 0
}
{ .mfb
nop.m 0
nop.m 0
(p14) fnma.d.s0 f8 = f9,f9,f0 // Set I,O and -INF result
br.cond.sptk tgamma_libm_err
};;
@ -1671,7 +1671,7 @@ tgamma_spec_res:
};;
{ .mfb
(p11) cmp.ltu.unc p7,p8 = GR_0x30033,GR_Sign_Exp
nop.f 0
nop.f 0
(p10) br.cond.spnt tgamma_singularity
};;
.pred.rel "mutex",p7,p8
@ -1686,7 +1686,7 @@ tgamma_spec_res:
mov GR_TAG = 258 // overflow
}
{ .mfb
nop.m 0
nop.m 0
(p15) fma.d.s0 f8 = f9,f9,f0 // Set I,O and +INF result
br.cond.sptk tgamma_libm_err
};;
@ -1764,7 +1764,7 @@ tgamma_spec:
{ .mfi
(p7) mov GR_TAG = 259 // negative
(p7) frcpa.s0 f8,p0 = f1,f8
nop.i 0
nop.i 0
}
{ .mib
nop.m 0
@ -1788,10 +1788,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
@ -1799,18 +1799,18 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
{ .mmi
stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
}
{ .mib
stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
@ -1825,10 +1825,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
};;
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function

View File

@ -21,25 +21,25 @@
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT
// LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,SPECIAL,
// EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code,and requests that all
// problem reports or change requests be submitted to it directly at
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
//*********************************************************************
//
// History:
// History:
// 11/30/01 Initial version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
@ -75,8 +75,8 @@
// IEEE Special Conditions:
//
// tgammaf(+inf) = +inf
// tgammaf(-inf) = QNaN
// tgammaf(+/-0) = +/-inf
// tgammaf(-inf) = QNaN
// tgammaf(+/-0) = +/-inf
// tgammaf(x<0, x - integer) = QNaN
// tgammaf(SNaN) = QNaN
// tgammaf(QNaN) = QNaN
@ -86,7 +86,7 @@
// Overview
//
// The method consists of three cases.
//
//
// If 2 <= x < OVERFLOW_BOUNDARY use case tgamma_regular;
// else if 0 < x < 2 use case tgamma_from_0_to_2;
// else if -(i+1) < x < -i, i = 0...43 use case tgamma_negatives;
@ -111,9 +111,9 @@
// r = x - N, note 0 <= r < 1
//
// n = N & ~0xF - index of table that contains coefficient of
// polynomial approximation
// polynomial approximation
// i = N & 0xF - is used in recursive formula
//
//
//
// Step 2: Approximation
// ---------------------
@ -125,7 +125,7 @@
// -----------------
// In case when i > 0 we need to multiply P12n(r) by product
// R(i,x)=(x-1)*(x-2)*...*(x-i). To reduce number of fp-instructions
// we can calculate R as follow:
// we can calculate R as follow:
// R(i,x) = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-1))*(x-i)) if i is
// even or R = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-2))*(x-(i-1)))*
// *(i-1) if i is odd. In both cases we need to calculate
@ -147,7 +147,7 @@
// x_min is point of local minimum on [1; 2] interval.
// if 1.5 <= x < 1.75 than GAMMA(x) = P7(x-1.5)
// if 1.75 <= x < 2.0 than GAMMA(x) = P7(x-1.5)
// and
// and
// if 0 < x < 1 than GAMMA(x) = GAMMA(x+1)/x
//
// Case -(i+1) < x < -i, i = 0...43
@ -158,13 +158,13 @@
//
// Step 1: Reduction
// -----------------
// Note that period of sin(PI*x) is 2 and range reduction for
// sin(PI*x) is like to range reduction for GAMMA(x)
// Note that period of sin(PI*x) is 2 and range reduction for
// sin(PI*x) is like to range reduction for GAMMA(x)
// i.e rs = x - round(x) and |rs| <= 0.5.
//
// Step 2: Approximation
// ---------------------
// To approximate sin(PI*x)/PI = sin(PI*(2*n+rs))/PI =
// To approximate sin(PI*x)/PI = sin(PI*(2*n+rs))/PI =
// = (-1)^n*sin(PI*rs)/PI Taylor series is used.
// sin(PI*rs)/PI ~ S17(rs).
//
@ -172,7 +172,7 @@
// ----------------
// To calculate 1/x and 1/(GAMMA(x)*S12(rs)) we use frcpa
// instruction with following Newton-Raphson interations.
//
//
//
//*********************************************************************
@ -218,7 +218,7 @@ FR_X = f10
FR_Y = f1
FR_RESULT = f8
FR_iXt = f11
FR_iXt = f11
FR_Xt = f12
FR_r = f13
FR_r2 = f14
@ -671,7 +671,7 @@ GLOBAL_LIBM_ENTRY(tgammaf)
{ .mfi
nop.m 0
(p14) fma.s1 FR_rs2 = FR_rs,FR_rs,f0
nop.i 0
nop.i 0
}
{ .mfb
nop.m 0
@ -680,12 +680,12 @@ GLOBAL_LIBM_ENTRY(tgammaf)
(p7) br.cond.spnt tgammaf_overflow_near0_bound
};;
{ .mfi
nop.m 0
nop.m 0
(p6) fnma.s1 FR_Rq1 = FR_Rq1,FR_Rq0,f0
nop.i 0
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
(p10) fma.s1 FR_Rq2 = FR_Rq2,FR_Rq3,f0
and GR_Sig = 0x7,GR_Sig
};;
@ -730,24 +730,24 @@ GLOBAL_LIBM_ENTRY(tgammaf)
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
(p9) fma.s1 FR_Rq1 = FR_Rq1,FR_Rq2,f0
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 FR_C51 = FR_C51,FR_r,FR_C50
nop.i 0
nop.i 0
};;
{ .mfi
(p14) getf.exp GR_SignExp = FR_rs
fma.s1 FR_C01 = FR_C01,FR_C11,f0
nop.i 0
nop.i 0
}
{ .mfi
nop.m 0
(p14) fma.s1 FR_S01 = FR_S01,FR_rs2,FR_S00
nop.i 0
nop.i 0
};;
{ .mfi
nop.m 0
@ -763,7 +763,7 @@ GLOBAL_LIBM_ENTRY(tgammaf)
{ .mfi
nop.m 0
(p14) fma.s1 FR_S11 = FR_S11,FR_rs2,FR_S10
(p14) tbit.z.unc p11,p12 = GR_SignExp,17
(p14) tbit.z.unc p11,p12 = GR_SignExp,17
}
{ .mfi
nop.m 0
@ -788,7 +788,7 @@ GLOBAL_LIBM_ENTRY(tgammaf)
{ .mfi
nop.m 0
(p7) fma.s1 FR_An = FR_Rq1,FR_An,f0
nop.i 0
nop.i 0
};;
{ .mfb
nop.m 0
@ -841,7 +841,7 @@ GLOBAL_LIBM_ENTRY(tgammaf)
};;
{ .mfi
nop.m 0
nop.m 0
(p14) fma.s1 FR_GAMMA = FR_C01,FR_C41,f0
(p14) tbit.z.unc p6,p7 = GR_Sig,0
}
@ -954,7 +954,7 @@ tgammaf_from_0_to_1:
{ .mfi
cmp.gt p9,p0 = GR_Arg,GR_ExpOf05
fma.s1 FR_r = f0,f0,FR_NormX // reduced arg for (0;1)
mov GR_ExpOf025 = 0x7FA
mov GR_ExpOf025 = 0x7FA
};;
{ .mfi
getf.s GR_ArgNz = f8
@ -973,7 +973,7 @@ tgammaf_from_0_to_1:
(p6) mov GR_Tbl12Offs = 0x40 // 0.25 <= x < 0.5
}
{ .mfi
add GR_ad_Ce = 0x2C0,GR_ad_Data
add GR_ad_Ce = 0x2C0,GR_ad_Data
nop.f 0
add GR_ad_Co = 0x2A0,GR_ad_Data
};;
@ -992,7 +992,7 @@ tgammaf_from_0_to_1:
ldfpd FR_A7,FR_A6 = [GR_ad_Co],16
ldfpd FR_A5,FR_A4 = [GR_ad_Ce],16
// jump if argument is close to 0 positive
(p12) br.cond.spnt tgammaf_overflow
(p12) br.cond.spnt tgammaf_overflow
};;
{ .mfi
ldfpd FR_A3,FR_A2 = [GR_ad_Co],16
@ -1003,7 +1003,7 @@ tgammaf_from_0_to_1:
{ .mfb
ldfpd FR_A1,FR_A0 = [GR_ad_Ce],16
nop.f 0
br.cond.sptk tgamma_from_0_to_2
br.cond.sptk tgamma_from_0_to_2
};;
// here if 1 < x < 2
@ -1023,7 +1023,7 @@ tgammaf_from_1_to_2:
{ .mfi
nop.m 0
nop.f 0
and GR_TblOffs = GR_TblOffs,GR_TblOffsMask
and GR_TblOffs = GR_TblOffs,GR_TblOffsMask
};;
{ .mfi
shladd GR_ad_Co = GR_TblOffs,3,GR_ad_Co
@ -1106,17 +1106,17 @@ tgamma_from_0_to_2:
nop.i 0
};;
{ .mfi
nop.m 0
nop.m 0
(p10) fma.s1 FR_GAMMA = FR_A7,FR_r4,FR_A3
nop.i 0
}
{ .mfi
nop.m 0
nop.m 0
(p11) fma.s.s0 f8 = FR_A7,FR_r4,FR_A3
nop.i 0
};;
{ .mfb
nop.m 0
nop.m 0
(p10) fma.s.s0 f8 = FR_GAMMA,FR_Rcp2,f0
br.ret.sptk b0
};;
@ -1140,7 +1140,7 @@ tgammaf_overflow_near0_bound:
{ .mfi
nop.m 0
nop.f 0
shl r8 = r8,20
shl r8 = r8,20
};;
{ .mfi
sub r8 = r8,r0,1
@ -1150,12 +1150,12 @@ tgammaf_overflow_near0_bound:
.pred.rel "mutex",p14,p15
{ .mfi
// set p8 to 0 in case of overflow and to 1 otherwise
// for negative arg:
// for negative arg:
// no overflow if rounding mode either Z or +Inf, i.e.
// GR_fpsr > 1
(p14) cmp.lt p8,p0 = 1,GR_fpsr
nop.f 0
// for positive arg:
// for positive arg:
// no overflow if rounding mode either Z or -Inf, i.e.
// (GR_fpsr & 1) == 0
(p15) tbit.z p0,p8 = GR_fpsr,0
@ -1186,7 +1186,7 @@ tgammaf_overflow:
mov GR_TAG = 261 // overflow
}
{ .mfb
nop.m 0
nop.m 0
(p15) fma.s.s0 f8 = f9,f9,f0 // set I,O and +INF result
br.cond.sptk tgammaf_libm_err
};;
@ -1259,7 +1259,7 @@ tgammaf_spec_args:
{ .mfi
(p7) mov GR_TAG = 262 // negative
(p7) frcpa.s0 f8,p0 = f1,f8
nop.i 0
nop.i 0
}
{ .mib
nop.m 0
@ -1283,10 +1283,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
@ -1294,18 +1294,18 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
{ .mmi
stfs [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfs [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
stfs [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
}
{ .mib
stfs [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
@ -1320,10 +1320,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
};;
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More