791 lines
19 KiB
ArmAsm
791 lines
19 KiB
ArmAsm
.file "asinl.s"
|
|
|
|
// Copyright (C) 2000, 2001, Intel Corporation
|
|
// All rights reserved.
|
|
//
|
|
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
|
|
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met:
|
|
//
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
//
|
|
// * Redistributions in binary form must reproduce the above copyright
|
|
// notice, this list of conditions and the following disclaimer in the
|
|
// documentation and/or other materials provided with the distribution.
|
|
//
|
|
// * The name of Intel Corporation may not be used to endorse or promote
|
|
// products derived from this software without specific prior written
|
|
// permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
|
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
//
|
|
// Intel Corporation is the author of this code, and requests that all
|
|
// problem reports or change requests be submitted to it directly at
|
|
// http://developer.intel.com/opensource.
|
|
//
|
|
// History
|
|
//==============================================================
|
|
// 2/02/00 Initial version
|
|
// 4/04/00 Unwind support added
|
|
// 8/15/00 Bundle added after call to __libm_error_support to properly
|
|
// set [the previously overwritten] GR_Parameter_RESULT.
|
|
//
|
|
// API
|
|
//==============================================================
|
|
// long double = asinl(long double)
|
|
// input floating point f8
|
|
// output floating point f8
|
|
//
|
|
// Registers used
|
|
//==============================================================
|
|
//
|
|
// predicate registers used:
|
|
// p6 -> p12
|
|
//
|
|
// floating-point registers used:
|
|
// f8 has input, then output
|
|
// f32 -> f87, f8 -> f13, f32 -> f87
|
|
//
|
|
// general registers used:
|
|
// r32 -> r47
|
|
//
|
|
// Overview of operation
|
|
//==============================================================
|
|
// There are three paths
|
|
// 1. |x| < 2^-40 ASIN_TINY
|
|
// 2. 2^-40 <= |x| < 1/4 ASIN_POLY
|
|
// 3. 1/4 <= |x| < 1 ASIN_ATAN
|
|
|
|
#include "libm_support.h"
|
|
|
|
// Assembly macros
|
|
//==============================================================
|
|
FR_RESULT = f10
|
|
FR_X = f8
|
|
FR_Y = f1
|
|
asin_P79 = f32
|
|
asin_P59 = f33
|
|
asin_P39 = f34
|
|
asin_P19 = f35
|
|
|
|
asin_P810 = f36
|
|
asin_P610 = f37
|
|
asin_P410 = f38
|
|
asin_P210 = f39
|
|
|
|
asin_A1 = f41
|
|
asin_A2 = f42
|
|
asin_A3 = f43
|
|
asin_A4 = f44
|
|
asin_A5 = f45
|
|
asin_A6 = f46
|
|
asin_A7 = f47
|
|
asin_A8 = f48
|
|
asin_A9 = f49
|
|
asin_A10 = f50
|
|
|
|
asin_X2 = f51
|
|
asin_X4 = f52
|
|
|
|
asin_B = f53
|
|
asin_Bb = f54
|
|
asin_C = f55
|
|
asin_Cc = f56
|
|
asin_D = f57
|
|
|
|
asin_W = f58
|
|
asin_Ww = f59
|
|
|
|
asin_y0 = f60
|
|
asin_y1 = f61
|
|
asin_y2 = f62
|
|
|
|
asin_H = f63
|
|
asin_Hh = f64
|
|
|
|
asin_t1 = f65
|
|
asin_t2 = f66
|
|
asin_t3 = f67
|
|
asin_t4 = f68
|
|
asin_t5 = f69
|
|
|
|
asin_Pseries = f70
|
|
asin_NORM_f8 = f71
|
|
asin_ABS_NORM_f8 = f72
|
|
|
|
asin_2m100 = f73
|
|
asin_P1P2 = f74
|
|
asin_HALF = f75
|
|
asin_1mD = f76
|
|
|
|
asin_1mB = f77
|
|
asin_1mBmC = f78
|
|
asin_S = f79
|
|
|
|
asin_BmWW = f80
|
|
asin_BmWWpb = f81
|
|
asin_2W = f82
|
|
asin_1d2W = f83
|
|
asin_Dd = f84
|
|
|
|
asin_XWw = f85
|
|
asin_low = f86
|
|
|
|
asin_pi_by_2 = f87
|
|
asin_pi_by_2_lo = f88
|
|
|
|
asin_GR_17_ones = r33
|
|
asin_GR_16_ones = r34
|
|
asin_GR_signexp_f8 = r35
|
|
asin_GR_exp = r36
|
|
asin_GR_true_exp = r37
|
|
asin_GR_ff9b = r38
|
|
|
|
GR_SAVE_B0 = r39
|
|
GR_SAVE_SP = r40
|
|
GR_SAVE_PFS = r33
|
|
// r33 can be used safely.
|
|
// r40 is address of table of coefficients
|
|
// Later it is used to save sp across calls
|
|
GR_SAVE_GP = r41
|
|
asin_GR_fffe = r42
|
|
asin_GR_retval = r43
|
|
|
|
GR_Parameter_X = r44
|
|
GR_Parameter_Y = r45
|
|
GR_Parameter_RESULT = r46
|
|
GR_Parameter_TAG = r47
|
|
|
|
|
|
// 2^-40:
|
|
// A true exponent of -40 is
|
|
// : -40 + register_bias
|
|
// : -28 + ffff = ffd7
|
|
|
|
// A true exponent of -100 is
|
|
// : -100 + register_bias
|
|
// : -64 + ffff = ff9b
|
|
|
|
// Data tables
|
|
//==============================================================
|
|
|
|
#ifdef _LIBC
|
|
.rodata
|
|
#else
|
|
.data
|
|
#endif
|
|
|
|
.align 16
|
|
|
|
asin_coefficients:
|
|
ASM_TYPE_DIRECTIVE(asin_coefficients,@object)
|
|
data8 0xBB08911F2013961E, 0x00003FF8 // A10
|
|
data8 0x981F1095A23A87D3, 0x00003FF8 // A9
|
|
data8 0xBDF09C6C4177BCC6, 0x00003FF8 // A8
|
|
data8 0xE4C3A60B049ACCEA, 0x00003FF8 // A7
|
|
data8 0x8E2789F4E8A8F1AD, 0x00003FF9 // A6
|
|
data8 0xB745D09B2B0E850B, 0x00003FF9 // A5
|
|
data8 0xF8E38E3BC4C50920, 0x00003FF9 // A4
|
|
data8 0xB6DB6DB6D89FCD81, 0x00003FFA // A3
|
|
data8 0x99999999999AF376, 0x00003FFB // A2
|
|
data8 0xAAAAAAAAAAAAAA71, 0x00003FFC // A1
|
|
|
|
data8 0xc90fdaa22168c234, 0x00003FFF // pi_by_2_hi
|
|
data8 0xc4c6628b80dc1cd1, 0x00003FBF // pi_by_2_lo
|
|
ASM_SIZE_DIRECTIVE(asin_coefficients)
|
|
|
|
.align 32
|
|
.global asinl#
|
|
|
|
.section .text
|
|
.proc asinl#
|
|
.align 32
|
|
|
|
|
|
asinl:
|
|
|
|
{ .mfi
|
|
alloc r32 = ar.pfs,1,11,4,0
|
|
(p0) fnorm asin_NORM_f8 = f8
|
|
(p0) mov asin_GR_17_ones = 0x1ffff
|
|
}
|
|
|
|
{ .mii
|
|
(p0) mov asin_GR_16_ones = 0xffff
|
|
(p0) mov asin_GR_ff9b = 0xff9b ;;
|
|
nop.i 999
|
|
}
|
|
|
|
|
|
{ .mmi
|
|
(p0) setf.exp asin_2m100 = asin_GR_ff9b
|
|
(p0) addl r40 = @ltoff(asin_coefficients), gp
|
|
nop.i 999
|
|
}
|
|
;;
|
|
|
|
{ .mmi
|
|
ld8 r40 = [r40]
|
|
nop.m 999
|
|
nop.i 999
|
|
}
|
|
;;
|
|
|
|
|
|
|
|
// Load the constants
|
|
|
|
{ .mmi
|
|
(p0) ldfe asin_A10 = [r40],16 ;;
|
|
(p0) ldfe asin_A9 = [r40],16
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mmi
|
|
(p0) ldfe asin_A8 = [r40],16 ;;
|
|
(p0) ldfe asin_A7 = [r40],16
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mmi
|
|
(p0) ldfe asin_A6 = [r40],16 ;;
|
|
(p0) getf.exp asin_GR_signexp_f8 = asin_NORM_f8
|
|
nop.i 999
|
|
}
|
|
|
|
{ .mmi
|
|
(p0) ldfe asin_A5 = [r40],16 ;;
|
|
(p0) ldfe asin_A4 = [r40],16
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) fmerge.s asin_ABS_NORM_f8 = f0, asin_NORM_f8
|
|
(p0) and asin_GR_exp = asin_GR_signexp_f8, asin_GR_17_ones ;;
|
|
}
|
|
|
|
// case 1: |x| < 2^-40 ==> p6 (includes x = +-0)
|
|
// case 2: 2^-40 <= |x| < 2^-2 ==> p8
|
|
// case 3: 2^-2 <= |x| < 1 ==> p9
|
|
// case 4: 1 <= |x| ==> p11
|
|
// In case 4, we pick up the special case x = +-1 and return +-pi/2
|
|
|
|
{ .mii
|
|
(p0) ldfe asin_A3 = [r40],16
|
|
(p0) sub asin_GR_true_exp = asin_GR_exp, asin_GR_16_ones ;;
|
|
(p0) cmp.ge.unc p6, p7 = -41, asin_GR_true_exp ;;
|
|
}
|
|
|
|
{ .mii
|
|
(p0) ldfe asin_A2 = [r40],16
|
|
(p7) cmp.ge.unc p8, p9 = -3, asin_GR_true_exp ;;
|
|
(p9) cmp.ge.unc p10, p11 = -1, asin_GR_true_exp
|
|
}
|
|
|
|
{ .mmi
|
|
(p0) ldfe asin_A1 = [r40],16 ;;
|
|
(p0) ldfe asin_pi_by_2 = [r40],16
|
|
nop.i 999
|
|
}
|
|
|
|
// case 4: |x| >= 1
|
|
{ .mib
|
|
nop.m 999
|
|
nop.i 999
|
|
(p11) br.spnt L(ASIN_ERROR_RETURN) ;;
|
|
}
|
|
|
|
// case 1: |x| < 2^-40
|
|
{ .mfb
|
|
nop.m 999
|
|
(p6) fma.s0 f8 = asin_2m100,f8,f8
|
|
(p6) br.ret.spnt b0 ;;
|
|
}
|
|
|
|
|
|
// case 2: 2^-40 <= |x| < 2^-2 ==> p8
|
|
{ .mfi
|
|
nop.m 999
|
|
(p8) fma.s1 asin_X2 = f8,f8, f0
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p8) fma.s1 asin_X4 = asin_X2,asin_X2, f0
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p8) fma.s1 asin_P810 = asin_X4, asin_A10, asin_A8
|
|
nop.i 999
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p8) fma.s1 asin_P79 = asin_X4, asin_A9, asin_A7
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p8) fma.s1 asin_P610 = asin_X4, asin_P810, asin_A6
|
|
nop.i 999
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p8) fma.s1 asin_P59 = asin_X4, asin_P79, asin_A5
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p8) fma.s1 asin_P410 = asin_X4, asin_P610, asin_A4
|
|
nop.i 999
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p8) fma.s1 asin_P39 = asin_X4, asin_P59, asin_A3
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p8) fma.s1 asin_P210 = asin_X4, asin_P410, asin_A2
|
|
nop.i 999
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p8) fma.s1 asin_P19 = asin_X4, asin_P39, asin_A1
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p8) fma.s1 asin_P1P2 = asin_X2, asin_P210, asin_P19
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p8) fma.s1 asin_P1P2 = asin_X2, asin_P1P2, f0
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfb
|
|
nop.m 999
|
|
(p8) fma.s0 f8 = asin_NORM_f8, asin_P1P2, asin_NORM_f8
|
|
(p8) br.ret.spnt b0 ;;
|
|
}
|
|
|
|
// case 3: 2^-2 <= |x| < 1
|
|
// 1- X*X is computed as B + b
|
|
// Step 1.1: Get B and b
|
|
|
|
// atan2 will return
|
|
// f8 = Z_hi
|
|
// f10 = Z_lo
|
|
// f11 = s_lo
|
|
|
|
|
|
{ .mfi
|
|
(p0) mov asin_GR_fffe = 0xfffe
|
|
(p0) fmerge.se f8 = asin_ABS_NORM_f8, asin_ABS_NORM_f8
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mmf
|
|
nop.m 0
|
|
(p0) setf.exp asin_HALF = asin_GR_fffe
|
|
(p0) fmerge.se f12 = asin_NORM_f8, asin_NORM_f8 ;;
|
|
}
|
|
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) fcmp.lt.unc.s1 p6,p7 = asin_ABS_NORM_f8, asin_HALF
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p7) fma.s1 asin_D = f1,f1,asin_ABS_NORM_f8
|
|
nop.i 999
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p7) fms.s1 asin_C = f1,f1,asin_ABS_NORM_f8
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p7) fma.s1 asin_B = asin_C, asin_D, f0
|
|
nop.i 999
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p7) fms.s1 asin_1mD = f1,f1,asin_D
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p7) fma.s1 asin_Dd = asin_1mD,f1, asin_ABS_NORM_f8
|
|
nop.i 999
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p7) fms.s1 asin_Bb = asin_C, asin_D, asin_B
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p7) fma.s1 asin_Bb = asin_C, asin_Dd, asin_Bb
|
|
nop.i 999
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p6) fma.s1 asin_C = asin_ABS_NORM_f8, asin_ABS_NORM_f8, f0
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p6) fms.s1 asin_B = f1, f1, asin_C
|
|
nop.i 999
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p6) fms.s1 asin_Cc = asin_ABS_NORM_f8, asin_ABS_NORM_f8, asin_C
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) fma.s1 asin_Hh = asin_HALF, asin_B, f0
|
|
nop.i 999
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p6) fms.s1 asin_1mB = f1, f1, asin_B
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
// Step 1.2:
|
|
// sqrt(B + b) is computed as W + w
|
|
// Get W
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) frsqrta.s1 asin_y0,p8 = asin_B
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p6) fms.s1 asin_1mBmC = asin_1mB, f1, asin_C
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) fma.s1 asin_t1 = asin_y0, asin_y0, f0
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p6) fms.s1 asin_Bb = asin_1mBmC, f1, asin_Cc
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) fnma.s1 asin_t2 = asin_t1, asin_Hh, asin_HALF
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) fma.s1 asin_y1 = asin_t2, asin_y0, asin_y0
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) fma.s1 asin_t3 = asin_y1, asin_Hh, f0
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) fnma.s1 asin_t4 = asin_t3, asin_y1, asin_HALF
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) fma.s1 asin_y2 = asin_t4, asin_y1, asin_y1
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) fma.s1 asin_S = asin_B, asin_y2, f0
|
|
nop.i 999
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) fma.s1 asin_H = asin_y2, asin_HALF, f0
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) fma.s1 asin_t5 = asin_Hh, asin_y2, f0
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) fnma.s1 asin_Dd = asin_S, asin_S, asin_B
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) fma.s1 asin_W = asin_Dd, asin_H, asin_S
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) fma.s1 asin_2W = asin_W, f1, asin_W
|
|
nop.i 999
|
|
}
|
|
|
|
// Step 1.3
|
|
// Get w
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) fnma.s1 asin_BmWW = asin_W, asin_W, asin_B
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
// Step 2
|
|
// asin(x) = atan2(X,sqrt(1-X*X))
|
|
// = atan2(X, W) -Xw
|
|
// corr = Xw
|
|
// asin(x) = Z_hi + (s_lo*Z_lo - corr)
|
|
// Call atan2(X, W)
|
|
// Save W in f9
|
|
// Save X in f12
|
|
// Save w in f13
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) fmerge.se f9 = asin_W, asin_W
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) fma.s1 asin_BmWWpb = asin_BmWW, f1, asin_Bb
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) frcpa.s1 asin_1d2W,p9 = f1, asin_2W
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) fma.s1 asin_Ww = asin_BmWWpb, asin_1d2W, f0
|
|
nop.i 999 ;;
|
|
}
|
|
.endp asinl
|
|
ASM_SIZE_DIRECTIVE(asinl)
|
|
|
|
.proc __libm_callout
|
|
__libm_callout:
|
|
.prologue
|
|
{ .mfi
|
|
nop.m 0
|
|
nop.f 0
|
|
.save ar.pfs,GR_SAVE_PFS
|
|
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
|
|
};;
|
|
{ .mfi
|
|
mov GR_SAVE_GP=gp // Save gp
|
|
nop.f 0
|
|
.save b0, GR_SAVE_B0
|
|
mov GR_SAVE_B0=b0 // Save b0
|
|
}
|
|
.body
|
|
{.mfb
|
|
nop.m 0
|
|
(p0) fmerge.se f13 = asin_Ww, asin_Ww
|
|
(p0) br.call.sptk.many b0=__libm_atan2_reg#
|
|
};;
|
|
{ .mfi
|
|
mov gp = GR_SAVE_GP // Restore gp
|
|
(p0) fma.s1 asin_XWw = asin_ABS_NORM_f8,f13,f0
|
|
mov b0 = GR_SAVE_B0 // Restore return address
|
|
};;
|
|
// asin_XWw = Xw = corr
|
|
// asin_low = (s_lo * Z_lo - corr)
|
|
// f8 = Z_hi + (s_lo * Z_lo - corr)
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) fms.s1 asin_low = f11, f10, asin_XWw
|
|
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) fma.s0 f8 = f8, f1, asin_low
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfb
|
|
nop.m 999
|
|
(p0) fmerge.s f8 = f12,f8
|
|
(p0) br.ret.sptk b0 ;;
|
|
}
|
|
.endp __libm_callout
|
|
ASM_SIZE_DIRECTIVE(__libm_callout)
|
|
|
|
.proc SPECIAL
|
|
SPECIAL:
|
|
L(ASIN_ERROR_RETURN):
|
|
|
|
// If X is 1, return (sign of X)pi/2
|
|
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) fcmp.eq.unc p6,p7 = asin_ABS_NORM_f8,f1
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfb
|
|
(p6) ldfe asin_pi_by_2_lo = [r40]
|
|
(p6) fmerge.s asin_pi_by_2 = f8,asin_pi_by_2
|
|
nop.b 0;;
|
|
}
|
|
|
|
// If X is a NAN, leave
|
|
// qnan snan inf norm unorm 0 -+
|
|
// 1 1 0 0 0 0 11
|
|
{ .mfb
|
|
nop.m 999
|
|
(p6) fma.s0 f8 = f8,asin_pi_by_2_lo,asin_pi_by_2
|
|
(p6) br.ret.spnt b0
|
|
}
|
|
{ .mfi
|
|
nop.m 999
|
|
(p0) fclass.m.unc p12,p0 = f8, 0xc3
|
|
nop.i 999 ;;
|
|
}
|
|
|
|
{ .mfb
|
|
nop.m 999
|
|
(p12) fma.s0 f8 = f8,f1,f0
|
|
(p12) br.ret.spnt b0 ;;
|
|
}
|
|
{ .mfi
|
|
(p0) mov GR_Parameter_TAG = 60
|
|
(p0) frcpa f10, p6 = f0, f0
|
|
nop.i 0
|
|
};;
|
|
.endp SPECIAL
|
|
ASM_SIZE_DIRECTIVE(SPECIAL)
|
|
|
|
.proc __libm_error_region
|
|
__libm_error_region:
|
|
.prologue
|
|
{ .mfi
|
|
add GR_Parameter_Y=-32,sp // Parameter 2 value
|
|
nop.f 0
|
|
.save ar.pfs,GR_SAVE_PFS
|
|
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
|
|
}
|
|
{ .mfi
|
|
.fframe 64
|
|
add sp=-64,sp // Create new stack
|
|
nop.f 0
|
|
mov GR_SAVE_GP=gp // Save gp
|
|
};;
|
|
{ .mmi
|
|
stfe [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
|
|
add GR_Parameter_X = 16,sp // Parameter 1 address
|
|
.save b0, GR_SAVE_B0
|
|
mov GR_SAVE_B0=b0 // Save b0
|
|
};;
|
|
.body
|
|
{ .mib
|
|
stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
|
|
add GR_Parameter_RESULT = 0,GR_Parameter_Y
|
|
nop.b 0 // Parameter 3 address
|
|
}
|
|
{ .mib
|
|
stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
|
|
add GR_Parameter_Y = -16,GR_Parameter_Y
|
|
br.call.sptk b0=__libm_error_support# // Call error handling function
|
|
};;
|
|
{ .mmi
|
|
nop.m 0
|
|
nop.m 0
|
|
add GR_Parameter_RESULT = 48,sp
|
|
};;
|
|
{ .mmi
|
|
ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
|
|
.restore sp
|
|
add sp = 64,sp // Restore stack pointer
|
|
mov b0 = GR_SAVE_B0 // Restore return address
|
|
};;
|
|
{ .mib
|
|
mov gp = GR_SAVE_GP // Restore gp
|
|
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
|
|
br.ret.sptk b0 // Return
|
|
};;
|
|
|
|
.endp __libm_error_region
|
|
ASM_SIZE_DIRECTIVE(__libm_error_region)
|
|
|
|
.type __libm_error_support#,@function
|
|
.global __libm_error_support#
|
|
|
|
.type __libm_atan2_reg#,@function
|
|
.global __libm_atan2_reg#
|