* sysdeps/unix/sysv/linux/x86_64/getcontext.S: Use functionally
equivalent, but shorter instructions. * sysdeps/unix/sysv/linux/x86_64/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/x86_64/setcontext.S: Likewise. * sysdeps/unix/sysv/linux/x86_64/clone.S: Likewise. * sysdeps/unix/sysv/linux/x86_64/swapcontext.S: Likewise. * sysdeps/unix/x86_64/sysdep.S: Likewise. * sysdeps/x86_64/strchr.S: Likewise. * sysdeps/x86_64/memset.S: Likewise. * sysdeps/x86_64/strcspn.S: Likewise. * sysdeps/x86_64/strcmp.S: Likewise. * sysdeps/x86_64/elf/start.S: Likewise. * sysdeps/x86_64/strspn.S: Likewise. * sysdeps/x86_64/dl-machine.h: Likewise. * sysdeps/x86_64/bsd-_setjmp.S: Likewise. * sysdeps/x86_64/bsd-setjmp.S: Likewise. * sysdeps/x86_64/strtok.S: Likewise.
This commit is contained in:
parent
4d6302cf51
commit
ee6189855a
20
ChangeLog
20
ChangeLog
@ -1,3 +1,23 @@
|
||||
2005-03-31 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* sysdeps/unix/sysv/linux/x86_64/getcontext.S: Use functionally
|
||||
equivalent, but shorter instructions.
|
||||
* sysdeps/unix/sysv/linux/x86_64/sysdep.h: Likewise.
|
||||
* sysdeps/unix/sysv/linux/x86_64/setcontext.S: Likewise.
|
||||
* sysdeps/unix/sysv/linux/x86_64/clone.S: Likewise.
|
||||
* sysdeps/unix/sysv/linux/x86_64/swapcontext.S: Likewise.
|
||||
* sysdeps/unix/x86_64/sysdep.S: Likewise.
|
||||
* sysdeps/x86_64/strchr.S: Likewise.
|
||||
* sysdeps/x86_64/memset.S: Likewise.
|
||||
* sysdeps/x86_64/strcspn.S: Likewise.
|
||||
* sysdeps/x86_64/strcmp.S: Likewise.
|
||||
* sysdeps/x86_64/elf/start.S: Likewise.
|
||||
* sysdeps/x86_64/strspn.S: Likewise.
|
||||
* sysdeps/x86_64/dl-machine.h: Likewise.
|
||||
* sysdeps/x86_64/bsd-_setjmp.S: Likewise.
|
||||
* sysdeps/x86_64/bsd-setjmp.S: Likewise.
|
||||
* sysdeps/x86_64/strtok.S: Likewise.
|
||||
|
||||
2005-03-30 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* sysdeps/ia64/fpu/e_acosf.S: Update from Intel libm 2005-03-21.
|
||||
|
@ -1,3 +1,8 @@
|
||||
2005-03-31 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* sysdeps/unix/sysv/linux/x86_64/sysdep-cancel.h: Use
|
||||
functionally equivalent, but shorter instructions.
|
||||
|
||||
2005-03-28 Daniel Jacobowitz <dan@codesourcery.com>
|
||||
|
||||
* sysdeps/mips/tls.h: New file.
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Jakub Jelinek <jakub@redhat.com>, 2002.
|
||||
|
||||
@ -45,7 +45,7 @@
|
||||
POPARGS_##args \
|
||||
/* The return value from CENABLE is argument for CDISABLE. */ \
|
||||
movq %rax, (%rsp); \
|
||||
movq $SYS_ify (syscall_name), %rax; \
|
||||
movl $SYS_ify (syscall_name), %eax; \
|
||||
syscall; \
|
||||
movq (%rsp), %rdi; \
|
||||
/* Save %rax since it's the error code from the syscall. */ \
|
||||
|
@ -1,3 +1,25 @@
|
||||
2005-03-31 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_unlock.S: Use
|
||||
functionally equivalent, but shorter instructions.
|
||||
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S: Likewise.
|
||||
* sysdeps/unix/sysv/linux/x86_64/sysdep-cancel.h: Likewise.
|
||||
* sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_timedrdlock.S:
|
||||
Likewise.
|
||||
* sysdeps/unix/sysv/linux/x86_64/pthread_barrier_wait.S: Likewise.
|
||||
* sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_rdlock.S: Likewise.
|
||||
* sysdeps/unix/sysv/linux/x86_64/sem_wait.S: Likewise.
|
||||
* sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_wrlock.S: Likewise.
|
||||
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S: Likewise.
|
||||
* sysdeps/unix/sysv/linux/x86_64/pthread_once.S: Likewise.
|
||||
* sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_timedwrlock.S:
|
||||
Likewise.
|
||||
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S: Likewise.
|
||||
* sysdeps/unix/sysv/linux/x86_64/lowlevellock.S: Likewise.
|
||||
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S: Likewise.
|
||||
* sysdeps/unix/sysv/linux/x86_64/sem_post.S: Likewise.
|
||||
* sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S: Likewise.
|
||||
|
||||
2005-03-28 Daniel Jacobowitz <dan@codesourcery.com>
|
||||
|
||||
* sysdeps/mips/Makefile: New file.
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
|
||||
|
||||
@ -48,12 +48,16 @@ __lll_mutex_lock_wait:
|
||||
|
||||
xorq %r10, %r10 /* No timeout. */
|
||||
movl $2, %edx
|
||||
movq %r10, %rsi /* movq $FUTEX_WAIT, %rsi */
|
||||
#if FUTEX_WAIT == 0
|
||||
xorl %esi, %esi
|
||||
#else
|
||||
movl $FUTEX_WAIT, %esi
|
||||
#endif
|
||||
|
||||
cmpl %edx, %eax /* NB: %edx == 2 */
|
||||
jne 2f
|
||||
|
||||
1: movq $SYS_futex, %rax
|
||||
1: movl $SYS_futex, %eax
|
||||
syscall
|
||||
|
||||
2: movl %edx, %eax
|
||||
@ -93,7 +97,7 @@ __lll_mutex_timedlock_wait:
|
||||
1:
|
||||
/* Get current time. */
|
||||
movq %rsp, %rdi
|
||||
xorq %rsi, %rsi
|
||||
xorl %esi, %esi
|
||||
movq $VSYSCALL_ADDR_vgettimeofday, %rax
|
||||
/* This is a regular function call, all caller-save registers
|
||||
might be clobbered. */
|
||||
@ -101,7 +105,7 @@ __lll_mutex_timedlock_wait:
|
||||
|
||||
/* Compute relative timeout. */
|
||||
movq 8(%rsp), %rax
|
||||
movq $1000, %rdi
|
||||
movl $1000, %edi
|
||||
mul %rdi /* Milli seconds to nano seconds. */
|
||||
movq (%r13), %rdi
|
||||
movq 8(%r13), %rsi
|
||||
@ -126,9 +130,13 @@ __lll_mutex_timedlock_wait:
|
||||
je 8f
|
||||
|
||||
movq %rsp, %r10
|
||||
xorq %rsi, %rsi /* movq $FUTEX_WAIT, %rsi */
|
||||
#if FUTEX_WAIT == 0
|
||||
xorl %esi, %esi
|
||||
#else
|
||||
movl $FUTEX_WAIT, %esi
|
||||
#endif
|
||||
movq %r12, %rdi
|
||||
movq $SYS_futex, %rax
|
||||
movl $SYS_futex, %eax
|
||||
syscall
|
||||
movq %rax, %rcx
|
||||
|
||||
@ -195,9 +203,9 @@ __lll_mutex_unlock_wake:
|
||||
pushq %rdx
|
||||
|
||||
movl $0, (%rdi)
|
||||
movq $FUTEX_WAKE, %rsi
|
||||
movl $FUTEX_WAKE, %esi
|
||||
movl $1, %edx /* Wake one thread. */
|
||||
movq $SYS_futex, %rax
|
||||
movl $SYS_futex, %eax
|
||||
syscall
|
||||
|
||||
popq %rdx
|
||||
@ -222,13 +230,13 @@ __lll_timedwait_tid:
|
||||
|
||||
/* Get current time. */
|
||||
2: movq %rsp, %rdi
|
||||
xorq %rsi, %rsi
|
||||
xorl %esi, %esi
|
||||
movq $VSYSCALL_ADDR_vgettimeofday, %rax
|
||||
callq *%rax
|
||||
|
||||
/* Compute relative timeout. */
|
||||
movq 8(%rsp), %rax
|
||||
movq $1000, %rdi
|
||||
movl $1000, %edi
|
||||
mul %rdi /* Milli seconds to nano seconds. */
|
||||
movq (%r13), %rdi
|
||||
movq 8(%r13), %rsi
|
||||
@ -248,9 +256,13 @@ __lll_timedwait_tid:
|
||||
jz 4f
|
||||
|
||||
movq %rsp, %r10
|
||||
xorq %rsi, %rsi /* movq $FUTEX_WAIT, %rsi */
|
||||
#if FUTEX_WAIT == 0
|
||||
xorl %esi, %esi
|
||||
#else
|
||||
movl $FUTEX_WAIT, %esi
|
||||
#endif
|
||||
movq %r12, %rdi
|
||||
movq $SYS_futex, %rax
|
||||
movl $SYS_futex, %eax
|
||||
syscall
|
||||
|
||||
cmpl $0, (%rdi)
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
|
||||
|
||||
@ -63,9 +63,14 @@ pthread_barrier_wait:
|
||||
|
||||
/* Wait for the remaining threads. The call will return immediately
|
||||
if the CURR_EVENT memory has meanwhile been changed. */
|
||||
7: xorq %rsi, %rsi /* movq $FUTEX_WAIT, %rsi */
|
||||
7:
|
||||
#if FUTEX_WAIT == 0
|
||||
xorl %esi, %esi
|
||||
#else
|
||||
movl $FUTEX_WAIT, %esi
|
||||
#endif
|
||||
xorq %r10, %r10
|
||||
8: movq $SYS_futex, %rax
|
||||
8: movl $SYS_futex, %eax
|
||||
syscall
|
||||
|
||||
/* Don't return on spurious wakeups. The syscall does not change
|
||||
@ -110,8 +115,8 @@ pthread_barrier_wait:
|
||||
/* Wake up all waiters. The count is a signed number in the kernel
|
||||
so 0x7fffffff is the highest value. */
|
||||
movl $0x7fffffff, %edx
|
||||
movq $FUTEX_WAKE, %rsi
|
||||
movq $SYS_futex, %rax
|
||||
movl $FUTEX_WAKE, %esi
|
||||
movl $SYS_futex, %eax
|
||||
syscall
|
||||
|
||||
/* Increment LEFT. If this brings the count back to the
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
|
||||
|
||||
@ -81,10 +81,10 @@ __pthread_cond_broadcast:
|
||||
je 9f
|
||||
|
||||
/* Wake up all threads. */
|
||||
movq $FUTEX_CMP_REQUEUE, %rsi
|
||||
movq $SYS_futex, %rax
|
||||
movl $FUTEX_CMP_REQUEUE, %esi
|
||||
movl $SYS_futex, %eax
|
||||
movl $1, %edx
|
||||
movq $0x7fffffff, %r10
|
||||
movl $0x7fffffff, %r10d
|
||||
syscall
|
||||
|
||||
/* For any kind of error, which mainly is EAGAIN, we try again
|
||||
@ -128,9 +128,9 @@ __pthread_cond_broadcast:
|
||||
jmp 8b
|
||||
|
||||
9: /* The futex requeue functionality is not available. */
|
||||
movq $0x7fffffff, %rdx
|
||||
movq $FUTEX_WAKE, %rsi
|
||||
movq $SYS_futex, %rax
|
||||
movl $0x7fffffff, %edx
|
||||
movl $FUTEX_WAKE, %esi
|
||||
movl $SYS_futex, %eax
|
||||
syscall
|
||||
jmp 10b
|
||||
.size __pthread_cond_broadcast, .-__pthread_cond_broadcast
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
|
||||
|
||||
@ -66,9 +66,9 @@ __pthread_cond_signal:
|
||||
addl $1, (%rdi)
|
||||
|
||||
/* Wake up one thread. */
|
||||
movq $FUTEX_WAKE, %rsi
|
||||
movq $SYS_futex, %rax
|
||||
movq $1, %rdx
|
||||
movl $FUTEX_WAKE, %esi
|
||||
movl $SYS_futex, %eax
|
||||
movl $1, %edx
|
||||
syscall
|
||||
|
||||
/* Unlock. */
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
|
||||
|
||||
@ -56,7 +56,7 @@ __pthread_cond_timedwait:
|
||||
.Lsubq:
|
||||
|
||||
cmpq $1000000000, 8(%rdx)
|
||||
movq $EINVAL, %rax
|
||||
movl $EINVAL, %eax
|
||||
jae 18f
|
||||
|
||||
/* Stack frame:
|
||||
@ -102,7 +102,7 @@ __pthread_cond_timedwait:
|
||||
|
||||
/* Unlock the mutex. */
|
||||
2: movq 16(%rsp), %rdi
|
||||
xorq %rsi, %rsi
|
||||
xorl %esi, %esi
|
||||
callq __pthread_mutex_unlock_usercnt
|
||||
|
||||
testl %eax, %eax
|
||||
@ -141,7 +141,7 @@ __pthread_cond_timedwait:
|
||||
/* Only clocks 0 and 1 are allowed so far. Both are handled in the
|
||||
kernel. */
|
||||
leaq 24(%rsp), %rsi
|
||||
movq $__NR_clock_gettime, %rax
|
||||
movl $__NR_clock_gettime, %eax
|
||||
syscall
|
||||
# ifndef __ASSUME_POSIX_TIMERS
|
||||
cmpq $-ENOSYS, %rax
|
||||
@ -155,13 +155,13 @@ __pthread_cond_timedwait:
|
||||
subq 32(%rsp), %rdx
|
||||
#else
|
||||
leaq 24(%rsp), %rdi
|
||||
xorq %rsi, %rsi
|
||||
xorl %esi, %esi
|
||||
movq $VSYSCALL_ADDR_vgettimeofday, %rax
|
||||
callq *%rax
|
||||
|
||||
/* Compute relative timeout. */
|
||||
movq 32(%rsp), %rax
|
||||
movq $1000, %rdx
|
||||
movl $1000, %edx
|
||||
mul %rdx /* Milli seconds to nano seconds. */
|
||||
movq (%r13), %rcx
|
||||
movq 8(%r13), %rdx
|
||||
@ -195,10 +195,14 @@ __pthread_cond_timedwait:
|
||||
movl %eax, (%rsp)
|
||||
|
||||
leaq 24(%rsp), %r10
|
||||
xorq %rsi, %rsi /* movq $FUTEX_WAIT, %rsi */
|
||||
#if FUTEX_WAIT == 0
|
||||
xorl %esi, %esi
|
||||
#else
|
||||
movl $FUTEX_WAIT, %esi
|
||||
#endif
|
||||
movq %r12, %rdx
|
||||
addq $cond_futex, %rdi
|
||||
movq $SYS_futex, %rax
|
||||
movl $SYS_futex, %eax
|
||||
syscall
|
||||
movq %rax, %r14
|
||||
|
||||
@ -237,7 +241,7 @@ __pthread_cond_timedwait:
|
||||
|
||||
13: incq wakeup_seq(%rdi)
|
||||
incl cond_futex(%rdi)
|
||||
movq $ETIMEDOUT, %r14
|
||||
movl $ETIMEDOUT, %r14d
|
||||
jmp 14f
|
||||
|
||||
23: xorq %r14, %r14
|
||||
@ -256,8 +260,8 @@ __pthread_cond_timedwait:
|
||||
jne 25f
|
||||
|
||||
addq $cond_nwaiters, %rdi
|
||||
movq $SYS_futex, %rax
|
||||
movq $FUTEX_WAKE, %rsi
|
||||
movl $SYS_futex, %eax
|
||||
movl $FUTEX_WAKE, %esi
|
||||
movl $1, %edx
|
||||
syscall
|
||||
subq $cond_nwaiters, %rdi
|
||||
@ -349,13 +353,13 @@ __pthread_cond_timedwait:
|
||||
#if defined __NR_clock_gettime && !defined __ASSUME_POSIX_TIMERS
|
||||
/* clock_gettime not available. */
|
||||
19: leaq 24(%rsp), %rdi
|
||||
xorq %rsi, %rsi
|
||||
xorl %esi, %esi
|
||||
movq $VSYSCALL_ADDR_vgettimeofday, %rax
|
||||
callq *%rax
|
||||
|
||||
/* Compute relative timeout. */
|
||||
movq 32(%rsp), %rax
|
||||
movq $1000, %rdx
|
||||
movl $1000, %edx
|
||||
mul %rdx /* Milli seconds to nano seconds. */
|
||||
movq (%r13), %rcx
|
||||
movq 8(%r13), %rdx
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
|
||||
|
||||
@ -82,12 +82,12 @@ __condvar_cleanup:
|
||||
jne 4f
|
||||
|
||||
addq $cond_nwaiters, %rdi
|
||||
movq $SYS_futex, %rax
|
||||
movq $FUTEX_WAKE, %rsi
|
||||
movl $SYS_futex, %eax
|
||||
movl $FUTEX_WAKE, %esi
|
||||
movl $1, %edx
|
||||
syscall
|
||||
subq $cond_nwaiters, %rdi
|
||||
movq $1, %r12
|
||||
movl $1, %r12d
|
||||
|
||||
4: LOCK
|
||||
#if cond_lock == 0
|
||||
@ -105,9 +105,9 @@ __condvar_cleanup:
|
||||
2: testq %r12, %r12
|
||||
jnz 5f
|
||||
addq $cond_futex, %rdi
|
||||
movq $FUTEX_WAKE, %rsi
|
||||
movl $FUTEX_WAKE, %esi
|
||||
movl $0x7fffffff, %edx
|
||||
movq $SYS_futex, %rax
|
||||
movl $SYS_futex, %eax
|
||||
syscall
|
||||
|
||||
5: movq 16(%r8), %rdi
|
||||
@ -170,7 +170,7 @@ __pthread_cond_wait:
|
||||
|
||||
/* Unlock the mutex. */
|
||||
2: movq 16(%rsp), %rdi
|
||||
xorq %rsi, %rsi
|
||||
xorl %esi, %esi
|
||||
callq __pthread_mutex_unlock_usercnt
|
||||
|
||||
testl %eax, %eax
|
||||
@ -215,8 +215,12 @@ __pthread_cond_wait:
|
||||
xorq %r10, %r10
|
||||
movq %r12, %rdx
|
||||
addq $cond_futex-cond_lock, %rdi
|
||||
movq $SYS_futex, %rax
|
||||
movq %r10, %rsi /* movq $FUTEX_WAIT, %rsi */
|
||||
movl $SYS_futex, %eax
|
||||
#if FUTEX_WAIT == 0
|
||||
xorl %esi, %esi
|
||||
#else
|
||||
movl $FUTEX_WAIT, %esi
|
||||
#endif
|
||||
syscall
|
||||
|
||||
movl (%rsp), %edi
|
||||
@ -262,8 +266,8 @@ __pthread_cond_wait:
|
||||
jne 17f
|
||||
|
||||
addq $cond_nwaiters, %rdi
|
||||
movq $SYS_futex, %rax
|
||||
movq $FUTEX_WAKE, %rsi
|
||||
movl $SYS_futex, %eax
|
||||
movl $FUTEX_WAKE, %esi
|
||||
movl $1, %edx
|
||||
syscall
|
||||
subq $cond_nwaiters, %rdi
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
|
||||
|
||||
@ -74,8 +74,12 @@ __pthread_once:
|
||||
jnz 3f /* Different for generation -> run initializer. */
|
||||
|
||||
/* Somebody else got here first. Wait. */
|
||||
movq %r10, %rsi /* movq $FUTEX_WAIT, %rsi */
|
||||
movq $SYS_futex, %rax
|
||||
#if FUTEX_WAIT == 0
|
||||
xorl %esi, %esi
|
||||
#else
|
||||
movl $FUTEX_WAIT, %esi
|
||||
#endif
|
||||
movl $SYS_futex, %eax
|
||||
syscall
|
||||
jmp 6b
|
||||
|
||||
@ -98,12 +102,12 @@ __pthread_once:
|
||||
/* Wake up all other threads. */
|
||||
movl $0x7fffffff, %edx
|
||||
movl $FUTEX_WAKE, %esi
|
||||
movq $SYS_futex, %rax
|
||||
movl $SYS_futex, %eax
|
||||
syscall
|
||||
|
||||
4: addq $8, %rsp
|
||||
.Ladd:
|
||||
xorq %rax, %rax
|
||||
xorl %eax, %eax
|
||||
retq
|
||||
|
||||
.size __pthread_once,.-__pthread_once
|
||||
@ -124,8 +128,8 @@ clear_once_control:
|
||||
movl $0, (%rdi)
|
||||
|
||||
movl $0x7fffffff, %edx
|
||||
movq $FUTEX_WAKE, %rsi
|
||||
movq $SYS_futex, %rax
|
||||
movl $FUTEX_WAKE, %esi
|
||||
movl $SYS_futex, %eax
|
||||
syscall
|
||||
|
||||
movq %r8, %rdi
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
|
||||
|
||||
@ -74,8 +74,12 @@ __pthread_rwlock_rdlock:
|
||||
jne 10f
|
||||
|
||||
11: addq $READERS_WAKEUP, %rdi
|
||||
movq %r10, %rsi /* movq $FUTEX_WAIT, %rsi */
|
||||
movq $SYS_futex, %rax
|
||||
#if FUTEX_WAIT == 0
|
||||
xorl %esi, %esi
|
||||
#else
|
||||
movl $FUTEX_WAIT, %esi
|
||||
#endif
|
||||
movl $SYS_futex, %eax
|
||||
syscall
|
||||
|
||||
subq $READERS_WAKEUP, %rdi
|
||||
@ -94,7 +98,7 @@ __pthread_rwlock_rdlock:
|
||||
13: decl READERS_QUEUED(%rdi)
|
||||
jmp 2b
|
||||
|
||||
5: xorq %rdx, %rdx
|
||||
5: xorl %edx, %edx
|
||||
incl NR_READERS(%rdi)
|
||||
je 8f
|
||||
9: LOCK
|
||||
@ -122,7 +126,7 @@ __pthread_rwlock_rdlock:
|
||||
14: cmpl %fs:TID, %eax
|
||||
jne 3b
|
||||
/* Deadlock detected. */
|
||||
movq $EDEADLK, %rdx
|
||||
movl $EDEADLK, %edx
|
||||
jmp 9b
|
||||
|
||||
6:
|
||||
@ -137,12 +141,12 @@ __pthread_rwlock_rdlock:
|
||||
|
||||
/* Overflow. */
|
||||
8: decl NR_READERS(%rdi)
|
||||
movq $EAGAIN, %rdx
|
||||
movl $EAGAIN, %edx
|
||||
jmp 9b
|
||||
|
||||
/* Overflow. */
|
||||
4: decl READERS_QUEUED(%rdi)
|
||||
movq $EAGAIN, %rdx
|
||||
movl $EAGAIN, %edx
|
||||
jmp 9b
|
||||
|
||||
10:
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
|
||||
|
||||
@ -90,13 +90,13 @@ pthread_rwlock_timedrdlock:
|
||||
|
||||
/* Get current time. */
|
||||
11: movq %rsp, %rdi
|
||||
xorq %rsi, %rsi
|
||||
xorl %esi, %esi
|
||||
movq $VSYSCALL_ADDR_vgettimeofday, %rax
|
||||
callq *%rax
|
||||
|
||||
/* Compute relative timeout. */
|
||||
movq 8(%rsp), %rax
|
||||
movq $1000, %rdi
|
||||
movl $1000, %edi
|
||||
mul %rdi /* Milli seconds to nano seconds. */
|
||||
movq (%r13), %rcx
|
||||
movq 8(%r13), %rdi
|
||||
@ -112,11 +112,15 @@ pthread_rwlock_timedrdlock:
|
||||
movq %rcx, (%rsp) /* Store relative timeout. */
|
||||
movq %rdi, 8(%rsp)
|
||||
|
||||
xorq %rsi, %rsi /* movq $FUTEX_WAIT, %rsi */
|
||||
#if FUTEX_WAIT == 0
|
||||
xorl %esi, %esi
|
||||
#else
|
||||
movl $FUTEX_WAIT, %esi
|
||||
#endif
|
||||
movq %rsp, %r10
|
||||
movl %r14d, %edx
|
||||
leaq READERS_WAKEUP(%r12), %rdi
|
||||
movq $SYS_futex, %rax
|
||||
movl $SYS_futex, %eax
|
||||
syscall
|
||||
movq %rax, %rdx
|
||||
17:
|
||||
@ -136,11 +140,11 @@ pthread_rwlock_timedrdlock:
|
||||
cmpq $-ETIMEDOUT, %rdx
|
||||
jne 2b
|
||||
|
||||
18: movq $ETIMEDOUT, %rdx
|
||||
18: movl $ETIMEDOUT, %edx
|
||||
jmp 9f
|
||||
|
||||
|
||||
5: xorq %rdx, %rdx
|
||||
5: xorl %edx, %edx
|
||||
incl NR_READERS(%r12)
|
||||
je 8f
|
||||
9: LOCK
|
||||
@ -168,7 +172,7 @@ pthread_rwlock_timedrdlock:
|
||||
|
||||
14: cmpl %fs:TID, %eax
|
||||
jne 3b
|
||||
movq $EDEADLK, %rdx
|
||||
movl $EDEADLK, %edx
|
||||
jmp 9b
|
||||
|
||||
6:
|
||||
@ -182,12 +186,12 @@ pthread_rwlock_timedrdlock:
|
||||
|
||||
/* Overflow. */
|
||||
8: decl NR_READERS(%r12)
|
||||
movq $EAGAIN, %rdx
|
||||
movl $EAGAIN, %edx
|
||||
jmp 9b
|
||||
|
||||
/* Overflow. */
|
||||
4: decl READERS_QUEUED(%r12)
|
||||
movq $EAGAIN, %rdx
|
||||
movl $EAGAIN, %edx
|
||||
jmp 9b
|
||||
|
||||
10:
|
||||
@ -211,6 +215,6 @@ pthread_rwlock_timedrdlock:
|
||||
16: movq $-ETIMEDOUT, %rdx
|
||||
jmp 17b
|
||||
|
||||
19: movq $EINVAL, %rdx
|
||||
19: movl $EINVAL, %edx
|
||||
jmp 9b
|
||||
.size pthread_rwlock_timedrdlock,.-pthread_rwlock_timedrdlock
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
|
||||
|
||||
@ -86,13 +86,13 @@ pthread_rwlock_timedwrlock:
|
||||
|
||||
/* Get current time. */
|
||||
11: movq %rsp, %rdi
|
||||
xorq %rsi, %rsi
|
||||
xorl %esi, %esi
|
||||
movq $VSYSCALL_ADDR_vgettimeofday, %rax
|
||||
callq *%rax
|
||||
|
||||
/* Compute relative timeout. */
|
||||
movq 8(%rsp), %rax
|
||||
movq $1000, %rdi
|
||||
movl $1000, %edi
|
||||
mul %rdi /* Milli seconds to nano seconds. */
|
||||
movq (%r13), %rcx
|
||||
movq 8(%r13), %rdi
|
||||
@ -108,11 +108,15 @@ pthread_rwlock_timedwrlock:
|
||||
movq %rcx, (%rsp) /* Store relative timeout. */
|
||||
movq %rdi, 8(%rsp)
|
||||
|
||||
xorq %rsi, %rsi /* movq $FUTEX_WAIT, %rsi */
|
||||
#if FUTEX_WAIT == 0
|
||||
xorl %esi, %esi
|
||||
#else
|
||||
movl $FUTEX_WAIT, %esi
|
||||
#endif
|
||||
movq %rsp, %r10
|
||||
movl %r14d, %edx
|
||||
leaq WRITERS_WAKEUP(%r12), %rdi
|
||||
movq $SYS_futex, %rax
|
||||
movl $SYS_futex, %eax
|
||||
syscall
|
||||
movq %rax, %rdx
|
||||
17:
|
||||
@ -132,11 +136,11 @@ pthread_rwlock_timedwrlock:
|
||||
cmpq $-ETIMEDOUT, %rdx
|
||||
jne 2b
|
||||
|
||||
18: movq $ETIMEDOUT, %rdx
|
||||
18: movl $ETIMEDOUT, %edx
|
||||
jmp 9f
|
||||
|
||||
|
||||
5: xorq %rdx, %rdx
|
||||
5: xorl %edx, %edx
|
||||
movl %fs:TID, %eax
|
||||
movl %eax, WRITER(%r12)
|
||||
9: LOCK
|
||||
@ -164,7 +168,7 @@ pthread_rwlock_timedwrlock:
|
||||
|
||||
14: cmpl %fs:TID, %eax
|
||||
jne 3b
|
||||
20: movq $EDEADLK, %rdx
|
||||
20: movl $EDEADLK, %edx
|
||||
jmp 9b
|
||||
|
||||
6:
|
||||
@ -178,7 +182,7 @@ pthread_rwlock_timedwrlock:
|
||||
|
||||
/* Overflow. */
|
||||
4: decl WRITERS_QUEUED(%r12)
|
||||
movq $EAGAIN, %rdx
|
||||
movl $EAGAIN, %edx
|
||||
jmp 9b
|
||||
|
||||
10:
|
||||
@ -202,6 +206,6 @@ pthread_rwlock_timedwrlock:
|
||||
16: movq $-ETIMEDOUT, %rdx
|
||||
jmp 17b
|
||||
|
||||
19: movq $EINVAL, %rdx
|
||||
19: movl $EINVAL, %edx
|
||||
jmp 9b
|
||||
.size pthread_rwlock_timedwrlock,.-pthread_rwlock_timedwrlock
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
|
||||
|
||||
@ -56,7 +56,7 @@ __pthread_rwlock_unlock:
|
||||
|
||||
5: movl $0, WRITER(%rdi)
|
||||
|
||||
movq $1, %rsi
|
||||
movl $1, %esi
|
||||
leaq WRITERS_WAKEUP(%rdi), %r10
|
||||
movq %rsi, %rdx
|
||||
cmpl $0, WRITERS_QUEUED(%rdi)
|
||||
@ -78,11 +78,11 @@ __pthread_rwlock_unlock:
|
||||
#endif
|
||||
jne 7f
|
||||
|
||||
8: movq $SYS_futex, %rax
|
||||
8: movl $SYS_futex, %eax
|
||||
movq %r10, %rdi
|
||||
syscall
|
||||
|
||||
xorq %rax, %rax
|
||||
xorl %eax, %eax
|
||||
retq
|
||||
|
||||
.align 16
|
||||
@ -94,7 +94,7 @@ __pthread_rwlock_unlock:
|
||||
#endif
|
||||
jne 3f
|
||||
|
||||
4: xorq %rax, %rax
|
||||
4: xorl %eax, %eax
|
||||
retq
|
||||
|
||||
1:
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
|
||||
|
||||
@ -72,8 +72,12 @@ __pthread_rwlock_wrlock:
|
||||
jne 10f
|
||||
|
||||
11: addq $WRITERS_WAKEUP, %rdi
|
||||
movq %r10, %rsi /* movq $FUTEX_WAIT, %rsi */
|
||||
movq $SYS_futex, %rax
|
||||
#if FUTEX_WAIT == 0
|
||||
xorl %esi, %esi
|
||||
#else
|
||||
movl $FUTEX_WAIT, %esi
|
||||
#endif
|
||||
movl $SYS_futex, %eax
|
||||
syscall
|
||||
|
||||
subq $WRITERS_WAKEUP, %rdi
|
||||
@ -92,7 +96,7 @@ __pthread_rwlock_wrlock:
|
||||
13: decl WRITERS_QUEUED(%rdi)
|
||||
jmp 2b
|
||||
|
||||
5: xorq %rdx, %rdx
|
||||
5: xorl %edx, %edx
|
||||
movl %fs:TID, %eax
|
||||
movl %eax, WRITER(%rdi)
|
||||
9: LOCK
|
||||
@ -119,7 +123,7 @@ __pthread_rwlock_wrlock:
|
||||
|
||||
14: cmpl %fs:TID, %eax
|
||||
jne 3b
|
||||
movq $EDEADLK, %rdx
|
||||
movl $EDEADLK, %edx
|
||||
jmp 9b
|
||||
|
||||
6:
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
|
||||
|
||||
@ -41,8 +41,8 @@ sem_post:
|
||||
LOCK
|
||||
xaddl %edx, (%rdi)
|
||||
|
||||
movq $SYS_futex, %rax
|
||||
movq $FUTEX_WAKE, %rsi
|
||||
movl $SYS_futex, %eax
|
||||
movl $FUTEX_WAKE, %esi
|
||||
incl %edx
|
||||
syscall
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
|
||||
|
||||
@ -82,14 +82,14 @@ sem_timedwait:
|
||||
7: call __pthread_enable_asynccancel
|
||||
movl %eax, 16(%rsp)
|
||||
|
||||
xorq %rsi, %rsi
|
||||
xorl %esi, %esi
|
||||
movq %rsp, %rdi
|
||||
movq $VSYSCALL_ADDR_vgettimeofday, %rax
|
||||
callq *%rax
|
||||
|
||||
/* Compute relative timeout. */
|
||||
movq 8(%rsp), %rax
|
||||
movq $1000, %rdi
|
||||
movl $1000, %edi
|
||||
mul %rdi /* Milli seconds to nano seconds. */
|
||||
movq (%r13), %rdi
|
||||
movq 8(%r13), %rsi
|
||||
@ -107,8 +107,8 @@ sem_timedwait:
|
||||
|
||||
movq %rsp, %r10
|
||||
movq %r12, %rdi
|
||||
xorq %rsi, %rsi
|
||||
movq $SYS_futex, %rax
|
||||
xorl %esi, %esi
|
||||
movl $SYS_futex, %eax
|
||||
xorl %edx, %edx
|
||||
syscall
|
||||
movq %rax, %r14
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
|
||||
|
||||
@ -77,7 +77,7 @@ sem_wait:
|
||||
movl %eax, %r8d
|
||||
|
||||
xorq %r10, %r10
|
||||
movq $SYS_futex, %rax
|
||||
movl $SYS_futex, %eax
|
||||
movq %r13, %rdi
|
||||
movq %r10, %rsi
|
||||
movq %r10, %rdx
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Jakub Jelinek <jakub@redhat.com>, 2002.
|
||||
|
||||
@ -48,7 +48,7 @@
|
||||
POPARGS_##args \
|
||||
/* The return value from CENABLE is argument for CDISABLE. */ \
|
||||
movq %rax, (%rsp); \
|
||||
movq $SYS_ify (syscall_name), %rax; \
|
||||
movl $SYS_ify (syscall_name), %eax; \
|
||||
syscall; \
|
||||
movq (%rsp), %rdi; \
|
||||
/* Save %rax since it's the error code from the syscall. */ \
|
||||
|
@ -27,7 +27,8 @@ sysdep_routines += libc_libm_error libm_frexp libm_frexpf libm_frexpl \
|
||||
$(duplicated-routines)
|
||||
|
||||
sysdep-CPPFLAGS += -include libm-symbols.h \
|
||||
-D__POSIX__ \
|
||||
-D__POSIX__ -Dopensource \
|
||||
-D_LIB_VERSIONIMF=_LIB_VERSION \
|
||||
-DSIZE_INT_32 -DSIZE_LONG_INT_64 -DSIZE_LONG_LONG_INT_64
|
||||
-DSIZE_INT_32 -DSIZE_LONG_INT_64 -DSIZE_LONG_LONG_INT_64 \
|
||||
-DSIZE_LONG_64 -DIA64
|
||||
endif
|
||||
|
@ -824,6 +824,7 @@ acos_abs_gt_1:
|
||||
GLOBAL_LIBM_END(acos)
|
||||
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -601,6 +601,7 @@ ACOSF_ABS_ONE:
|
||||
|
||||
GLOBAL_LIBM_END(acosf)
|
||||
|
||||
|
||||
// Stack operations when calling error support.
|
||||
// (1) (2)
|
||||
// sp -> + psp -> +
|
||||
|
@ -1139,6 +1139,7 @@ ACOSH_LESS_ONE:
|
||||
|
||||
GLOBAL_LIBM_END(acosh)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
|
||||
|
@ -968,6 +968,7 @@ ACOSH_LESS_ONE:
|
||||
|
||||
GLOBAL_LIBM_END(acoshf)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
|
||||
|
@ -1650,6 +1650,7 @@ acoshl_lt_pone:
|
||||
GLOBAL_LIBM_END(acoshl)
|
||||
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -35,7 +35,7 @@
|
||||
//
|
||||
// Intel Corporation is the author of this code, and requests that all
|
||||
// problem reports or change requests be submitted to it directly at
|
||||
// http: //www.intel.com/software/products/opensource/libraries/num.htm.
|
||||
// http://www.intel.com/software/products/opensource/libraries/num.htm.
|
||||
//
|
||||
// History
|
||||
//==============================================================
|
||||
@ -2482,6 +2482,7 @@ acosl_SPECIAL_CASES:
|
||||
|
||||
GLOBAL_LIBM_END(acosl)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
// (1)
|
||||
|
@ -800,6 +800,7 @@ asin_abs_gt_1:
|
||||
GLOBAL_LIBM_END(asin)
|
||||
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -583,6 +583,7 @@ ASINF_ABS_ONE:
|
||||
;;
|
||||
|
||||
GLOBAL_LIBM_END(asinf)
|
||||
|
||||
// Stack operations when calling error support.
|
||||
// (1) (2)
|
||||
// sp -> + psp -> +
|
||||
|
@ -35,7 +35,7 @@
|
||||
//
|
||||
// Intel Corporation is the author of this code, and requests that all
|
||||
// problem reports or change requests be submitted to it directly at
|
||||
// http: //www.intel.com/software/products/opensource/libraries/num.htm.
|
||||
// http://www.intel.com/software/products/opensource/libraries/num.htm.
|
||||
//
|
||||
// History
|
||||
//==============================================================
|
||||
@ -2459,6 +2459,7 @@ SMALL_S:
|
||||
GLOBAL_LIBM_END(asinl)
|
||||
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
// (1)
|
||||
|
@ -52,6 +52,7 @@
|
||||
// 08/20/02 Corrected inexact flag and directed rounding symmetry bugs
|
||||
// 02/06/03 Reordered header: .section, .global, .proc, .align
|
||||
// 04/17/03 Added missing mutex directive
|
||||
// 12/23/03 atan2(NaN1,NaN2) now QNaN1, for consistency with atan2f, atan2l
|
||||
//
|
||||
// API
|
||||
//==============================================================
|
||||
@ -142,7 +143,7 @@
|
||||
// -0 -0 -pi
|
||||
//
|
||||
// Nan anything quiet Y
|
||||
// anything NaN quiet X
|
||||
// Not NaN NaN quiet X
|
||||
|
||||
// atan2(+-0/+-0) sets double error tag to 37
|
||||
|
||||
@ -388,7 +389,7 @@ GLOBAL_IEEE754_ENTRY(atan2)
|
||||
}
|
||||
{ .mfb
|
||||
ldfe atan2_P21 = [EXP_AD_P2],16
|
||||
(p10) fma.d.s0 f8 = atan2_Y,atan2_X,f0 // If y=nan, result quietized y
|
||||
(p10) fma.d.s0 f8 = atan2_X,atan2_Y,f0 // If y=nan, result quietized y
|
||||
(p10) br.ret.spnt b0 // Exit if y=nan
|
||||
;;
|
||||
}
|
||||
@ -985,6 +986,7 @@ ATAN2_ERROR:
|
||||
}
|
||||
GLOBAL_IEEE754_END(atan2)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
// (1)
|
||||
|
@ -827,6 +827,7 @@ ATAN2F_XY_INF_NAN_ZERO:
|
||||
|
||||
GLOBAL_IEEE754_END(atan2f)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
mov GR_Parameter_TAG = 38
|
||||
|
@ -1008,6 +1008,7 @@ atanh_ge_one:
|
||||
|
||||
GLOBAL_LIBM_END(atanh)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
|
||||
|
@ -782,6 +782,7 @@ atanhf_ge_one:
|
||||
|
||||
GLOBAL_LIBM_END(atanhf)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
|
||||
|
@ -1101,6 +1101,7 @@ atanhl_gt_one:
|
||||
};;
|
||||
|
||||
GLOBAL_LIBM_END(atanhl)
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -811,6 +811,7 @@ COSH_UNORM:
|
||||
|
||||
GLOBAL_IEEE754_END(cosh)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -652,6 +652,7 @@ COSH_UNORM:
|
||||
|
||||
GLOBAL_IEEE754_END(coshf)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -1033,6 +1033,7 @@ COSH_HUGE:
|
||||
|
||||
GLOBAL_IEEE754_END(coshl)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
.file "exp.s"
|
||||
|
||||
|
||||
// Copyright (c) 2000 - 2002, Intel Corporation
|
||||
// Copyright (c) 2000 - 2003, Intel Corporation
|
||||
// All rights reserved.
|
||||
//
|
||||
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
|
||||
@ -52,6 +52,7 @@
|
||||
// 05/20/02 Cleaned up namespace and sf0 syntax
|
||||
// 09/07/02 Force inexact flag
|
||||
// 11/15/02 Split underflow path into zero/nonzero; eliminated fma in main path
|
||||
// 05/30/03 Set inexact flag on unmasked overflow/underflow
|
||||
|
||||
// API
|
||||
//==============================================================
|
||||
@ -602,7 +603,7 @@ EXP_CERTAIN_OVERFLOW:
|
||||
}
|
||||
{ .mfb
|
||||
mov GR_Parameter_TAG = 14
|
||||
fma.d.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result
|
||||
fma.d.s0 FR_RESULT = fTmp, fTmp, fTmp // Set I,O and +INF result
|
||||
br.cond.sptk __libm_error_region
|
||||
}
|
||||
;;
|
||||
@ -685,6 +686,13 @@ EXP_CERTAIN_UNDERFLOW:
|
||||
}
|
||||
;;
|
||||
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fmerge.se fTmp = fTmp, fLn2_by_128_lo // Small with signif lsb 1
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
fma.d.s0 f8 = fTmp, fTmp, f0 // Set I,U, tiny (+0.0) result
|
||||
@ -730,6 +738,7 @@ EXP_UNDERFLOW_ZERO:
|
||||
|
||||
GLOBAL_IEEE754_END(exp)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -1,7 +1,7 @@
|
||||
.file "exp10.s"
|
||||
|
||||
|
||||
// Copyright (c) 2000 - 2003, Intel Corporation
|
||||
// Copyright (c) 2000 - 2004, Intel Corporation
|
||||
// All rights reserved.
|
||||
//
|
||||
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
|
||||
@ -35,7 +35,7 @@
|
||||
//
|
||||
// Intel Corporation is the author of this code, and requests that all
|
||||
// problem reports or change requests be submitted to it directly at
|
||||
// http: //www.intel.com/software/products/opensource/libraries/num.htm.
|
||||
// http://www.intel.com/software/products/opensource/libraries/num.htm.
|
||||
//
|
||||
// History
|
||||
//==============================================================
|
||||
@ -43,6 +43,7 @@
|
||||
// 05/20/02 Cleaned up namespace and sf0 syntax
|
||||
// 09/06/02 Improved performance; no inexact flags on exact cases
|
||||
// 01/29/03 Added missing } to bundle templates
|
||||
// 12/16/04 Call error handling on underflow.
|
||||
//
|
||||
// API
|
||||
//==============================================================
|
||||
@ -81,8 +82,8 @@
|
||||
// Registers used
|
||||
//==============================================================
|
||||
// r2-r3, r14-r40
|
||||
// f6-f15, f32-f51
|
||||
// p6-p9, p12
|
||||
// f6-f15, f32-f52
|
||||
// p6-p12
|
||||
//
|
||||
|
||||
|
||||
@ -104,6 +105,7 @@ GR_EXPMAX = r24
|
||||
GR_BIAS53 = r25
|
||||
|
||||
GR_ROUNDVAL = r26
|
||||
GR_SNORM_LIMIT = r26
|
||||
GR_MASK = r27
|
||||
GR_KF0 = r28
|
||||
GR_MASK_low = r29
|
||||
@ -161,6 +163,7 @@ FR_E = f49
|
||||
FR_exact_limit = f50
|
||||
|
||||
FR_int_x = f51
|
||||
FR_SNORM_LIMIT = f52
|
||||
|
||||
|
||||
// Data tables
|
||||
@ -256,8 +259,12 @@ GLOBAL_IEEE754_ENTRY(exp10)
|
||||
}
|
||||
;;
|
||||
|
||||
{.mib
|
||||
{.mlx
|
||||
ldfe FR_LOG2_10= [ GR_COEFF_START ], 16 // load log2(10)*2^(10-63)
|
||||
movl GR_SNORM_LIMIT= 0xc0733a7146f72a41 // Smallest normal threshold
|
||||
}
|
||||
{.mib
|
||||
nop.m 0
|
||||
nop.i 0
|
||||
(p12) br.cond.spnt SPECIAL_exp10 // Branch if nan, inf, zero
|
||||
}
|
||||
@ -284,7 +291,7 @@ GLOBAL_IEEE754_ENTRY(exp10)
|
||||
;;
|
||||
|
||||
{.mfi
|
||||
nop.m 0
|
||||
setf.d FR_SNORM_LIMIT= GR_SNORM_LIMIT // Set smallest normal limit
|
||||
fma.s1 FR_L2_10_high= FR_LOG2_10, FR_2P53, f0 // FR_LOG2_10= log2(10)_hi
|
||||
nop.i 0
|
||||
}
|
||||
@ -388,6 +395,13 @@ GLOBAL_IEEE754_ENTRY(exp10)
|
||||
}
|
||||
;;
|
||||
|
||||
{.mfi
|
||||
nop.m 0
|
||||
fcmp.ge.s1 p11,p0= f8, FR_SNORM_LIMIT // Test x for normal range
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
{.mfi
|
||||
nop.m 0
|
||||
fma.s1 FR_E= FR_E0, FR_COEFF1, f0 // E= C_1*e
|
||||
@ -431,10 +445,17 @@ GLOBAL_IEEE754_ENTRY(exp10)
|
||||
{.mfb
|
||||
nop.m 0
|
||||
(p9) fma.d.s1 f8= FR_P, FR_T, FR_T // result= T+T*P, exact use s1
|
||||
br.ret.sptk b0 // return
|
||||
(p11) br.ret.sptk b0 // return, if result normal
|
||||
}
|
||||
;;
|
||||
|
||||
// Here if result in denormal range (and not zero)
|
||||
{.mib
|
||||
nop.m 0
|
||||
mov GR_Parameter_TAG= 265
|
||||
br.cond.sptk __libm_error_region // Branch to error handling
|
||||
}
|
||||
;;
|
||||
|
||||
SPECIAL_exp10:
|
||||
{.mfi
|
||||
@ -487,53 +508,35 @@ SPECIAL_exp10:
|
||||
|
||||
OUT_RANGE_exp10:
|
||||
|
||||
// underflow: p6= 1
|
||||
// overflow: p8= 1
|
||||
|
||||
{.mii
|
||||
.pred.rel "mutex",p6,p8
|
||||
{.mmi
|
||||
(p8) mov GR_EXPMAX= 0x1fffe
|
||||
nop.i 0
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
|
||||
{.mmb
|
||||
(p8) mov GR_Parameter_TAG= 166
|
||||
(p8) setf.exp FR_R= GR_EXPMAX
|
||||
nop.b 999
|
||||
}
|
||||
;;
|
||||
|
||||
{.mfi
|
||||
nop.m 999
|
||||
(p8) fma.d.s0 f8= FR_R, FR_R, f0 // Create overflow
|
||||
nop.i 999
|
||||
}
|
||||
// underflow: p6= 1
|
||||
{.mii
|
||||
nop.m 0
|
||||
(p6) mov GR_EXPMAX= 1
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
{.mmb
|
||||
nop.m 0
|
||||
(p6) setf.exp FR_R= GR_EXPMAX
|
||||
nop.b 999
|
||||
{.mii
|
||||
setf.exp FR_R= GR_EXPMAX
|
||||
(p8) mov GR_Parameter_TAG= 166
|
||||
(p6) mov GR_Parameter_TAG= 265
|
||||
}
|
||||
;;
|
||||
|
||||
{.mfb
|
||||
nop.m 999
|
||||
(p6) fma.d.s0 f8= FR_R, FR_R, f0 // Create underflow
|
||||
(p6) br.ret.sptk b0 // will not call libm_error for underflow
|
||||
nop.m 0
|
||||
fma.d.s0 f8= FR_R, FR_R, f0 // Create overflow/underflow
|
||||
br.cond.sptk __libm_error_region // Branch to error handling
|
||||
}
|
||||
;;
|
||||
|
||||
GLOBAL_IEEE754_END(exp10)
|
||||
weak_alias (exp10, pow10)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
|
||||
.prologue
|
||||
|
@ -1,7 +1,7 @@
|
||||
.file "exp10f.s"
|
||||
|
||||
|
||||
// Copyright (c) 2000 - 2003, Intel Corporation
|
||||
// Copyright (c) 2000 - 2004, Intel Corporation
|
||||
// All rights reserved.
|
||||
//
|
||||
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
|
||||
@ -35,7 +35,7 @@
|
||||
//
|
||||
// Intel Corporation is the author of this code, and requests that all
|
||||
// problem reports or change requests be submitted to it directly at
|
||||
// http: //www.intel.com/software/products/opensource/libraries/num.htm.
|
||||
// http://www.intel.com/software/products/opensource/libraries/num.htm.
|
||||
//
|
||||
// History
|
||||
//==============================================================
|
||||
@ -43,6 +43,7 @@
|
||||
// 05/20/02 Cleaned up namespace and sf0 syntax
|
||||
// 09/06/02 Improved performance and accuracy; no inexact flags on exact cases
|
||||
// 01/29/03 Added missing } to bundle templates
|
||||
// 12/16/04 Call error handling on underflow.
|
||||
//
|
||||
// API
|
||||
//==============================================================
|
||||
@ -80,8 +81,8 @@
|
||||
// Registers used
|
||||
//==============================================================
|
||||
// r2-r3, r14-r40
|
||||
// f6-f15, f32-f51
|
||||
// p6-p9, p12
|
||||
// f6-f15, f32-f52
|
||||
// p6-p12
|
||||
//
|
||||
|
||||
|
||||
@ -102,6 +103,7 @@ GR_Fh_ADDR = r23
|
||||
GR_EXPMAX = r24
|
||||
|
||||
GR_ROUNDVAL = r26
|
||||
GR_SNORM_LIMIT = r26
|
||||
GR_MASK = r27
|
||||
GR_KF0 = r28
|
||||
GR_MASK_low = r29
|
||||
@ -153,6 +155,7 @@ FR_E = f49
|
||||
FR_exact_limit = f50
|
||||
|
||||
FR_int_x = f51
|
||||
FR_SNORM_LIMIT = f52
|
||||
|
||||
|
||||
// Data tables
|
||||
@ -246,8 +249,12 @@ GLOBAL_IEEE754_ENTRY(exp10f)
|
||||
}
|
||||
;;
|
||||
|
||||
{.mib
|
||||
{.mlx
|
||||
ldfe FR_LOG2_10= [ GR_COEFF_START ], 16 // load log2(10)*2^(10-63)
|
||||
movl GR_SNORM_LIMIT= 0xc217b818 // Smallest normal threshold
|
||||
}
|
||||
{.mib
|
||||
nop.m 0
|
||||
nop.i 0
|
||||
(p12) br.cond.spnt SPECIAL_exp10 // Branch if nan, inf, zero
|
||||
}
|
||||
@ -261,7 +268,7 @@ GLOBAL_IEEE754_ENTRY(exp10f)
|
||||
;;
|
||||
|
||||
{.mfi
|
||||
nop.m 0
|
||||
setf.s FR_SNORM_LIMIT= GR_SNORM_LIMIT // Set smallest normal limit
|
||||
(p8) fcvt.fx.s1 FR_int_x = f8 // Convert x to integer
|
||||
nop.i 0
|
||||
}
|
||||
@ -335,7 +342,7 @@ GLOBAL_IEEE754_ENTRY(exp10f)
|
||||
|
||||
{.mfb
|
||||
ldf8 FR_T_high= [ GR_Fh_ADDR ] // load T_high= 2^{f_high}
|
||||
nop.f 0
|
||||
fcmp.ge.s1 p11, p0= f8, FR_SNORM_LIMIT // Test x for normal range
|
||||
(p12) br.cond.spnt OUT_RANGE_exp10
|
||||
}
|
||||
;;
|
||||
@ -390,10 +397,17 @@ GLOBAL_IEEE754_ENTRY(exp10f)
|
||||
{.mfb
|
||||
nop.m 0
|
||||
(p9) fma.s.s1 f8= FR_P, FR_T, FR_T // result= T+T*P, exact use s1
|
||||
br.ret.sptk b0 // return
|
||||
(p11) br.ret.sptk b0 // return, if result normal
|
||||
}
|
||||
;;
|
||||
|
||||
// Here if result in denormal range (and not zero)
|
||||
{.mib
|
||||
nop.m 0
|
||||
mov GR_Parameter_TAG= 266
|
||||
br.cond.sptk __libm_error_region // Branch to error handling
|
||||
}
|
||||
;;
|
||||
|
||||
SPECIAL_exp10:
|
||||
{.mfi
|
||||
@ -446,53 +460,35 @@ SPECIAL_exp10:
|
||||
|
||||
OUT_RANGE_exp10:
|
||||
|
||||
// underflow: p6= 1
|
||||
// overflow: p8= 1
|
||||
|
||||
{.mii
|
||||
.pred.rel "mutex",p6,p8
|
||||
{.mmi
|
||||
(p8) mov GR_EXPMAX= 0x1fffe
|
||||
nop.i 0
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
|
||||
{.mmb
|
||||
(p8) mov GR_Parameter_TAG= 167
|
||||
(p8) setf.exp FR_R= GR_EXPMAX
|
||||
nop.b 999
|
||||
}
|
||||
;;
|
||||
|
||||
{.mfi
|
||||
nop.m 999
|
||||
(p8) fma.s.s0 f8= FR_R, FR_R, f0 // Create overflow
|
||||
nop.i 999
|
||||
}
|
||||
// underflow: p6= 1
|
||||
{.mii
|
||||
nop.m 0
|
||||
(p6) mov GR_EXPMAX= 1
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
{.mmb
|
||||
nop.m 0
|
||||
(p6) setf.exp FR_R= GR_EXPMAX
|
||||
nop.b 999
|
||||
{.mii
|
||||
setf.exp FR_R= GR_EXPMAX
|
||||
(p8) mov GR_Parameter_TAG= 167
|
||||
(p6) mov GR_Parameter_TAG= 266
|
||||
}
|
||||
;;
|
||||
|
||||
{.mfb
|
||||
nop.m 999
|
||||
(p6) fma.s.s0 f8= FR_R, FR_R, f0 // Create underflow
|
||||
(p6) br.ret.sptk b0 // will not call libm_error for underflow
|
||||
nop.m 0
|
||||
fma.s.s0 f8= FR_R, FR_R, f0 // Create overflow/underflow
|
||||
br.cond.sptk __libm_error_region // Branch to error handling
|
||||
}
|
||||
;;
|
||||
|
||||
GLOBAL_IEEE754_END(exp10f)
|
||||
weak_alias (exp10f, pow10f)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
|
||||
.prologue
|
||||
|
@ -1,7 +1,7 @@
|
||||
.file "exp10l.s"
|
||||
|
||||
|
||||
// Copyright (c) 2000 - 2003, Intel Corporation
|
||||
// Copyright (c) 2000 - 2004, Intel Corporation
|
||||
// All rights reserved.
|
||||
//
|
||||
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
|
||||
@ -44,6 +44,7 @@
|
||||
// 02/06/03 Reordered header: .section, .global, .proc, .align
|
||||
// 05/08/03 Reformatted assembly source; corrected overflow result for round to
|
||||
// -inf and round to zero; exact results now don't set inexact flag
|
||||
// 12/16/04 Call error handling on underflow.
|
||||
//
|
||||
// API
|
||||
//==============================================================
|
||||
@ -79,9 +80,9 @@
|
||||
|
||||
// Registers used
|
||||
//==============================================================
|
||||
// f6-f15, f32-f62
|
||||
// f6-f15, f32-f63
|
||||
// r14-r30, r32-r40
|
||||
// p6-p8, p12-p14
|
||||
// p6-p8, p11-p14
|
||||
//
|
||||
|
||||
|
||||
@ -129,6 +130,7 @@
|
||||
FR_4 = f60
|
||||
FR_28 = f61
|
||||
FR_32 = f62
|
||||
FR_SNORM_LIMIT = f63
|
||||
|
||||
|
||||
GR_ADDR0 = r14
|
||||
@ -178,6 +180,7 @@ LOCAL_OBJECT_START(poly_coeffs)
|
||||
data8 0x3f55d87fe78a6731 // C_5
|
||||
data8 0x3f2430912f86c787 // C_6
|
||||
data8 0x9257edfe9b5fb698, 0x00003fbf // log2(10)_low (bits 64...127)
|
||||
data8 0x9a1bc98027a81918, 0x0000c00b // Smallest normal threshold
|
||||
LOCAL_OBJECT_END(poly_coeffs)
|
||||
|
||||
|
||||
@ -435,7 +438,7 @@ GLOBAL_IEEE754_ENTRY(exp10l)
|
||||
|
||||
{.mmf
|
||||
// GR_D_ADDR = pointer to D table
|
||||
add GR_D_ADDR = 2048-64+96+16, GR_ADDR0
|
||||
add GR_D_ADDR = 2048-64+96+32, GR_ADDR0
|
||||
// load C_3, C_4
|
||||
ldfpd FR_COEFF3, FR_COEFF4 = [ GR_ADDR0 ], 16
|
||||
// y = x*log2(10)*2^8
|
||||
@ -471,7 +474,8 @@ GLOBAL_IEEE754_ENTRY(exp10l)
|
||||
}
|
||||
|
||||
{.mfi
|
||||
nop.m 0
|
||||
// load smallest normal limit
|
||||
ldfe FR_SNORM_LIMIT = [ GR_ADDR0 ], 16
|
||||
// x>overflow threshold ?
|
||||
fcmp.gt.s1 p12, p7 = f8, FR_OF_TEST
|
||||
nop.i 0 ;;
|
||||
@ -596,6 +600,13 @@ GLOBAL_IEEE754_ENTRY(exp10l)
|
||||
nop.i 0 ;;
|
||||
}
|
||||
|
||||
{.mfi
|
||||
nop.m 0
|
||||
// test if x >= smallest normal limit
|
||||
fcmp.ge.s1 p11, p0 = f8, FR_SNORM_LIMIT
|
||||
nop.i 0 ;;
|
||||
}
|
||||
|
||||
{.mfi
|
||||
nop.m 0
|
||||
// P36 = P34+r2*P56
|
||||
@ -646,9 +657,16 @@ GLOBAL_IEEE754_ENTRY(exp10l)
|
||||
// result = T+T*P
|
||||
(p14) fma.s0 f8 = FR_COEFF3, FR_UF_TEST, FR_UF_TEST
|
||||
// return
|
||||
br.ret.sptk b0 ;;
|
||||
(p11) br.ret.sptk b0 ;; // return, if result normal
|
||||
}
|
||||
|
||||
// Here if result in denormal range (and not zero)
|
||||
{.mib
|
||||
nop.m 0
|
||||
mov GR_Parameter_TAG= 264
|
||||
br.cond.sptk __libm_error_region // Branch to error handling
|
||||
}
|
||||
;;
|
||||
|
||||
SPECIAL_EXP10:
|
||||
|
||||
@ -703,47 +721,35 @@ SPECIAL_EXP10:
|
||||
|
||||
OUT_RANGE_EXP10:
|
||||
|
||||
{.mii
|
||||
// overflow: p8 = 1
|
||||
// underflow: p6 = 1
|
||||
// overflow: p8 = 1
|
||||
|
||||
.pred.rel "mutex",p6,p8
|
||||
{.mmi
|
||||
(p8) mov GR_CONST1 = 0x1fffe
|
||||
nop.i 0
|
||||
nop.i 0 ;;
|
||||
}
|
||||
|
||||
{.mmb
|
||||
(p8) mov GR_Parameter_TAG = 165
|
||||
(p8) setf.exp FR_KF0 = GR_CONST1
|
||||
nop.b 999 ;;
|
||||
}
|
||||
|
||||
{.mfi
|
||||
nop.m 999
|
||||
(p8) fma.s0 f8 = FR_KF0, FR_KF0, f0
|
||||
nop.i 999
|
||||
}
|
||||
{.mii
|
||||
nop.m 0
|
||||
// underflow: p6 = 1
|
||||
(p6) mov GR_CONST1 = 1
|
||||
nop.i 0 ;;
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
{.mmb
|
||||
nop.m 0
|
||||
(p6) setf.exp FR_KF0 = GR_CONST1
|
||||
nop.b 999 ;;
|
||||
{.mii
|
||||
setf.exp FR_KF0 = GR_CONST1
|
||||
(p8) mov GR_Parameter_TAG = 165
|
||||
(p6) mov GR_Parameter_TAG = 264
|
||||
}
|
||||
;;
|
||||
|
||||
{.mfb
|
||||
nop.m 999
|
||||
(p6) fma.s0 f8 = FR_KF0, FR_KF0, f0
|
||||
// will not call libm_error for underflow
|
||||
(p6) br.ret.sptk b0 ;;
|
||||
fma.s0 f8 = FR_KF0, FR_KF0, f0 // Create overflow/underflow
|
||||
br.cond.sptk __libm_error_region // Branch to error handling
|
||||
}
|
||||
;;
|
||||
|
||||
GLOBAL_IEEE754_END(exp10l)
|
||||
weak_alias (exp10l, pow10l)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{.mfi
|
||||
|
@ -35,7 +35,7 @@
|
||||
//
|
||||
// Intel Corporation is the author of this code, and requests that all
|
||||
// problem reports or change requests be submitted to it directly at
|
||||
// http: //www.intel.com/software/products/opensource/libraries/num.htm.
|
||||
// http://www.intel.com/software/products/opensource/libraries/num.htm.
|
||||
//
|
||||
// History
|
||||
//==============================================================
|
||||
@ -495,6 +495,7 @@ OUT_RANGE_exp2:
|
||||
|
||||
GLOBAL_LIBM_END(exp2)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
|
||||
.prologue
|
||||
|
@ -35,7 +35,7 @@
|
||||
//
|
||||
// Intel Corporation is the author of this code, and requests that all
|
||||
// problem reports or change requests be submitted to it directly at
|
||||
// http: //www.intel.com/software/products/opensource/libraries/num.htm.
|
||||
// http://www.intel.com/software/products/opensource/libraries/num.htm.
|
||||
//
|
||||
// History
|
||||
//==============================================================
|
||||
@ -470,6 +470,7 @@ OUT_RANGE_exp2:
|
||||
|
||||
GLOBAL_LIBM_END(exp2f)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
|
||||
.prologue
|
||||
|
@ -747,6 +747,7 @@ OUT_RANGE_exp2l:
|
||||
|
||||
GLOBAL_LIBM_END(exp2l)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{.mfi
|
||||
|
@ -1,7 +1,7 @@
|
||||
.file "expf.s"
|
||||
|
||||
|
||||
// Copyright (c) 2000 - 2002, Intel Corporation
|
||||
// Copyright (c) 2000 - 2003, Intel Corporation
|
||||
// All rights reserved.
|
||||
//
|
||||
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
|
||||
@ -52,6 +52,7 @@
|
||||
// 09/26/02 support of higher precision inputs added, underflow threshold
|
||||
// corrected
|
||||
// 11/15/02 Improved performance on Itanium 2, added possible over/under paths
|
||||
// 05/30/03 Set inexact flag on unmasked overflow/underflow
|
||||
//
|
||||
//
|
||||
// API
|
||||
@ -521,7 +522,7 @@ EXP_CERTAIN_OVERFLOW:
|
||||
}
|
||||
{ .mfb
|
||||
mov GR_Parameter_TAG = 16
|
||||
fma.s.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result
|
||||
fma.s.s0 FR_RESULT = fTmp, fTmp, fTmp // Set I,O and +INF result
|
||||
br.cond.sptk __libm_error_region
|
||||
}
|
||||
;;
|
||||
@ -604,6 +605,13 @@ EXP_CERTAIN_UNDERFLOW:
|
||||
}
|
||||
;;
|
||||
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fmerge.se fTmp = fTmp, f64DivLn2 // Small with non-trial signif
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
fma.s.s0 f8 = fTmp, fTmp, f0 // Set I,U, tiny (+0.0) result
|
||||
@ -649,6 +657,7 @@ EXP_UNDERFLOW_ZERO:
|
||||
|
||||
GLOBAL_IEEE754_END(expf)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -499,6 +499,7 @@ FMOD_Y_ZERO:
|
||||
}
|
||||
|
||||
GLOBAL_IEEE754_END(fmod)
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -514,6 +514,7 @@ EXP_ERROR_RETURN:
|
||||
}
|
||||
|
||||
GLOBAL_IEEE754_END(fmodf)
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -106,6 +106,7 @@ FR_RESULT = f8
|
||||
|
||||
LOCAL_LIBM_ENTRY(cabs)
|
||||
LOCAL_LIBM_END(cabs)
|
||||
|
||||
GLOBAL_IEEE754_ENTRY(hypot)
|
||||
|
||||
{.mfi
|
||||
@ -384,6 +385,7 @@ GLOBAL_IEEE754_ENTRY(hypot)
|
||||
(p9) br.ret.sptk b0;;
|
||||
}
|
||||
GLOBAL_IEEE754_END(hypot)
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -106,6 +106,7 @@ FR_RESULT = f8
|
||||
|
||||
LOCAL_LIBM_ENTRY(cabsf)
|
||||
LOCAL_LIBM_END(cabsf)
|
||||
|
||||
GLOBAL_IEEE754_ENTRY(hypotf)
|
||||
{.mfi
|
||||
alloc r32= ar.pfs,0,4,4,0
|
||||
@ -337,6 +338,7 @@ GLOBAL_IEEE754_ENTRY(hypotf)
|
||||
(p9) br.ret.sptk b0;;
|
||||
}
|
||||
GLOBAL_IEEE754_END(hypotf)
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mii
|
||||
|
@ -105,6 +105,7 @@ FR_RESULT = f8
|
||||
|
||||
LOCAL_LIBM_ENTRY(cabsl)
|
||||
LOCAL_LIBM_END(cabsl)
|
||||
|
||||
GLOBAL_IEEE754_ENTRY(hypotl)
|
||||
{.mfi
|
||||
alloc r32= ar.pfs,0,4,4,0
|
||||
@ -421,6 +422,7 @@ GLOBAL_IEEE754_ENTRY(hypotl)
|
||||
(p9) br.ret.sptk b0;;
|
||||
}
|
||||
GLOBAL_IEEE754_END(hypotl)
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -1,5 +1,6 @@
|
||||
/* file: lgamma_r.c */
|
||||
|
||||
|
||||
// Copyright (c) 2002 Intel Corporation
|
||||
// All rights reserved.
|
||||
//
|
||||
@ -20,7 +21,6 @@
|
||||
// products derived from this software without specific prior written
|
||||
// permission.
|
||||
|
||||
// WARRANTY DISCLAIMER
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
|
@ -1,5 +1,6 @@
|
||||
/* file: lgammaf_r.c */
|
||||
|
||||
|
||||
// Copyright (c) 2002 Intel Corporation
|
||||
// All rights reserved.
|
||||
//
|
||||
@ -20,7 +21,6 @@
|
||||
// products derived from this software without specific prior written
|
||||
// permission.
|
||||
|
||||
// WARRANTY DISCLAIMER
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
|
@ -1,5 +1,6 @@
|
||||
/* file: lgammal_r.c */
|
||||
|
||||
|
||||
// Copyright (c) 2002 Intel Corporation
|
||||
// All rights reserved.
|
||||
//
|
||||
@ -20,7 +21,6 @@
|
||||
// products derived from this software without specific prior written
|
||||
// permission.
|
||||
|
||||
// WARRANTY DISCLAIMER
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
|
@ -1386,6 +1386,7 @@ GLOBAL_IEEE754_ENTRY(log10)
|
||||
};;
|
||||
GLOBAL_IEEE754_END(log10)
|
||||
|
||||
|
||||
GLOBAL_IEEE754_ENTRY(log)
|
||||
{ .mfi
|
||||
getf.exp GR_Exp = f8 // if x is unorm then must recompute
|
||||
@ -1667,6 +1668,7 @@ log_libm_err:
|
||||
};;
|
||||
GLOBAL_IEEE754_END(log)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -655,6 +655,7 @@ SPECIAL_LOG2:
|
||||
|
||||
GLOBAL_LIBM_END(log2)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -493,6 +493,7 @@ SPECIAL_log2f:
|
||||
|
||||
GLOBAL_LIBM_END(log2f)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -761,6 +761,7 @@ LOG2_PSEUDO_ZERO:
|
||||
|
||||
GLOBAL_IEEE754_END(log2l)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -841,6 +841,7 @@ GLOBAL_IEEE754_ENTRY(log10f)
|
||||
br.cond.sptk logf_log10f_common
|
||||
};;
|
||||
GLOBAL_IEEE754_END(log10f)
|
||||
|
||||
GLOBAL_IEEE754_ENTRY(logf)
|
||||
{ .mfi
|
||||
getf.exp GR_Exp = f8 // if x is unorm then must recompute
|
||||
@ -1087,6 +1088,7 @@ logf_libm_err:
|
||||
};;
|
||||
GLOBAL_IEEE754_END(logf)
|
||||
|
||||
|
||||
// Stack operations when calling error support.
|
||||
// (1) (2) (3) (call) (4)
|
||||
// sp -> + psp -> + psp -> + sp -> +
|
||||
|
@ -634,6 +634,7 @@ GLOBAL_IEEE754_ENTRY(logl)
|
||||
|
||||
GLOBAL_IEEE754_END(logl)
|
||||
|
||||
|
||||
GLOBAL_IEEE754_ENTRY(log10l)
|
||||
{ .mfi
|
||||
alloc r32 = ar.pfs,0,21,4,0
|
||||
@ -1144,6 +1145,7 @@ LOGL_64_negative:
|
||||
|
||||
|
||||
GLOBAL_IEEE754_END(log10l)
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -2234,6 +2234,7 @@ POW_OVER_UNDER_ERROR:
|
||||
|
||||
GLOBAL_LIBM_END(pow)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
|
||||
.prologue
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -60,6 +60,7 @@
|
||||
// 02/10/03 Reordered header: .section, .global, .proc, .align;
|
||||
// used data8 for long double table values
|
||||
// 04/17/03 Added missing mutex directive
|
||||
// 10/13/03 Corrected .endp names to match .proc names
|
||||
//
|
||||
//*********************************************************************
|
||||
//
|
||||
@ -2755,6 +2756,7 @@ POWL_64_SQRT:
|
||||
|
||||
GLOBAL_LIBM_END(powl)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
@ -2803,6 +2805,6 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
br.ret.sptk b0 // Return
|
||||
};;
|
||||
|
||||
.endp
|
||||
LOCAL_LIBM_END(__libm_error_region#)
|
||||
.type __libm_error_support#,@function
|
||||
.global __libm_error_support#
|
||||
|
@ -531,6 +531,7 @@ EXP_ERROR_RETURN:
|
||||
GLOBAL_IEEE754_END(remainder)
|
||||
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -550,6 +550,7 @@ EXP_ERROR_RETURN:
|
||||
GLOBAL_IEEE754_END(remainderf)
|
||||
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -557,6 +557,7 @@ EXP_ERROR_RETURN:
|
||||
}
|
||||
|
||||
GLOBAL_IEEE754_END(remainderl)
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -21,60 +21,82 @@
|
||||
// products derived from this software without specific prior written
|
||||
// permission.
|
||||
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Intel Corporation is the author of this code, and requests that all
|
||||
// problem reports or change requests be submitted to it directly at
|
||||
// problem reports or change requests be submitted to it directly at
|
||||
// http://www.intel.com/software/products/opensource/libraries/num.htm.
|
||||
//
|
||||
// History
|
||||
//==============================================================
|
||||
// 02/02/00 Initial version
|
||||
// 01/26/01 Scalb completely reworked and now standalone version
|
||||
// 01/26/01 Scalb completely reworked and now standalone version
|
||||
// 05/20/02 Cleaned up namespace and sf0 syntax
|
||||
// 02/10/03 Reordered header: .section, .global, .proc, .align
|
||||
// 08/06/03 Improved performance
|
||||
//
|
||||
// API
|
||||
//==============================================================
|
||||
// double = scalb (double x, double n)
|
||||
// double = scalb (double x, double n)
|
||||
// input floating point f8 and floating point f9
|
||||
// output floating point f8
|
||||
//
|
||||
// int_type = 0 if int is 32 bits
|
||||
// int_type = 1 if int is 64 bits
|
||||
//
|
||||
// Returns x* 2**n using an fma and detects overflow
|
||||
// and underflow.
|
||||
// and underflow.
|
||||
//
|
||||
//
|
||||
// Strategy:
|
||||
// Compute biased exponent of result exp_Result = N + exp_X
|
||||
// Break into ranges:
|
||||
// exp_Result > 0x103fe -> Certain overflow
|
||||
// exp_Result = 0x103fe -> Possible overflow
|
||||
// 0x0fc01 <= exp_Result < 0x103fe -> No over/underflow (main path)
|
||||
// 0x0fc01 - 52 <= exp_Result < 0x0fc01 -> Possible underflow
|
||||
// exp_Result < 0x0fc01 - 52 -> Certain underflow
|
||||
|
||||
FR_Big = f6
|
||||
FR_NBig = f7
|
||||
FR_Floating_X = f8
|
||||
FR_Result = f8
|
||||
FR_Floating_N = f9
|
||||
FR_Result2 = f9
|
||||
FR_Norm_N = f10
|
||||
FR_Result3 = f11
|
||||
FR_Norm_X = f12
|
||||
FR_Result3 = f10
|
||||
FR_Norm_X = f11
|
||||
FR_Two_N = f12
|
||||
FR_N_float_int = f13
|
||||
FR_Two_N = f14
|
||||
FR_Two_to_Big = f15
|
||||
FR_Big = f6
|
||||
FR_NBig = f7
|
||||
FR_Norm_N = f14
|
||||
|
||||
GR_neg_ov_limit= r14
|
||||
GR_big_exp = r14
|
||||
GR_N_Biased = r15
|
||||
GR_Big = r16
|
||||
GR_NBig = r17
|
||||
GR_Scratch = r18
|
||||
GR_Scratch1 = r19
|
||||
GR_exp_Result = r18
|
||||
GR_pos_ov_limit= r19
|
||||
GR_exp_sure_ou = r19
|
||||
GR_Bias = r20
|
||||
GR_N_as_int = r21
|
||||
GR_signexp_X = r22
|
||||
GR_exp_X = r23
|
||||
GR_exp_mask = r24
|
||||
GR_max_exp = r25
|
||||
GR_min_exp = r26
|
||||
GR_min_den_exp = r27
|
||||
GR_Scratch = r28
|
||||
GR_signexp_N = r29
|
||||
GR_exp_N = r30
|
||||
|
||||
GR_SAVE_B0 = r32
|
||||
GR_SAVE_GP = r33
|
||||
@ -89,412 +111,442 @@ GLOBAL_IEEE754_ENTRY(scalb)
|
||||
|
||||
//
|
||||
// Is x NAN, INF, ZERO, +-?
|
||||
//
|
||||
{ .mfi
|
||||
alloc r32=ar.pfs,0,3,4,0
|
||||
fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
|
||||
addl GR_Scratch = 0x019C3F,r0
|
||||
}
|
||||
//
|
||||
// Is y a NAN, INF, ZERO, +-?
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fclass.m.unc p6,p0 = FR_Floating_N, 0xe7 //@snan | @qnan | @inf | @zero
|
||||
addl GR_Scratch1 = 0x063BF,r0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Convert N to a fp integer
|
||||
// Normalize x
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fnorm.s1 FR_Norm_N = FR_Floating_N
|
||||
nop.i 999
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fnorm.s1 FR_Norm_X = FR_Floating_X
|
||||
nop.i 999
|
||||
};;
|
||||
|
||||
//
|
||||
// Create 2*big
|
||||
// Create 2**-big
|
||||
// Normalize x
|
||||
// Branch on special values.
|
||||
//
|
||||
{ .mib
|
||||
setf.exp FR_Big = GR_Scratch
|
||||
nop.i 0
|
||||
(p6) br.cond.spnt SCALB_NAN_INF_ZERO
|
||||
}
|
||||
{ .mib
|
||||
setf.exp FR_NBig = GR_Scratch1
|
||||
nop.i 0
|
||||
(p7) br.cond.spnt SCALB_NAN_INF_ZERO
|
||||
};;
|
||||
|
||||
//
|
||||
// Convert N to a fp integer
|
||||
// Create -35000
|
||||
//
|
||||
{ .mfi
|
||||
addl GR_Scratch = 1,r0
|
||||
fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
|
||||
addl GR_NBig = -35000,r0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Put N if a GP register
|
||||
// Convert N_float_int to floating point value
|
||||
// Create 35000
|
||||
// Build the exponent Bias
|
||||
//
|
||||
{ .mii
|
||||
getf.sig GR_N_as_int = FR_N_float_int
|
||||
shl GR_Scratch = GR_Scratch,63
|
||||
addl GR_Big = 35000,r0
|
||||
{ .mfi
|
||||
getf.exp GR_signexp_N = FR_Floating_N // Get signexp of n
|
||||
fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero
|
||||
mov GR_Bias = 0x0ffff
|
||||
}
|
||||
{ .mfi
|
||||
addl GR_Bias = 0x0FFFF,r0
|
||||
fcvt.xf FR_N_float_int = FR_N_float_int
|
||||
nop.i 0
|
||||
};;
|
||||
|
||||
//
|
||||
// Catch those fp values that are beyond 2**64-1
|
||||
// Is N > 35000
|
||||
// Is N < -35000
|
||||
//
|
||||
{ .mfi
|
||||
cmp.ne.unc p9,p10 = GR_N_as_int,GR_Scratch
|
||||
nop.f 0
|
||||
nop.i 0
|
||||
mov GR_Big = 35000 // If N this big then certain overflow
|
||||
fcvt.fx.trunc.s1 FR_N_float_int = FR_Floating_N // Get N in significand
|
||||
nop.i 0
|
||||
}
|
||||
{ .mmi
|
||||
cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
|
||||
cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
|
||||
nop.i 0
|
||||
};;
|
||||
;;
|
||||
|
||||
{ .mfi
|
||||
getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x
|
||||
fclass.m p7,p0 = FR_Floating_N, 0x0b // Test for n=unorm
|
||||
nop.i 0
|
||||
}
|
||||
//
|
||||
// Is N really an int, only for those non-int indefinites?
|
||||
// Create exp bias.
|
||||
// Normalize n
|
||||
//
|
||||
{ .mfi
|
||||
add GR_N_Biased = GR_Bias,GR_N_as_int
|
||||
(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
|
||||
nop.i 0
|
||||
};;
|
||||
mov GR_exp_mask = 0x1ffff // Exponent mask
|
||||
fnorm.s1 FR_Norm_N = FR_Floating_N
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Branch and return if N is not an int.
|
||||
// Main path, create 2**N
|
||||
// Is n NAN, INF, ZERO, +-?
|
||||
//
|
||||
{ .mfi
|
||||
setf.exp FR_Two_N = GR_N_Biased
|
||||
nop.i 999
|
||||
mov GR_big_exp = 0x1003e // Exponent at which n is integer
|
||||
fclass.m p9,p0 = FR_Floating_N, 0xe7 // @snan | @qnan | @inf | @zero
|
||||
mov GR_max_exp = 0x103fe // Exponent of maximum double
|
||||
}
|
||||
//
|
||||
// Normalize x
|
||||
//
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
fnorm.s1 FR_Norm_X = FR_Floating_X
|
||||
(p7) br.cond.spnt SCALB_N_UNORM // Branch if n=unorm
|
||||
}
|
||||
;;
|
||||
|
||||
SCALB_COMMON1:
|
||||
// Main path continues. Also return here from u=unorm path.
|
||||
// Handle special cases if x = Nan, Inf, Zero
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
fcmp.lt.s1 p7,p0 = FR_Floating_N, f0 // Test N negative
|
||||
(p6) br.cond.spnt SCALB_NAN_INF_ZERO
|
||||
}
|
||||
;;
|
||||
|
||||
// Handle special cases if n = Nan, Inf, Zero
|
||||
{ .mfi
|
||||
getf.sig GR_N_as_int = FR_N_float_int // Get n from significand
|
||||
fclass.m p8,p0 = FR_Floating_X, 0x0b // Test for x=unorm
|
||||
mov GR_exp_sure_ou = 0x1000e // Exp_N where x*2^N sure over/under
|
||||
}
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
(p7) frcpa.s0 f8,p11 = f0,f0
|
||||
(p7) br.ret.spnt b0
|
||||
};;
|
||||
|
||||
//
|
||||
// Set denormal on denormal input x and denormal input N
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
(p10)fcmp.ge.s1 p6,p8 = FR_Norm_N,f0
|
||||
nop.i 0
|
||||
};;
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
|
||||
nop.i 999
|
||||
mov GR_min_exp = 0x0fc01 // Exponent of minimum double
|
||||
fcvt.xf FR_N_float_int = FR_N_float_int // Convert N to FP integer
|
||||
(p9) br.cond.spnt SCALB_NAN_INF_ZERO
|
||||
}
|
||||
;;
|
||||
|
||||
{ .mmi
|
||||
and GR_exp_N = GR_exp_mask, GR_signexp_N // Get exponent of N
|
||||
(p7) sub GR_Big = r0, GR_Big // Limit for N
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
{ .mib
|
||||
cmp.lt p9,p0 = GR_exp_N, GR_big_exp // N possible non-integer?
|
||||
cmp.ge p6,p0 = GR_exp_N, GR_exp_sure_ou // N certain over/under?
|
||||
(p8) br.cond.spnt SCALB_X_UNORM // Branch if x=unorm
|
||||
}
|
||||
;;
|
||||
|
||||
SCALB_COMMON2:
|
||||
// Main path continues. Also return here from x=unorm path.
|
||||
// Create biased exponent for 2**N
|
||||
{ .mmi
|
||||
(p6) mov GR_N_as_int = GR_Big // Limit N
|
||||
;;
|
||||
add GR_N_Biased = GR_Bias,GR_N_as_int
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fcmp.ge.s0 p12,p13 = FR_Floating_N,f0
|
||||
nop.i 0
|
||||
};;
|
||||
setf.exp FR_Two_N = GR_N_Biased // Form 2**N
|
||||
(p9) fcmp.neq.unc.s1 p9,p0 = FR_Norm_N, FR_N_float_int // Test if N an integer
|
||||
and GR_exp_X = GR_exp_mask, GR_signexp_X // Get exponent of X
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Adjust 2**N if N was very small or very large
|
||||
// Compute biased result exponent
|
||||
// Branch if N is not an integer
|
||||
//
|
||||
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
|
||||
nop.i 0
|
||||
{ .mib
|
||||
add GR_exp_Result = GR_exp_X, GR_N_as_int
|
||||
mov GR_min_den_exp = 0x0fc01 - 52 // Exponent of min denorm dble
|
||||
(p9) br.cond.spnt SCALB_N_NOT_INT
|
||||
}
|
||||
{ .mlx
|
||||
nop.m 999
|
||||
movl GR_Scratch = 0x00000000000303FF
|
||||
};;
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
|
||||
nop.i 0
|
||||
}
|
||||
{ .mlx
|
||||
nop.m 999
|
||||
movl GR_Scratch1= 0x00000000000103FF
|
||||
};;
|
||||
;;
|
||||
|
||||
// Set up necessary status fields
|
||||
//
|
||||
// Raise Denormal operand flag with compare
|
||||
// Do final operation
|
||||
//
|
||||
{ .mfi
|
||||
cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow
|
||||
fcmp.ge.s0 p0,p11 = FR_Floating_X,FR_Floating_N // Dummy to set denorm
|
||||
cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
|
||||
}
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
|
||||
(p9) br.cond.spnt SCALB_UNDERFLOW // Branch if certain underflow
|
||||
}
|
||||
;;
|
||||
|
||||
{ .mib
|
||||
(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow
|
||||
(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow
|
||||
(p7) br.ret.sptk b0 // Return from main path
|
||||
}
|
||||
;;
|
||||
|
||||
{ .bbb
|
||||
(p6) br.cond.spnt SCALB_OVERFLOW // Branch if certain overflow
|
||||
(p8) br.cond.spnt SCALB_POSSIBLE_OVERFLOW // Branch if possible overflow
|
||||
(p9) br.cond.spnt SCALB_POSSIBLE_UNDERFLOW // Branch if possible underflow
|
||||
}
|
||||
;;
|
||||
|
||||
// Here if possible underflow.
|
||||
// Resulting exponent: 0x0fc01-52 <= exp_Result < 0x0fc01
|
||||
SCALB_POSSIBLE_UNDERFLOW:
|
||||
//
|
||||
// Here if possible overflow.
|
||||
// Resulting exponent: 0x103fe = exp_Result
|
||||
SCALB_POSSIBLE_OVERFLOW:
|
||||
|
||||
// Set up necessary status fields
|
||||
//
|
||||
// S0 user supplied status
|
||||
// S2 user supplied status + WRE + TD (Overflows)
|
||||
// S3 user supplied status + FZ + TD (Underflows)
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fsetc.s3 0x7F,0x41
|
||||
nop.i 999
|
||||
mov GR_pos_ov_limit = 0x103ff // Exponent for positive overflow
|
||||
fsetc.s3 0x7F,0x41
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fsetc.s2 0x7F,0x42
|
||||
nop.i 999
|
||||
};;
|
||||
mov GR_neg_ov_limit = 0x303ff // Exponent for negative overflow
|
||||
fsetc.s2 0x7F,0x42
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Do final operation
|
||||
// Do final operation with s2 and s3
|
||||
//
|
||||
{ .mfi
|
||||
setf.exp FR_NBig = GR_Scratch
|
||||
fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
|
||||
nop.i 999
|
||||
setf.exp FR_NBig = GR_neg_ov_limit
|
||||
fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
|
||||
nop.i 999
|
||||
};;
|
||||
{ .mfi
|
||||
setf.exp FR_Big = GR_Scratch1
|
||||
fma.d.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
|
||||
nop.i 999
|
||||
};;
|
||||
setf.exp FR_Big = GR_pos_ov_limit
|
||||
fma.d.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
// Check for overflow or underflow.
|
||||
//
|
||||
// S0 user supplied status
|
||||
// S2 user supplied status + WRE + TD (Overflow)
|
||||
// S3 user supplied status + FZ + TD (Underflow)
|
||||
//
|
||||
//
|
||||
// Restore s3
|
||||
// Restore s2
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fsetc.s3 0x7F,0x40
|
||||
nop.i 999
|
||||
nop.m 0
|
||||
fsetc.s3 0x7F,0x40
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fsetc.s2 0x7F,0x40
|
||||
nop.i 999
|
||||
};;
|
||||
nop.m 0
|
||||
fsetc.s2 0x7F,0x40
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Is the result zero?
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fclass.m.unc p6, p0 = FR_Result3, 0x007
|
||||
nop.i 999
|
||||
}
|
||||
nop.m 0
|
||||
fclass.m p6, p0 = FR_Result3, 0x007
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
addl GR_Tag = 53, r0
|
||||
fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
|
||||
nop.i 0
|
||||
};;
|
||||
nop.m 0
|
||||
fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Detect masked underflow - Tiny + Inexact Only
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
nop.m 0
|
||||
(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
|
||||
nop.i 999
|
||||
};;
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Is result bigger the allowed range?
|
||||
// Branch out for underflow
|
||||
//
|
||||
{ .mfb
|
||||
(p6) addl GR_Tag = 54, r0
|
||||
nop.m 0
|
||||
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
|
||||
(p6) br.cond.spnt SCALB_UNDERFLOW
|
||||
};;
|
||||
(p6) br.cond.spnt SCALB_UNDERFLOW
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Branch out for overflow
|
||||
//
|
||||
{ .mbb
|
||||
nop.m 0
|
||||
(p7) br.cond.spnt SCALB_OVERFLOW
|
||||
(p9) br.cond.spnt SCALB_OVERFLOW
|
||||
};;
|
||||
{ .bbb
|
||||
(p7) br.cond.spnt SCALB_OVERFLOW
|
||||
(p9) br.cond.spnt SCALB_OVERFLOW
|
||||
br.ret.sptk b0 // Return from main path.
|
||||
}
|
||||
;;
|
||||
|
||||
// Here if result overflows
|
||||
SCALB_OVERFLOW:
|
||||
{ .mib
|
||||
alloc r32=ar.pfs,3,0,4,0
|
||||
addl GR_Tag = 53, r0 // Set error tag for overflow
|
||||
br.cond.sptk __libm_error_region // Call error support for overflow
|
||||
}
|
||||
;;
|
||||
|
||||
// Here if result underflows
|
||||
SCALB_UNDERFLOW:
|
||||
{ .mib
|
||||
alloc r32=ar.pfs,3,0,4,0
|
||||
addl GR_Tag = 54, r0 // Set error tag for underflow
|
||||
br.cond.sptk __libm_error_region // Call error support for underflow
|
||||
}
|
||||
;;
|
||||
|
||||
SCALB_NAN_INF_ZERO:
|
||||
|
||||
//
|
||||
// Return from main path.
|
||||
// Before entry, N has been converted to a fp integer in significand of
|
||||
// FR_N_float_int
|
||||
//
|
||||
{ .mfb
|
||||
nop.m 999
|
||||
nop.f 0
|
||||
br.ret.sptk b0;;
|
||||
}
|
||||
|
||||
SCALB_NAN_INF_ZERO:
|
||||
|
||||
// Convert N_float_int to floating point value
|
||||
//
|
||||
// Convert N to a fp integer
|
||||
//
|
||||
{ .mfi
|
||||
addl GR_Scratch = 1,r0
|
||||
fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
|
||||
nop.i 999
|
||||
getf.sig GR_N_as_int = FR_N_float_int
|
||||
fclass.m p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fclass.m.unc p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
|
||||
nop.i 0
|
||||
};;
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fclass.m.unc p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
|
||||
shl GR_Scratch = GR_Scratch,63
|
||||
};;
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fclass.m.unc p8,p0 = FR_Floating_N, 0x21 // @inf
|
||||
nop.i 0
|
||||
addl GR_Scratch = 1,r0
|
||||
fcvt.xf FR_N_float_int = FR_N_float_int
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fclass.m.unc p9,p0 = FR_Floating_N, 0x22 // @-inf
|
||||
nop.i 0
|
||||
};;
|
||||
;;
|
||||
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fclass.m p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
|
||||
shl GR_Scratch = GR_Scratch,63
|
||||
}
|
||||
;;
|
||||
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fclass.m p8,p0 = FR_Floating_N, 0x21 // @inf
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fclass.m p9,p0 = FR_Floating_N, 0x22 // @-inf
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Either X or N is a Nan, return result and possible raise invalid.
|
||||
//
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
(p6) fma.d.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
|
||||
nop.m 0
|
||||
(p6) fma.d.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
|
||||
(p6) br.ret.spnt b0
|
||||
};;
|
||||
}
|
||||
;;
|
||||
|
||||
{ .mfb
|
||||
getf.sig GR_N_as_int = FR_N_float_int
|
||||
(p7) fma.d.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
|
||||
nop.m 0
|
||||
(p7) fma.d.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
|
||||
(p7) br.ret.spnt b0
|
||||
};;
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// If N + Inf do something special
|
||||
// For N = -Inf, create Int
|
||||
//
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
(p8) fma.d.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0
|
||||
(p8) br.ret.spnt b0
|
||||
nop.m 0
|
||||
(p8) fma.d.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0
|
||||
(p8) br.ret.spnt b0
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
(p9) fnma.d.s0 FR_Floating_N = FR_Floating_N, f1, f0
|
||||
nop.i 0
|
||||
};;
|
||||
nop.m 0
|
||||
(p9) fnma.d.s0 FR_Floating_N = FR_Floating_N, f1, f0
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// If N==-Inf,return x/(-N)
|
||||
//
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
(p9) frcpa.s0 FR_Result,p6 = FR_Floating_X,FR_Floating_N
|
||||
(p9) br.ret.spnt b0
|
||||
};;
|
||||
|
||||
//
|
||||
// Convert N_float_int to floating point value
|
||||
//
|
||||
{ .mfi
|
||||
cmp.ne.unc p9,p0 = GR_N_as_int,GR_Scratch
|
||||
fcvt.xf FR_N_float_int = FR_N_float_int
|
||||
nop.i 0
|
||||
};;
|
||||
cmp.ne p7,p0 = GR_N_as_int,GR_Scratch
|
||||
(p9) frcpa.s0 FR_Result,p0 = FR_Floating_X,FR_Floating_N
|
||||
(p9) br.ret.spnt b0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Is N an integer.
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
|
||||
nop.i 0
|
||||
};;
|
||||
nop.m 0
|
||||
(p7) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// If N not an int, return NaN and raise invalid.
|
||||
//
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
(p7) frcpa.s0 FR_Result,p6 = f0,f0
|
||||
(p7) br.ret.spnt b0
|
||||
};;
|
||||
nop.m 0
|
||||
(p7) frcpa.s0 FR_Result,p0 = f0,f0
|
||||
(p7) br.ret.spnt b0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Always return x in other path.
|
||||
// Always return x in other path.
|
||||
//
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
fma.d.s0 FR_Result = FR_Floating_X,f1,f0
|
||||
br.ret.sptk b0
|
||||
};;
|
||||
nop.m 0
|
||||
fma.d.s0 FR_Result = FR_Floating_X,f1,f0
|
||||
br.ret.sptk b0
|
||||
}
|
||||
;;
|
||||
|
||||
// Here if n not int
|
||||
// Return NaN and raise invalid.
|
||||
SCALB_N_NOT_INT:
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
frcpa.s0 FR_Result,p0 = f0,f0
|
||||
br.ret.sptk b0
|
||||
}
|
||||
;;
|
||||
|
||||
// Here if n=unorm
|
||||
SCALB_N_UNORM:
|
||||
{ .mfb
|
||||
getf.exp GR_signexp_N = FR_Norm_N // Get signexp of normalized n
|
||||
fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N // Get N in significand
|
||||
br.cond.sptk SCALB_COMMON1 // Return to main path
|
||||
}
|
||||
;;
|
||||
|
||||
// Here if x=unorm
|
||||
SCALB_X_UNORM:
|
||||
{ .mib
|
||||
getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x
|
||||
nop.i 0
|
||||
br.cond.sptk SCALB_COMMON2 // Return to main path
|
||||
}
|
||||
;;
|
||||
|
||||
GLOBAL_IEEE754_END(scalb)
|
||||
__libm_error_region:
|
||||
|
||||
SCALB_OVERFLOW:
|
||||
SCALB_UNDERFLOW:
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
|
||||
//
|
||||
// Get stack address of N
|
||||
//
|
||||
.prologue
|
||||
{ .mfi
|
||||
add GR_Parameter_Y=-32,sp
|
||||
add GR_Parameter_Y=-32,sp
|
||||
nop.f 0
|
||||
.save ar.pfs,GR_SAVE_PFS
|
||||
mov GR_SAVE_PFS=ar.pfs
|
||||
mov GR_SAVE_PFS=ar.pfs
|
||||
}
|
||||
//
|
||||
// Adjust sp
|
||||
// Adjust sp
|
||||
//
|
||||
{ .mfi
|
||||
.fframe 64
|
||||
add sp=-64,sp
|
||||
add sp=-64,sp
|
||||
nop.f 0
|
||||
mov GR_SAVE_GP=gp
|
||||
mov GR_SAVE_GP=gp
|
||||
};;
|
||||
|
||||
//
|
||||
// Store N on stack in correct position
|
||||
// Store N on stack in correct position
|
||||
// Locate the address of x on stack
|
||||
//
|
||||
{ .mmi
|
||||
stfd [GR_Parameter_Y] = FR_Norm_N,16
|
||||
add GR_Parameter_X = 16,sp
|
||||
stfd [GR_Parameter_Y] = FR_Norm_N,16
|
||||
add GR_Parameter_X = 16,sp
|
||||
.save b0, GR_SAVE_B0
|
||||
mov GR_SAVE_B0=b0
|
||||
mov GR_SAVE_B0=b0
|
||||
};;
|
||||
|
||||
//
|
||||
@ -503,42 +555,42 @@ SCALB_UNDERFLOW:
|
||||
//
|
||||
.body
|
||||
{ .mib
|
||||
stfd [GR_Parameter_X] = FR_Norm_X
|
||||
add GR_Parameter_RESULT = 0,GR_Parameter_Y
|
||||
stfd [GR_Parameter_X] = FR_Norm_X
|
||||
add GR_Parameter_RESULT = 0,GR_Parameter_Y
|
||||
nop.b 0
|
||||
}
|
||||
{ .mib
|
||||
stfd [GR_Parameter_Y] = FR_Result
|
||||
stfd [GR_Parameter_Y] = FR_Result
|
||||
add GR_Parameter_Y = -16,GR_Parameter_Y
|
||||
br.call.sptk b0=__libm_error_support#
|
||||
br.call.sptk b0=__libm_error_support#
|
||||
};;
|
||||
|
||||
//
|
||||
// Get location of result on stack
|
||||
//
|
||||
{ .mmi
|
||||
add GR_Parameter_RESULT = 48,sp
|
||||
nop.m 0
|
||||
nop.m 0
|
||||
add GR_Parameter_RESULT = 48,sp
|
||||
nop.i 0
|
||||
};;
|
||||
|
||||
//
|
||||
// Get the new result
|
||||
// Get the new result
|
||||
//
|
||||
{ .mmi
|
||||
ldfd FR_Result = [GR_Parameter_RESULT]
|
||||
ldfd FR_Result = [GR_Parameter_RESULT]
|
||||
.restore sp
|
||||
add sp = 64,sp
|
||||
mov b0 = GR_SAVE_B0
|
||||
add sp = 64,sp
|
||||
mov b0 = GR_SAVE_B0
|
||||
};;
|
||||
|
||||
//
|
||||
// Restore gp, ar.pfs and return
|
||||
//
|
||||
{ .mib
|
||||
mov gp = GR_SAVE_GP
|
||||
mov ar.pfs = GR_SAVE_PFS
|
||||
br.ret.sptk b0
|
||||
mov gp = GR_SAVE_GP
|
||||
mov ar.pfs = GR_SAVE_PFS
|
||||
br.ret.sptk b0
|
||||
};;
|
||||
|
||||
LOCAL_LIBM_END(__libm_error_region)
|
||||
|
@ -21,60 +21,82 @@
|
||||
// products derived from this software without specific prior written
|
||||
// permission.
|
||||
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Intel Corporation is the author of this code, and requests that all
|
||||
// problem reports or change requests be submitted to it directly at
|
||||
// problem reports or change requests be submitted to it directly at
|
||||
// http://www.intel.com/software/products/opensource/libraries/num.htm.
|
||||
//
|
||||
// History
|
||||
//==============================================================
|
||||
// 02/02/00 Initial version
|
||||
// 01/26/01 Scalb completely reworked and now standalone version
|
||||
// 01/26/01 Scalb completely reworked and now standalone version
|
||||
// 05/20/02 Cleaned up namespace and sf0 syntax
|
||||
// 02/10/03 Reordered header: .section, .global, .proc, .align
|
||||
// 08/06/03 Improved performance
|
||||
//
|
||||
// API
|
||||
//==============================================================
|
||||
// float = scalbf (float x, float n)
|
||||
// float = scalbf (float x, float n)
|
||||
// input floating point f8 and floating point f9
|
||||
// output floating point f8
|
||||
//
|
||||
// int_type = 0 if int is 32 bits
|
||||
// int_type = 1 if int is 64 bits
|
||||
//
|
||||
// Returns x* 2**n using an fma and detects overflow
|
||||
// and underflow.
|
||||
// and underflow.
|
||||
//
|
||||
//
|
||||
// Strategy:
|
||||
// Compute biased exponent of result exp_Result = N + exp_X
|
||||
// Break into ranges:
|
||||
// exp_Result > 0x1007e -> Certain overflow
|
||||
// exp_Result = 0x1007e -> Possible overflow
|
||||
// 0x0ff81 <= exp_Result < 0x1007e -> No over/underflow (main path)
|
||||
// 0x0ff81 - 23 <= exp_Result < 0x0ff81 -> Possible underflow
|
||||
// exp_Result < 0x0ff81 - 23 -> Certain underflow
|
||||
|
||||
FR_Big = f6
|
||||
FR_NBig = f7
|
||||
FR_Floating_X = f8
|
||||
FR_Result = f8
|
||||
FR_Floating_N = f9
|
||||
FR_Result2 = f9
|
||||
FR_Norm_N = f10
|
||||
FR_Result3 = f11
|
||||
FR_Norm_X = f12
|
||||
FR_Result3 = f10
|
||||
FR_Norm_X = f11
|
||||
FR_Two_N = f12
|
||||
FR_N_float_int = f13
|
||||
FR_Two_N = f14
|
||||
FR_Two_to_Big = f15
|
||||
FR_Big = f6
|
||||
FR_NBig = f7
|
||||
FR_Norm_N = f14
|
||||
|
||||
GR_neg_ov_limit= r14
|
||||
GR_big_exp = r14
|
||||
GR_N_Biased = r15
|
||||
GR_Big = r16
|
||||
GR_NBig = r17
|
||||
GR_Scratch = r18
|
||||
GR_Scratch1 = r19
|
||||
GR_exp_Result = r18
|
||||
GR_pos_ov_limit= r19
|
||||
GR_exp_sure_ou = r19
|
||||
GR_Bias = r20
|
||||
GR_N_as_int = r21
|
||||
GR_signexp_X = r22
|
||||
GR_exp_X = r23
|
||||
GR_exp_mask = r24
|
||||
GR_max_exp = r25
|
||||
GR_min_exp = r26
|
||||
GR_min_den_exp = r27
|
||||
GR_Scratch = r28
|
||||
GR_signexp_N = r29
|
||||
GR_exp_N = r30
|
||||
|
||||
GR_SAVE_B0 = r32
|
||||
GR_SAVE_GP = r33
|
||||
@ -89,412 +111,442 @@ GLOBAL_IEEE754_ENTRY(scalbf)
|
||||
|
||||
//
|
||||
// Is x NAN, INF, ZERO, +-?
|
||||
//
|
||||
{ .mfi
|
||||
alloc r32=ar.pfs,0,3,4,0
|
||||
fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
|
||||
addl GR_Scratch = 0x019C3F,r0
|
||||
}
|
||||
//
|
||||
// Is y a NAN, INF, ZERO, +-?
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fclass.m.unc p6,p0 = FR_Floating_N, 0xe7 //@snan | @qnan | @inf | @zero
|
||||
addl GR_Scratch1 = 0x063BF,r0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Convert N to a fp integer
|
||||
// Normalize x
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fnorm.s1 FR_Norm_N = FR_Floating_N
|
||||
nop.i 999
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fnorm.s1 FR_Norm_X = FR_Floating_X
|
||||
nop.i 999
|
||||
};;
|
||||
|
||||
//
|
||||
// Create 2*big
|
||||
// Create 2**-big
|
||||
// Normalize x
|
||||
// Branch on special values.
|
||||
//
|
||||
{ .mib
|
||||
setf.exp FR_Big = GR_Scratch
|
||||
nop.i 0
|
||||
(p6) br.cond.spnt SCALBF_NAN_INF_ZERO
|
||||
}
|
||||
{ .mib
|
||||
setf.exp FR_NBig = GR_Scratch1
|
||||
nop.i 0
|
||||
(p7) br.cond.spnt SCALBF_NAN_INF_ZERO
|
||||
};;
|
||||
|
||||
//
|
||||
// Convert N to a fp integer
|
||||
// Create -35000
|
||||
//
|
||||
{ .mfi
|
||||
addl GR_Scratch = 1,r0
|
||||
fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
|
||||
addl GR_NBig = -35000,r0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Put N if a GP register
|
||||
// Convert N_float_int to floating point value
|
||||
// Create 35000
|
||||
// Build the exponent Bias
|
||||
//
|
||||
{ .mii
|
||||
getf.sig GR_N_as_int = FR_N_float_int
|
||||
shl GR_Scratch = GR_Scratch,63
|
||||
addl GR_Big = 35000,r0
|
||||
{ .mfi
|
||||
getf.exp GR_signexp_N = FR_Floating_N // Get signexp of n
|
||||
fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero
|
||||
mov GR_Bias = 0x0ffff
|
||||
}
|
||||
{ .mfi
|
||||
addl GR_Bias = 0x0FFFF,r0
|
||||
fcvt.xf FR_N_float_int = FR_N_float_int
|
||||
nop.i 0
|
||||
};;
|
||||
|
||||
//
|
||||
// Catch those fp values that are beyond 2**64-1
|
||||
// Is N > 35000
|
||||
// Is N < -35000
|
||||
//
|
||||
{ .mfi
|
||||
cmp.ne.unc p9,p10 = GR_N_as_int,GR_Scratch
|
||||
nop.f 0
|
||||
nop.i 0
|
||||
mov GR_Big = 35000 // If N this big then certain overflow
|
||||
fcvt.fx.trunc.s1 FR_N_float_int = FR_Floating_N // Get N in significand
|
||||
nop.i 0
|
||||
}
|
||||
{ .mmi
|
||||
cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
|
||||
cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
|
||||
nop.i 0
|
||||
};;
|
||||
;;
|
||||
|
||||
{ .mfi
|
||||
getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x
|
||||
fclass.m p7,p0 = FR_Floating_N, 0x0b // Test for n=unorm
|
||||
nop.i 0
|
||||
}
|
||||
//
|
||||
// Is N really an int, only for those non-int indefinites?
|
||||
// Create exp bias.
|
||||
// Normalize n
|
||||
//
|
||||
{ .mfi
|
||||
add GR_N_Biased = GR_Bias,GR_N_as_int
|
||||
(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
|
||||
nop.i 0
|
||||
};;
|
||||
mov GR_exp_mask = 0x1ffff // Exponent mask
|
||||
fnorm.s1 FR_Norm_N = FR_Floating_N
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Branch and return if N is not an int.
|
||||
// Main path, create 2**N
|
||||
// Is n NAN, INF, ZERO, +-?
|
||||
//
|
||||
{ .mfi
|
||||
setf.exp FR_Two_N = GR_N_Biased
|
||||
nop.i 999
|
||||
mov GR_big_exp = 0x1003e // Exponent at which n is integer
|
||||
fclass.m p9,p0 = FR_Floating_N, 0xe7 // @snan | @qnan | @inf | @zero
|
||||
mov GR_max_exp = 0x1007e // Exponent of maximum float
|
||||
}
|
||||
//
|
||||
// Normalize x
|
||||
//
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
fnorm.s1 FR_Norm_X = FR_Floating_X
|
||||
(p7) br.cond.spnt SCALBF_N_UNORM // Branch if n=unorm
|
||||
}
|
||||
;;
|
||||
|
||||
SCALBF_COMMON1:
|
||||
// Main path continues. Also return here from u=unorm path.
|
||||
// Handle special cases if x = Nan, Inf, Zero
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
fcmp.lt.s1 p7,p0 = FR_Floating_N, f0 // Test N negative
|
||||
(p6) br.cond.spnt SCALBF_NAN_INF_ZERO
|
||||
}
|
||||
;;
|
||||
|
||||
// Handle special cases if n = Nan, Inf, Zero
|
||||
{ .mfi
|
||||
getf.sig GR_N_as_int = FR_N_float_int // Get n from significand
|
||||
fclass.m p8,p0 = FR_Floating_X, 0x0b // Test for x=unorm
|
||||
mov GR_exp_sure_ou = 0x1000e // Exp_N where x*2^N sure over/under
|
||||
}
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
(p7) frcpa.s0 f8,p11 = f0,f0
|
||||
(p7) br.ret.spnt b0
|
||||
};;
|
||||
|
||||
//
|
||||
// Set denormal on denormal input x and denormal input N
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
(p10)fcmp.ge.s1 p6,p8 = FR_Norm_N,f0
|
||||
nop.i 0
|
||||
};;
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
|
||||
nop.i 999
|
||||
mov GR_min_exp = 0x0ff81 // Exponent of minimum float
|
||||
fcvt.xf FR_N_float_int = FR_N_float_int // Convert N to FP integer
|
||||
(p9) br.cond.spnt SCALBF_NAN_INF_ZERO
|
||||
}
|
||||
;;
|
||||
|
||||
{ .mmi
|
||||
and GR_exp_N = GR_exp_mask, GR_signexp_N // Get exponent of N
|
||||
(p7) sub GR_Big = r0, GR_Big // Limit for N
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
{ .mib
|
||||
cmp.lt p9,p0 = GR_exp_N, GR_big_exp // N possible non-integer?
|
||||
cmp.ge p6,p0 = GR_exp_N, GR_exp_sure_ou // N certain over/under?
|
||||
(p8) br.cond.spnt SCALBF_X_UNORM // Branch if x=unorm
|
||||
}
|
||||
;;
|
||||
|
||||
SCALBF_COMMON2:
|
||||
// Main path continues. Also return here from x=unorm path.
|
||||
// Create biased exponent for 2**N
|
||||
{ .mmi
|
||||
(p6) mov GR_N_as_int = GR_Big // Limit N
|
||||
;;
|
||||
add GR_N_Biased = GR_Bias,GR_N_as_int
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fcmp.ge.s0 p12,p13 = FR_Floating_N,f0
|
||||
nop.i 0
|
||||
};;
|
||||
setf.exp FR_Two_N = GR_N_Biased // Form 2**N
|
||||
(p9) fcmp.neq.unc.s1 p9,p0 = FR_Norm_N, FR_N_float_int // Test if N an integer
|
||||
and GR_exp_X = GR_exp_mask, GR_signexp_X // Get exponent of X
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Adjust 2**N if N was very small or very large
|
||||
// Compute biased result exponent
|
||||
// Branch if N is not an integer
|
||||
//
|
||||
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
|
||||
nop.i 0
|
||||
{ .mib
|
||||
add GR_exp_Result = GR_exp_X, GR_N_as_int
|
||||
mov GR_min_den_exp = 0x0ff81 - 23 // Exponent of min denorm float
|
||||
(p9) br.cond.spnt SCALBF_N_NOT_INT
|
||||
}
|
||||
{ .mlx
|
||||
nop.m 999
|
||||
movl GR_Scratch = 0x000000000003007F
|
||||
};;
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
|
||||
nop.i 0
|
||||
}
|
||||
{ .mlx
|
||||
nop.m 999
|
||||
movl GR_Scratch1= 0x000000000001007F
|
||||
};;
|
||||
;;
|
||||
|
||||
// Set up necessary status fields
|
||||
//
|
||||
// Raise Denormal operand flag with compare
|
||||
// Do final operation
|
||||
//
|
||||
{ .mfi
|
||||
cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow
|
||||
fcmp.ge.s0 p0,p11 = FR_Floating_X,FR_Floating_N // Dummy to set denorm
|
||||
cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
|
||||
}
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
|
||||
(p9) br.cond.spnt SCALBF_UNDERFLOW // Branch if certain underflow
|
||||
}
|
||||
;;
|
||||
|
||||
{ .mib
|
||||
(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow
|
||||
(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow
|
||||
(p7) br.ret.sptk b0 // Return from main path
|
||||
}
|
||||
;;
|
||||
|
||||
{ .bbb
|
||||
(p6) br.cond.spnt SCALBF_OVERFLOW // Branch if certain overflow
|
||||
(p8) br.cond.spnt SCALBF_POSSIBLE_OVERFLOW // Branch if possible overflow
|
||||
(p9) br.cond.spnt SCALBF_POSSIBLE_UNDERFLOW // Branch if possible underflow
|
||||
}
|
||||
;;
|
||||
|
||||
// Here if possible underflow.
|
||||
// Resulting exponent: 0x0ff81-23 <= exp_Result < 0x0ff81
|
||||
SCALBF_POSSIBLE_UNDERFLOW:
|
||||
//
|
||||
// Here if possible overflow.
|
||||
// Resulting exponent: 0x1007e = exp_Result
|
||||
SCALBF_POSSIBLE_OVERFLOW:
|
||||
|
||||
// Set up necessary status fields
|
||||
//
|
||||
// S0 user supplied status
|
||||
// S2 user supplied status + WRE + TD (Overflows)
|
||||
// S3 user supplied status + FZ + TD (Underflows)
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fsetc.s3 0x7F,0x41
|
||||
nop.i 999
|
||||
mov GR_pos_ov_limit = 0x1007f // Exponent for positive overflow
|
||||
fsetc.s3 0x7F,0x41
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fsetc.s2 0x7F,0x42
|
||||
nop.i 999
|
||||
};;
|
||||
mov GR_neg_ov_limit = 0x3007f // Exponent for negative overflow
|
||||
fsetc.s2 0x7F,0x42
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Do final operation
|
||||
// Do final operation with s2 and s3
|
||||
//
|
||||
{ .mfi
|
||||
setf.exp FR_NBig = GR_Scratch
|
||||
fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
|
||||
nop.i 999
|
||||
setf.exp FR_NBig = GR_neg_ov_limit
|
||||
fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
|
||||
nop.i 999
|
||||
};;
|
||||
{ .mfi
|
||||
setf.exp FR_Big = GR_Scratch1
|
||||
fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
|
||||
nop.i 999
|
||||
};;
|
||||
setf.exp FR_Big = GR_pos_ov_limit
|
||||
fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
// Check for overflow or underflow.
|
||||
//
|
||||
// S0 user supplied status
|
||||
// S2 user supplied status + WRE + TD (Overflow)
|
||||
// S3 user supplied status + FZ + TD (Underflow)
|
||||
//
|
||||
//
|
||||
// Restore s3
|
||||
// Restore s2
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fsetc.s3 0x7F,0x40
|
||||
nop.i 999
|
||||
nop.m 0
|
||||
fsetc.s3 0x7F,0x40
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fsetc.s2 0x7F,0x40
|
||||
nop.i 999
|
||||
};;
|
||||
nop.m 0
|
||||
fsetc.s2 0x7F,0x40
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Is the result zero?
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fclass.m.unc p6, p0 = FR_Result3, 0x007
|
||||
nop.i 999
|
||||
}
|
||||
nop.m 0
|
||||
fclass.m p6, p0 = FR_Result3, 0x007
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
addl GR_Tag = 55, r0
|
||||
fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
|
||||
nop.i 0
|
||||
};;
|
||||
nop.m 0
|
||||
fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Detect masked underflow - Tiny + Inexact Only
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
nop.m 0
|
||||
(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
|
||||
nop.i 999
|
||||
};;
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Is result bigger the allowed range?
|
||||
// Branch out for underflow
|
||||
//
|
||||
{ .mfb
|
||||
(p6) addl GR_Tag = 56, r0
|
||||
nop.m 0
|
||||
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
|
||||
(p6) br.cond.spnt SCALBF_UNDERFLOW
|
||||
};;
|
||||
(p6) br.cond.spnt SCALBF_UNDERFLOW
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Branch out for overflow
|
||||
//
|
||||
{ .mbb
|
||||
nop.m 0
|
||||
(p7) br.cond.spnt SCALBF_OVERFLOW
|
||||
(p9) br.cond.spnt SCALBF_OVERFLOW
|
||||
};;
|
||||
{ .bbb
|
||||
(p7) br.cond.spnt SCALBF_OVERFLOW
|
||||
(p9) br.cond.spnt SCALBF_OVERFLOW
|
||||
br.ret.sptk b0 // Return from main path.
|
||||
}
|
||||
;;
|
||||
|
||||
// Here if result overflows
|
||||
SCALBF_OVERFLOW:
|
||||
{ .mib
|
||||
alloc r32=ar.pfs,3,0,4,0
|
||||
addl GR_Tag = 55, r0 // Set error tag for overflow
|
||||
br.cond.sptk __libm_error_region // Call error support for overflow
|
||||
}
|
||||
;;
|
||||
|
||||
// Here if result underflows
|
||||
SCALBF_UNDERFLOW:
|
||||
{ .mib
|
||||
alloc r32=ar.pfs,3,0,4,0
|
||||
addl GR_Tag = 56, r0 // Set error tag for underflow
|
||||
br.cond.sptk __libm_error_region // Call error support for underflow
|
||||
}
|
||||
;;
|
||||
|
||||
SCALBF_NAN_INF_ZERO:
|
||||
|
||||
//
|
||||
// Return from main path.
|
||||
// Before entry, N has been converted to a fp integer in significand of
|
||||
// FR_N_float_int
|
||||
//
|
||||
{ .mfb
|
||||
nop.m 999
|
||||
nop.f 0
|
||||
br.ret.sptk b0;;
|
||||
}
|
||||
|
||||
SCALBF_NAN_INF_ZERO:
|
||||
|
||||
// Convert N_float_int to floating point value
|
||||
//
|
||||
// Convert N to a fp integer
|
||||
//
|
||||
{ .mfi
|
||||
addl GR_Scratch = 1,r0
|
||||
fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
|
||||
nop.i 999
|
||||
getf.sig GR_N_as_int = FR_N_float_int
|
||||
fclass.m p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fclass.m.unc p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
|
||||
nop.i 0
|
||||
};;
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fclass.m.unc p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
|
||||
shl GR_Scratch = GR_Scratch,63
|
||||
};;
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fclass.m.unc p8,p0 = FR_Floating_N, 0x21 // @inf
|
||||
nop.i 0
|
||||
addl GR_Scratch = 1,r0
|
||||
fcvt.xf FR_N_float_int = FR_N_float_int
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fclass.m.unc p9,p0 = FR_Floating_N, 0x22 // @-inf
|
||||
nop.i 0
|
||||
};;
|
||||
;;
|
||||
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fclass.m p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
|
||||
shl GR_Scratch = GR_Scratch,63
|
||||
}
|
||||
;;
|
||||
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fclass.m p8,p0 = FR_Floating_N, 0x21 // @inf
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fclass.m p9,p0 = FR_Floating_N, 0x22 // @-inf
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Either X or N is a Nan, return result and possible raise invalid.
|
||||
//
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
(p6) fma.s.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
|
||||
nop.m 0
|
||||
(p6) fma.s.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
|
||||
(p6) br.ret.spnt b0
|
||||
};;
|
||||
}
|
||||
;;
|
||||
|
||||
{ .mfb
|
||||
getf.sig GR_N_as_int = FR_N_float_int
|
||||
(p7) fma.s.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
|
||||
nop.m 0
|
||||
(p7) fma.s.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
|
||||
(p7) br.ret.spnt b0
|
||||
};;
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// If N + Inf do something special
|
||||
// For N = -Inf, create Int
|
||||
//
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
(p8) fma.s.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0
|
||||
(p8) br.ret.spnt b0
|
||||
nop.m 0
|
||||
(p8) fma.s.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0
|
||||
(p8) br.ret.spnt b0
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
(p9) fnma.s.s0 FR_Floating_N = FR_Floating_N, f1, f0
|
||||
nop.i 0
|
||||
};;
|
||||
nop.m 0
|
||||
(p9) fnma.s.s0 FR_Floating_N = FR_Floating_N, f1, f0
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// If N==-Inf,return x/(-N)
|
||||
//
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
(p9) frcpa.s0 FR_Result,p6 = FR_Floating_X,FR_Floating_N
|
||||
(p9) br.ret.spnt b0
|
||||
};;
|
||||
|
||||
//
|
||||
// Convert N_float_int to floating point value
|
||||
//
|
||||
{ .mfi
|
||||
cmp.ne.unc p9,p0 = GR_N_as_int,GR_Scratch
|
||||
fcvt.xf FR_N_float_int = FR_N_float_int
|
||||
nop.i 0
|
||||
};;
|
||||
cmp.ne p7,p0 = GR_N_as_int,GR_Scratch
|
||||
(p9) frcpa.s0 FR_Result,p0 = FR_Floating_X,FR_Floating_N
|
||||
(p9) br.ret.spnt b0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Is N an integer.
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
|
||||
nop.i 0
|
||||
};;
|
||||
nop.m 0
|
||||
(p7) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// If N not an int, return NaN and raise invalid.
|
||||
//
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
(p7) frcpa.s0 FR_Result,p6 = f0,f0
|
||||
(p7) br.ret.spnt b0
|
||||
};;
|
||||
nop.m 0
|
||||
(p7) frcpa.s0 FR_Result,p0 = f0,f0
|
||||
(p7) br.ret.spnt b0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Always return x in other path.
|
||||
// Always return x in other path.
|
||||
//
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
fma.s.s0 FR_Result = FR_Floating_X,f1,f0
|
||||
br.ret.sptk b0
|
||||
};;
|
||||
nop.m 0
|
||||
fma.s.s0 FR_Result = FR_Floating_X,f1,f0
|
||||
br.ret.sptk b0
|
||||
}
|
||||
;;
|
||||
|
||||
// Here if n not int
|
||||
// Return NaN and raise invalid.
|
||||
SCALBF_N_NOT_INT:
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
frcpa.s0 FR_Result,p0 = f0,f0
|
||||
br.ret.sptk b0
|
||||
}
|
||||
;;
|
||||
|
||||
// Here if n=unorm
|
||||
SCALBF_N_UNORM:
|
||||
{ .mfb
|
||||
getf.exp GR_signexp_N = FR_Norm_N // Get signexp of normalized n
|
||||
fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N // Get N in significand
|
||||
br.cond.sptk SCALBF_COMMON1 // Return to main path
|
||||
}
|
||||
;;
|
||||
|
||||
// Here if x=unorm
|
||||
SCALBF_X_UNORM:
|
||||
{ .mib
|
||||
getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x
|
||||
nop.i 0
|
||||
br.cond.sptk SCALBF_COMMON2 // Return to main path
|
||||
}
|
||||
;;
|
||||
|
||||
GLOBAL_IEEE754_END(scalbf)
|
||||
__libm_error_region:
|
||||
|
||||
SCALBF_OVERFLOW:
|
||||
SCALBF_UNDERFLOW:
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
|
||||
//
|
||||
// Get stack address of N
|
||||
//
|
||||
.prologue
|
||||
{ .mfi
|
||||
add GR_Parameter_Y=-32,sp
|
||||
add GR_Parameter_Y=-32,sp
|
||||
nop.f 0
|
||||
.save ar.pfs,GR_SAVE_PFS
|
||||
mov GR_SAVE_PFS=ar.pfs
|
||||
mov GR_SAVE_PFS=ar.pfs
|
||||
}
|
||||
//
|
||||
// Adjust sp
|
||||
// Adjust sp
|
||||
//
|
||||
{ .mfi
|
||||
.fframe 64
|
||||
add sp=-64,sp
|
||||
add sp=-64,sp
|
||||
nop.f 0
|
||||
mov GR_SAVE_GP=gp
|
||||
mov GR_SAVE_GP=gp
|
||||
};;
|
||||
|
||||
//
|
||||
// Store N on stack in correct position
|
||||
// Store N on stack in correct position
|
||||
// Locate the address of x on stack
|
||||
//
|
||||
{ .mmi
|
||||
stfs [GR_Parameter_Y] = FR_Norm_N,16
|
||||
add GR_Parameter_X = 16,sp
|
||||
stfs [GR_Parameter_Y] = FR_Norm_N,16
|
||||
add GR_Parameter_X = 16,sp
|
||||
.save b0, GR_SAVE_B0
|
||||
mov GR_SAVE_B0=b0
|
||||
mov GR_SAVE_B0=b0
|
||||
};;
|
||||
|
||||
//
|
||||
@ -503,42 +555,42 @@ SCALBF_UNDERFLOW:
|
||||
//
|
||||
.body
|
||||
{ .mib
|
||||
stfs [GR_Parameter_X] = FR_Norm_X
|
||||
add GR_Parameter_RESULT = 0,GR_Parameter_Y
|
||||
stfs [GR_Parameter_X] = FR_Norm_X
|
||||
add GR_Parameter_RESULT = 0,GR_Parameter_Y
|
||||
nop.b 0
|
||||
}
|
||||
{ .mib
|
||||
stfs [GR_Parameter_Y] = FR_Result
|
||||
stfs [GR_Parameter_Y] = FR_Result
|
||||
add GR_Parameter_Y = -16,GR_Parameter_Y
|
||||
br.call.sptk b0=__libm_error_support#
|
||||
br.call.sptk b0=__libm_error_support#
|
||||
};;
|
||||
|
||||
//
|
||||
// Get location of result on stack
|
||||
//
|
||||
{ .mmi
|
||||
add GR_Parameter_RESULT = 48,sp
|
||||
nop.m 0
|
||||
nop.m 0
|
||||
add GR_Parameter_RESULT = 48,sp
|
||||
nop.i 0
|
||||
};;
|
||||
|
||||
//
|
||||
// Get the new result
|
||||
// Get the new result
|
||||
//
|
||||
{ .mmi
|
||||
ldfs FR_Result = [GR_Parameter_RESULT]
|
||||
ldfs FR_Result = [GR_Parameter_RESULT]
|
||||
.restore sp
|
||||
add sp = 64,sp
|
||||
mov b0 = GR_SAVE_B0
|
||||
add sp = 64,sp
|
||||
mov b0 = GR_SAVE_B0
|
||||
};;
|
||||
|
||||
//
|
||||
// Restore gp, ar.pfs and return
|
||||
//
|
||||
{ .mib
|
||||
mov gp = GR_SAVE_GP
|
||||
mov ar.pfs = GR_SAVE_PFS
|
||||
br.ret.sptk b0
|
||||
mov gp = GR_SAVE_GP
|
||||
mov ar.pfs = GR_SAVE_PFS
|
||||
br.ret.sptk b0
|
||||
};;
|
||||
|
||||
LOCAL_LIBM_END(__libm_error_region)
|
||||
|
@ -21,60 +21,82 @@
|
||||
// products derived from this software without specific prior written
|
||||
// permission.
|
||||
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Intel Corporation is the author of this code, and requests that all
|
||||
// problem reports or change requests be submitted to it directly at
|
||||
// problem reports or change requests be submitted to it directly at
|
||||
// http://www.intel.com/software/products/opensource/libraries/num.htm.
|
||||
//
|
||||
// History
|
||||
//==============================================================
|
||||
// 02/02/00 Initial version
|
||||
// 01/26/01 Scalb completely reworked and now standalone version
|
||||
// 01/26/01 Scalb completely reworked and now standalone version
|
||||
// 05/20/02 Cleaned up namespace and sf0 syntax
|
||||
// 02/10/03 Reordered header: .section, .global, .proc, .align
|
||||
// 08/06/03 Improved performance
|
||||
//
|
||||
// API
|
||||
//==============================================================
|
||||
// double-extended = scalbl (double-extended x, double-extended n)
|
||||
// long double = scalbl (long double x, long double n)
|
||||
// input floating point f8 and floating point f9
|
||||
// output floating point f8
|
||||
//
|
||||
// int_type = 0 if int is 32 bits
|
||||
// int_type = 1 if int is 64 bits
|
||||
//
|
||||
// Returns x* 2**n using an fma and detects overflow
|
||||
// and underflow.
|
||||
// and underflow.
|
||||
//
|
||||
//
|
||||
// Strategy:
|
||||
// Compute biased exponent of result exp_Result = N + exp_X
|
||||
// Break into ranges:
|
||||
// exp_Result > 0x13ffe -> Certain overflow
|
||||
// exp_Result = 0x13ffe -> Possible overflow
|
||||
// 0x0c001 <= exp_Result < 0x13ffe -> No over/underflow (main path)
|
||||
// 0x0c001 - 63 <= exp_Result < 0x0c001 -> Possible underflow
|
||||
// exp_Result < 0x0c001 - 63 -> Certain underflow
|
||||
|
||||
FR_Big = f6
|
||||
FR_NBig = f7
|
||||
FR_Floating_X = f8
|
||||
FR_Result = f8
|
||||
FR_Floating_N = f9
|
||||
FR_Result2 = f9
|
||||
FR_Norm_N = f10
|
||||
FR_Result3 = f11
|
||||
FR_Norm_X = f12
|
||||
FR_Result3 = f10
|
||||
FR_Norm_X = f11
|
||||
FR_Two_N = f12
|
||||
FR_N_float_int = f13
|
||||
FR_Two_N = f14
|
||||
FR_Two_to_Big = f15
|
||||
FR_Big = f6
|
||||
FR_NBig = f7
|
||||
FR_Norm_N = f14
|
||||
|
||||
GR_neg_ov_limit= r14
|
||||
GR_big_exp = r14
|
||||
GR_N_Biased = r15
|
||||
GR_Big = r16
|
||||
GR_NBig = r17
|
||||
GR_Scratch = r18
|
||||
GR_Scratch1 = r19
|
||||
GR_exp_Result = r18
|
||||
GR_pos_ov_limit= r19
|
||||
GR_exp_sure_ou = r19
|
||||
GR_Bias = r20
|
||||
GR_N_as_int = r21
|
||||
GR_signexp_X = r22
|
||||
GR_exp_X = r23
|
||||
GR_exp_mask = r24
|
||||
GR_max_exp = r25
|
||||
GR_min_exp = r26
|
||||
GR_min_den_exp = r27
|
||||
GR_Scratch = r28
|
||||
GR_signexp_N = r29
|
||||
GR_exp_N = r30
|
||||
|
||||
GR_SAVE_B0 = r32
|
||||
GR_SAVE_GP = r33
|
||||
@ -89,412 +111,442 @@ GLOBAL_IEEE754_ENTRY(scalbl)
|
||||
|
||||
//
|
||||
// Is x NAN, INF, ZERO, +-?
|
||||
//
|
||||
{ .mfi
|
||||
alloc r32=ar.pfs,0,3,4,0
|
||||
fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
|
||||
addl GR_Scratch = 0x019C3F,r0
|
||||
}
|
||||
//
|
||||
// Is y a NAN, INF, ZERO, +-?
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fclass.m.unc p6,p0 = FR_Floating_N, 0xe7 //@snan | @qnan | @inf | @zero
|
||||
addl GR_Scratch1 = 0x063BF,r0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Convert N to a fp integer
|
||||
// Normalize x
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fnorm.s1 FR_Norm_N = FR_Floating_N
|
||||
nop.i 999
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fnorm.s1 FR_Norm_X = FR_Floating_X
|
||||
nop.i 999
|
||||
};;
|
||||
|
||||
//
|
||||
// Create 2*big
|
||||
// Create 2**-big
|
||||
// Normalize x
|
||||
// Branch on special values.
|
||||
//
|
||||
{ .mib
|
||||
setf.exp FR_Big = GR_Scratch
|
||||
nop.i 0
|
||||
(p6) br.cond.spnt SCALBL_NAN_INF_ZERO
|
||||
}
|
||||
{ .mib
|
||||
setf.exp FR_NBig = GR_Scratch1
|
||||
nop.i 0
|
||||
(p7) br.cond.spnt SCALBL_NAN_INF_ZERO
|
||||
};;
|
||||
|
||||
//
|
||||
// Convert N to a fp integer
|
||||
// Create -35000
|
||||
//
|
||||
{ .mfi
|
||||
addl GR_Scratch = 1,r0
|
||||
fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
|
||||
addl GR_NBig = -35000,r0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Put N if a GP register
|
||||
// Convert N_float_int to floating point value
|
||||
// Create 35000
|
||||
// Build the exponent Bias
|
||||
//
|
||||
{ .mii
|
||||
getf.sig GR_N_as_int = FR_N_float_int
|
||||
shl GR_Scratch = GR_Scratch,63
|
||||
addl GR_Big = 35000,r0
|
||||
{ .mfi
|
||||
getf.exp GR_signexp_N = FR_Floating_N // Get signexp of n
|
||||
fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero
|
||||
mov GR_Bias = 0x0ffff
|
||||
}
|
||||
{ .mfi
|
||||
addl GR_Bias = 0x0FFFF,r0
|
||||
fcvt.xf FR_N_float_int = FR_N_float_int
|
||||
nop.i 0
|
||||
};;
|
||||
|
||||
//
|
||||
// Catch those fp values that are beyond 2**64-1
|
||||
// Is N > 35000
|
||||
// Is N < -35000
|
||||
//
|
||||
{ .mfi
|
||||
cmp.ne.unc p9,p10 = GR_N_as_int,GR_Scratch
|
||||
nop.f 0
|
||||
nop.i 0
|
||||
mov GR_Big = 35000 // If N this big then certain overflow
|
||||
fcvt.fx.trunc.s1 FR_N_float_int = FR_Floating_N // Get N in significand
|
||||
nop.i 0
|
||||
}
|
||||
{ .mmi
|
||||
cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
|
||||
cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
|
||||
nop.i 0
|
||||
};;
|
||||
;;
|
||||
|
||||
{ .mfi
|
||||
getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x
|
||||
fclass.m p7,p0 = FR_Floating_N, 0x0b // Test for n=unorm
|
||||
nop.i 0
|
||||
}
|
||||
//
|
||||
// Is N really an int, only for those non-int indefinites?
|
||||
// Create exp bias.
|
||||
// Normalize n
|
||||
//
|
||||
{ .mfi
|
||||
add GR_N_Biased = GR_Bias,GR_N_as_int
|
||||
(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
|
||||
nop.i 0
|
||||
};;
|
||||
mov GR_exp_mask = 0x1ffff // Exponent mask
|
||||
fnorm.s1 FR_Norm_N = FR_Floating_N
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Branch and return if N is not an int.
|
||||
// Main path, create 2**N
|
||||
// Is n NAN, INF, ZERO, +-?
|
||||
//
|
||||
{ .mfi
|
||||
setf.exp FR_Two_N = GR_N_Biased
|
||||
nop.i 999
|
||||
mov GR_big_exp = 0x1003e // Exponent at which n is integer
|
||||
fclass.m p9,p0 = FR_Floating_N, 0xe7 // @snan | @qnan | @inf | @zero
|
||||
mov GR_max_exp = 0x13ffe // Exponent of maximum long double
|
||||
}
|
||||
//
|
||||
// Normalize x
|
||||
//
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
fnorm.s1 FR_Norm_X = FR_Floating_X
|
||||
(p7) br.cond.spnt SCALBL_N_UNORM // Branch if n=unorm
|
||||
}
|
||||
;;
|
||||
|
||||
SCALBL_COMMON1:
|
||||
// Main path continues. Also return here from u=unorm path.
|
||||
// Handle special cases if x = Nan, Inf, Zero
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
fcmp.lt.s1 p7,p0 = FR_Floating_N, f0 // Test N negative
|
||||
(p6) br.cond.spnt SCALBL_NAN_INF_ZERO
|
||||
}
|
||||
;;
|
||||
|
||||
// Handle special cases if n = Nan, Inf, Zero
|
||||
{ .mfi
|
||||
getf.sig GR_N_as_int = FR_N_float_int // Get n from significand
|
||||
fclass.m p8,p0 = FR_Floating_X, 0x0b // Test for x=unorm
|
||||
mov GR_exp_sure_ou = 0x1000e // Exp_N where x*2^N sure over/under
|
||||
}
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
(p7) frcpa.s0 f8,p11 = f0,f0
|
||||
(p7) br.ret.spnt b0
|
||||
};;
|
||||
|
||||
//
|
||||
// Set denormal on denormal input x and denormal input N
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
(p10)fcmp.ge.s1 p6,p8 = FR_Norm_N,f0
|
||||
nop.i 0
|
||||
};;
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
|
||||
nop.i 999
|
||||
mov GR_min_exp = 0x0c001 // Exponent of minimum long double
|
||||
fcvt.xf FR_N_float_int = FR_N_float_int // Convert N to FP integer
|
||||
(p9) br.cond.spnt SCALBL_NAN_INF_ZERO
|
||||
}
|
||||
;;
|
||||
|
||||
{ .mmi
|
||||
and GR_exp_N = GR_exp_mask, GR_signexp_N // Get exponent of N
|
||||
(p7) sub GR_Big = r0, GR_Big // Limit for N
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
{ .mib
|
||||
cmp.lt p9,p0 = GR_exp_N, GR_big_exp // N possible non-integer?
|
||||
cmp.ge p6,p0 = GR_exp_N, GR_exp_sure_ou // N certain over/under?
|
||||
(p8) br.cond.spnt SCALBL_X_UNORM // Branch if x=unorm
|
||||
}
|
||||
;;
|
||||
|
||||
SCALBL_COMMON2:
|
||||
// Main path continues. Also return here from x=unorm path.
|
||||
// Create biased exponent for 2**N
|
||||
{ .mmi
|
||||
(p6) mov GR_N_as_int = GR_Big // Limit N
|
||||
;;
|
||||
add GR_N_Biased = GR_Bias,GR_N_as_int
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fcmp.ge.s0 p12,p13 = FR_Floating_N,f0
|
||||
nop.i 0
|
||||
};;
|
||||
setf.exp FR_Two_N = GR_N_Biased // Form 2**N
|
||||
(p9) fcmp.neq.unc.s1 p9,p0 = FR_Norm_N, FR_N_float_int // Test if N an integer
|
||||
and GR_exp_X = GR_exp_mask, GR_signexp_X // Get exponent of X
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Adjust 2**N if N was very small or very large
|
||||
// Compute biased result exponent
|
||||
// Branch if N is not an integer
|
||||
//
|
||||
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
|
||||
nop.i 0
|
||||
{ .mib
|
||||
add GR_exp_Result = GR_exp_X, GR_N_as_int
|
||||
mov GR_min_den_exp = 0x0c001 - 63 // Exp of min denorm long dble
|
||||
(p9) br.cond.spnt SCALBL_N_NOT_INT
|
||||
}
|
||||
{ .mlx
|
||||
nop.m 999
|
||||
movl GR_Scratch = 0x0000000000033FFF
|
||||
};;
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
|
||||
nop.i 0
|
||||
}
|
||||
{ .mlx
|
||||
nop.m 999
|
||||
movl GR_Scratch1= 0x0000000000013FFF
|
||||
};;
|
||||
;;
|
||||
|
||||
// Set up necessary status fields
|
||||
//
|
||||
// Raise Denormal operand flag with compare
|
||||
// Do final operation
|
||||
//
|
||||
{ .mfi
|
||||
cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow
|
||||
fcmp.ge.s0 p0,p11 = FR_Floating_X,FR_Floating_N // Dummy to set denorm
|
||||
cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
|
||||
}
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
|
||||
(p9) br.cond.spnt SCALBL_UNDERFLOW // Branch if certain underflow
|
||||
}
|
||||
;;
|
||||
|
||||
{ .mib
|
||||
(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow
|
||||
(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow
|
||||
(p7) br.ret.sptk b0 // Return from main path
|
||||
}
|
||||
;;
|
||||
|
||||
{ .bbb
|
||||
(p6) br.cond.spnt SCALBL_OVERFLOW // Branch if certain overflow
|
||||
(p8) br.cond.spnt SCALBL_POSSIBLE_OVERFLOW // Branch if possible overflow
|
||||
(p9) br.cond.spnt SCALBL_POSSIBLE_UNDERFLOW // Branch if possible underflow
|
||||
}
|
||||
;;
|
||||
|
||||
// Here if possible underflow.
|
||||
// Resulting exponent: 0x0c001-63 <= exp_Result < 0x0c001
|
||||
SCALBL_POSSIBLE_UNDERFLOW:
|
||||
//
|
||||
// Here if possible overflow.
|
||||
// Resulting exponent: 0x13ffe = exp_Result
|
||||
SCALBL_POSSIBLE_OVERFLOW:
|
||||
|
||||
// Set up necessary status fields
|
||||
//
|
||||
// S0 user supplied status
|
||||
// S2 user supplied status + WRE + TD (Overflows)
|
||||
// S3 user supplied status + FZ + TD (Underflows)
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fsetc.s3 0x7F,0x41
|
||||
nop.i 999
|
||||
mov GR_pos_ov_limit = 0x13fff // Exponent for positive overflow
|
||||
fsetc.s3 0x7F,0x41
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fsetc.s2 0x7F,0x42
|
||||
nop.i 999
|
||||
};;
|
||||
mov GR_neg_ov_limit = 0x33fff // Exponent for negative overflow
|
||||
fsetc.s2 0x7F,0x42
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Do final operation
|
||||
// Do final operation with s2 and s3
|
||||
//
|
||||
{ .mfi
|
||||
setf.exp FR_NBig = GR_Scratch
|
||||
fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
|
||||
nop.i 999
|
||||
setf.exp FR_NBig = GR_neg_ov_limit
|
||||
fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
|
||||
nop.i 999
|
||||
};;
|
||||
{ .mfi
|
||||
setf.exp FR_Big = GR_Scratch1
|
||||
fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
|
||||
nop.i 999
|
||||
};;
|
||||
setf.exp FR_Big = GR_pos_ov_limit
|
||||
fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
// Check for overflow or underflow.
|
||||
//
|
||||
// S0 user supplied status
|
||||
// S2 user supplied status + WRE + TD (Overflow)
|
||||
// S3 user supplied status + FZ + TD (Underflow)
|
||||
//
|
||||
//
|
||||
// Restore s3
|
||||
// Restore s2
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fsetc.s3 0x7F,0x40
|
||||
nop.i 999
|
||||
nop.m 0
|
||||
fsetc.s3 0x7F,0x40
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fsetc.s2 0x7F,0x40
|
||||
nop.i 999
|
||||
};;
|
||||
nop.m 0
|
||||
fsetc.s2 0x7F,0x40
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Is the result zero?
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fclass.m.unc p6, p0 = FR_Result3, 0x007
|
||||
nop.i 999
|
||||
}
|
||||
nop.m 0
|
||||
fclass.m p6, p0 = FR_Result3, 0x007
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
addl GR_Tag = 51, r0
|
||||
fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
|
||||
nop.i 0
|
||||
};;
|
||||
nop.m 0
|
||||
fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Detect masked underflow - Tiny + Inexact Only
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
nop.m 0
|
||||
(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
|
||||
nop.i 999
|
||||
};;
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Is result bigger the allowed range?
|
||||
// Branch out for underflow
|
||||
//
|
||||
{ .mfb
|
||||
(p6) addl GR_Tag = 52, r0
|
||||
nop.m 0
|
||||
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
|
||||
(p6) br.cond.spnt SCALBL_UNDERFLOW
|
||||
};;
|
||||
(p6) br.cond.spnt SCALBL_UNDERFLOW
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Branch out for overflow
|
||||
//
|
||||
{ .mbb
|
||||
nop.m 0
|
||||
(p7) br.cond.spnt SCALBL_OVERFLOW
|
||||
(p9) br.cond.spnt SCALBL_OVERFLOW
|
||||
};;
|
||||
{ .bbb
|
||||
(p7) br.cond.spnt SCALBL_OVERFLOW
|
||||
(p9) br.cond.spnt SCALBL_OVERFLOW
|
||||
br.ret.sptk b0 // Return from main path.
|
||||
}
|
||||
;;
|
||||
|
||||
// Here if result overflows
|
||||
SCALBL_OVERFLOW:
|
||||
{ .mib
|
||||
alloc r32=ar.pfs,3,0,4,0
|
||||
addl GR_Tag = 51, r0 // Set error tag for overflow
|
||||
br.cond.sptk __libm_error_region // Call error support for overflow
|
||||
}
|
||||
;;
|
||||
|
||||
// Here if result underflows
|
||||
SCALBL_UNDERFLOW:
|
||||
{ .mib
|
||||
alloc r32=ar.pfs,3,0,4,0
|
||||
addl GR_Tag = 52, r0 // Set error tag for underflow
|
||||
br.cond.sptk __libm_error_region // Call error support for underflow
|
||||
}
|
||||
;;
|
||||
|
||||
SCALBL_NAN_INF_ZERO:
|
||||
|
||||
//
|
||||
// Return from main path.
|
||||
// Before entry, N has been converted to a fp integer in significand of
|
||||
// FR_N_float_int
|
||||
//
|
||||
{ .mfb
|
||||
nop.m 999
|
||||
nop.f 0
|
||||
br.ret.sptk b0;;
|
||||
}
|
||||
|
||||
SCALBL_NAN_INF_ZERO:
|
||||
|
||||
// Convert N_float_int to floating point value
|
||||
//
|
||||
// Convert N to a fp integer
|
||||
//
|
||||
{ .mfi
|
||||
addl GR_Scratch = 1,r0
|
||||
fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
|
||||
nop.i 999
|
||||
getf.sig GR_N_as_int = FR_N_float_int
|
||||
fclass.m p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fclass.m.unc p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
|
||||
nop.i 0
|
||||
};;
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fclass.m.unc p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
|
||||
shl GR_Scratch = GR_Scratch,63
|
||||
};;
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fclass.m.unc p8,p0 = FR_Floating_N, 0x21 // @inf
|
||||
nop.i 0
|
||||
addl GR_Scratch = 1,r0
|
||||
fcvt.xf FR_N_float_int = FR_N_float_int
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fclass.m.unc p9,p0 = FR_Floating_N, 0x22 // @-inf
|
||||
nop.i 0
|
||||
};;
|
||||
;;
|
||||
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fclass.m p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
|
||||
shl GR_Scratch = GR_Scratch,63
|
||||
}
|
||||
;;
|
||||
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fclass.m p8,p0 = FR_Floating_N, 0x21 // @inf
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fclass.m p9,p0 = FR_Floating_N, 0x22 // @-inf
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Either X or N is a Nan, return result and possible raise invalid.
|
||||
//
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
(p6) fma.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
|
||||
nop.m 0
|
||||
(p6) fma.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
|
||||
(p6) br.ret.spnt b0
|
||||
};;
|
||||
}
|
||||
;;
|
||||
|
||||
{ .mfb
|
||||
getf.sig GR_N_as_int = FR_N_float_int
|
||||
(p7) fma.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
|
||||
nop.m 0
|
||||
(p7) fma.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
|
||||
(p7) br.ret.spnt b0
|
||||
};;
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// If N + Inf do something special
|
||||
// For N = -Inf, create Int
|
||||
//
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
(p8) fma.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0
|
||||
(p8) br.ret.spnt b0
|
||||
nop.m 0
|
||||
(p8) fma.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0
|
||||
(p8) br.ret.spnt b0
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
(p9) fnma.s0 FR_Floating_N = FR_Floating_N, f1, f0
|
||||
nop.i 0
|
||||
};;
|
||||
nop.m 0
|
||||
(p9) fnma.s0 FR_Floating_N = FR_Floating_N, f1, f0
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// If N==-Inf,return x/(-N)
|
||||
//
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
(p9) frcpa.s0 FR_Result,p6 = FR_Floating_X,FR_Floating_N
|
||||
(p9) br.ret.spnt b0
|
||||
};;
|
||||
|
||||
//
|
||||
// Convert N_float_int to floating point value
|
||||
//
|
||||
{ .mfi
|
||||
cmp.ne.unc p9,p0 = GR_N_as_int,GR_Scratch
|
||||
fcvt.xf FR_N_float_int = FR_N_float_int
|
||||
nop.i 0
|
||||
};;
|
||||
cmp.ne p7,p0 = GR_N_as_int,GR_Scratch
|
||||
(p9) frcpa.s0 FR_Result,p0 = FR_Floating_X,FR_Floating_N
|
||||
(p9) br.ret.spnt b0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Is N an integer.
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
|
||||
nop.i 0
|
||||
};;
|
||||
nop.m 0
|
||||
(p7) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// If N not an int, return NaN and raise invalid.
|
||||
//
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
(p7) frcpa.s0 FR_Result,p6 = f0,f0
|
||||
(p7) br.ret.spnt b0
|
||||
};;
|
||||
nop.m 0
|
||||
(p7) frcpa.s0 FR_Result,p0 = f0,f0
|
||||
(p7) br.ret.spnt b0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Always return x in other path.
|
||||
// Always return x in other path.
|
||||
//
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
fma.s0 FR_Result = FR_Floating_X,f1,f0
|
||||
br.ret.sptk b0
|
||||
};;
|
||||
nop.m 0
|
||||
fma.s0 FR_Result = FR_Floating_X,f1,f0
|
||||
br.ret.sptk b0
|
||||
}
|
||||
;;
|
||||
|
||||
// Here if n not int
|
||||
// Return NaN and raise invalid.
|
||||
SCALBL_N_NOT_INT:
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
frcpa.s0 FR_Result,p0 = f0,f0
|
||||
br.ret.sptk b0
|
||||
}
|
||||
;;
|
||||
|
||||
// Here if n=unorm
|
||||
SCALBL_N_UNORM:
|
||||
{ .mfb
|
||||
getf.exp GR_signexp_N = FR_Norm_N // Get signexp of normalized n
|
||||
fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N // Get N in significand
|
||||
br.cond.sptk SCALBL_COMMON1 // Return to main path
|
||||
}
|
||||
;;
|
||||
|
||||
// Here if x=unorm
|
||||
SCALBL_X_UNORM:
|
||||
{ .mib
|
||||
getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x
|
||||
nop.i 0
|
||||
br.cond.sptk SCALBL_COMMON2 // Return to main path
|
||||
}
|
||||
;;
|
||||
|
||||
GLOBAL_IEEE754_END(scalbl)
|
||||
__libm_error_region:
|
||||
|
||||
SCALBL_OVERFLOW:
|
||||
SCALBL_UNDERFLOW:
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
|
||||
//
|
||||
// Get stack address of N
|
||||
//
|
||||
.prologue
|
||||
{ .mfi
|
||||
add GR_Parameter_Y=-32,sp
|
||||
add GR_Parameter_Y=-32,sp
|
||||
nop.f 0
|
||||
.save ar.pfs,GR_SAVE_PFS
|
||||
mov GR_SAVE_PFS=ar.pfs
|
||||
mov GR_SAVE_PFS=ar.pfs
|
||||
}
|
||||
//
|
||||
// Adjust sp
|
||||
// Adjust sp
|
||||
//
|
||||
{ .mfi
|
||||
.fframe 64
|
||||
add sp=-64,sp
|
||||
add sp=-64,sp
|
||||
nop.f 0
|
||||
mov GR_SAVE_GP=gp
|
||||
mov GR_SAVE_GP=gp
|
||||
};;
|
||||
|
||||
//
|
||||
// Store N on stack in correct position
|
||||
// Store N on stack in correct position
|
||||
// Locate the address of x on stack
|
||||
//
|
||||
{ .mmi
|
||||
stfe [GR_Parameter_Y] = FR_Norm_N,16
|
||||
add GR_Parameter_X = 16,sp
|
||||
stfe [GR_Parameter_Y] = FR_Norm_N,16
|
||||
add GR_Parameter_X = 16,sp
|
||||
.save b0, GR_SAVE_B0
|
||||
mov GR_SAVE_B0=b0
|
||||
mov GR_SAVE_B0=b0
|
||||
};;
|
||||
|
||||
//
|
||||
@ -503,42 +555,42 @@ SCALBL_UNDERFLOW:
|
||||
//
|
||||
.body
|
||||
{ .mib
|
||||
stfe [GR_Parameter_X] = FR_Norm_X
|
||||
add GR_Parameter_RESULT = 0,GR_Parameter_Y
|
||||
stfe [GR_Parameter_X] = FR_Norm_X
|
||||
add GR_Parameter_RESULT = 0,GR_Parameter_Y
|
||||
nop.b 0
|
||||
}
|
||||
{ .mib
|
||||
stfe [GR_Parameter_Y] = FR_Result
|
||||
stfe [GR_Parameter_Y] = FR_Result
|
||||
add GR_Parameter_Y = -16,GR_Parameter_Y
|
||||
br.call.sptk b0=__libm_error_support#
|
||||
br.call.sptk b0=__libm_error_support#
|
||||
};;
|
||||
|
||||
//
|
||||
// Get location of result on stack
|
||||
//
|
||||
{ .mmi
|
||||
add GR_Parameter_RESULT = 48,sp
|
||||
nop.m 0
|
||||
nop.m 0
|
||||
add GR_Parameter_RESULT = 48,sp
|
||||
nop.i 0
|
||||
};;
|
||||
|
||||
//
|
||||
// Get the new result
|
||||
// Get the new result
|
||||
//
|
||||
{ .mmi
|
||||
ldfe FR_Result = [GR_Parameter_RESULT]
|
||||
ldfe FR_Result = [GR_Parameter_RESULT]
|
||||
.restore sp
|
||||
add sp = 64,sp
|
||||
mov b0 = GR_SAVE_B0
|
||||
add sp = 64,sp
|
||||
mov b0 = GR_SAVE_B0
|
||||
};;
|
||||
|
||||
//
|
||||
// Restore gp, ar.pfs and return
|
||||
//
|
||||
{ .mib
|
||||
mov gp = GR_SAVE_GP
|
||||
mov ar.pfs = GR_SAVE_PFS
|
||||
br.ret.sptk b0
|
||||
mov gp = GR_SAVE_GP
|
||||
mov ar.pfs = GR_SAVE_PFS
|
||||
br.ret.sptk b0
|
||||
};;
|
||||
|
||||
LOCAL_LIBM_END(__libm_error_region)
|
||||
|
@ -850,6 +850,7 @@ SINH_UNORM:
|
||||
|
||||
GLOBAL_IEEE754_END(sinh)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -689,6 +689,7 @@ SINH_UNORM:
|
||||
|
||||
GLOBAL_IEEE754_END(sinhf)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -1055,6 +1055,7 @@ SINH_HUGE:
|
||||
|
||||
GLOBAL_IEEE754_END(sinhl)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
|
||||
|
@ -252,6 +252,7 @@ GLOBAL_IEEE754_ENTRY(sqrt)
|
||||
}
|
||||
// END DOUBLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
|
||||
GLOBAL_IEEE754_END(sqrt)
|
||||
|
||||
// Stack operations when calling error support.
|
||||
// (1) (2) (3) (call) (4)
|
||||
// sp -> + psp -> + psp -> + sp -> +
|
||||
|
@ -204,6 +204,7 @@ GLOBAL_IEEE754_ENTRY(sqrtf)
|
||||
//
|
||||
GLOBAL_IEEE754_END(sqrtf)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mii
|
||||
|
@ -221,6 +221,7 @@ alloc r32= ar.pfs,0,5,4,0
|
||||
|
||||
// END DOUBLE EXTENDED PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
|
||||
GLOBAL_IEEE754_END(sqrtl)
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -16,8 +16,18 @@ import_c() {
|
||||
echo "$1 $libm_dir/$2 $3"
|
||||
}
|
||||
|
||||
import_c DUMMY libm_support.h libm_support.h
|
||||
import_c DUMMY libm_error.c libm_error.c
|
||||
dummy_files="
|
||||
libm_cpu_defs.h
|
||||
libm_error_codes.h
|
||||
libm_support.h
|
||||
libm_error.c
|
||||
"
|
||||
|
||||
for f in $dummy_files
|
||||
do
|
||||
import_c DUMMY $f $f
|
||||
done
|
||||
|
||||
import_c scalblnf scalblnf.c s_scalblnf.c
|
||||
|
||||
for f in acos acosh asin atanh cosh exp2 exp10 fmod log2 pow remainder \
|
||||
|
@ -7,10 +7,13 @@ BEGIN {
|
||||
getline;
|
||||
while (!match($0, "^// WARRANTY DISCLAIMER")) {
|
||||
print;
|
||||
getline;
|
||||
if (!getline) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
getline;
|
||||
printf \
|
||||
if (getline)
|
||||
{
|
||||
printf \
|
||||
"// Redistribution and use in source and binary forms, with or without\n" \
|
||||
"// modification, are permitted provided that the following conditions are\n" \
|
||||
"// met:\n" \
|
||||
@ -25,10 +28,11 @@ BEGIN {
|
||||
"// * The name of Intel Corporation may not be used to endorse or promote\n" \
|
||||
"// products derived from this software without specific prior written\n" \
|
||||
"// permission.\n\n";
|
||||
if (LICENSE_ONLY == "y") {
|
||||
do {
|
||||
print;
|
||||
} while (getline);
|
||||
if (LICENSE_ONLY == "y") {
|
||||
do {
|
||||
print;
|
||||
} while (getline);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -115,7 +119,6 @@ BEGIN {
|
||||
print
|
||||
getline;
|
||||
}
|
||||
getline;
|
||||
printf "%s_END(%s)\n", type, name;
|
||||
if (match(name, "^exp10[fl]?$")) {
|
||||
t=substr(name,6)
|
||||
|
@ -16,6 +16,7 @@ import_s() {
|
||||
# $2 = source file-name
|
||||
# $3 = destination file-name
|
||||
echo "Importing $1 from $2 -> $3"
|
||||
rm -f $3
|
||||
awk -f import_file.awk FUNC=$1 $2 > $3
|
||||
}
|
||||
|
||||
@ -24,19 +25,82 @@ import_c() {
|
||||
# $2 = source file-name
|
||||
# $3 = destination file-name
|
||||
echo "Importing $1 from $2 -> $3"
|
||||
rm -f $3
|
||||
awk -f import_file.awk LICENSE_ONLY=y $2 > $3
|
||||
}
|
||||
|
||||
do_imports() {
|
||||
while read func_pattern src_file dst_file; do
|
||||
if [ "$(expr $src_file : '.*\(c\)$')" = "c" ]; then
|
||||
case $src_file in
|
||||
*.[ch])
|
||||
import_c "$func_pattern" "$src_file" "$dst_file"
|
||||
else
|
||||
;;
|
||||
*)
|
||||
import_s "$func_pattern" "$src_file" "$dst_file"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
./gen_import_file_list $libm_dir > import_file_list
|
||||
|
||||
do_imports < import_file_list
|
||||
|
||||
emptyfiles="
|
||||
e_gamma_r.c
|
||||
e_gammaf_r.c
|
||||
e_gammal_r.c
|
||||
s_sincos.c
|
||||
s_sincosf.c
|
||||
s_sincosl.c
|
||||
t_exp.c
|
||||
w_acosh.c
|
||||
w_acoshf.c
|
||||
w_acoshl.c
|
||||
w_atanh.c
|
||||
w_atanhf.c
|
||||
w_atanhl.c
|
||||
w_exp10.c
|
||||
w_exp10f.c
|
||||
w_exp10l.c
|
||||
w_exp2.c
|
||||
w_exp2f.c
|
||||
w_exp2l.c
|
||||
w_expl.c
|
||||
w_lgamma_r.c
|
||||
w_lgammaf_r.c
|
||||
w_lgammal_r.c
|
||||
w_log2.c
|
||||
w_log2f.c
|
||||
w_log2l.c
|
||||
w_sinh.c
|
||||
w_sinhf.c
|
||||
w_sinhl.c
|
||||
"
|
||||
for f in $emptyfiles
|
||||
do
|
||||
rm -f $f
|
||||
echo "/* Not needed. */" > $f
|
||||
done
|
||||
|
||||
removedfiles="
|
||||
libm_atan2_reg.S
|
||||
s_ldexp.S
|
||||
s_ldexpf.S
|
||||
s_ldexpl.S
|
||||
s_scalbn.S
|
||||
s_scalbnf.S
|
||||
s_scalbnl.S
|
||||
"
|
||||
|
||||
rm -f $removedfiles
|
||||
|
||||
for f in lgammaf_r.c lgammal_r.c lgamma_r.c
|
||||
do
|
||||
import_c $f $libm_dir/$f e_$f
|
||||
done
|
||||
|
||||
for f in lgamma.c lgammaf.c lgammal.c
|
||||
do
|
||||
import_c $f $libm_dir/$f w_$f
|
||||
done
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -47,6 +47,7 @@
|
||||
// 09/15/02 Fixed bug on the branch lgamma_negrecursion
|
||||
// 10/21/02 Now it returns SIGN(GAMMA(x))=-1 for negative zero
|
||||
// 02/10/03 Reordered header: .section, .global, .proc, .align
|
||||
// 07/22/03 Reformatted some data tables
|
||||
//
|
||||
//*********************************************************************
|
||||
//
|
||||
@ -951,19 +952,32 @@ data8 0xD28D3312983E98A0,0xBFFF //S2
|
||||
//
|
||||
data8 0x8090F777D7942F73,0x4001 // PR01
|
||||
data8 0xE5B521193CF61E63,0x4000 // PR11
|
||||
data8 0xC02C000000001939,0x0000000000000233 // (-15;-14)
|
||||
data8 0xC02A000000016124,0x0000000000002BFB // (-14;-13)
|
||||
data8 0xC02800000011EED9,0x0000000000025CBB // (-13;-12)
|
||||
data8 0xC026000000D7322A,0x00000000001E1095 // (-12;-11)
|
||||
data8 0xC0240000093F2777,0x00000000013DD3DC // (-11;-10)
|
||||
data8 0xC02200005C7768FB,0x000000000C9539B9 // (-10;-9)
|
||||
data8 0xC02000034028B3F9,0x000000007570C565 // (-9;-8)
|
||||
data8 0xC01C0033FDEDFE1F,0x00000007357E670E // (-8;-7)
|
||||
data8 0xC018016B25897C8D,0x000000346DC5D639 // (-7;-6)
|
||||
data8 0xC014086A57F0B6D9,0x0000010624DD2F1B // (-6;-5)
|
||||
data8 0xC010284E78599581,0x0000051EB851EB85 // (-5;-4)
|
||||
data8 0xC009260DBC9E59AF,0x000028F5C28F5C29 // (-4;-3)
|
||||
data8 0xC003A7FC9600F86C,0x0000666666666666 // (-3;-2)
|
||||
data8 0xC02C000000001939 // (-15;-14)
|
||||
data8 0x0000000000000233 // (-15;-14)
|
||||
data8 0xC02A000000016124 // (-14;-13)
|
||||
data8 0x0000000000002BFB // (-14;-13)
|
||||
data8 0xC02800000011EED9 // (-13;-12)
|
||||
data8 0x0000000000025CBB // (-13;-12)
|
||||
data8 0xC026000000D7322A // (-12;-11)
|
||||
data8 0x00000000001E1095 // (-12;-11)
|
||||
data8 0xC0240000093F2777 // (-11;-10)
|
||||
data8 0x00000000013DD3DC // (-11;-10)
|
||||
data8 0xC02200005C7768FB // (-10;-9)
|
||||
data8 0x000000000C9539B9 // (-10;-9)
|
||||
data8 0xC02000034028B3F9 // (-9;-8)
|
||||
data8 0x000000007570C565 // (-9;-8)
|
||||
data8 0xC01C0033FDEDFE1F // (-8;-7)
|
||||
data8 0x00000007357E670E // (-8;-7)
|
||||
data8 0xC018016B25897C8D // (-7;-6)
|
||||
data8 0x000000346DC5D639 // (-7;-6)
|
||||
data8 0xC014086A57F0B6D9 // (-6;-5)
|
||||
data8 0x0000010624DD2F1B // (-6;-5)
|
||||
data8 0xC010284E78599581 // (-5;-4)
|
||||
data8 0x0000051EB851EB85 // (-5;-4)
|
||||
data8 0xC009260DBC9E59AF // (-4;-3)
|
||||
data8 0x000028F5C28F5C29 // (-4;-3)
|
||||
data8 0xC003A7FC9600F86C // (-3;-2)
|
||||
data8 0x0000666666666666 // (-3;-2)
|
||||
data8 0xCC15879606130890,0x4000 // PR21
|
||||
data8 0xB42FE3281465E1CC,0x4000 // PR31
|
||||
//
|
||||
@ -971,19 +985,32 @@ data8 0x828185F0B95C9916,0x4001 // PR00
|
||||
//
|
||||
data8 0xD4D3C819E4E5654B,0x4000 // PR10
|
||||
data8 0xA82FBBA4FCC75298,0x4000 // PR20
|
||||
data8 0xC02DFFFFFFFFFE52,0x000000000000001C // (-15;-14)
|
||||
data8 0xC02BFFFFFFFFE6C7,0x00000000000001A6 // (-14;-13)
|
||||
data8 0xC029FFFFFFFE9EDC,0x0000000000002BFB // (-13;-12)
|
||||
data8 0xC027FFFFFFEE1127,0x000000000001EEC8 // (-12;-11)
|
||||
data8 0xC025FFFFFF28CDD4,0x00000000001E1095 // (-11;-10)
|
||||
data8 0xC023FFFFF6C0D7C0,0x000000000101B2B3 // (-10;-9)
|
||||
data8 0xC021FFFFA3884BD0,0x000000000D6BF94D // (-9;-8)
|
||||
data8 0xC01FFFF97F8159CF,0x00000000C9539B89 // (-8;-7)
|
||||
data8 0xC01BFFCBF76B86F0,0x00000007357E670E // (-7;-6)
|
||||
data8 0xC017FE92F591F40D,0x000000346DC5D639 // (-6;-5)
|
||||
data8 0xC013F7577A6EEAFD,0x00000147AE147AE1 // (-5;-4)
|
||||
data8 0xC00FA471547C2FE5,0x00000C49BA5E353F // (-4;-3)
|
||||
data8 0xC005FB410A1BD901,0x000053F7CED91687 // (-3;-2)
|
||||
data8 0xC02DFFFFFFFFFE52 // (-15;-14)
|
||||
data8 0x000000000000001C // (-15;-14)
|
||||
data8 0xC02BFFFFFFFFE6C7 // (-14;-13)
|
||||
data8 0x00000000000001A6 // (-14;-13)
|
||||
data8 0xC029FFFFFFFE9EDC // (-13;-12)
|
||||
data8 0x0000000000002BFB // (-13;-12)
|
||||
data8 0xC027FFFFFFEE1127 // (-12;-11)
|
||||
data8 0x000000000001EEC8 // (-12;-11)
|
||||
data8 0xC025FFFFFF28CDD4 // (-11;-10)
|
||||
data8 0x00000000001E1095 // (-11;-10)
|
||||
data8 0xC023FFFFF6C0D7C0 // (-10;-9)
|
||||
data8 0x000000000101B2B3 // (-10;-9)
|
||||
data8 0xC021FFFFA3884BD0 // (-9;-8)
|
||||
data8 0x000000000D6BF94D // (-9;-8)
|
||||
data8 0xC01FFFF97F8159CF // (-8;-7)
|
||||
data8 0x00000000C9539B89 // (-8;-7)
|
||||
data8 0xC01BFFCBF76B86F0 // (-7;-6)
|
||||
data8 0x00000007357E670E // (-7;-6)
|
||||
data8 0xC017FE92F591F40D // (-6;-5)
|
||||
data8 0x000000346DC5D639 // (-6;-5)
|
||||
data8 0xC013F7577A6EEAFD // (-5;-4)
|
||||
data8 0x00000147AE147AE1 // (-5;-4)
|
||||
data8 0xC00FA471547C2FE5 // (-4;-3)
|
||||
data8 0x00000C49BA5E353F // (-4;-3)
|
||||
data8 0xC005FB410A1BD901 // (-3;-2)
|
||||
data8 0x000053F7CED91687 // (-3;-2)
|
||||
data8 0x80151BB918A293AA,0x4000 // PR30
|
||||
data8 0xB3C9F8F47422A314,0x400B // PRN
|
||||
//
|
||||
@ -3538,6 +3565,7 @@ lgamma_libm_err:
|
||||
};;
|
||||
GLOBAL_LIBM_END(__libm_lgamma)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -47,6 +47,7 @@
|
||||
// 09/16/02 Improved accuracy on intervals reduced to [1;1.25]
|
||||
// 10/21/02 Now it returns SIGN(GAMMA(x))=-1 for negative zero
|
||||
// 02/10/03 Reordered header: .section, .global, .proc, .align
|
||||
// 07/22/03 Reformatted some data tables
|
||||
//
|
||||
//*********************************************************************
|
||||
//
|
||||
@ -685,19 +686,26 @@ data8 0x3FF1029A9DD542B4,0xBFFAD37C209D3B25 // A6,A5
|
||||
data8 0x405385E6FD9BE7EA // A0
|
||||
data8 0x478895F1C0000000 // Overflow boundary
|
||||
data8 0x400062D97D26B523,0xC00A03E1529FF023 // A6,A5
|
||||
data8 0x4069204C51E566CE,0 // A0
|
||||
data8 0x4069204C51E566CE // A0
|
||||
data8 0x0000000000000000 // pad
|
||||
data8 0x40101476B38FD501,0xC0199DE7B387C0FC // A6,A5
|
||||
data8 0x407EB8DAEC83D759,0 // A0
|
||||
data8 0x407EB8DAEC83D759 // A0
|
||||
data8 0x0000000000000000 // pad
|
||||
data8 0x401FDB008D65125A,0xC0296B506E665581 // A6,A5
|
||||
data8 0x409226D93107EF66,0 // A0
|
||||
data8 0x409226D93107EF66 // A0
|
||||
data8 0x0000000000000000 // pad
|
||||
data8 0x402FB3EAAF3E7B2D,0xC039521142AD8E0D // A6,A5
|
||||
data8 0x40A4EFA4F072792E,0 // A0
|
||||
data8 0x40A4EFA4F072792E // A0
|
||||
data8 0x0000000000000000 // pad
|
||||
data8 0x403FA024C66B2563,0xC0494569F250E691 // A6,A5
|
||||
data8 0x40B7B747C9235BB8,0 // A0
|
||||
data8 0x40B7B747C9235BB8 // A0
|
||||
data8 0x0000000000000000 // pad
|
||||
data8 0x404F9607D6DA512C,0xC0593F0B2EDDB4BC // A6,A5
|
||||
data8 0x40CA7E29C5F16DE2,0 // A0
|
||||
data8 0x40CA7E29C5F16DE2 // A0
|
||||
data8 0x0000000000000000 // pad
|
||||
data8 0x405F90C5F613D98D,0xC0693BD130E50AAF // A6,A5
|
||||
data8 0x40DD4495238B190C,0 // A0
|
||||
data8 0x40DD4495238B190C // A0
|
||||
data8 0x0000000000000000 // pad
|
||||
//
|
||||
// polynomial approximation of ln(sin(Pi*x)/(Pi*x)), |x| <= 0.5
|
||||
data8 0xBFD58731A486E820,0xBFA4452CC28E15A9 // S16,S14
|
||||
@ -2133,6 +2141,7 @@ lgammaf_libm_err:
|
||||
};;
|
||||
GLOBAL_LIBM_END(__libm_lgammaf)
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -7622,6 +7622,7 @@ lgammal_singularity:
|
||||
GLOBAL_LIBM_END(__libm_lgammal)
|
||||
|
||||
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -21,20 +21,20 @@
|
||||
// products derived from this software without specific prior written
|
||||
// permission.
|
||||
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Intel Corporation is the author of this code, and requests that all
|
||||
// problem reports or change requests be submitted to it directly at
|
||||
// problem reports or change requests be submitted to it directly at
|
||||
// http://www.intel.com/software/products/opensource/libraries/num.htm.
|
||||
//
|
||||
// History
|
||||
@ -44,38 +44,51 @@
|
||||
// 02/06/02 Corrected to handle 32- or 64-bit integers
|
||||
// 05/20/02 Cleaned up namespace and sf0 syntax
|
||||
// 02/10/03 Reordered header: .section, .global, .proc, .align
|
||||
// 08/25/03 Improved performance
|
||||
//
|
||||
// API
|
||||
//==============================================================
|
||||
// float = __libm_scalblnf (float x, long int n, int long_int_type)
|
||||
// float __libm_scalblnf (float x, long int n, int long_int_type)
|
||||
// input floating point f8 and long int n (r33)
|
||||
// input long_int_type = 0 if long int defined as 32 bits, = 1 if 64 bits
|
||||
//
|
||||
// output floating point f8
|
||||
//
|
||||
|
||||
// Returns x* 2**n using an fma and detects overflow
|
||||
// and underflow.
|
||||
// and underflow.
|
||||
//
|
||||
//
|
||||
// Strategy:
|
||||
// Compute biased exponent of result exp_Result = N + exp_X
|
||||
// Break into ranges:
|
||||
// exp_Result > 0x1007e -> Certain overflow
|
||||
// exp_Result = 0x1007e -> Possible overflow
|
||||
// 0x0ff81 <= exp_Result < 0x1007e -> No over/underflow (main path)
|
||||
// 0x0ff81 - 23 <= exp_Result < 0x0ff81 -> Possible underflow
|
||||
// exp_Result < 0x0ff81 - 23 -> Certain underflow
|
||||
|
||||
FR_Big = f6
|
||||
FR_NBig = f7
|
||||
FR_Floating_X = f8
|
||||
FR_Result = f8
|
||||
FR_Result2 = f9
|
||||
FR_Result3 = f11
|
||||
FR_Norm_X = f12
|
||||
FR_Two_N = f14
|
||||
FR_Two_to_Big = f15
|
||||
FR_Result3 = f10
|
||||
FR_Norm_X = f11
|
||||
FR_Two_N = f12
|
||||
|
||||
GR_neg_ov_limit= r14
|
||||
GR_N_Biased = r15
|
||||
GR_Big = r16
|
||||
GR_NBig = r17
|
||||
GR_Scratch = r18
|
||||
GR_Scratch1 = r19
|
||||
GR_exp_Result = r18
|
||||
GR_pos_ov_limit= r19
|
||||
GR_Bias = r20
|
||||
GR_N_as_int = r21
|
||||
GR_signexp_X = r22
|
||||
GR_exp_X = r23
|
||||
GR_exp_mask = r24
|
||||
GR_max_exp = r25
|
||||
GR_min_exp = r26
|
||||
GR_min_den_exp = r27
|
||||
|
||||
GR_SAVE_B0 = r32
|
||||
GR_SAVE_GP = r33
|
||||
@ -93,243 +106,298 @@ GLOBAL_LIBM_ENTRY(__libm_scalblnf)
|
||||
// Build the exponent Bias
|
||||
//
|
||||
{ .mfi
|
||||
alloc r32=ar.pfs,3,0,4,0
|
||||
fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
|
||||
addl GR_Bias = 0x0FFFF,r0
|
||||
getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x
|
||||
fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero
|
||||
mov GR_Bias = 0x0ffff
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Is N zero?
|
||||
// Normalize x
|
||||
// Do we need to sign extend input (long_int_type = 0)?
|
||||
// Is long integer type 32 bits?
|
||||
//
|
||||
{ .mfi
|
||||
cmp.eq.unc p6,p0 = r33,r0
|
||||
fnorm.s1 FR_Norm_X = FR_Floating_X
|
||||
cmp.eq.unc p8,p9 = r34,r0
|
||||
mov GR_Big = 35000 // If N this big then certain overflow
|
||||
fnorm.s1 FR_Norm_X = FR_Floating_X
|
||||
cmp.eq p8,p9 = r34,r0
|
||||
}
|
||||
;;
|
||||
|
||||
{ .mii
|
||||
(p9) mov GR_N_as_int = r33 // Get n directly if long int 64 bits
|
||||
(p8) sxt4 GR_N_as_int = r33 // Sign extend n if long int 32 bits
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Normalize x
|
||||
// Branch and return special values.
|
||||
// Create -35000
|
||||
// Create 35000
|
||||
//
|
||||
// Sign extend N if long int is 32 bits
|
||||
{ .mfi
|
||||
addl GR_Big = 35000,r0
|
||||
(p9) mov GR_N_as_int = r33 // Copy N if long int is 64 bits
|
||||
fclass.m p9,p0 = FR_Floating_X, 0x0b // Test for x=unorm
|
||||
(p8) sxt4 GR_N_as_int = r33 // Sign extend N if long int is 32 bits
|
||||
}
|
||||
{ .mfi
|
||||
mov GR_NBig = -35000 // If N this small then certain underflow
|
||||
nop.f 0
|
||||
mov GR_max_exp = 0x1007e // Exponent of maximum float
|
||||
}
|
||||
;;
|
||||
|
||||
// Create biased exponent for 2**N
|
||||
{ .mfi
|
||||
add GR_N_Biased = GR_Bias,GR_N_as_int
|
||||
nop.f 0
|
||||
cmp.ge p7, p0 = GR_N_as_int, GR_Big // Certain overflow?
|
||||
}
|
||||
{ .mfb
|
||||
addl GR_NBig = -35000,r0
|
||||
(p7) fma.s.s0 FR_Result = FR_Floating_X,f1, f0
|
||||
(p7) br.ret.spnt b0
|
||||
};;
|
||||
{ .mib
|
||||
cmp.le p8, p0 = GR_N_as_int, GR_NBig // Certain underflow?
|
||||
mov GR_min_exp = 0x0ff81 // Exponent of minimum float
|
||||
(p9) br.cond.spnt SCALBNF_UNORM // Branch if x=unorm
|
||||
}
|
||||
;;
|
||||
|
||||
SCALBNF_COMMON:
|
||||
// Main path continues. Also return here from x=unorm path.
|
||||
// Create 2**N
|
||||
.pred.rel "mutex",p7,p8
|
||||
{ .mfi
|
||||
setf.exp FR_Two_N = GR_N_Biased
|
||||
nop.f 0
|
||||
(p7) mov GR_N_as_int = GR_Big // Limit max N
|
||||
}
|
||||
{ .mfi
|
||||
(p8) mov GR_N_as_int = GR_NBig // Limit min N
|
||||
nop.f 0
|
||||
(p8) cmp.eq p7,p0 = r0,r0 // Set p7 if |N| big
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Build the exponent Bias
|
||||
// Return x when N = 0
|
||||
// Create biased exponent for 2**N for N big
|
||||
// Is N zero?
|
||||
//
|
||||
{ .mfi
|
||||
setf.exp FR_Two_N = GR_N_Biased
|
||||
(p7) add GR_N_Biased = GR_Bias,GR_N_as_int
|
||||
nop.f 0
|
||||
addl GR_Scratch1 = 0x063BF,r0
|
||||
cmp.eq.or p6,p0 = r33,r0
|
||||
}
|
||||
{ .mfb
|
||||
addl GR_Scratch = 0x019C3F,r0
|
||||
(p6) fma.s.s0 FR_Result = FR_Floating_X,f1, f0
|
||||
(p6) br.ret.spnt b0
|
||||
};;
|
||||
{ .mfi
|
||||
mov GR_pos_ov_limit = 0x1007f // Exponent for positive overflow
|
||||
nop.f 0
|
||||
mov GR_exp_mask = 0x1ffff // Exponent mask
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Create 2**N for N big
|
||||
// Return x when N = 0 or X = Nan, Inf, Zero
|
||||
//
|
||||
{ .mfi
|
||||
(p7) setf.exp FR_Two_N = GR_N_Biased
|
||||
nop.f 0
|
||||
mov GR_min_den_exp = 0x0ff81 - 23 // Exponent of min denorm float
|
||||
}
|
||||
{ .mfb
|
||||
and GR_exp_X = GR_exp_mask, GR_signexp_X
|
||||
(p6) fma.s.s0 FR_Result = FR_Floating_X, f1, f0
|
||||
(p6) br.ret.spnt b0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Create 2*big
|
||||
// Create 2**-big
|
||||
// Is N > 35000
|
||||
// Is N < -35000
|
||||
// Raise Denormal operand flag with compare
|
||||
// Main path, create 2**N
|
||||
// Compute biased result exponent
|
||||
//
|
||||
{ .mfi
|
||||
setf.exp FR_NBig = GR_Scratch1
|
||||
nop.f 0
|
||||
cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
|
||||
}
|
||||
{ .mfi
|
||||
setf.exp FR_Big = GR_Scratch
|
||||
add GR_exp_Result = GR_exp_X, GR_N_as_int
|
||||
fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
|
||||
cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
|
||||
};;
|
||||
mov GR_neg_ov_limit = 0x3007f // Exponent for negative overflow
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Adjust 2**N if N was very small or very large
|
||||
// Do final operation
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
|
||||
nop.i 0
|
||||
cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow
|
||||
fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
|
||||
cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
|
||||
}
|
||||
{ .mlx
|
||||
nop.m 999
|
||||
movl GR_Scratch = 0x000000000003007F
|
||||
};;
|
||||
|
||||
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
|
||||
nop.i 0
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
nop.f 0
|
||||
(p9) br.cond.spnt SCALBNF_UNDERFLOW // Branch if certain underflow
|
||||
}
|
||||
{ .mlx
|
||||
nop.m 999
|
||||
movl GR_Scratch1= 0x000000000001007F
|
||||
};;
|
||||
;;
|
||||
|
||||
// Set up necessary status fields
|
||||
{ .mib
|
||||
(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow
|
||||
(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow
|
||||
(p7) br.ret.sptk b0 // Return from main path
|
||||
}
|
||||
;;
|
||||
|
||||
{ .bbb
|
||||
(p6) br.cond.spnt SCALBNF_OVERFLOW // Branch if certain overflow
|
||||
(p8) br.cond.spnt SCALBNF_POSSIBLE_OVERFLOW // Branch if possible overflow
|
||||
(p9) br.cond.spnt SCALBNF_POSSIBLE_UNDERFLOW // Branch if possible underflow
|
||||
}
|
||||
;;
|
||||
|
||||
// Here if possible underflow.
|
||||
// Resulting exponent: 0x0ff81-23 <= exp_Result < 0x0ff81
|
||||
SCALBNF_POSSIBLE_UNDERFLOW:
|
||||
//
|
||||
// Here if possible overflow.
|
||||
// Resulting exponent: 0x1007e = exp_Result
|
||||
SCALBNF_POSSIBLE_OVERFLOW:
|
||||
|
||||
// Set up necessary status fields
|
||||
//
|
||||
// S0 user supplied status
|
||||
// S2 user supplied status + WRE + TD (Overflows)
|
||||
// S3 user supplied status + FZ + TD (Underflows)
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
nop.m 0
|
||||
fsetc.s3 0x7F,0x41
|
||||
nop.i 999
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
nop.m 0
|
||||
fsetc.s2 0x7F,0x42
|
||||
nop.i 999
|
||||
};;
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Do final operation
|
||||
// Do final operation with s2 and s3
|
||||
//
|
||||
{ .mfi
|
||||
setf.exp FR_NBig = GR_Scratch
|
||||
fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
|
||||
nop.i 999
|
||||
setf.exp FR_NBig = GR_neg_ov_limit
|
||||
fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
|
||||
nop.i 999
|
||||
};;
|
||||
{ .mfi
|
||||
setf.exp FR_Big = GR_Scratch1
|
||||
fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
|
||||
nop.i 999
|
||||
};;
|
||||
setf.exp FR_Big = GR_pos_ov_limit
|
||||
fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
// Check for overflow or underflow.
|
||||
// Restore s3
|
||||
// Restore s2
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
nop.m 0
|
||||
fsetc.s3 0x7F,0x40
|
||||
nop.i 999
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
nop.m 0
|
||||
fsetc.s2 0x7F,0x40
|
||||
nop.i 999
|
||||
};;
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Is the result zero?
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fclass.m.unc p6, p0 = FR_Result3, 0x007
|
||||
nop.i 999
|
||||
}
|
||||
nop.m 0
|
||||
fclass.m p6, p0 = FR_Result3, 0x007
|
||||
nop.i 0
|
||||
}
|
||||
{ .mfi
|
||||
addl GR_Tag = 205, r0
|
||||
fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
|
||||
nop.i 0
|
||||
};;
|
||||
nop.m 0
|
||||
fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Detect masked underflow - Tiny + Inexact Only
|
||||
//
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
nop.m 0
|
||||
(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
|
||||
nop.i 999
|
||||
};;
|
||||
nop.i 0
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Is result bigger the allowed range?
|
||||
// Branch out for underflow
|
||||
//
|
||||
{ .mfb
|
||||
(p6) addl GR_Tag = 206, r0
|
||||
nop.m 0
|
||||
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
|
||||
(p6) br.cond.spnt scalbnf_UNDERFLOW
|
||||
};;
|
||||
(p6) br.cond.spnt SCALBNF_UNDERFLOW
|
||||
}
|
||||
;;
|
||||
|
||||
//
|
||||
// Branch out for overflow
|
||||
//
|
||||
{ .mbb
|
||||
nop.m 0
|
||||
(p7) br.cond.spnt scalbnf_OVERFLOW
|
||||
(p9) br.cond.spnt scalbnf_OVERFLOW
|
||||
};;
|
||||
|
||||
//
|
||||
// Return from main path.
|
||||
//
|
||||
{ .mfb
|
||||
nop.m 999
|
||||
nop.f 0
|
||||
br.ret.sptk b0;;
|
||||
{ .bbb
|
||||
(p7) br.cond.spnt SCALBNF_OVERFLOW
|
||||
(p9) br.cond.spnt SCALBNF_OVERFLOW
|
||||
br.ret.sptk b0 // Return from main path.
|
||||
}
|
||||
;;
|
||||
|
||||
// Here if result overflows
|
||||
SCALBNF_OVERFLOW:
|
||||
{ .mib
|
||||
alloc r32=ar.pfs,3,0,4,0
|
||||
addl GR_Tag = 205, r0 // Set error tag for overflow
|
||||
br.cond.sptk __libm_error_region // Call error support for overflow
|
||||
}
|
||||
;;
|
||||
|
||||
// Here if result underflows
|
||||
SCALBNF_UNDERFLOW:
|
||||
{ .mib
|
||||
alloc r32=ar.pfs,3,0,4,0
|
||||
addl GR_Tag = 206, r0 // Set error tag for underflow
|
||||
br.cond.sptk __libm_error_region // Call error support for underflow
|
||||
}
|
||||
;;
|
||||
|
||||
// Here if x=unorm
|
||||
SCALBNF_UNORM:
|
||||
{ .mib
|
||||
getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x
|
||||
nop.i 0
|
||||
br.cond.sptk SCALBNF_COMMON // Return to main path
|
||||
}
|
||||
;;
|
||||
|
||||
|
||||
GLOBAL_LIBM_END(__libm_scalblnf)
|
||||
__libm_error_region:
|
||||
|
||||
scalbnf_OVERFLOW:
|
||||
scalbnf_UNDERFLOW:
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
|
||||
//
|
||||
// Get stack address of N
|
||||
//
|
||||
.prologue
|
||||
{ .mfi
|
||||
add GR_Parameter_Y=-32,sp
|
||||
add GR_Parameter_Y=-32,sp
|
||||
nop.f 0
|
||||
.save ar.pfs,GR_SAVE_PFS
|
||||
mov GR_SAVE_PFS=ar.pfs
|
||||
mov GR_SAVE_PFS=ar.pfs
|
||||
}
|
||||
//
|
||||
// Adjust sp
|
||||
// Adjust sp
|
||||
//
|
||||
{ .mfi
|
||||
.fframe 64
|
||||
add sp=-64,sp
|
||||
add sp=-64,sp
|
||||
nop.f 0
|
||||
mov GR_SAVE_GP=gp
|
||||
mov GR_SAVE_GP=gp
|
||||
};;
|
||||
|
||||
//
|
||||
// Store N on stack in correct position
|
||||
// Store N on stack in correct position
|
||||
// Locate the address of x on stack
|
||||
//
|
||||
{ .mmi
|
||||
st8 [GR_Parameter_Y] = GR_N_as_int,16
|
||||
add GR_Parameter_X = 16,sp
|
||||
st8 [GR_Parameter_Y] = GR_N_as_int,16
|
||||
add GR_Parameter_X = 16,sp
|
||||
.save b0, GR_SAVE_B0
|
||||
mov GR_SAVE_B0=b0
|
||||
mov GR_SAVE_B0=b0
|
||||
};;
|
||||
|
||||
//
|
||||
@ -338,42 +406,42 @@ scalbnf_UNDERFLOW:
|
||||
//
|
||||
.body
|
||||
{ .mib
|
||||
stfs [GR_Parameter_X] = FR_Norm_X
|
||||
add GR_Parameter_RESULT = 0,GR_Parameter_Y
|
||||
stfs [GR_Parameter_X] = FR_Norm_X
|
||||
add GR_Parameter_RESULT = 0,GR_Parameter_Y
|
||||
nop.b 0
|
||||
}
|
||||
{ .mib
|
||||
stfs [GR_Parameter_Y] = FR_Result
|
||||
stfs [GR_Parameter_Y] = FR_Result
|
||||
add GR_Parameter_Y = -16,GR_Parameter_Y
|
||||
br.call.sptk b0=__libm_error_support#
|
||||
br.call.sptk b0=__libm_error_support#
|
||||
};;
|
||||
|
||||
//
|
||||
// Get location of result on stack
|
||||
//
|
||||
{ .mmi
|
||||
add GR_Parameter_RESULT = 48,sp
|
||||
nop.m 0
|
||||
nop.m 0
|
||||
add GR_Parameter_RESULT = 48,sp
|
||||
nop.i 0
|
||||
};;
|
||||
|
||||
//
|
||||
// Get the new result
|
||||
// Get the new result
|
||||
//
|
||||
{ .mmi
|
||||
ldfs FR_Result = [GR_Parameter_RESULT]
|
||||
ldfs FR_Result = [GR_Parameter_RESULT]
|
||||
.restore sp
|
||||
add sp = 64,sp
|
||||
mov b0 = GR_SAVE_B0
|
||||
add sp = 64,sp
|
||||
mov b0 = GR_SAVE_B0
|
||||
};;
|
||||
|
||||
//
|
||||
// Restore gp, ar.pfs and return
|
||||
//
|
||||
{ .mib
|
||||
mov gp = GR_SAVE_GP
|
||||
mov ar.pfs = GR_SAVE_PFS
|
||||
br.ret.sptk b0
|
||||
mov gp = GR_SAVE_GP
|
||||
mov ar.pfs = GR_SAVE_PFS
|
||||
br.ret.sptk b0
|
||||
};;
|
||||
|
||||
LOCAL_LIBM_END(__libm_error_region)
|
||||
|
@ -46,12 +46,13 @@
|
||||
// 03/19/02 Added stack unwind around call to __libm_cis_large
|
||||
// 09/05/02 Work range is widened by reduction strengthen (3 parts of Pi/16)
|
||||
// 02/10/03 Reordered header: .section, .global, .proc, .align
|
||||
//
|
||||
// 08/08/03 Improved performance
|
||||
// 02/11/04 cis is moved to the separate file.
|
||||
//
|
||||
// API
|
||||
//==============================================================
|
||||
// 1) double _Complex cis(double)
|
||||
// 2) void sincos(double, double*s, double*c)
|
||||
// 3) __libm_sincos - internal LIBM function, that accepts
|
||||
// 1) void sincos(double, double*s, double*c)
|
||||
// 2) __libm_sincos - internal LIBM function, that accepts
|
||||
// argument in f8 and returns cosine through f8, sine through f9
|
||||
//
|
||||
// Overview of operation
|
||||
@ -65,12 +66,12 @@
|
||||
// nfloat = Round result to integer (round-to-nearest)
|
||||
//
|
||||
// r = x - nfloat * pi/2^k
|
||||
// Do this as ((((x - nfloat * HIGH(pi/2^k))) -
|
||||
// nfloat * LOW(pi/2^k)) -
|
||||
// Do this as ((((x - nfloat * HIGH(pi/2^k))) -
|
||||
// nfloat * LOW(pi/2^k)) -
|
||||
// nfloat * LOWEST(pi/2^k) for increased accuracy.
|
||||
// pi/2^k is stored as two numbers that when added make pi/2^k.
|
||||
// pi/2^k = HIGH(pi/2^k) + LOW(pi/2^k)
|
||||
// HIGH and LOW parts are rounded to zero values,
|
||||
// HIGH and LOW parts are rounded to zero values,
|
||||
// and LOWEST is rounded to nearest one.
|
||||
//
|
||||
// x = (nfloat * pi/2^k) + r
|
||||
@ -166,15 +167,14 @@
|
||||
// Registers used
|
||||
//==============================================================
|
||||
// general input registers:
|
||||
// r14 -> r19
|
||||
// r32 -> r49
|
||||
// r14 -> r39
|
||||
|
||||
// predicate registers used:
|
||||
// p6 -> p14
|
||||
|
||||
//
|
||||
// floating-point registers used
|
||||
// f9 -> f15
|
||||
// f32 -> f100
|
||||
// f32 -> f67
|
||||
|
||||
// Assembly macros
|
||||
//==============================================================
|
||||
@ -246,38 +246,32 @@ cis_Q = f67
|
||||
cis_pResSin = r33
|
||||
cis_pResCos = r34
|
||||
|
||||
cis_exp_limit = r35
|
||||
cis_r_signexp = r36
|
||||
cis_AD_beta_table = r37
|
||||
cis_r_sincos = r38
|
||||
|
||||
cis_r_exp = r39
|
||||
cis_r_17_ones = r40
|
||||
|
||||
cis_GR_sig_inv_pi_by_16 = r14
|
||||
cis_GR_rshf_2to61 = r15
|
||||
cis_GR_rshf = r16
|
||||
cis_GR_exp_2tom61 = r17
|
||||
cis_GR_n = r18
|
||||
|
||||
cis_GR_n_sin = r19
|
||||
cis_GR_m_sin = r41
|
||||
cis_GR_32m_sin = r41
|
||||
cis_exp_limit = r20
|
||||
cis_r_signexp = r21
|
||||
cis_AD_1 = r22
|
||||
cis_r_sincos = r23
|
||||
cis_r_exp = r24
|
||||
cis_r_17_ones = r25
|
||||
cis_GR_m_sin = r26
|
||||
cis_GR_32m_sin = r26
|
||||
cis_GR_n_cos = r27
|
||||
cis_GR_m_cos = r28
|
||||
cis_GR_32m_cos = r28
|
||||
cis_AD_2_sin = r29
|
||||
cis_AD_2_cos = r30
|
||||
cis_gr_tmp = r31
|
||||
|
||||
cis_GR_n_cos = r42
|
||||
cis_GR_m_cos = r43
|
||||
cis_GR_32m_cos = r43
|
||||
|
||||
cis_AD_2_sin = r44
|
||||
cis_AD_2_cos = r45
|
||||
|
||||
cis_gr_tmp = r46
|
||||
GR_SAVE_B0 = r47
|
||||
GR_SAVE_GP = r48
|
||||
rB0_SAVED = r49
|
||||
GR_SAVE_PFS = r50
|
||||
GR_SAVE_PR = r51
|
||||
cis_AD_1 = r52
|
||||
GR_SAVE_B0 = r35
|
||||
GR_SAVE_GP = r36
|
||||
rB0_SAVED = r37
|
||||
GR_SAVE_PFS = r38
|
||||
GR_SAVE_PR = r39
|
||||
|
||||
RODATA
|
||||
|
||||
@ -408,14 +402,14 @@ LOCAL_OBJECT_END(double_sin_cos_beta_k4)
|
||||
GLOBAL_IEEE754_ENTRY(sincos)
|
||||
// cis_GR_sig_inv_pi_by_16 = significand of 16/pi
|
||||
{ .mlx
|
||||
alloc GR_SAVE_PFS = ar.pfs, 0, 21, 0, 0
|
||||
getf.exp cis_r_signexp = cis_Arg
|
||||
movl cis_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A
|
||||
|
||||
|
||||
}
|
||||
// cis_GR_rshf_2to61 = 1.1000 2^(63+63-2)
|
||||
{ .mlx
|
||||
addl cis_AD_1 = @ltoff(double_cis_pi), gp
|
||||
movl cis_GR_rshf_2to61 = 0x47b8000000000000
|
||||
movl cis_GR_rshf_2to61 = 0x47b8000000000000
|
||||
};;
|
||||
|
||||
{ .mfi
|
||||
@ -430,12 +424,11 @@ GLOBAL_IEEE754_ENTRY(sincos)
|
||||
br.cond.sptk _CIS_COMMON
|
||||
};;
|
||||
GLOBAL_IEEE754_END(sincos)
|
||||
LOCAL_LIBM_ENTRY(cis)
|
||||
LOCAL_LIBM_END(cis)
|
||||
|
||||
GLOBAL_LIBM_ENTRY(__libm_sincos)
|
||||
// cis_GR_sig_inv_pi_by_16 = significand of 16/pi
|
||||
{ .mlx
|
||||
alloc GR_SAVE_PFS = ar.pfs,0,21,0,0
|
||||
getf.exp cis_r_signexp = cis_Arg
|
||||
movl cis_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A
|
||||
}
|
||||
// cis_GR_rshf_2to61 = 1.1000 2^(63+63-2)
|
||||
@ -443,11 +436,12 @@ GLOBAL_LIBM_ENTRY(__libm_sincos)
|
||||
addl cis_AD_1 = @ltoff(double_cis_pi), gp
|
||||
movl cis_GR_rshf_2to61 = 0x47b8000000000000
|
||||
};;
|
||||
|
||||
// p14 set for __libm_sincos and cis
|
||||
{ .mfi
|
||||
ld8 cis_AD_1 = [cis_AD_1]
|
||||
fnorm.s1 cis_NORM_f8 = cis_Arg
|
||||
cmp.eq p14, p13 = r0, r0
|
||||
cmp.eq p14, p13 = r0, r0
|
||||
}
|
||||
// cis_GR_exp_2tom61 = exponent of scaling factor 2^-61
|
||||
{ .mib
|
||||
@ -476,10 +470,15 @@ _CIS_COMMON:
|
||||
// 2^-61 for scaling Nfloat
|
||||
// 0x1001a is register_bias + 27.
|
||||
// So if f8 >= 2^27, go to large arguments routine
|
||||
{ .mmi
|
||||
getf.exp cis_r_signexp = cis_Arg
|
||||
setf.exp cis_2TOM61 = cis_GR_exp_2tom61
|
||||
{ .mfi
|
||||
alloc GR_SAVE_PFS = ar.pfs, 3, 5, 0, 0
|
||||
fclass.m p11,p0 = cis_Arg, 0x0b // Test for x=unorm
|
||||
mov cis_exp_limit = 0x1001a
|
||||
}
|
||||
{ .mib
|
||||
setf.exp cis_2TOM61 = cis_GR_exp_2tom61
|
||||
nop.i 0
|
||||
(p6) br.cond.spnt _CIS_SPECIAL_ARGS
|
||||
};;
|
||||
|
||||
// Load the two pieces of pi/16
|
||||
@ -488,9 +487,11 @@ _CIS_COMMON:
|
||||
{ .mmb
|
||||
ldfe cis_Pi_by_16_hi = [cis_AD_1],16
|
||||
setf.d cis_RSHF = cis_GR_rshf
|
||||
(p6) br.cond.spnt _CIS_SPECIAL_ARGS
|
||||
(p11) br.cond.spnt _CIS_UNORM // Branch if x=unorm
|
||||
};;
|
||||
|
||||
_CIS_COMMON2:
|
||||
// Return here if x=unorm
|
||||
// Create constant inexact set
|
||||
{ .mmi
|
||||
ldfe cis_Pi_by_16_lo = [cis_AD_1],16
|
||||
@ -498,23 +499,18 @@ _CIS_COMMON:
|
||||
nop.i 0
|
||||
};;
|
||||
|
||||
// Select exponent (17 lsb)
|
||||
{ .mfi
|
||||
ldfe cis_Pi_by_16_lowest = [cis_AD_1],16
|
||||
nop.f 0
|
||||
nop.i 0
|
||||
dep.z cis_r_exp = cis_r_signexp, 0, 17
|
||||
};;
|
||||
|
||||
// Start loading P, Q coefficients
|
||||
{ .mib
|
||||
ldfpd cis_P4,cis_Q4 = [cis_AD_1],16
|
||||
dep.z cis_r_exp = cis_r_signexp, 0, 17
|
||||
nop.b 0
|
||||
};;
|
||||
|
||||
// p10 is true if we must call routines to handle larger arguments
|
||||
// p10 is true if f8 exp is > 0x1001a
|
||||
{ .mmb
|
||||
ldfpd cis_P3,cis_Q3 = [cis_AD_1],16
|
||||
ldfpd cis_P4,cis_Q4 = [cis_AD_1],16
|
||||
cmp.ge p10, p0 = cis_r_exp, cis_exp_limit
|
||||
(p10) br.cond.spnt _CIS_LARGE_ARGS // go to |x| >= 2^27 path
|
||||
};;
|
||||
@ -523,39 +519,33 @@ _CIS_COMMON:
|
||||
// Multiply x by scaled 16/pi and add large const to shift integer part of W to
|
||||
// rightmost bits of significand
|
||||
{ .mfi
|
||||
ldfpd cis_P2,cis_Q2 = [cis_AD_1],16
|
||||
ldfpd cis_P3,cis_Q3 = [cis_AD_1],16
|
||||
fma.s1 cis_W_2TO61_RSH = cis_NORM_f8,cis_SIG_INV_PI_BY_16_2TO61,cis_RSHF_2TO61
|
||||
nop.i 0
|
||||
};;
|
||||
|
||||
// get N = (int)cis_int_Nfloat
|
||||
// cis_NFLOAT = Round_Int_Nearest(cis_W)
|
||||
{ .mmf
|
||||
getf.sig cis_GR_n = cis_W_2TO61_RSH
|
||||
ldfpd cis_P2,cis_Q2 = [cis_AD_1],16
|
||||
fms.s1 cis_NFLOAT = cis_W_2TO61_RSH,cis_2TOM61,cis_RSHF
|
||||
};;
|
||||
|
||||
// cis_r = -cis_Nfloat * cis_Pi_by_16_hi + x
|
||||
{ .mfi
|
||||
ldfpd cis_P1,cis_Q1 = [cis_AD_1], 16
|
||||
fms.s1 cis_NFLOAT = cis_W_2TO61_RSH,cis_2TOM61,cis_RSHF
|
||||
nop.i 0
|
||||
};;
|
||||
|
||||
// get N = (int)cis_int_Nfloat
|
||||
{ .mfi
|
||||
getf.sig cis_GR_n = cis_W_2TO61_RSH
|
||||
nop.f 0
|
||||
nop.i 0
|
||||
};;
|
||||
|
||||
// Add 2^(k-1) (which is in cis_r_sincos) to N
|
||||
// cis_r = -cis_Nfloat * cis_Pi_by_16_hi + x
|
||||
// cis_r = cis_r -cis_Nfloat * cis_Pi_by_16_lo
|
||||
{ .mfi
|
||||
add cis_GR_n_cos = 0x8, cis_GR_n
|
||||
fnma.s1 cis_r = cis_NFLOAT,cis_Pi_by_16_hi,cis_NORM_f8
|
||||
nop.i 0
|
||||
};;
|
||||
|
||||
//Get M (least k+1 bits of N)
|
||||
// Add 2^(k-1) (which is in cis_r_sincos) to N
|
||||
{ .mmi
|
||||
add cis_GR_n_cos = 0x8, cis_GR_n
|
||||
;;
|
||||
//Get M (least k+1 bits of N)
|
||||
and cis_GR_m_sin = 0x1f,cis_GR_n
|
||||
and cis_GR_m_cos = 0x1f,cis_GR_n_cos
|
||||
nop.i 0
|
||||
};;
|
||||
|
||||
{ .mmi
|
||||
@ -565,9 +555,10 @@ _CIS_COMMON:
|
||||
};;
|
||||
|
||||
// Add 32*M to address of sin_cos_beta table
|
||||
{ .mmi
|
||||
// cis_r = cis_r -cis_Nfloat * cis_Pi_by_16_lo
|
||||
{ .mfi
|
||||
add cis_AD_2_sin = cis_GR_32m_sin, cis_AD_1
|
||||
nop.m 0
|
||||
fnma.s1 cis_r = cis_NFLOAT, cis_Pi_by_16_lo, cis_r
|
||||
shl cis_GR_32m_cos = cis_GR_m_cos,5
|
||||
};;
|
||||
|
||||
@ -580,7 +571,6 @@ _CIS_COMMON:
|
||||
|
||||
{ .mfi
|
||||
ldfe cis_Sm_cos = [cis_AD_2_cos], 16
|
||||
fnma.s1 cis_r = cis_NFLOAT, cis_Pi_by_16_lo, cis_r
|
||||
nop.i 0
|
||||
};;
|
||||
|
||||
@ -604,7 +594,7 @@ _CIS_COMMON:
|
||||
|
||||
{ .mfi
|
||||
ldfe cis_Cm_cos = [cis_AD_2_cos]
|
||||
fma.s1 cis_P_temp1 = cis_rsq, cis_P4, cis_P3
|
||||
fma.s1 cis_P_temp1 = cis_rsq, cis_P4, cis_P3
|
||||
nop.i 0
|
||||
}
|
||||
|
||||
@ -636,6 +626,12 @@ _CIS_COMMON:
|
||||
nop.i 0
|
||||
};;
|
||||
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fmpy.s1 cis_rcub = cis_r_exact, cis_rsq // get r^3
|
||||
nop.i 0
|
||||
};;
|
||||
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fma.s1 cis_Q = cis_rsq, cis_Q_temp2, cis_Q1
|
||||
@ -647,12 +643,6 @@ _CIS_COMMON:
|
||||
nop.i 0
|
||||
};;
|
||||
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fmpy.s1 cis_rcub = cis_r_exact, cis_rsq // get r^3
|
||||
nop.i 0
|
||||
};;
|
||||
|
||||
{ .mfi
|
||||
nop.m 0
|
||||
fma.s1 cis_Q_sin = cis_srsq_sin,cis_Q, cis_Sm_sin
|
||||
@ -717,7 +707,17 @@ _CIS_SPECIAL_ARGS:
|
||||
stfd [cis_pResCos] = cis_Cos_res
|
||||
br.ret.sptk b0 // common exit for sincos main path
|
||||
};;
|
||||
|
||||
_CIS_UNORM:
|
||||
// Here if x=unorm
|
||||
{ .mfb
|
||||
getf.exp cis_r_signexp = cis_NORM_f8 // Get signexp of x
|
||||
fcmp.eq.s0 p11,p0 = cis_Arg, f0 // Dummy op to set denorm
|
||||
br.cond.sptk _CIS_COMMON2 // Return to main path
|
||||
};;
|
||||
|
||||
GLOBAL_LIBM_END(__libm_sincos)
|
||||
|
||||
//// |x| > 2^27 path ///////
|
||||
.proc _CIS_LARGE_ARGS
|
||||
_CIS_LARGE_ARGS:
|
||||
|
@ -792,6 +792,7 @@ GLOBAL_LIBM_END(__libm_sincos_large)
|
||||
|
||||
|
||||
|
||||
|
||||
GLOBAL_LIBM_ENTRY(__libm_sin_large)
|
||||
|
||||
{ .mlx
|
||||
@ -821,6 +822,7 @@ alloc GR_Table_Base = ar.pfs,0,12,2,0
|
||||
}
|
||||
|
||||
GLOBAL_LIBM_END(__libm_sin_large)
|
||||
|
||||
GLOBAL_LIBM_ENTRY(__libm_cos_large)
|
||||
|
||||
{ .mlx
|
||||
@ -2673,6 +2675,7 @@ SINCOS_SPECIAL:
|
||||
}
|
||||
GLOBAL_LIBM_END(__libm_cos_large)
|
||||
|
||||
|
||||
// *******************************************************************
|
||||
// *******************************************************************
|
||||
// *******************************************************************
|
||||
|
@ -47,12 +47,12 @@
|
||||
// 03/19/02 Added stack unwind around call to __libm_cisf_large
|
||||
// 09/05/02 Work range is widened by reduction strengthen (2 parts of Pi/16)
|
||||
// 02/10/03 Reordered header: .section, .global, .proc, .align
|
||||
// 02/11/04 cisf is moved to the separate file.
|
||||
|
||||
// API
|
||||
//==============================================================
|
||||
// 1) float _Complex cisf(float)
|
||||
// 2) void sincosf(float, float*s, float*c)
|
||||
// 3) __libm_sincosf - internal LIBM function, that accepts
|
||||
// 1) void sincosf(float, float*s, float*c)
|
||||
// 2) __libm_sincosf - internal LIBM function, that accepts
|
||||
// argument in f8 and returns cosine through f8, sine through f9
|
||||
|
||||
//
|
||||
@ -400,7 +400,7 @@ GLOBAL_IEEE754_ENTRY(sincosf)
|
||||
{ .mlx
|
||||
alloc GR_SAVE_PFS = ar.pfs, 0, 21, 0, 0
|
||||
movl cisf_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A // 16/pi signd
|
||||
|
||||
|
||||
}
|
||||
// cis_GR_rshf_2to61 = 1.1000 2^(63+63-2)
|
||||
{ .mlx
|
||||
@ -420,8 +420,7 @@ GLOBAL_IEEE754_ENTRY(sincosf)
|
||||
br.cond.sptk _CISF_COMMON
|
||||
};;
|
||||
GLOBAL_IEEE754_END(sincosf)
|
||||
LOCAL_LIBM_ENTRY(cisf)
|
||||
LOCAL_LIBM_END(cisf)
|
||||
|
||||
GLOBAL_LIBM_ENTRY(__libm_sincosf)
|
||||
{ .mlx
|
||||
// cisf_GR_sig_inv_pi_by_16 = significand of 16/pi
|
||||
@ -438,7 +437,7 @@ GLOBAL_LIBM_ENTRY(__libm_sincosf)
|
||||
{ .mfi
|
||||
ld8 cisf_AD_1 = [cisf_AD_1]
|
||||
fnorm.s1 cisf_NORM_f8 = cisf_Arg
|
||||
cmp.eq p14, p13 = r0, r0
|
||||
cmp.eq p14, p13 = r0, r0
|
||||
}
|
||||
// cisf_GR_exp_2tom61 = exponent of scaling factor 2^-61
|
||||
{ .mib
|
||||
@ -499,7 +498,7 @@ _CISF_COMMON:
|
||||
// p10 is true if f8 exp is >= 0x10017
|
||||
{ .mmb
|
||||
ldfpd cisf_P1,cisf_Q1 = [cisf_AD_1], 16
|
||||
cmp.ge p10, p0 = cisf_r_exp, cisf_exp_limit
|
||||
cmp.ge p10, p0 = cisf_r_exp, cisf_exp_limit
|
||||
(p10) br.cond.spnt _CISF_LARGE_ARGS // go to |x| >= 2^24 path
|
||||
};;
|
||||
|
||||
@ -521,7 +520,7 @@ _CISF_COMMON:
|
||||
|
||||
// N = (int)cisf_int_Nfloat
|
||||
{ .mfi
|
||||
getf.sig cisf_GR_n = cisf_W_2TO61_RSH
|
||||
getf.sig cisf_GR_n = cisf_W_2TO61_RSH
|
||||
nop.f 0
|
||||
nop.i 0
|
||||
};;
|
||||
@ -537,7 +536,7 @@ _CISF_COMMON:
|
||||
|
||||
//Get M (least k+1 bits of N)
|
||||
{ .mmi
|
||||
and cisf_GR_m_sin = 0x1f,cisf_GR_n
|
||||
and cisf_GR_m_sin = 0x1f,cisf_GR_n
|
||||
and cisf_GR_m_cos = 0x1f,cisf_GR_n_cos
|
||||
nop.i 0
|
||||
};;
|
||||
@ -552,7 +551,7 @@ _CISF_COMMON:
|
||||
{ .mmf
|
||||
ldfpd cisf_Sm_sin, cisf_Cm_sin = [cisf_AD_2_sin]
|
||||
ldfpd cisf_Sm_cos, cisf_Cm_cos = [cisf_AD_2_cos]
|
||||
fclass.m.unc p10,p0 = cisf_Arg,0x0b
|
||||
fclass.m.unc p10,p0 = cisf_Arg,0x0b
|
||||
};;
|
||||
|
||||
{ .mfi
|
||||
@ -679,6 +678,7 @@ _CISF_RETURN:
|
||||
br.ret.sptk b0 // exit for sincos
|
||||
};;
|
||||
GLOBAL_LIBM_END(__libm_sincosf)
|
||||
|
||||
//// |x| > 2^24 path ///////
|
||||
.proc _CISF_LARGE_ARGS
|
||||
_CISF_LARGE_ARGS:
|
||||
@ -728,7 +728,7 @@ _CISF_LARGE_ARGS:
|
||||
{ .mfb
|
||||
nop.m 0
|
||||
fma.s.s0 cisf_Sin_res = cisf_Sin_res, f1, f0
|
||||
(p14) br.cond.sptk _CISF_RETURN
|
||||
(p14) br.cond.sptk _CISF_RETURN
|
||||
};;
|
||||
|
||||
{ .mmb
|
||||
|
@ -1,7 +1,7 @@
|
||||
.file "libm_sincosl.asm"
|
||||
.file "libm_sincosl.s"
|
||||
|
||||
|
||||
// Copyright (c) 2000 - 2003, Intel Corporation
|
||||
// Copyright (c) 2000 - 2004, Intel Corporation
|
||||
// All rights reserved.
|
||||
//
|
||||
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
|
||||
@ -43,6 +43,9 @@
|
||||
// 05/13/02 Initial version of sincosl (based on libm's sinl and cosl)
|
||||
// 02/10/03 Reordered header: .section, .global, .proc, .align;
|
||||
// used data8 for long double table values
|
||||
// 10/13/03 Corrected .file name
|
||||
// 02/11/04 cisl is moved to the separate file.
|
||||
// 10/26/04 Avoided using r14-31 as scratch so not clobbered by dynamic loader
|
||||
//
|
||||
//*********************************************************************
|
||||
//
|
||||
@ -50,9 +53,8 @@
|
||||
//
|
||||
// API's
|
||||
//==============================================================
|
||||
// 1) long double _Complex cisl(long double)
|
||||
// 2) void sincosl(long double, long double*s, long double*c)
|
||||
// 3) __libm_sincosl - internal LIBM function, that accepts
|
||||
// 1) void sincosl(long double, long double*s, long double*c)
|
||||
// 2) __libm_sincosl - internal LIBM function, that accepts
|
||||
// argument in f8 and returns cosine through f8, sine through f9
|
||||
//
|
||||
//
|
||||
@ -65,7 +67,7 @@
|
||||
// f32-f121
|
||||
//
|
||||
// General Purpose Registers:
|
||||
// r32-r47
|
||||
// r32-r61
|
||||
//
|
||||
// Predicate Registers: p6-p15
|
||||
//
|
||||
@ -775,20 +777,6 @@ FR_Tmp = f94
|
||||
sincos_pResSin = r34
|
||||
sincos_pResCos = r35
|
||||
|
||||
GR_sig_inv_pi = r14
|
||||
GR_rshf_2to64 = r15
|
||||
GR_exp_2tom64 = r16
|
||||
GR_rshf = r17
|
||||
GR_ad_p = r18
|
||||
GR_ad_d = r19
|
||||
GR_ad_pp = r20
|
||||
GR_ad_qq = r21
|
||||
GR_ad_c = r22
|
||||
GR_ad_s = r23
|
||||
GR_ad_ce = r24
|
||||
GR_ad_se = r25
|
||||
GR_ad_m14 = r26
|
||||
GR_ad_s1 = r27
|
||||
GR_exp_m2_to_m3= r36
|
||||
GR_N_Inc = r37
|
||||
GR_Cis = r38
|
||||
@ -803,6 +791,20 @@ GR_N_SignS = r45
|
||||
GR_N_SignC = r46
|
||||
GR_N_SinCos = r47
|
||||
|
||||
GR_sig_inv_pi = r48
|
||||
GR_rshf_2to64 = r49
|
||||
GR_exp_2tom64 = r50
|
||||
GR_rshf = r51
|
||||
GR_ad_p = r52
|
||||
GR_ad_d = r53
|
||||
GR_ad_pp = r54
|
||||
GR_ad_qq = r55
|
||||
GR_ad_c = r56
|
||||
GR_ad_s = r57
|
||||
GR_ad_ce = r58
|
||||
GR_ad_se = r59
|
||||
GR_ad_m14 = r60
|
||||
GR_ad_s1 = r61
|
||||
|
||||
// For unwind support
|
||||
GR_SAVE_B0 = r39
|
||||
@ -814,7 +816,7 @@ GR_SAVE_PFS = r41
|
||||
|
||||
GLOBAL_IEEE754_ENTRY(sincosl)
|
||||
{ .mlx ///////////////////////////// 1 /////////////////
|
||||
alloc r32 = ar.pfs,3,13,2,0
|
||||
alloc r32 = ar.pfs,3,27,2,0
|
||||
movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
|
||||
}
|
||||
{ .mlx
|
||||
@ -834,11 +836,9 @@ GLOBAL_IEEE754_ENTRY(sincosl)
|
||||
};;
|
||||
GLOBAL_IEEE754_END(sincosl)
|
||||
|
||||
LOCAL_LIBM_ENTRY(cisl)
|
||||
LOCAL_LIBM_END(cisl)
|
||||
GLOBAL_LIBM_ENTRY(__libm_sincosl)
|
||||
{ .mlx ///////////////////////////// 1 /////////////////
|
||||
alloc r32 = ar.pfs,3,14,2,0
|
||||
alloc r32 = ar.pfs,3,27,2,0
|
||||
movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
|
||||
}
|
||||
{ .mlx
|
||||
@ -2447,6 +2447,7 @@ SINCOSL_SPECIAL:
|
||||
|
||||
GLOBAL_LIBM_END(__libm_sincosl)
|
||||
|
||||
|
||||
// *******************************************************************
|
||||
// *******************************************************************
|
||||
// *******************************************************************
|
||||
@ -2461,7 +2462,7 @@ GLOBAL_LIBM_END(__libm_sincosl)
|
||||
// c is in f9
|
||||
// N is in r8
|
||||
// Be sure to allocate at least 2 GP registers as output registers for
|
||||
// __libm_pi_by_2_reduce. This routine uses r49-50. These are used as
|
||||
// __libm_pi_by_2_reduce. This routine uses r62-63. These are used as
|
||||
// scratch registers within the __libm_pi_by_2_reduce routine (for speed).
|
||||
//
|
||||
// We know also that __libm_pi_by_2_reduce preserves f10-15, f71-127. We
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1134,3 +1134,4 @@ ASINH_UNORM:
|
||||
;;
|
||||
|
||||
GLOBAL_LIBM_END(asinh)
|
||||
|
||||
|
@ -1344,3 +1344,4 @@ near_0:
|
||||
GLOBAL_LIBM_END(asinhl)
|
||||
|
||||
|
||||
|
||||
|
@ -553,3 +553,4 @@ ATANF_X_INF_NAN_ZERO:
|
||||
;;
|
||||
|
||||
GLOBAL_LIBM_END(atanf)
|
||||
|
||||
|
@ -812,6 +812,7 @@ GLOBAL_IEEE754_ENTRY(atanl)
|
||||
;;
|
||||
|
||||
GLOBAL_IEEE754_END(atanl)
|
||||
|
||||
GLOBAL_IEEE754_ENTRY(atan2l)
|
||||
|
||||
{ .mfi
|
||||
@ -1951,6 +1952,7 @@ ATANL_ArgY_Not_INF:
|
||||
;;
|
||||
|
||||
GLOBAL_IEEE754_END(atan2l)
|
||||
|
||||
LOCAL_LIBM_ENTRY(__libm_error_region)
|
||||
.prologue
|
||||
{ .mfi
|
||||
|
@ -35,7 +35,7 @@
|
||||
//
|
||||
// Intel Corporation is the author of this code, and requests that all
|
||||
// problem reports or change requests be submitted to it directly at
|
||||
// http: //www.intel.com/software/products/opensource/libraries/num.htm.
|
||||
// http://www.intel.com/software/products/opensource/libraries/num.htm.
|
||||
//
|
||||
// History
|
||||
//==============================================================
|
||||
|
@ -35,7 +35,7 @@
|
||||
//
|
||||
// Intel Corporation is the author of this code, and requests that all
|
||||
// problem reports or change requests be submitted to it directly at
|
||||
// http: //www.intel.com/software/products/opensource/libraries/num.htm.
|
||||
// http://www.intel.com/software/products/opensource/libraries/num.htm.
|
||||
//
|
||||
// History
|
||||
//==============================================================
|
||||
@ -762,3 +762,4 @@ GLOBAL_LIBM_END(cbrtf)
|
||||
|
||||
|
||||
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,7 @@
|
||||
.file "sincos.s"
|
||||
|
||||
|
||||
// Copyright (c) 2000 - 2003, Intel Corporation
|
||||
// Copyright (c) 2000 - 2004, Intel Corporation
|
||||
// All rights reserved.
|
||||
//
|
||||
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
|
||||
@ -51,6 +51,8 @@
|
||||
// 06/03/02 Insure inexact flag set for large arg result
|
||||
// 09/05/02 Work range is widened by reduction strengthen (3 parts of Pi/16)
|
||||
// 02/10/03 Reordered header: .section, .global, .proc, .align
|
||||
// 08/08/03 Improved performance
|
||||
// 10/28/04 Saved sincos_r_sincos to avoid clobber by dynamic loader
|
||||
|
||||
// API
|
||||
//==============================================================
|
||||
@ -170,11 +172,11 @@
|
||||
// Registers used
|
||||
//==============================================================
|
||||
// general input registers:
|
||||
// r14 -> r19
|
||||
// r32 -> r45
|
||||
// r14 -> r26
|
||||
// r32 -> r35
|
||||
|
||||
// predicate registers used:
|
||||
// p6 -> p14
|
||||
// p6 -> p11
|
||||
|
||||
// floating-point registers used
|
||||
// f9 -> f15
|
||||
@ -236,16 +238,6 @@ fp_tmp = f61
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
sincos_AD_1 = r33
|
||||
sincos_AD_2 = r34
|
||||
sincos_exp_limit = r35
|
||||
sincos_r_signexp = r36
|
||||
sincos_AD_beta_table = r37
|
||||
sincos_r_sincos = r38
|
||||
|
||||
sincos_r_exp = r39
|
||||
sincos_r_17_ones = r40
|
||||
|
||||
sincos_GR_sig_inv_pi_by_16 = r14
|
||||
sincos_GR_rshf_2to61 = r15
|
||||
sincos_GR_rshf = r16
|
||||
@ -254,11 +246,18 @@ sincos_GR_n = r18
|
||||
sincos_GR_m = r19
|
||||
sincos_GR_32m = r19
|
||||
sincos_GR_all_ones = r19
|
||||
sincos_AD_1 = r20
|
||||
sincos_AD_2 = r21
|
||||
sincos_exp_limit = r22
|
||||
sincos_r_signexp = r23
|
||||
sincos_r_17_ones = r24
|
||||
sincos_r_sincos = r25
|
||||
sincos_r_exp = r26
|
||||
|
||||
gr_tmp = r41
|
||||
GR_SAVE_PFS = r41
|
||||
GR_SAVE_B0 = r42
|
||||
GR_SAVE_GP = r43
|
||||
GR_SAVE_PFS = r33
|
||||
GR_SAVE_B0 = r34
|
||||
GR_SAVE_GP = r35
|
||||
GR_SAVE_r_sincos = r36
|
||||
|
||||
|
||||
RODATA
|
||||
@ -405,7 +404,7 @@ LOCAL_OBJECT_END(double_sin_cos_beta_k4)
|
||||
GLOBAL_IEEE754_ENTRY(sin)
|
||||
|
||||
{ .mlx
|
||||
alloc r32 = ar.pfs, 1, 13, 0, 0
|
||||
getf.exp sincos_r_signexp = f8
|
||||
movl sincos_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A // signd of 16/pi
|
||||
}
|
||||
{ .mlx
|
||||
@ -427,10 +426,11 @@ GLOBAL_IEEE754_ENTRY(sin)
|
||||
;;
|
||||
|
||||
GLOBAL_IEEE754_END(sin)
|
||||
|
||||
GLOBAL_IEEE754_ENTRY(cos)
|
||||
|
||||
{ .mlx
|
||||
alloc r32 = ar.pfs, 1, 13, 0, 0
|
||||
getf.exp sincos_r_signexp = f8
|
||||
movl sincos_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A // signd of 16/pi
|
||||
}
|
||||
{ .mlx
|
||||
@ -464,7 +464,6 @@ _SINCOS_COMMON:
|
||||
// Form two constants we need
|
||||
// 16/pi * 2^-2 * 2^63, scaled by 2^61 since we just loaded the significand
|
||||
// 1.1000...000 * 2^(63+63-2) to right shift int(W) into the low significand
|
||||
// fcmp used to set denormal, and invalid on snans
|
||||
{ .mfi
|
||||
setf.sig sincos_SIG_INV_PI_BY_16_2TO61 = sincos_GR_sig_inv_pi_by_16
|
||||
fclass.m p6,p0 = f8, 0xe7 // if x = 0,inf,nan
|
||||
@ -480,10 +479,15 @@ _SINCOS_COMMON:
|
||||
// 2^-61 for scaling Nfloat
|
||||
// 0x1001a is register_bias + 27.
|
||||
// So if f8 >= 2^27, go to large argument routines
|
||||
{ .mmi
|
||||
getf.exp sincos_r_signexp = f8
|
||||
{ .mfi
|
||||
alloc r32 = ar.pfs, 1, 4, 0, 0
|
||||
fclass.m p11,p0 = f8, 0x0b // Test for x=unorm
|
||||
mov sincos_GR_all_ones = -1 // For "inexect" constant create
|
||||
}
|
||||
{ .mib
|
||||
setf.exp sincos_2TOM61 = sincos_GR_exp_2tom61
|
||||
addl gr_tmp = -1,r0 // For "inexect" constant create
|
||||
nop.i 999
|
||||
(p6) br.cond.spnt _SINCOS_SPECIAL_ARGS
|
||||
}
|
||||
;;
|
||||
|
||||
@ -493,41 +497,31 @@ _SINCOS_COMMON:
|
||||
{ .mmb
|
||||
ldfe sincos_Pi_by_16_1 = [sincos_AD_1],16
|
||||
setf.d sincos_RSHF = sincos_GR_rshf
|
||||
(p6) br.cond.spnt _SINCOS_SPECIAL_ARGS
|
||||
(p11) br.cond.spnt _SINCOS_UNORM // Branch if x=unorm
|
||||
}
|
||||
;;
|
||||
|
||||
_SINCOS_COMMON2:
|
||||
// Return here if x=unorm
|
||||
// Create constant used to set inexact
|
||||
{ .mmi
|
||||
ldfe sincos_Pi_by_16_2 = [sincos_AD_1],16
|
||||
setf.sig fp_tmp = gr_tmp // constant for inexact set
|
||||
nop.i 999
|
||||
};;
|
||||
|
||||
{ .mfi
|
||||
ldfe sincos_Pi_by_16_3 = [sincos_AD_1],16
|
||||
nop.f 999
|
||||
nop.i 999
|
||||
};;
|
||||
|
||||
// Polynomial coefficients (Q4, P4, Q3, P3, Q2, Q1, P2, P1) loading
|
||||
{ .mmi
|
||||
ldfpd sincos_P4,sincos_Q4 = [sincos_AD_1],16
|
||||
nop.m 999
|
||||
setf.sig fp_tmp = sincos_GR_all_ones
|
||||
nop.i 999
|
||||
};;
|
||||
|
||||
// Select exponent (17 lsb)
|
||||
{ .mmi
|
||||
ldfpd sincos_P3,sincos_Q3 = [sincos_AD_1],16
|
||||
nop.m 999
|
||||
{ .mfi
|
||||
ldfe sincos_Pi_by_16_3 = [sincos_AD_1],16
|
||||
nop.f 999
|
||||
dep.z sincos_r_exp = sincos_r_signexp, 0, 17
|
||||
}
|
||||
;;
|
||||
};;
|
||||
|
||||
// Polynomial coefficients (Q4, P4, Q3, P3, Q2, Q1, P2, P1) loading
|
||||
// p10 is true if we must call routines to handle larger arguments
|
||||
// p10 is true if f8 exp is >= 0x1001a (2^27)
|
||||
{ .mmb
|
||||
ldfpd sincos_P2,sincos_Q2 = [sincos_AD_1],16
|
||||
ldfpd sincos_P4,sincos_Q4 = [sincos_AD_1],16
|
||||
cmp.ge p10,p0 = sincos_r_exp,sincos_exp_limit
|
||||
(p10) br.cond.spnt _SINCOS_LARGE_ARGS // Go to "large args" routine
|
||||
};;
|
||||
@ -536,66 +530,61 @@ _SINCOS_COMMON:
|
||||
// Multiply x by scaled 16/pi and add large const to shift integer part of W to
|
||||
// rightmost bits of significand
|
||||
{ .mfi
|
||||
ldfpd sincos_P1,sincos_Q1 = [sincos_AD_1],16
|
||||
ldfpd sincos_P3,sincos_Q3 = [sincos_AD_1],16
|
||||
fma.s1 sincos_W_2TO61_RSH = sincos_NORM_f8,sincos_SIG_INV_PI_BY_16_2TO61,sincos_RSHF_2TO61
|
||||
nop.i 999
|
||||
};;
|
||||
|
||||
// get N = (int)sincos_int_Nfloat
|
||||
// sincos_NFLOAT = Round_Int_Nearest(sincos_W)
|
||||
// This is done by scaling back by 2^-61 and subtracting the shift constant
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fms.s1 sincos_NFLOAT = sincos_W_2TO61_RSH,sincos_2TOM61,sincos_RSHF
|
||||
nop.i 999
|
||||
};;
|
||||
|
||||
|
||||
// get N = (int)sincos_int_Nfloat
|
||||
{ .mfi
|
||||
{ .mmf
|
||||
getf.sig sincos_GR_n = sincos_W_2TO61_RSH
|
||||
nop.f 999
|
||||
nop.i 999
|
||||
ldfpd sincos_P2,sincos_Q2 = [sincos_AD_1],16
|
||||
fms.s1 sincos_NFLOAT = sincos_W_2TO61_RSH,sincos_2TOM61,sincos_RSHF
|
||||
};;
|
||||
|
||||
// Add 2^(k-1) (which is in sincos_r_sincos) to N
|
||||
// sincos_r = -sincos_Nfloat * sincos_Pi_by_16_1 + x
|
||||
{ .mfi
|
||||
add sincos_GR_n = sincos_GR_n, sincos_r_sincos
|
||||
ldfpd sincos_P1,sincos_Q1 = [sincos_AD_1],16
|
||||
fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_1, sincos_NORM_f8
|
||||
nop.i 999
|
||||
};;
|
||||
|
||||
// Get M (least k+1 bits of N)
|
||||
// Add 2^(k-1) (which is in sincos_r_sincos) to N
|
||||
{ .mmi
|
||||
and sincos_GR_m = 0x1f,sincos_GR_n;;
|
||||
add sincos_GR_n = sincos_GR_n, sincos_r_sincos
|
||||
;;
|
||||
// Get M (least k+1 bits of N)
|
||||
and sincos_GR_m = 0x1f,sincos_GR_n
|
||||
nop.i 999
|
||||
};;
|
||||
|
||||
// sincos_r = sincos_r -sincos_Nfloat * sincos_Pi_by_16_2
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_2, sincos_r
|
||||
shl sincos_GR_32m = sincos_GR_m,5
|
||||
};;
|
||||
|
||||
// Add 32*M to address of sin_cos_beta table
|
||||
// For sin denorm. - set uflow
|
||||
{ .mfi
|
||||
add sincos_AD_2 = sincos_GR_32m, sincos_AD_1
|
||||
(p8) fclass.m.unc p10,p0 = f8,0x0b // For sin denorm. - set uflow
|
||||
(p8) fclass.m.unc p10,p0 = f8,0x0b
|
||||
nop.i 999
|
||||
};;
|
||||
|
||||
// Load Sin and Cos table value using obtained index m (sincosf_AD_2)
|
||||
{ .mfi
|
||||
ldfe sincos_Sm = [sincos_AD_2],16
|
||||
(p9) fclass.m.unc p11,p0 = f8,0x0b // For cos denorm - set denorm
|
||||
nop.i 999
|
||||
};;
|
||||
|
||||
// sincos_r = sincos_r -sincos_Nfloat * sincos_Pi_by_16_2
|
||||
{ .mfi
|
||||
ldfe sincos_Cm = [sincos_AD_2]
|
||||
fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_2, sincos_r
|
||||
nop.f 999
|
||||
nop.i 999
|
||||
};;
|
||||
|
||||
// get rsq = r*r
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
ldfe sincos_Cm = [sincos_AD_2]
|
||||
fma.s1 sincos_rsq = sincos_r, sincos_r, f0 // r^2 = r*r
|
||||
nop.i 999
|
||||
}
|
||||
@ -660,7 +649,6 @@ _SINCOS_COMMON:
|
||||
fma.s1 sincos_Q = sincos_rsq, sincos_Q_temp2, sincos_Q1
|
||||
nop.i 999
|
||||
}
|
||||
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fma.s1 sincos_P = sincos_rsq, sincos_P_temp2, sincos_P1
|
||||
@ -675,7 +663,6 @@ _SINCOS_COMMON:
|
||||
fma.s1 sincos_Q = sincos_srsq,sincos_Q, sincos_Sm
|
||||
nop.i 999
|
||||
}
|
||||
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
fma.s1 sincos_P = sincos_rcub,sincos_P, sincos_r_exact
|
||||
@ -683,19 +670,12 @@ _SINCOS_COMMON:
|
||||
};;
|
||||
|
||||
// If sin(denormal), force underflow to be set
|
||||
.pred.rel "mutex",p10,p11
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
(p10) fmpy.d.s0 fp_tmp = f8,f8 // forces underflow flag
|
||||
nop.i 999 // for denormal sine args
|
||||
}
|
||||
{ .mfi
|
||||
nop.m 999
|
||||
(p11) fma.d.s0 fp_tmp = f8,f1, f8 // forces denormal flag
|
||||
nop.i 999 // for denormal cosine args
|
||||
(p10) fmpy.d.s0 fp_tmp = sincos_NORM_f8,sincos_NORM_f8
|
||||
nop.i 999
|
||||
};;
|
||||
|
||||
|
||||
// Final calculation
|
||||
// result = C[m]*P + Q
|
||||
{ .mfb
|
||||
@ -724,13 +704,22 @@ _SINCOS_SPECIAL_ARGS:
|
||||
br.ret.sptk b0 // Exit for x = 0/Inf/NaN path
|
||||
};;
|
||||
|
||||
_SINCOS_UNORM:
|
||||
// Here if x=unorm
|
||||
{ .mfb
|
||||
getf.exp sincos_r_signexp = sincos_NORM_f8 // Get signexp of x
|
||||
fcmp.eq.s0 p11,p0 = f8, f0 // Dummy op to set denorm flag
|
||||
br.cond.sptk _SINCOS_COMMON2 // Return to main path
|
||||
};;
|
||||
|
||||
GLOBAL_IEEE754_END(cos)
|
||||
|
||||
//////////// x >= 2^27 - large arguments routine call ////////////
|
||||
LOCAL_LIBM_ENTRY(__libm_callout_sincos)
|
||||
_SINCOS_LARGE_ARGS:
|
||||
.prologue
|
||||
{ .mfi
|
||||
mov sincos_GR_all_ones = -1 // 0xffffffff
|
||||
mov GR_SAVE_r_sincos = sincos_r_sincos // Save sin or cos
|
||||
nop.f 999
|
||||
.save ar.pfs,GR_SAVE_PFS
|
||||
mov GR_SAVE_PFS = ar.pfs
|
||||
@ -753,7 +742,7 @@ _SINCOS_LARGE_ARGS:
|
||||
};;
|
||||
|
||||
{ .mbb
|
||||
cmp.ne p9,p0 = sincos_r_sincos, r0 // set p9 if cos
|
||||
cmp.ne p9,p0 = GR_SAVE_r_sincos, r0 // set p9 if cos
|
||||
nop.b 999
|
||||
(p9) br.call.sptk.many b0 = __libm_cos_large# // cos(large_X)
|
||||
};;
|
||||
|
@ -408,6 +408,7 @@ GLOBAL_IEEE754_ENTRY(sinf)
|
||||
};;
|
||||
|
||||
GLOBAL_IEEE754_END(sinf)
|
||||
|
||||
GLOBAL_IEEE754_ENTRY(cosf)
|
||||
|
||||
{ .mlx
|
||||
@ -657,6 +658,7 @@ _SINCOSF_SPECIAL_ARGS:
|
||||
};;
|
||||
|
||||
GLOBAL_IEEE754_END(cosf)
|
||||
|
||||
//////////// x >= 2^24 - large arguments routine call ////////////
|
||||
LOCAL_LIBM_ENTRY(__libm_callout_sincosf)
|
||||
_SINCOSF_LARGE_ARGS:
|
||||
|
@ -1,7 +1,7 @@
|
||||
.file "sincosl.s"
|
||||
|
||||
|
||||
// Copyright (c) 2000 - 2003, Intel Corporation
|
||||
// Copyright (c) 2000 - 2004, Intel Corporation
|
||||
// All rights reserved.
|
||||
//
|
||||
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
|
||||
@ -47,6 +47,8 @@
|
||||
// 05/13/02 Changed interface to __libm_pi_by_2_reduce
|
||||
// 02/10/03 Reordered header: .section, .global, .proc, .align;
|
||||
// used data8 for long double table values
|
||||
// 10/13/03 Corrected final .endp name to match .proc
|
||||
// 10/26/04 Avoided using r14-31 as scratch so not clobbered by dynamic loader
|
||||
//
|
||||
//*********************************************************************
|
||||
//
|
||||
@ -63,8 +65,7 @@
|
||||
// f32-f99
|
||||
//
|
||||
// General Purpose Registers:
|
||||
// r32-r43
|
||||
// r44-r45 (Used to pass arguments to pi_by_2 reduce routine)
|
||||
// r32-r58
|
||||
//
|
||||
// Predicate Registers: p6-p13
|
||||
//
|
||||
@ -715,20 +716,6 @@ FR_PP_1_lo = f98
|
||||
FR_ArgPrime = f99
|
||||
FR_inexact = f100
|
||||
|
||||
GR_sig_inv_pi = r14
|
||||
GR_rshf_2to64 = r15
|
||||
GR_exp_2tom64 = r16
|
||||
GR_rshf = r17
|
||||
GR_ad_p = r18
|
||||
GR_ad_d = r19
|
||||
GR_ad_pp = r20
|
||||
GR_ad_qq = r21
|
||||
GR_ad_c = r22
|
||||
GR_ad_s = r23
|
||||
GR_ad_ce = r24
|
||||
GR_ad_se = r25
|
||||
GR_ad_m14 = r26
|
||||
GR_ad_s1 = r27
|
||||
GR_exp_m2_to_m3= r36
|
||||
GR_N_Inc = r37
|
||||
GR_Sin_or_Cos = r38
|
||||
@ -739,6 +726,21 @@ GR_exp_2_to_63 = r42
|
||||
GR_exp_2_to_m3 = r43
|
||||
GR_exp_2_to_24 = r44
|
||||
|
||||
GR_sig_inv_pi = r45
|
||||
GR_rshf_2to64 = r46
|
||||
GR_exp_2tom64 = r47
|
||||
GR_rshf = r48
|
||||
GR_ad_p = r49
|
||||
GR_ad_d = r50
|
||||
GR_ad_pp = r51
|
||||
GR_ad_qq = r52
|
||||
GR_ad_c = r53
|
||||
GR_ad_s = r54
|
||||
GR_ad_ce = r55
|
||||
GR_ad_se = r56
|
||||
GR_ad_m14 = r57
|
||||
GR_ad_s1 = r58
|
||||
|
||||
// Added for unwind support
|
||||
|
||||
GR_SAVE_B0 = r39
|
||||
@ -750,7 +752,7 @@ GR_SAVE_PFS = r41
|
||||
|
||||
GLOBAL_IEEE754_ENTRY(sinl)
|
||||
{ .mlx
|
||||
alloc r32 = ar.pfs,0,12,2,0
|
||||
alloc r32 = ar.pfs,0,27,2,0
|
||||
movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
|
||||
}
|
||||
{ .mlx
|
||||
@ -772,9 +774,10 @@ GLOBAL_IEEE754_ENTRY(sinl)
|
||||
;;
|
||||
|
||||
GLOBAL_IEEE754_END(sinl)
|
||||
|
||||
GLOBAL_IEEE754_ENTRY(cosl)
|
||||
{ .mlx
|
||||
alloc r32 = ar.pfs,0,12,2,0
|
||||
alloc r32 = ar.pfs,0,27,2,0
|
||||
movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
|
||||
}
|
||||
{ .mlx
|
||||
@ -2285,6 +2288,7 @@ SINCOSL_SPECIAL:
|
||||
}
|
||||
|
||||
GLOBAL_IEEE754_END(cosl)
|
||||
|
||||
// *******************************************************************
|
||||
// *******************************************************************
|
||||
// *******************************************************************
|
||||
@ -2299,7 +2303,7 @@ GLOBAL_IEEE754_END(cosl)
|
||||
// c is in f9
|
||||
// N is in r8
|
||||
// Be sure to allocate at least 2 GP registers as output registers for
|
||||
// __libm_pi_by_2_reduce. This routine uses r49-50. These are used as
|
||||
// __libm_pi_by_2_reduce. This routine uses r59-60. These are used as
|
||||
// scratch registers within the __libm_pi_by_2_reduce routine (for speed).
|
||||
//
|
||||
// We know also that __libm_pi_by_2_reduce preserves f10-15, f71-127. We
|
||||
@ -2356,6 +2360,6 @@ SINCOSL_ARG_TOO_LARGE:
|
||||
br.cond.sptk SINCOSL_NORMAL_R // Branch if |r|>=2^-3 for |x| >= 2^63
|
||||
};;
|
||||
|
||||
.endp
|
||||
LOCAL_LIBM_END(__libm_callout)
|
||||
.type __libm_pi_by_2_reduce#,@function
|
||||
.global __libm_pi_by_2_reduce#
|
||||
|
@ -922,3 +922,4 @@ erf_denormal:
|
||||
|
||||
GLOBAL_LIBM_END(erf)
|
||||
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user