45b86625d7
gcc: * Makefile.in (LIB1ASMSRC): Don't export. (libgcc.mvars): Don't emit LIB1ASMFUNCS, LIB1ASMSRC. * config/arm/arm.c: Update lib1funcs.asm filename. * config/arm/linux-eabi.h: Likewise. * config/arm/bpabi-v6m.S, config/arm/bpabi.S, config/arm/ieee754-df.S, config/arm/ieee754-sf.S: Move to ../libgcc/config/arm. * config/arm/lib1funcs.asm: Move to ../libgcc/config/arm/lib1funcs.S. * config/arm/t-arm (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/arm/t-arm-elf (LIB1ASMFUNCS): Remove. * config/arm/t-bpabi: Likewise. * config/arm/t-linux (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/arm/t-linux-eabi (LIB1ASMFUNCS): Remove. * config/arm/t-strongarm-elf: Likewise. * config/arm/t-symbian: Likewise. * config/arm/t-vxworks: Likewise. * config/arm/t-wince-pe: Likewise. * config/avr/libgcc.S: Move to ../libgcc/config/avr. * config/avr/t-avr (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/bfin/lib1funcs.asm: Move to ../libgcc/config/bfin/lib1funcs.S. * config/bfin/t-bfin: Remove. * config/bfin/t-bfin-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/bfin/t-bfin-linux: Likewise. * config/bfin/t-bfin-uclinux: Likewise. * config/c6x/lib1funcs.asm: Move to ../libgcc/config/c6x/lib1funcs.S. * config/c6x/t-c6x-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/fr30/lib1funcs.asm: Move to ../libgcc/config/fr30/lib1funcs.S. * config/fr30/t-fr30 (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/frv/lib1funcs.asm: Move to ../libgcc/config/frv/lib1funcs.S. * config/frv/t-frv (CROSS_LIBGCC1, LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/h8300/fixunssfsi.c: Update lib1funcs.asm filename. * config/h8300/lib1funcs.asm: Move to ../libgcc/config/h8300/lib1funcs.S. * config/h8300/t-h8300 (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/i386/cygwin.asm: Move to ../libgcc/config/i386/cygwin.S. * config/i386/t-cygming (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/i386/t-interix: Likewise. * config/ia64/lib1funcs.asm: Move to ../libgcc/config/ia64/lib1funcs.S. * config/ia64/t-hpux (LIB1ASMFUNCS, LIBGCC1_TEST): Remove. * config/ia64/t-ia64 (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/iq2000/t-iq2000 (LIBGCC1, CROSS_LIBGCC1): Remove. * config/m32c/m32c.c: Update m32c-lib1.S filename. * config/m32c/m32c-lib1.S: Move to ../libgcc/config/m32c/lib1funcs.S. * config/m32c/t-m32c (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/m32r/t-linux (CROSS_LIBGCC1, LIBGCC1, LIBGCC1_TEST): Remove. * config/m68k/lb1sf68.asm: Move to ../libgcc/config/m68k/lb1sf68.S. * config/m68k/t-floatlib (LIB1ASMSRC, LIB1ASMFUNCS): New file. * config/mcore/lib1.asm: Move to ../libgcc/config/mcore/lib1funcs.S. * config/mcore/t-mcore (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/mep/mep-lib1.asm: Move to ../libgcc/config/mep/lib1funcs.S. * config/mep/t-mep (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/mips/mips16.S: Move to ../libgcc/config/mips. * config/mips/t-libgcc-mips16: Remove. * config/mips/t-sr71k (LIBGCC1, CROSS_LIBGCC1): Remove. * config/pa/milli64.S: Move to ../libgcc/config/pa. * config/pa/t-linux (LIB1ASMFUNCS, LIB1ASMSRC): Remove. * config/pa/t-linux64: Likewise. * config/picochip/libgccExtras/fake_libgcc.asm: Move to ../libgcc/config/picochip/lib1funcs.S. * config/picochip/t-picochip (LIB1ASMFUNCS, LIB1ASMSRC): Remove. * config/sh/lib1funcs.asm: Move to ../libgcc/config/sh/lib1funcs.S. * config/sh/lib1funcs.h: Move to ../libgcc/config/sh. * config/sh/sh.h: Update lib1funcs.asm filename. * config/sh/t-linux (LIB1ASMFUNCS_CACHE): Remove. * config/sh/t-netbsd: Likewise. * config/sh/t-sh (LIB1ASMSRC, LIB1ASMFUNCS, LIB1ASMFUNCS_CACHE): Remove. * config/sh/t-sh64 (LIB1ASMFUNCS): Remove. * config/sparc/lb1spc.asm: Move to ../libgcc/config/sparc/lb1spc.S. * config/sparc/lb1spl.asm: Remove. * config/sparc/t-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/sparc/t-leon: Likewise. * config/spu/t-spu-elf (LIBGCC1, CROSS_LIBGCC1): Remove. * config/v850/lib1funcs.asm: Move to ../libgcc/config/v850/lib1funcs.S. * config/v850/t-v850 (LIB1ASMSRC, LIB1ASMFUNCS): Remove * config/vax/lib1funcs.asm: Move to ../libgcc/config/vax/lib1funcs.S. * config/vax/t-linux: Remove. * config/xtensa/ieee754-df.S, config/xtensa/ieee754-sf.S: Move to ../libgcc/config/xtensa. * config/xtensa/lib1funcs.asm: Move to ../libgcc/config/xtensa/lib1funcs.S. * config/xtensa/t-xtensa (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config.gcc (bfin*-rtems*): Remove bfin/t-bfin from tmake_file. (bfin*-*): Likewise. (mips64*-*-linux*, mipsisa64*-*-linux*): Remove mips/t-libgcc-mips16 from tmake_file. (mips*-*-linux*): Likewise. (mips*-sde-elf*): Likewise. (mipsisa32-*-elf*, mipsisa32el-*-elf*, mipsisa32r2-*-elf*) (mipsisa32r2el-*-elf*, mipsisa64-*-elf*, mipsisa64el-*-elf*) (mipsisa64r2-*-elf*, mipsisa64r2el-*-elf*): Likewise. (mipsisa64sb1-*-elf*, mipsisa64sb1el-*-elf*): Likewise. (mips-*-elf*, mipsel-*-elf*): Likewise. (mips64-*-elf*, mips64el-*-elf*): Likewise. (mips64orion-*-elf*, mips64orionel-*-elf*): Likewise. (mips*-*-rtems*): Likewise. (mipstx39-*-elf*, mipstx39el-*-elf*): Likewise. (vax-*-linux*): Remove vax/t-linux from tmake_file. libgcc: * Makefile.in ($(lib1asmfuncs-o), $(lib1asmfuncs-s-o)): Use $(srcdir) to refer to $(LIB1ASMSRC). Use $<. * config/arm/bpabi-v6m.S, config/arm/bpabi.S, config/arm/ieee754-df.S, config/arm/ieee754-sf.S, config/arm/lib1funcs.S: New files. * config/arm/libunwind.S [!__symbian__]: Use lib1funcs.S. * config/arm/t-arm: New file. * config/arm/t-bpabi (LIB1ASMFUNCS): Set. * config/arm/t-elf, config/arm/t-linux, config/arm/t-linux-eabi, config/arm/t-strongarm-elf: New files. * config/arm/t-symbian (LIB1ASMFUNCS): Set. * config/arm/t-vxworks, config/arm/t-wince-pe: New files. * config/avr/lib1funcs.S: New file. * config/avr/t-avr (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/bfin/lib1funcs.S, config/bfin/t-bfin: New files. * config/c6x/lib1funcs.S: New file. * config/c6x/t-elf (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/fr30/lib1funcs.S, config/fr30/t-fr30: New files. * config/frv/lib1funcs.S: New file. * config/frv/t-frv (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/h8300/lib1funcs.S, config/h8300/t-h8300: New files. * config/i386/cygwin.S, config/i386/t-chkstk: New files. * config/ia64/__divxf3.asm: Rename to ... * config/ia64/__divxf3.S: ... this. Adapt lib1funcs.asm filename. * config/ia64/_fixtfdi.asm: Rename to ... * config/ia64/_fixtfdi.S: ... this. Adapt lib1funcs.asm filename. * config/ia64/_fixunstfdi.asm: Rename to ... * config/ia64/_fixunstfdi.S: ... this. Adapt lib1funcs.asm filename. * config/ia64/_floatditf.asm: Rename to ... * config/ia64/_floatditf.S: ... this. Adapt lib1funcs.asm filename. * config/ia64/lib1funcs.S: New file. * config/ia64/t-hpux (LIB1ASMFUNCS): Set. * config/ia64/t-ia64 (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/ia64/t-softfp-compat (libgcc1-tf-compats): Adapt suffix. * config/m32c/lib1funcs.S, config/m32c/t-m32c: New files. * config/m68k/lb1sf68.S, config/m68k/t-floatlib: New files. * config/mcore/lib1funcs.S, config/mcore/t-mcore: New files. * config/mep/lib1funcs.S: New file. * config/mep/t-mep (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/mips/mips16.S: New file. * config/mips/t-mips16 (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/pa/milli64.S: New file. * config/pa/t-linux, config/pa/t-linux64: New files. * config/picochip/lib1funcs.S: New file. * config/picochip/t-picochip (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/sh/lib1funcs.S, config/sh/lib1funcs.h: New files. * config/sh/t-linux (LIB1ASMFUNCS_CACHE): Set. * config/sh/t-netbsd: New file. * config/sh/t-sh (LIB1ASMSRC, LIB1ASMFUNCS, LIB1ASMFUNCS_CACHE): Set. Use $(srcdir) to refer to lib1funcs.S, adapt filename. * config/sh/t-sh64: New file. * config/sparc/lb1spc.S: New file. * config/sparc/t-softmul (LIB1ASMSRC): Adapt sparc/lb1spc.asm filename. * config/v850/lib1funcs.S, config/v850/t-v850: New files. * config/vax/lib1funcs.S, config/vax/t-linux: New files. * config/xtensa/ieee754-df.S, config/xtensa/ieee754-sf.S, config/xtensa/lib1funcs.S: New files. * config/xtensa/t-xtensa (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config.host (arm-wrs-vxworks): Add arm/t-arm, arm/t-vxworks to tmake_file. (arm*-*-freebsd*): Add arm/t-arm, arm/t-strongarm-elf to tmake_file. (arm*-*-netbsdelf*): Add arm/t-arm to tmake_file. (arm*-*-linux*): Likewise. Add arm/t-elf, arm/t-bpabi, arm/t-linux-eabi to tmake_file for arm*-*-linux-*eabi, add arm/t-linux otherwise. (arm*-*-uclinux*): Add arm/t-arm, arm/t-elf to tmake_file. (arm*-*-ecos-elf): Likewise. (arm*-*-eabi*, arm*-*-symbianelf*): Likewise. (arm*-*-rtems*): Likewise. (arm*-*-elf): Likewise. (arm*-wince-pe*): Add arm/t-arm, arm/t-wince-pe to tmake_file. (avr-*-rtems*): Add to tmake_file, add avr/t-avr. (bfin*-elf*): Add bfin/t-bfin to tmake_file. (bfin*-uclinux*): Likewise. (bfin*-linux-uclibc*): Likewise. (bfin*-rtems*): Likewise. (bfin*-*): Likewise. (fido-*-elf): Merge into m68k-*-elf*. (fr30-*-elf)): Add fr30/t-fr30 to tmake_file. (frv-*-*linux*): Add frv/t-frv to tmake_file. (h8300-*-rtems*): Add h8300/t-h8300 to tmake_file. (h8300-*-elf*): Likewise. (hppa*64*-*-linux*): Add pa/t-linux, pa/t-linux64 to tmake_file. (hppa*-*-linux*): Add pa/t-linux to tmake_file. (i[34567]86-*-cygwin*): Add i386/t-chkstk to tmake_file. (i[34567]86-*-mingw*): Likewise. (x86_64-*-mingw*): Likewise. (i[34567]86-*-interix3*): Likewise. (ia64*-*-hpux*): Add ia64/t-ia64, ia64/t-hpux to tmake_file. (ia64-hp-*vms*): Add ia64/t-ia64 to tmake_file. (m68k-*-elf*): Also handle fido-*-elf. Add m68k/t-floatlib to tmake_file. (m68k-*-uclinux*): Add m68k/t-floatlib to tmake_file. (m68k-*-linux*): Likewise. (m68k-*-rtems*): Likewise. (mcore-*-elf): Add mcore/t-mcore to tmake_file. (sh-*-elf*, sh[12346l]*-*-elf*): Add sh/t-sh64 to tmake_file for sh64*-*-*. (sh-*-linux*, sh[2346lbe]*-*-linux*): Add sh/t-sh to tmake_file. Add sh/t-sh64 to tmake_file for sh64*-*-linux*. (sh-*-netbsdelf*, shl*-*-netbsdelf*, sh5-*-netbsd*) (sh5l*-*-netbsd*, sh64-*-netbsd*, sh64l*-*-netbsd*): Add sh/t-sh, sh/t-netbsd to tmake_file. Add sh/t-sh64 to tmake_file for sh5*-*-netbsd*, sh64*-netbsd*. (sh-*-rtems*): Add sh/t-sh to tmake_file. (sh-wrs-vxworks): Likewise. (sparc-*-linux*): Add sparc/t-softmul to tmake_file except for *-leon[3-9]*. (v850*-*-*): Add v850/t-v850 to tmake_file. (vax-*-linux*): Add vax/t-linux to tmake_file. (m32c-*-elf*, m32c-*-rtems*): Add m32c/t-m32c to tmake_file. From-SVN: r180773
796 lines
15 KiB
ArmAsm
796 lines
15 KiB
ArmAsm
/* Copyright (C) 2000, 2001, 2003, 2005, 2009 Free Software Foundation, Inc.
|
|
Contributed by James E. Wilson <wilson@cygnus.com>.
|
|
|
|
This file is part of GCC.
|
|
|
|
GCC is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3, or (at your option)
|
|
any later version.
|
|
|
|
GCC is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
Under Section 7 of GPL version 3, you are granted additional
|
|
permissions described in the GCC Runtime Library Exception, version
|
|
3.1, as published by the Free Software Foundation.
|
|
|
|
You should have received a copy of the GNU General Public License and
|
|
a copy of the GCC Runtime Library Exception along with this program;
|
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#ifdef L__divxf3
|
|
// Compute a 80-bit IEEE double-extended quotient.
|
|
//
|
|
// From the Intel IA-64 Optimization Guide, choose the minimum latency
|
|
// alternative.
|
|
//
|
|
// farg0 holds the dividend. farg1 holds the divisor.
|
|
//
|
|
// __divtf3 is an alternate symbol name for backward compatibility.
|
|
|
|
.text
|
|
.align 16
|
|
.global __divxf3
|
|
.proc __divxf3
|
|
__divxf3:
|
|
#ifdef SHARED
|
|
.global __divtf3
|
|
__divtf3:
|
|
#endif
|
|
cmp.eq p7, p0 = r0, r0
|
|
frcpa.s0 f10, p6 = farg0, farg1
|
|
;;
|
|
(p6) cmp.ne p7, p0 = r0, r0
|
|
.pred.rel.mutex p6, p7
|
|
(p6) fnma.s1 f11 = farg1, f10, f1
|
|
(p6) fma.s1 f12 = farg0, f10, f0
|
|
;;
|
|
(p6) fma.s1 f13 = f11, f11, f0
|
|
(p6) fma.s1 f14 = f11, f11, f11
|
|
;;
|
|
(p6) fma.s1 f11 = f13, f13, f11
|
|
(p6) fma.s1 f13 = f14, f10, f10
|
|
;;
|
|
(p6) fma.s1 f10 = f13, f11, f10
|
|
(p6) fnma.s1 f11 = farg1, f12, farg0
|
|
;;
|
|
(p6) fma.s1 f11 = f11, f10, f12
|
|
(p6) fnma.s1 f12 = farg1, f10, f1
|
|
;;
|
|
(p6) fma.s1 f10 = f12, f10, f10
|
|
(p6) fnma.s1 f12 = farg1, f11, farg0
|
|
;;
|
|
(p6) fma.s0 fret0 = f12, f10, f11
|
|
(p7) mov fret0 = f10
|
|
br.ret.sptk rp
|
|
.endp __divxf3
|
|
#endif
|
|
|
|
#ifdef L__divdf3
|
|
// Compute a 64-bit IEEE double quotient.
|
|
//
|
|
// From the Intel IA-64 Optimization Guide, choose the minimum latency
|
|
// alternative.
|
|
//
|
|
// farg0 holds the dividend. farg1 holds the divisor.
|
|
|
|
.text
|
|
.align 16
|
|
.global __divdf3
|
|
.proc __divdf3
|
|
__divdf3:
|
|
cmp.eq p7, p0 = r0, r0
|
|
frcpa.s0 f10, p6 = farg0, farg1
|
|
;;
|
|
(p6) cmp.ne p7, p0 = r0, r0
|
|
.pred.rel.mutex p6, p7
|
|
(p6) fmpy.s1 f11 = farg0, f10
|
|
(p6) fnma.s1 f12 = farg1, f10, f1
|
|
;;
|
|
(p6) fma.s1 f11 = f12, f11, f11
|
|
(p6) fmpy.s1 f13 = f12, f12
|
|
;;
|
|
(p6) fma.s1 f10 = f12, f10, f10
|
|
(p6) fma.s1 f11 = f13, f11, f11
|
|
;;
|
|
(p6) fmpy.s1 f12 = f13, f13
|
|
(p6) fma.s1 f10 = f13, f10, f10
|
|
;;
|
|
(p6) fma.d.s1 f11 = f12, f11, f11
|
|
(p6) fma.s1 f10 = f12, f10, f10
|
|
;;
|
|
(p6) fnma.d.s1 f8 = farg1, f11, farg0
|
|
;;
|
|
(p6) fma.d fret0 = f8, f10, f11
|
|
(p7) mov fret0 = f10
|
|
br.ret.sptk rp
|
|
;;
|
|
.endp __divdf3
|
|
#endif
|
|
|
|
#ifdef L__divsf3
|
|
// Compute a 32-bit IEEE float quotient.
|
|
//
|
|
// From the Intel IA-64 Optimization Guide, choose the minimum latency
|
|
// alternative.
|
|
//
|
|
// farg0 holds the dividend. farg1 holds the divisor.
|
|
|
|
.text
|
|
.align 16
|
|
.global __divsf3
|
|
.proc __divsf3
|
|
__divsf3:
|
|
cmp.eq p7, p0 = r0, r0
|
|
frcpa.s0 f10, p6 = farg0, farg1
|
|
;;
|
|
(p6) cmp.ne p7, p0 = r0, r0
|
|
.pred.rel.mutex p6, p7
|
|
(p6) fmpy.s1 f8 = farg0, f10
|
|
(p6) fnma.s1 f9 = farg1, f10, f1
|
|
;;
|
|
(p6) fma.s1 f8 = f9, f8, f8
|
|
(p6) fmpy.s1 f9 = f9, f9
|
|
;;
|
|
(p6) fma.s1 f8 = f9, f8, f8
|
|
(p6) fmpy.s1 f9 = f9, f9
|
|
;;
|
|
(p6) fma.d.s1 f10 = f9, f8, f8
|
|
;;
|
|
(p6) fnorm.s.s0 fret0 = f10
|
|
(p7) mov fret0 = f10
|
|
br.ret.sptk rp
|
|
;;
|
|
.endp __divsf3
|
|
#endif
|
|
|
|
#ifdef L__divdi3
|
|
// Compute a 64-bit integer quotient.
|
|
//
|
|
// From the Intel IA-64 Optimization Guide, choose the minimum latency
|
|
// alternative.
|
|
//
|
|
// in0 holds the dividend. in1 holds the divisor.
|
|
|
|
.text
|
|
.align 16
|
|
.global __divdi3
|
|
.proc __divdi3
|
|
__divdi3:
|
|
.regstk 2,0,0,0
|
|
// Transfer inputs to FP registers.
|
|
setf.sig f8 = in0
|
|
setf.sig f9 = in1
|
|
// Check divide by zero.
|
|
cmp.ne.unc p0,p7=0,in1
|
|
;;
|
|
// Convert the inputs to FP, so that they won't be treated as unsigned.
|
|
fcvt.xf f8 = f8
|
|
fcvt.xf f9 = f9
|
|
(p7) break 1
|
|
;;
|
|
// Compute the reciprocal approximation.
|
|
frcpa.s1 f10, p6 = f8, f9
|
|
;;
|
|
// 3 Newton-Raphson iterations.
|
|
(p6) fnma.s1 f11 = f9, f10, f1
|
|
(p6) fmpy.s1 f12 = f8, f10
|
|
;;
|
|
(p6) fmpy.s1 f13 = f11, f11
|
|
(p6) fma.s1 f12 = f11, f12, f12
|
|
;;
|
|
(p6) fma.s1 f10 = f11, f10, f10
|
|
(p6) fma.s1 f11 = f13, f12, f12
|
|
;;
|
|
(p6) fma.s1 f10 = f13, f10, f10
|
|
(p6) fnma.s1 f12 = f9, f11, f8
|
|
;;
|
|
(p6) fma.s1 f10 = f12, f10, f11
|
|
;;
|
|
// Round quotient to an integer.
|
|
fcvt.fx.trunc.s1 f10 = f10
|
|
;;
|
|
// Transfer result to GP registers.
|
|
getf.sig ret0 = f10
|
|
br.ret.sptk rp
|
|
;;
|
|
.endp __divdi3
|
|
#endif
|
|
|
|
#ifdef L__moddi3
|
|
// Compute a 64-bit integer modulus.
|
|
//
|
|
// From the Intel IA-64 Optimization Guide, choose the minimum latency
|
|
// alternative.
|
|
//
|
|
// in0 holds the dividend (a). in1 holds the divisor (b).
|
|
|
|
.text
|
|
.align 16
|
|
.global __moddi3
|
|
.proc __moddi3
|
|
__moddi3:
|
|
.regstk 2,0,0,0
|
|
// Transfer inputs to FP registers.
|
|
setf.sig f14 = in0
|
|
setf.sig f9 = in1
|
|
// Check divide by zero.
|
|
cmp.ne.unc p0,p7=0,in1
|
|
;;
|
|
// Convert the inputs to FP, so that they won't be treated as unsigned.
|
|
fcvt.xf f8 = f14
|
|
fcvt.xf f9 = f9
|
|
(p7) break 1
|
|
;;
|
|
// Compute the reciprocal approximation.
|
|
frcpa.s1 f10, p6 = f8, f9
|
|
;;
|
|
// 3 Newton-Raphson iterations.
|
|
(p6) fmpy.s1 f12 = f8, f10
|
|
(p6) fnma.s1 f11 = f9, f10, f1
|
|
;;
|
|
(p6) fma.s1 f12 = f11, f12, f12
|
|
(p6) fmpy.s1 f13 = f11, f11
|
|
;;
|
|
(p6) fma.s1 f10 = f11, f10, f10
|
|
(p6) fma.s1 f11 = f13, f12, f12
|
|
;;
|
|
sub in1 = r0, in1
|
|
(p6) fma.s1 f10 = f13, f10, f10
|
|
(p6) fnma.s1 f12 = f9, f11, f8
|
|
;;
|
|
setf.sig f9 = in1
|
|
(p6) fma.s1 f10 = f12, f10, f11
|
|
;;
|
|
fcvt.fx.trunc.s1 f10 = f10
|
|
;;
|
|
// r = q * (-b) + a
|
|
xma.l f10 = f10, f9, f14
|
|
;;
|
|
// Transfer result to GP registers.
|
|
getf.sig ret0 = f10
|
|
br.ret.sptk rp
|
|
;;
|
|
.endp __moddi3
|
|
#endif
|
|
|
|
#ifdef L__udivdi3
|
|
// Compute a 64-bit unsigned integer quotient.
|
|
//
|
|
// From the Intel IA-64 Optimization Guide, choose the minimum latency
|
|
// alternative.
|
|
//
|
|
// in0 holds the dividend. in1 holds the divisor.
|
|
|
|
.text
|
|
.align 16
|
|
.global __udivdi3
|
|
.proc __udivdi3
|
|
__udivdi3:
|
|
.regstk 2,0,0,0
|
|
// Transfer inputs to FP registers.
|
|
setf.sig f8 = in0
|
|
setf.sig f9 = in1
|
|
// Check divide by zero.
|
|
cmp.ne.unc p0,p7=0,in1
|
|
;;
|
|
// Convert the inputs to FP, to avoid FP software-assist faults.
|
|
fcvt.xuf.s1 f8 = f8
|
|
fcvt.xuf.s1 f9 = f9
|
|
(p7) break 1
|
|
;;
|
|
// Compute the reciprocal approximation.
|
|
frcpa.s1 f10, p6 = f8, f9
|
|
;;
|
|
// 3 Newton-Raphson iterations.
|
|
(p6) fnma.s1 f11 = f9, f10, f1
|
|
(p6) fmpy.s1 f12 = f8, f10
|
|
;;
|
|
(p6) fmpy.s1 f13 = f11, f11
|
|
(p6) fma.s1 f12 = f11, f12, f12
|
|
;;
|
|
(p6) fma.s1 f10 = f11, f10, f10
|
|
(p6) fma.s1 f11 = f13, f12, f12
|
|
;;
|
|
(p6) fma.s1 f10 = f13, f10, f10
|
|
(p6) fnma.s1 f12 = f9, f11, f8
|
|
;;
|
|
(p6) fma.s1 f10 = f12, f10, f11
|
|
;;
|
|
// Round quotient to an unsigned integer.
|
|
fcvt.fxu.trunc.s1 f10 = f10
|
|
;;
|
|
// Transfer result to GP registers.
|
|
getf.sig ret0 = f10
|
|
br.ret.sptk rp
|
|
;;
|
|
.endp __udivdi3
|
|
#endif
|
|
|
|
#ifdef L__umoddi3
|
|
// Compute a 64-bit unsigned integer modulus.
|
|
//
|
|
// From the Intel IA-64 Optimization Guide, choose the minimum latency
|
|
// alternative.
|
|
//
|
|
// in0 holds the dividend (a). in1 holds the divisor (b).
|
|
|
|
.text
|
|
.align 16
|
|
.global __umoddi3
|
|
.proc __umoddi3
|
|
__umoddi3:
|
|
.regstk 2,0,0,0
|
|
// Transfer inputs to FP registers.
|
|
setf.sig f14 = in0
|
|
setf.sig f9 = in1
|
|
// Check divide by zero.
|
|
cmp.ne.unc p0,p7=0,in1
|
|
;;
|
|
// Convert the inputs to FP, to avoid FP software assist faults.
|
|
fcvt.xuf.s1 f8 = f14
|
|
fcvt.xuf.s1 f9 = f9
|
|
(p7) break 1;
|
|
;;
|
|
// Compute the reciprocal approximation.
|
|
frcpa.s1 f10, p6 = f8, f9
|
|
;;
|
|
// 3 Newton-Raphson iterations.
|
|
(p6) fmpy.s1 f12 = f8, f10
|
|
(p6) fnma.s1 f11 = f9, f10, f1
|
|
;;
|
|
(p6) fma.s1 f12 = f11, f12, f12
|
|
(p6) fmpy.s1 f13 = f11, f11
|
|
;;
|
|
(p6) fma.s1 f10 = f11, f10, f10
|
|
(p6) fma.s1 f11 = f13, f12, f12
|
|
;;
|
|
sub in1 = r0, in1
|
|
(p6) fma.s1 f10 = f13, f10, f10
|
|
(p6) fnma.s1 f12 = f9, f11, f8
|
|
;;
|
|
setf.sig f9 = in1
|
|
(p6) fma.s1 f10 = f12, f10, f11
|
|
;;
|
|
// Round quotient to an unsigned integer.
|
|
fcvt.fxu.trunc.s1 f10 = f10
|
|
;;
|
|
// r = q * (-b) + a
|
|
xma.l f10 = f10, f9, f14
|
|
;;
|
|
// Transfer result to GP registers.
|
|
getf.sig ret0 = f10
|
|
br.ret.sptk rp
|
|
;;
|
|
.endp __umoddi3
|
|
#endif
|
|
|
|
#ifdef L__divsi3
|
|
// Compute a 32-bit integer quotient.
|
|
//
|
|
// From the Intel IA-64 Optimization Guide, choose the minimum latency
|
|
// alternative.
|
|
//
|
|
// in0 holds the dividend. in1 holds the divisor.
|
|
|
|
.text
|
|
.align 16
|
|
.global __divsi3
|
|
.proc __divsi3
|
|
__divsi3:
|
|
.regstk 2,0,0,0
|
|
// Check divide by zero.
|
|
cmp.ne.unc p0,p7=0,in1
|
|
sxt4 in0 = in0
|
|
sxt4 in1 = in1
|
|
;;
|
|
setf.sig f8 = in0
|
|
setf.sig f9 = in1
|
|
(p7) break 1
|
|
;;
|
|
mov r2 = 0x0ffdd
|
|
fcvt.xf f8 = f8
|
|
fcvt.xf f9 = f9
|
|
;;
|
|
setf.exp f11 = r2
|
|
frcpa.s1 f10, p6 = f8, f9
|
|
;;
|
|
(p6) fmpy.s1 f8 = f8, f10
|
|
(p6) fnma.s1 f9 = f9, f10, f1
|
|
;;
|
|
(p6) fma.s1 f8 = f9, f8, f8
|
|
(p6) fma.s1 f9 = f9, f9, f11
|
|
;;
|
|
(p6) fma.s1 f10 = f9, f8, f8
|
|
;;
|
|
fcvt.fx.trunc.s1 f10 = f10
|
|
;;
|
|
getf.sig ret0 = f10
|
|
br.ret.sptk rp
|
|
;;
|
|
.endp __divsi3
|
|
#endif
|
|
|
|
#ifdef L__modsi3
|
|
// Compute a 32-bit integer modulus.
|
|
//
|
|
// From the Intel IA-64 Optimization Guide, choose the minimum latency
|
|
// alternative.
|
|
//
|
|
// in0 holds the dividend. in1 holds the divisor.
|
|
|
|
.text
|
|
.align 16
|
|
.global __modsi3
|
|
.proc __modsi3
|
|
__modsi3:
|
|
.regstk 2,0,0,0
|
|
mov r2 = 0x0ffdd
|
|
sxt4 in0 = in0
|
|
sxt4 in1 = in1
|
|
;;
|
|
setf.sig f13 = r32
|
|
setf.sig f9 = r33
|
|
// Check divide by zero.
|
|
cmp.ne.unc p0,p7=0,in1
|
|
;;
|
|
sub in1 = r0, in1
|
|
fcvt.xf f8 = f13
|
|
fcvt.xf f9 = f9
|
|
;;
|
|
setf.exp f11 = r2
|
|
frcpa.s1 f10, p6 = f8, f9
|
|
(p7) break 1
|
|
;;
|
|
(p6) fmpy.s1 f12 = f8, f10
|
|
(p6) fnma.s1 f10 = f9, f10, f1
|
|
;;
|
|
setf.sig f9 = in1
|
|
(p6) fma.s1 f12 = f10, f12, f12
|
|
(p6) fma.s1 f10 = f10, f10, f11
|
|
;;
|
|
(p6) fma.s1 f10 = f10, f12, f12
|
|
;;
|
|
fcvt.fx.trunc.s1 f10 = f10
|
|
;;
|
|
xma.l f10 = f10, f9, f13
|
|
;;
|
|
getf.sig ret0 = f10
|
|
br.ret.sptk rp
|
|
;;
|
|
.endp __modsi3
|
|
#endif
|
|
|
|
#ifdef L__udivsi3
|
|
// Compute a 32-bit unsigned integer quotient.
|
|
//
|
|
// From the Intel IA-64 Optimization Guide, choose the minimum latency
|
|
// alternative.
|
|
//
|
|
// in0 holds the dividend. in1 holds the divisor.
|
|
|
|
.text
|
|
.align 16
|
|
.global __udivsi3
|
|
.proc __udivsi3
|
|
__udivsi3:
|
|
.regstk 2,0,0,0
|
|
mov r2 = 0x0ffdd
|
|
zxt4 in0 = in0
|
|
zxt4 in1 = in1
|
|
;;
|
|
setf.sig f8 = in0
|
|
setf.sig f9 = in1
|
|
// Check divide by zero.
|
|
cmp.ne.unc p0,p7=0,in1
|
|
;;
|
|
fcvt.xf f8 = f8
|
|
fcvt.xf f9 = f9
|
|
(p7) break 1
|
|
;;
|
|
setf.exp f11 = r2
|
|
frcpa.s1 f10, p6 = f8, f9
|
|
;;
|
|
(p6) fmpy.s1 f8 = f8, f10
|
|
(p6) fnma.s1 f9 = f9, f10, f1
|
|
;;
|
|
(p6) fma.s1 f8 = f9, f8, f8
|
|
(p6) fma.s1 f9 = f9, f9, f11
|
|
;;
|
|
(p6) fma.s1 f10 = f9, f8, f8
|
|
;;
|
|
fcvt.fxu.trunc.s1 f10 = f10
|
|
;;
|
|
getf.sig ret0 = f10
|
|
br.ret.sptk rp
|
|
;;
|
|
.endp __udivsi3
|
|
#endif
|
|
|
|
#ifdef L__umodsi3
|
|
// Compute a 32-bit unsigned integer modulus.
|
|
//
|
|
// From the Intel IA-64 Optimization Guide, choose the minimum latency
|
|
// alternative.
|
|
//
|
|
// in0 holds the dividend. in1 holds the divisor.
|
|
|
|
.text
|
|
.align 16
|
|
.global __umodsi3
|
|
.proc __umodsi3
|
|
__umodsi3:
|
|
.regstk 2,0,0,0
|
|
mov r2 = 0x0ffdd
|
|
zxt4 in0 = in0
|
|
zxt4 in1 = in1
|
|
;;
|
|
setf.sig f13 = in0
|
|
setf.sig f9 = in1
|
|
// Check divide by zero.
|
|
cmp.ne.unc p0,p7=0,in1
|
|
;;
|
|
sub in1 = r0, in1
|
|
fcvt.xf f8 = f13
|
|
fcvt.xf f9 = f9
|
|
;;
|
|
setf.exp f11 = r2
|
|
frcpa.s1 f10, p6 = f8, f9
|
|
(p7) break 1;
|
|
;;
|
|
(p6) fmpy.s1 f12 = f8, f10
|
|
(p6) fnma.s1 f10 = f9, f10, f1
|
|
;;
|
|
setf.sig f9 = in1
|
|
(p6) fma.s1 f12 = f10, f12, f12
|
|
(p6) fma.s1 f10 = f10, f10, f11
|
|
;;
|
|
(p6) fma.s1 f10 = f10, f12, f12
|
|
;;
|
|
fcvt.fxu.trunc.s1 f10 = f10
|
|
;;
|
|
xma.l f10 = f10, f9, f13
|
|
;;
|
|
getf.sig ret0 = f10
|
|
br.ret.sptk rp
|
|
;;
|
|
.endp __umodsi3
|
|
#endif
|
|
|
|
#ifdef L__save_stack_nonlocal
|
|
// Notes on save/restore stack nonlocal: We read ar.bsp but write
|
|
// ar.bspstore. This is because ar.bsp can be read at all times
|
|
// (independent of the RSE mode) but since it's read-only we need to
|
|
// restore the value via ar.bspstore. This is OK because
|
|
// ar.bsp==ar.bspstore after executing "flushrs".
|
|
|
|
// void __ia64_save_stack_nonlocal(void *save_area, void *stack_pointer)
|
|
|
|
.text
|
|
.align 16
|
|
.global __ia64_save_stack_nonlocal
|
|
.proc __ia64_save_stack_nonlocal
|
|
__ia64_save_stack_nonlocal:
|
|
{ .mmf
|
|
alloc r18 = ar.pfs, 2, 0, 0, 0
|
|
mov r19 = ar.rsc
|
|
;;
|
|
}
|
|
{ .mmi
|
|
flushrs
|
|
st8 [in0] = in1, 24
|
|
and r19 = 0x1c, r19
|
|
;;
|
|
}
|
|
{ .mmi
|
|
st8 [in0] = r18, -16
|
|
mov ar.rsc = r19
|
|
or r19 = 0x3, r19
|
|
;;
|
|
}
|
|
{ .mmi
|
|
mov r16 = ar.bsp
|
|
mov r17 = ar.rnat
|
|
adds r2 = 8, in0
|
|
;;
|
|
}
|
|
{ .mmi
|
|
st8 [in0] = r16
|
|
st8 [r2] = r17
|
|
}
|
|
{ .mib
|
|
mov ar.rsc = r19
|
|
br.ret.sptk.few rp
|
|
;;
|
|
}
|
|
.endp __ia64_save_stack_nonlocal
|
|
#endif
|
|
|
|
#ifdef L__nonlocal_goto
|
|
// void __ia64_nonlocal_goto(void *target_label, void *save_area,
|
|
// void *static_chain);
|
|
|
|
.text
|
|
.align 16
|
|
.global __ia64_nonlocal_goto
|
|
.proc __ia64_nonlocal_goto
|
|
__ia64_nonlocal_goto:
|
|
{ .mmi
|
|
alloc r20 = ar.pfs, 3, 0, 0, 0
|
|
ld8 r12 = [in1], 8
|
|
mov.ret.sptk rp = in0, .L0
|
|
;;
|
|
}
|
|
{ .mmf
|
|
ld8 r16 = [in1], 8
|
|
mov r19 = ar.rsc
|
|
;;
|
|
}
|
|
{ .mmi
|
|
flushrs
|
|
ld8 r17 = [in1], 8
|
|
and r19 = 0x1c, r19
|
|
;;
|
|
}
|
|
{ .mmi
|
|
ld8 r18 = [in1]
|
|
mov ar.rsc = r19
|
|
or r19 = 0x3, r19
|
|
;;
|
|
}
|
|
{ .mmi
|
|
mov ar.bspstore = r16
|
|
;;
|
|
mov ar.rnat = r17
|
|
;;
|
|
}
|
|
{ .mmi
|
|
loadrs
|
|
invala
|
|
mov r15 = in2
|
|
;;
|
|
}
|
|
.L0: { .mib
|
|
mov ar.rsc = r19
|
|
mov ar.pfs = r18
|
|
br.ret.sptk.few rp
|
|
;;
|
|
}
|
|
.endp __ia64_nonlocal_goto
|
|
#endif
|
|
|
|
#ifdef L__restore_stack_nonlocal
|
|
// This is mostly the same as nonlocal_goto above.
|
|
// ??? This has not been tested yet.
|
|
|
|
// void __ia64_restore_stack_nonlocal(void *save_area)
|
|
|
|
.text
|
|
.align 16
|
|
.global __ia64_restore_stack_nonlocal
|
|
.proc __ia64_restore_stack_nonlocal
|
|
__ia64_restore_stack_nonlocal:
|
|
{ .mmf
|
|
alloc r20 = ar.pfs, 4, 0, 0, 0
|
|
ld8 r12 = [in0], 8
|
|
;;
|
|
}
|
|
{ .mmb
|
|
ld8 r16=[in0], 8
|
|
mov r19 = ar.rsc
|
|
;;
|
|
}
|
|
{ .mmi
|
|
flushrs
|
|
ld8 r17 = [in0], 8
|
|
and r19 = 0x1c, r19
|
|
;;
|
|
}
|
|
{ .mmf
|
|
ld8 r18 = [in0]
|
|
mov ar.rsc = r19
|
|
;;
|
|
}
|
|
{ .mmi
|
|
mov ar.bspstore = r16
|
|
;;
|
|
mov ar.rnat = r17
|
|
or r19 = 0x3, r19
|
|
;;
|
|
}
|
|
{ .mmf
|
|
loadrs
|
|
invala
|
|
;;
|
|
}
|
|
.L0: { .mib
|
|
mov ar.rsc = r19
|
|
mov ar.pfs = r18
|
|
br.ret.sptk.few rp
|
|
;;
|
|
}
|
|
.endp __ia64_restore_stack_nonlocal
|
|
#endif
|
|
|
|
#ifdef L__trampoline
|
|
// Implement the nested function trampoline. This is out of line
|
|
// so that we don't have to bother with flushing the icache, as
|
|
// well as making the on-stack trampoline smaller.
|
|
//
|
|
// The trampoline has the following form:
|
|
//
|
|
// +-------------------+ >
|
|
// TRAMP: | __ia64_trampoline | |
|
|
// +-------------------+ > fake function descriptor
|
|
// | TRAMP+16 | |
|
|
// +-------------------+ >
|
|
// | target descriptor |
|
|
// +-------------------+
|
|
// | static link |
|
|
// +-------------------+
|
|
|
|
.text
|
|
.align 16
|
|
.global __ia64_trampoline
|
|
.proc __ia64_trampoline
|
|
__ia64_trampoline:
|
|
{ .mmi
|
|
ld8 r2 = [r1], 8
|
|
;;
|
|
ld8 r15 = [r1]
|
|
}
|
|
{ .mmi
|
|
ld8 r3 = [r2], 8
|
|
;;
|
|
ld8 r1 = [r2]
|
|
mov b6 = r3
|
|
}
|
|
{ .bbb
|
|
br.sptk.many b6
|
|
;;
|
|
}
|
|
.endp __ia64_trampoline
|
|
#endif
|
|
|
|
#ifdef SHARED
|
|
// Thunks for backward compatibility.
|
|
#ifdef L_fixtfdi
|
|
.text
|
|
.align 16
|
|
.global __fixtfti
|
|
.proc __fixtfti
|
|
__fixtfti:
|
|
{ .bbb
|
|
br.sptk.many __fixxfti
|
|
;;
|
|
}
|
|
.endp __fixtfti
|
|
#endif
|
|
#ifdef L_fixunstfdi
|
|
.align 16
|
|
.global __fixunstfti
|
|
.proc __fixunstfti
|
|
__fixunstfti:
|
|
{ .bbb
|
|
br.sptk.many __fixunsxfti
|
|
;;
|
|
}
|
|
.endp __fixunstfti
|
|
#endif
|
|
#ifdef L_floatditf
|
|
.align 16
|
|
.global __floattitf
|
|
.proc __floattitf
|
|
__floattitf:
|
|
{ .bbb
|
|
br.sptk.many __floattixf
|
|
;;
|
|
}
|
|
.endp __floattitf
|
|
#endif
|
|
#endif
|