MIPS specific optimizations.
This commit is contained in:
parent
280ad607c7
commit
af43a56542
|
@ -0,0 +1,38 @@
|
||||||
|
/* Copyright (C) 2002 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
Contributed by Hartvig Ekner <hartvige@mips.com>, 2002.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, write to the Free
|
||||||
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||||
|
02111-1307 USA. */
|
||||||
|
|
||||||
|
|
||||||
|
#include <sgidefs.h>
|
||||||
|
|
||||||
|
|
||||||
|
#if (_MIPS_ISA >= _MIPS_ISA_MIPS2)
|
||||||
|
|
||||||
|
double
|
||||||
|
__ieee754_sqrt (double x)
|
||||||
|
{
|
||||||
|
double z;
|
||||||
|
__asm__ ("sqrt.d %0,%1" : "=f" (z) : "f" (x));
|
||||||
|
return z;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/e_sqrt.c>
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,39 @@
|
||||||
|
/* Copyright (C) 2002 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
Contributed by Hartvig Ekner <hartvige@mips.com>, 2002.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, write to the Free
|
||||||
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||||
|
02111-1307 USA. */
|
||||||
|
|
||||||
|
|
||||||
|
#include <sgidefs.h>
|
||||||
|
|
||||||
|
|
||||||
|
#if (_MIPS_ISA >= _MIPS_ISA_MIPS2)
|
||||||
|
|
||||||
|
float
|
||||||
|
__ieee754_sqrtf (float x)
|
||||||
|
{
|
||||||
|
float z;
|
||||||
|
__asm__ ("sqrt.s %0,%1" : "=f" (z) : "f" (x));
|
||||||
|
return z;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/flt-32/e_sqrtf.c>
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
|
@ -0,0 +1,43 @@
|
||||||
|
/* Set floating-point environment exception handling.
|
||||||
|
Copyright (C) 1998, 1999, 2000, 2002 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
Contributed by Hartvig Ekner <hartvige@mips.com>, 2002.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, write to the Free
|
||||||
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||||
|
02111-1307 USA. */
|
||||||
|
|
||||||
|
#include <fenv.h>
|
||||||
|
#include <fpu_control.h>
|
||||||
|
|
||||||
|
int
|
||||||
|
fesetexceptflag (const fexcept_t *flagp, int excepts)
|
||||||
|
{
|
||||||
|
fexcept_t temp;
|
||||||
|
|
||||||
|
/* Get the current exceptions. */
|
||||||
|
_FPU_GETCW (temp);
|
||||||
|
|
||||||
|
/* Make sure the flags we want restored are legal. */
|
||||||
|
excepts &= FE_ALL_EXCEPT;
|
||||||
|
|
||||||
|
/* Now clear the bits called for, and copy them in from flagp. Note that
|
||||||
|
we ignore all non-flag bits from *flagp, so they don't matter. */
|
||||||
|
temp = (temp & ~excepts) | (*flagp & excepts);
|
||||||
|
|
||||||
|
_FPU_SETCW (temp);
|
||||||
|
|
||||||
|
/* Success. */
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -0,0 +1,130 @@
|
||||||
|
/* Copyright (C) 2002 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
Contributed by Hartvig Ekner <hartvige@mips.com>, 2002.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, write to the Free
|
||||||
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||||
|
02111-1307 USA. */
|
||||||
|
|
||||||
|
#include <sysdep.h>
|
||||||
|
#include <endian.h>
|
||||||
|
|
||||||
|
|
||||||
|
/* void *memcpy(void *s1, const void *s2, size_t n);
|
||||||
|
|
||||||
|
This routine could be optimized further for MIPS64, but this is left
|
||||||
|
as an exercise for the future. When it is done, the file should be kept
|
||||||
|
as a sisterfile to this one, and placed in the sysdeps/mips/mips64
|
||||||
|
directory. */
|
||||||
|
|
||||||
|
#if __BYTE_ORDER == __BIG_ENDIAN
|
||||||
|
# define LWHI lwl /* high part is left in big-endian */
|
||||||
|
# define SWHI swl /* high part is left in big-endian */
|
||||||
|
# define LWLO lwr /* low part is right in big-endian */
|
||||||
|
# define SWLO swr /* low part is right in big-endian */
|
||||||
|
#else
|
||||||
|
# define LWHI lwr /* high part is right in little-endian */
|
||||||
|
# define SWHI swr /* high part is right in little-endian */
|
||||||
|
# define LWLO lwl /* low part is left in little-endian */
|
||||||
|
# define SWLO swl /* low part is left in little-endian */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ENTRY (memcpy)
|
||||||
|
.set noreorder
|
||||||
|
|
||||||
|
slti t0, a2, 8 # Less than 8?
|
||||||
|
bne t0, zero, $last8
|
||||||
|
move v0, a0 # Setup exit value before too late
|
||||||
|
|
||||||
|
xor t0, a1, a0 # Find a0/a1 displacement
|
||||||
|
andi t0, 0x3
|
||||||
|
bne t0, zero, $shift # Go handle the unaligned case
|
||||||
|
subu t1, zero, a1
|
||||||
|
andi t1, 0x3 # a0/a1 are aligned, but are we
|
||||||
|
beq t1, zero, $chk8w # starting in the middle of a word?
|
||||||
|
subu a2, t1
|
||||||
|
LWHI t0, 0(a1) # Yes we are... take care of that
|
||||||
|
addu a1, t1
|
||||||
|
SWHI t0, 0(a0)
|
||||||
|
addu a0, t1
|
||||||
|
|
||||||
|
$chk8w: andi t0, a2, 0x1f # 32 or more bytes left?
|
||||||
|
beq t0, a2, $chk1w
|
||||||
|
subu a3, a2, t0 # Yes
|
||||||
|
addu a3, a1 # a3 = end address of loop
|
||||||
|
move a2, t0 # a2 = what will be left after loop
|
||||||
|
$lop8w: lw t0, 0(a1) # Loop taking 8 words at a time
|
||||||
|
lw t1, 4(a1)
|
||||||
|
lw t2, 8(a1)
|
||||||
|
lw t3, 12(a1)
|
||||||
|
lw t4, 16(a1)
|
||||||
|
lw t5, 20(a1)
|
||||||
|
lw t6, 24(a1)
|
||||||
|
lw t7, 28(a1)
|
||||||
|
addiu a0, 32
|
||||||
|
addiu a1, 32
|
||||||
|
sw t0, -32(a0)
|
||||||
|
sw t1, -28(a0)
|
||||||
|
sw t2, -24(a0)
|
||||||
|
sw t3, -20(a0)
|
||||||
|
sw t4, -16(a0)
|
||||||
|
sw t5, -12(a0)
|
||||||
|
sw t6, -8(a0)
|
||||||
|
bne a1, a3, $lop8w
|
||||||
|
sw t7, -4(a0)
|
||||||
|
|
||||||
|
$chk1w: andi t0, a2, 0x3 # 4 or more bytes left?
|
||||||
|
beq t0, a2, $last8
|
||||||
|
subu a3, a2, t0 # Yes, handle them one word at a time
|
||||||
|
addu a3, a1 # a3 again end address
|
||||||
|
move a2, t0
|
||||||
|
$lop1w: lw t0, 0(a1)
|
||||||
|
addiu a0, 4
|
||||||
|
addiu a1, 4
|
||||||
|
bne a1, a3, $lop1w
|
||||||
|
sw t0, -4(a0)
|
||||||
|
|
||||||
|
$last8: blez a2, $lst8e # Handle last 8 bytes, one at a time
|
||||||
|
addu a3, a2, a1
|
||||||
|
$lst8l: lb t0, 0(a1)
|
||||||
|
addiu a0, 1
|
||||||
|
addiu a1, 1
|
||||||
|
bne a1, a3, $lst8l
|
||||||
|
sb t0, -1(a0)
|
||||||
|
$lst8e: jr ra # Bye, bye
|
||||||
|
nop
|
||||||
|
|
||||||
|
$shift: subu a3, zero, a0 # Src and Dest unaligned
|
||||||
|
andi a3, 0x3 # (unoptimized case...)
|
||||||
|
beq a3, zero, $shft1
|
||||||
|
subu a2, a3 # a2 = bytes left
|
||||||
|
LWHI t0, 0(a1) # Take care of first odd part
|
||||||
|
LWLO t0, 3(a1)
|
||||||
|
addu a1, a3
|
||||||
|
SWHI t0, 0(a0)
|
||||||
|
addu a0, a3
|
||||||
|
$shft1: andi t0, a2, 0x3
|
||||||
|
subu a3, a2, t0
|
||||||
|
addu a3, a1
|
||||||
|
$shfth: LWHI t1, 0(a1) # Limp through, word by word
|
||||||
|
LWLO t1, 3(a1)
|
||||||
|
addiu a0, 4
|
||||||
|
addiu a1, 4
|
||||||
|
bne a1, a3, $shfth
|
||||||
|
sw t1, -4(a0)
|
||||||
|
b $last8 # Handle anything which may be left
|
||||||
|
move a2, t0
|
||||||
|
|
||||||
|
.set reorder
|
||||||
|
END (memcpy)
|
|
@ -0,0 +1,83 @@
|
||||||
|
/* Copyright (C) 2002 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
Contributed by Hartvig Ekner <hartvige@mips.com>, 2002.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, write to the Free
|
||||||
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||||
|
02111-1307 USA. */
|
||||||
|
|
||||||
|
#include <sysdep.h>
|
||||||
|
#include <endian.h>
|
||||||
|
|
||||||
|
|
||||||
|
/* void *memset(void *s, int c, size_t n).
|
||||||
|
|
||||||
|
This routine could be optimized further for MIPS64, but this is left
|
||||||
|
as an exercise for the future. When it is done, the file should be kept
|
||||||
|
as a sisterfile to this one, and placed in the sysdeps/mips/mips64
|
||||||
|
directory. */
|
||||||
|
|
||||||
|
#if __BYTE_ORDER == __BIG_ENDIAN
|
||||||
|
# define SWHI swl /* high part is left in big-endian */
|
||||||
|
#else
|
||||||
|
# define SWHI swr /* high part is right in little-endian */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ENTRY (memset)
|
||||||
|
.set noreorder
|
||||||
|
|
||||||
|
slti t1, a2, 8 # Less than 8?
|
||||||
|
bne t1, zero, $last8
|
||||||
|
move v0, a0 # Setup exit value before too late
|
||||||
|
|
||||||
|
beq a1, zero, $ueven # If zero pattern, no need to extend
|
||||||
|
andi a1, 0xff # Avoid problems with bogus arguments
|
||||||
|
sll t0, a1, 8
|
||||||
|
or a1, t0
|
||||||
|
sll t0, a1, 16
|
||||||
|
or a1, t0 # a1 is now pattern in full word
|
||||||
|
|
||||||
|
$ueven: subu t0, zero, a0 # Unaligned address?
|
||||||
|
andi t0, 0x3
|
||||||
|
beq t0, zero, $chkw
|
||||||
|
subu a2, t0
|
||||||
|
SWHI a1, 0(a0) # Yes, handle first unaligned part
|
||||||
|
addu a0, t0 # Now both a0 and a2 are updated
|
||||||
|
|
||||||
|
$chkw: andi t0, a2, 0x7 # Enough left for one loop iteration?
|
||||||
|
beq t0, a2, $chkl
|
||||||
|
subu a3, a2, t0
|
||||||
|
addu a3, a0 # a3 is last loop address +1
|
||||||
|
move a2, t0 # a2 is now # of bytes left after loop
|
||||||
|
$loopw: addiu a0, 8 # Handle 2 words pr. iteration
|
||||||
|
sw a1, -8(a0)
|
||||||
|
bne a0, a3, $loopw
|
||||||
|
sw a1, -4(a0)
|
||||||
|
|
||||||
|
$chkl: andi t0, a2, 0x4 # Check if there is at least a full
|
||||||
|
beq t0, zero, $last8 # word remaining after the loop
|
||||||
|
subu a2, t0
|
||||||
|
sw a1, 0(a0) # Yes...
|
||||||
|
addiu a0, 4
|
||||||
|
|
||||||
|
$last8: blez a2, $exit # Handle last 8 bytes (if cnt>0)
|
||||||
|
addu a3, a2, a0 # a3 is last address +1
|
||||||
|
$lst8l: addiu a0, 1
|
||||||
|
bne a0, a3, $lst8l
|
||||||
|
sb a1, -1(a0)
|
||||||
|
$exit: j ra # Bye, bye
|
||||||
|
nop
|
||||||
|
|
||||||
|
.set reorder
|
||||||
|
END (memset)
|
Loading…
Reference in New Issue