Make mantissa type of mp_no configurable
The mantissa of mp_no is intended to take only integral values. This is a relatively good choice for powerpc due to its 4 fpus, but not for other architectures, which suffer due to this choice. This change makes the default mantissa a long integer and allows powerpc to override it. Additionally, some operations have been optimized for integer manipulation, resulting in a significant improvement in performance.
This commit is contained in:
parent
fce14d4e9c
commit
6f2e90e78f
19
ChangeLog
19
ChangeLog
@ -1,3 +1,22 @@
|
||||
2013-03-26 Siddhesh Poyarekar <siddhesh@redhat.com>
|
||||
|
||||
* sysdeps/ieee754/dbl-64/mpa-arch.h: New file.
|
||||
* sysdeps/ieee754/dbl-64/mpa.c (norm): Use MANTISSA_T and
|
||||
MANTISSA_STORE_T to store computations on mantissa. Use
|
||||
macros for rounding and division.
|
||||
(denorm): Likewise.
|
||||
(__dbl_mp): Likewise.
|
||||
(add_magnitudes): Likewise.
|
||||
(sub_magnitudes): Likewise.
|
||||
(__mul): Likewise.
|
||||
(__sqr): Likewise.
|
||||
* sysdeps/ieee754/dbl-64/mpa.h: Include mpa-arch.h. Define
|
||||
powers of two in terms of TWOPOW macro.
|
||||
(mp_no): Make type of mantissa as MANTISSA_T.
|
||||
[!RADIXI]: Define RADIXI.
|
||||
[!TWO52]: Define TWO52.
|
||||
* sysdeps/powerpc/power4/fpu/mpa-arch.h: New file.
|
||||
|
||||
2013-03-25 Adhemerval Zanella <azanella@linux.vnet.ibm.com>
|
||||
|
||||
* sysdeps/powerpc/fpu/s_llround.c: Fix libm ABI issue with missing
|
||||
|
47
sysdeps/ieee754/dbl-64/mpa-arch.h
Normal file
47
sysdeps/ieee754/dbl-64/mpa-arch.h
Normal file
@ -0,0 +1,47 @@
|
||||
/* Overridable constants and operations.
|
||||
Copyright (C) 2013 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2.1 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program; if not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef long mantissa_t;
|
||||
typedef int64_t mantissa_store_t;
|
||||
|
||||
#define TWOPOW(i) (1L << i)
|
||||
|
||||
#define RADIX_EXP 24
|
||||
#define RADIX TWOPOW (RADIX_EXP) /* 2^24 */
|
||||
|
||||
/* Divide D by RADIX and put the remainder in R. D must be a non-negative
|
||||
integral value. */
|
||||
#define DIV_RADIX(d, r) \
|
||||
({ \
|
||||
r = d & (RADIX - 1); \
|
||||
d >>= RADIX_EXP; \
|
||||
})
|
||||
|
||||
/* Put the integer component of a double X in R and retain the fraction in
|
||||
X. This is used in extracting mantissa digits for MP_NO by using the
|
||||
integer portion of the current value of the number as the current mantissa
|
||||
digit and then scaling by RADIX to get the next mantissa digit in the same
|
||||
manner. */
|
||||
#define INTEGER_OF(x, i) \
|
||||
({ \
|
||||
i = (mantissa_t) x; \
|
||||
x -= i; \
|
||||
})
|
||||
|
||||
/* Align IN down to F. The code assumes that F is a power of two. */
|
||||
#define ALIGN_DOWN_TO(in, f) ((in) & -(f))
|
@ -125,7 +125,8 @@ norm (const mp_no *x, double *y, int p)
|
||||
{
|
||||
#define R RADIXI
|
||||
long i;
|
||||
double a, c, u, v, z[5];
|
||||
double c;
|
||||
mantissa_t a, u, v, z[5];
|
||||
if (p < 5)
|
||||
{
|
||||
if (p == 1)
|
||||
@ -147,17 +148,14 @@ norm (const mp_no *x, double *y, int p)
|
||||
|
||||
for (i = 2; i < 5; i++)
|
||||
{
|
||||
z[i] = X[i] * a;
|
||||
u = (z[i] + CUTTER) - CUTTER;
|
||||
if (u > z[i])
|
||||
u -= RADIX;
|
||||
z[i] -= u;
|
||||
z[i - 1] += u * RADIXI;
|
||||
mantissa_store_t d, r;
|
||||
d = X[i] * (mantissa_store_t) a;
|
||||
DIV_RADIX (d, r);
|
||||
z[i] = r;
|
||||
z[i - 1] += d;
|
||||
}
|
||||
|
||||
u = (z[3] + TWO71) - TWO71;
|
||||
if (u > z[3])
|
||||
u -= TWO19;
|
||||
u = ALIGN_DOWN_TO (z[3], TWO19);
|
||||
v = z[3] - u;
|
||||
|
||||
if (v == TWO18)
|
||||
@ -200,7 +198,8 @@ denorm (const mp_no *x, double *y, int p)
|
||||
{
|
||||
long i, k;
|
||||
long p2 = p;
|
||||
double c, u, z[5];
|
||||
double c;
|
||||
mantissa_t u, z[5];
|
||||
|
||||
#define R RADIXI
|
||||
if (EX < -44 || (EX == -44 && X[1] < TWO5))
|
||||
@ -280,9 +279,7 @@ denorm (const mp_no *x, double *y, int p)
|
||||
z[3] = X[k];
|
||||
}
|
||||
|
||||
u = (z[3] + TWO57) - TWO57;
|
||||
if (u > z[3])
|
||||
u -= TWO5;
|
||||
u = ALIGN_DOWN_TO (z[3], TWO5);
|
||||
|
||||
if (u == z[3])
|
||||
{
|
||||
@ -330,7 +327,6 @@ __dbl_mp (double x, mp_no *y, int p)
|
||||
{
|
||||
long i, n;
|
||||
long p2 = p;
|
||||
double u;
|
||||
|
||||
/* Sign. */
|
||||
if (x == ZERO)
|
||||
@ -356,11 +352,7 @@ __dbl_mp (double x, mp_no *y, int p)
|
||||
n = MIN (p2, 4);
|
||||
for (i = 1; i <= n; i++)
|
||||
{
|
||||
u = (x + TWO52) - TWO52;
|
||||
if (u > x)
|
||||
u -= ONE;
|
||||
Y[i] = u;
|
||||
x -= u;
|
||||
INTEGER_OF (x, Y[i]);
|
||||
x *= RADIX;
|
||||
}
|
||||
for (; i <= p2; i++)
|
||||
@ -377,7 +369,7 @@ add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
|
||||
{
|
||||
long i, j, k;
|
||||
long p2 = p;
|
||||
double zk;
|
||||
mantissa_t zk;
|
||||
|
||||
EZ = EX;
|
||||
|
||||
@ -445,7 +437,7 @@ sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
|
||||
{
|
||||
long i, j, k;
|
||||
long p2 = p;
|
||||
double zk;
|
||||
mantissa_t zk;
|
||||
|
||||
EZ = EX;
|
||||
i = p2;
|
||||
@ -621,9 +613,9 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
|
||||
{
|
||||
long i, j, k, ip, ip2;
|
||||
long p2 = p;
|
||||
double u, zk;
|
||||
mantissa_store_t zk;
|
||||
const mp_no *a;
|
||||
double *diag;
|
||||
mantissa_store_t *diag;
|
||||
|
||||
/* Is z=0? */
|
||||
if (__glibc_unlikely (X[0] * Y[0] == ZERO))
|
||||
@ -680,11 +672,11 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
|
||||
|
||||
/* Precompute sums of diagonal elements so that we can directly use them
|
||||
later. See the next comment to know we why need them. */
|
||||
diag = alloca (k * sizeof (double));
|
||||
double d = ZERO;
|
||||
diag = alloca (k * sizeof (mantissa_store_t));
|
||||
mantissa_store_t d = ZERO;
|
||||
for (i = 1; i <= ip; i++)
|
||||
{
|
||||
d += X[i] * Y[i];
|
||||
d += X[i] * (mantissa_store_t) Y[i];
|
||||
diag[i] = d;
|
||||
}
|
||||
while (i < k)
|
||||
@ -697,18 +689,15 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
|
||||
if (k % 2 == 0)
|
||||
/* We want to add this only once, but since we subtract it in the sum
|
||||
of products above, we add twice. */
|
||||
zk += 2 * X[lim] * Y[lim];
|
||||
zk += 2 * X[lim] * (mantissa_store_t) Y[lim];
|
||||
|
||||
for (i = k - p2, j = p2; i < j; i++, j--)
|
||||
zk += (X[i] + X[j]) * (Y[i] + Y[j]);
|
||||
zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]);
|
||||
|
||||
zk -= diag[k - 1];
|
||||
|
||||
u = (zk + CUTTER) - CUTTER;
|
||||
if (u > zk)
|
||||
u -= RADIX;
|
||||
Z[k--] = zk - u;
|
||||
zk = u * RADIXI;
|
||||
DIV_RADIX (zk, Z[k]);
|
||||
k--;
|
||||
}
|
||||
|
||||
/* The real deal. Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i
|
||||
@ -731,18 +720,15 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
|
||||
if (k % 2 == 0)
|
||||
/* We want to add this only once, but since we subtract it in the sum
|
||||
of products above, we add twice. */
|
||||
zk += 2 * X[lim] * Y[lim];
|
||||
zk += 2 * X[lim] * (mantissa_store_t) Y[lim];
|
||||
|
||||
for (i = 1, j = k - 1; i < j; i++, j--)
|
||||
zk += (X[i] + X[j]) * (Y[i] + Y[j]);
|
||||
zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]);
|
||||
|
||||
zk -= diag[k - 1];
|
||||
|
||||
u = (zk + CUTTER) - CUTTER;
|
||||
if (u > zk)
|
||||
u -= RADIX;
|
||||
Z[k--] = zk - u;
|
||||
zk = u * RADIXI;
|
||||
DIV_RADIX (zk, Z[k]);
|
||||
k--;
|
||||
}
|
||||
Z[k] = zk;
|
||||
|
||||
@ -774,7 +760,7 @@ SECTION
|
||||
__sqr (const mp_no *x, mp_no *y, int p)
|
||||
{
|
||||
long i, j, k, ip;
|
||||
double u, yk;
|
||||
mantissa_store_t yk;
|
||||
|
||||
/* Is z=0? */
|
||||
if (__glibc_unlikely (X[0] == ZERO))
|
||||
@ -798,11 +784,11 @@ __sqr (const mp_no *x, mp_no *y, int p)
|
||||
|
||||
while (k > p)
|
||||
{
|
||||
double yk2 = 0.0;
|
||||
mantissa_store_t yk2 = 0;
|
||||
long lim = k / 2;
|
||||
|
||||
if (k % 2 == 0)
|
||||
yk += X[lim] * X[lim];
|
||||
yk += X[lim] * (mantissa_store_t) X[lim];
|
||||
|
||||
/* In __mul, this loop (and the one within the next while loop) run
|
||||
between a range to calculate the mantissa as follows:
|
||||
@ -814,36 +800,30 @@ __sqr (const mp_no *x, mp_no *y, int p)
|
||||
result. For cases where the range size is even, the mid-point needs
|
||||
to be added separately (above). */
|
||||
for (i = k - p, j = p; i < j; i++, j--)
|
||||
yk2 += X[i] * X[j];
|
||||
yk2 += X[i] * (mantissa_store_t) X[j];
|
||||
|
||||
yk += 2.0 * yk2;
|
||||
|
||||
u = (yk + CUTTER) - CUTTER;
|
||||
if (u > yk)
|
||||
u -= RADIX;
|
||||
Y[k--] = yk - u;
|
||||
yk = u * RADIXI;
|
||||
DIV_RADIX (yk, Y[k]);
|
||||
k--;
|
||||
}
|
||||
|
||||
while (k > 1)
|
||||
{
|
||||
double yk2 = 0.0;
|
||||
mantissa_store_t yk2 = 0;
|
||||
long lim = k / 2;
|
||||
|
||||
if (k % 2 == 0)
|
||||
yk += X[lim] * X[lim];
|
||||
yk += X[lim] * (mantissa_store_t) X[lim];
|
||||
|
||||
/* Likewise for this loop. */
|
||||
for (i = 1, j = k - 1; i < j; i++, j--)
|
||||
yk2 += X[i] * X[j];
|
||||
yk2 += X[i] * (mantissa_store_t) X[j];
|
||||
|
||||
yk += 2.0 * yk2;
|
||||
|
||||
u = (yk + CUTTER) - CUTTER;
|
||||
if (u > yk)
|
||||
u -= RADIX;
|
||||
Y[k--] = yk - u;
|
||||
yk = u * RADIXI;
|
||||
DIV_RADIX (yk, Y[k]);
|
||||
k--;
|
||||
}
|
||||
Y[k] = yk;
|
||||
|
||||
|
@ -35,6 +35,7 @@
|
||||
/* Common types and definition */
|
||||
/************************************************************************/
|
||||
|
||||
#include <mpa-arch.h>
|
||||
|
||||
/* The mp_no structure holds the details of a multi-precision floating point
|
||||
number.
|
||||
@ -61,7 +62,7 @@
|
||||
typedef struct
|
||||
{
|
||||
int e;
|
||||
double d[40];
|
||||
mantissa_t d[40];
|
||||
} mp_no;
|
||||
|
||||
typedef union
|
||||
@ -82,9 +83,13 @@ extern const mp_no mptwo;
|
||||
|
||||
#define ABS(x) ((x) < 0 ? -(x) : (x))
|
||||
|
||||
#define RADIX 0x1.0p24 /* 2^24 */
|
||||
#define RADIXI 0x1.0p-24 /* 2^-24 */
|
||||
#define CUTTER 0x1.0p76 /* 2^76 */
|
||||
#ifndef RADIXI
|
||||
# define RADIXI 0x1.0p-24 /* 2^-24 */
|
||||
#endif
|
||||
|
||||
#ifndef TWO52
|
||||
# define TWO52 0x1.0p52 /* 2^52 */
|
||||
#endif
|
||||
|
||||
#define ZERO 0.0 /* 0 */
|
||||
#define MZERO -0.0 /* 0 with the sign bit set */
|
||||
@ -92,13 +97,13 @@ extern const mp_no mptwo;
|
||||
#define MONE -1.0 /* -1 */
|
||||
#define TWO 2.0 /* 2 */
|
||||
|
||||
#define TWO5 0x1.0p5 /* 2^5 */
|
||||
#define TWO8 0x1.0p8 /* 2^52 */
|
||||
#define TWO10 0x1.0p10 /* 2^10 */
|
||||
#define TWO18 0x1.0p18 /* 2^18 */
|
||||
#define TWO19 0x1.0p19 /* 2^19 */
|
||||
#define TWO23 0x1.0p23 /* 2^23 */
|
||||
#define TWO52 0x1.0p52 /* 2^52 */
|
||||
#define TWO5 TWOPOW (5) /* 2^5 */
|
||||
#define TWO8 TWOPOW (8) /* 2^52 */
|
||||
#define TWO10 TWOPOW (10) /* 2^10 */
|
||||
#define TWO18 TWOPOW (18) /* 2^18 */
|
||||
#define TWO19 TWOPOW (19) /* 2^19 */
|
||||
#define TWO23 TWOPOW (23) /* 2^23 */
|
||||
|
||||
#define TWO57 0x1.0p57 /* 2^57 */
|
||||
#define TWO71 0x1.0p71 /* 2^71 */
|
||||
#define TWOM1032 0x1.0p-1032 /* 2^-1032 */
|
||||
|
56
sysdeps/powerpc/power4/fpu/mpa-arch.h
Normal file
56
sysdeps/powerpc/power4/fpu/mpa-arch.h
Normal file
@ -0,0 +1,56 @@
|
||||
/* Overridable constants and operations.
|
||||
Copyright (C) 2013 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2.1 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program; if not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
typedef double mantissa_t;
|
||||
typedef double mantissa_store_t;
|
||||
|
||||
#define TWOPOW(i) (0x1.0p##i)
|
||||
|
||||
#define RADIX TWOPOW (24) /* 2^24 */
|
||||
#define CUTTER TWOPOW (76) /* 2^76 */
|
||||
#define RADIXI 0x1.0p-24 /* 2^-24 */
|
||||
#define TWO52 TWOPOW (52) /* 2^52 */
|
||||
|
||||
/* Divide D by RADIX and put the remainder in R. */
|
||||
#define DIV_RADIX(d,r) \
|
||||
({ \
|
||||
double u = ((d) + CUTTER) - CUTTER; \
|
||||
if (u > (d)) \
|
||||
u -= RADIX; \
|
||||
r = (d) - u; \
|
||||
(d) = u * RADIXI; \
|
||||
})
|
||||
|
||||
/* Put the integer component of a double X in R and retain the fraction in
|
||||
X. */
|
||||
#define INTEGER_OF(x, r) \
|
||||
({ \
|
||||
double u = ((x) + TWO52) - TWO52; \
|
||||
if (u > (x)) \
|
||||
u -= ONE; \
|
||||
(r) = u; \
|
||||
(x) -= u; \
|
||||
})
|
||||
|
||||
/* Align IN down to a multiple of F, where F is a power of two. */
|
||||
#define ALIGN_DOWN_TO(in, f) \
|
||||
({ \
|
||||
double factor = f * TWO52; \
|
||||
double u = (in + factor) - factor; \
|
||||
if (u > in) \
|
||||
u -= f; \
|
||||
u; \
|
||||
})
|
Loading…
x
Reference in New Issue
Block a user