From 31d3cc00b0cc5205b4b4efd73c911cfddff444c6 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Tue, 25 Oct 2011 00:56:33 -0400 Subject: [PATCH] Cleanup FMA4 patch Move the FMA4 code into its own section. Avoid some of the duplication of data resulting from the double use of source files. --- ChangeLog | 50 ++++++++++++++ sysdeps/ieee754/dbl-64/branred.c | 10 ++- sysdeps/ieee754/dbl-64/doasin.c | 8 ++- sysdeps/ieee754/dbl-64/dosincos.c | 16 ++++- sysdeps/ieee754/dbl-64/e_asin.c | 12 +++- sysdeps/ieee754/dbl-64/e_atan2.c | 16 ++++- sysdeps/ieee754/dbl-64/e_exp.c | 12 +++- sysdeps/ieee754/dbl-64/e_log.c | 8 ++- sysdeps/ieee754/dbl-64/e_pow.c | 24 +++++-- sysdeps/ieee754/dbl-64/halfulp.c | 8 ++- sysdeps/ieee754/dbl-64/mpa.c | 61 +++++++++------- sysdeps/ieee754/dbl-64/mpa.h | 2 +- sysdeps/ieee754/dbl-64/mpatan.c | 29 +++++--- sysdeps/ieee754/dbl-64/mpatan.h | 73 +++++++++++--------- sysdeps/ieee754/dbl-64/mpatan2.c | 15 ++-- sysdeps/ieee754/dbl-64/mpexp.c | 43 +++++++----- sysdeps/ieee754/dbl-64/mpexp.h | 70 +++++++++++-------- sysdeps/ieee754/dbl-64/mpsqrt.c | 18 +++-- sysdeps/ieee754/dbl-64/mpsqrt.h | 29 +++++--- sysdeps/ieee754/dbl-64/mptan.c | 11 ++- sysdeps/ieee754/dbl-64/s_sin.c | 64 +++++++++++++---- sysdeps/ieee754/dbl-64/s_tan.c | 12 +++- sysdeps/ieee754/dbl-64/sincos32.c | 50 ++++++++++---- sysdeps/ieee754/dbl-64/slowexp.c | 12 +++- sysdeps/ieee754/dbl-64/slowpow.c | 12 +++- sysdeps/x86_64/fpu/multiarch/brandred-fma4.c | 1 + sysdeps/x86_64/fpu/multiarch/doasin-fma4.c | 1 + sysdeps/x86_64/fpu/multiarch/dosincos-fma4.c | 1 + sysdeps/x86_64/fpu/multiarch/e_asin-fma4.c | 1 + sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c | 1 + sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c | 1 + sysdeps/x86_64/fpu/multiarch/e_log-fma4.c | 1 + sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c | 1 + sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c | 1 + sysdeps/x86_64/fpu/multiarch/mpa-fma4.c | 2 + sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c | 2 + sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c | 1 + sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c | 2 + sysdeps/x86_64/fpu/multiarch/mplog-fma4.c | 1 + sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c | 2 + sysdeps/x86_64/fpu/multiarch/mptan-fma4.c | 1 + sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c | 1 + sysdeps/x86_64/fpu/multiarch/s_sin-fma4.c | 1 + sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c | 2 +- sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c | 1 + sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c | 1 + sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c | 1 + 47 files changed, 494 insertions(+), 198 deletions(-) diff --git a/ChangeLog b/ChangeLog index 992ad4cb2a..2383ec8229 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,53 @@ +2011-10-25 Ulrich Drepper + + * sysdeps/ieee754/dbl-64/branred.c: Move FMA4 code into separate + .text section. Avoid duplicate constants. + * sysdeps/ieee754/dbl-64/doasin.c: Likewise. + * sysdeps/ieee754/dbl-64/dosincos.c: Likewise. + * sysdeps/ieee754/dbl-64/e_asin.c: Likewise. + * sysdeps/ieee754/dbl-64/e_atan2.c: Likewise. + * sysdeps/ieee754/dbl-64/e_exp.c: Likewise. + * sysdeps/ieee754/dbl-64/e_log.c: Likewise. + * sysdeps/ieee754/dbl-64/e_pow.c: Likewise. + * sysdeps/ieee754/dbl-64/halfulp.c: Likewise. + * sysdeps/ieee754/dbl-64/mpa.c: Likewise. + * sysdeps/ieee754/dbl-64/mpa.h: Likewise. + * sysdeps/ieee754/dbl-64/mpatan.c: Likewise. + * sysdeps/ieee754/dbl-64/mpatan.h: Likewise. + * sysdeps/ieee754/dbl-64/mpatan2.c: Likewise. + * sysdeps/ieee754/dbl-64/mpexp.c: Likewise. + * sysdeps/ieee754/dbl-64/mpexp.h: Likewise. + * sysdeps/ieee754/dbl-64/mpsqrt.c: Likewise. + * sysdeps/ieee754/dbl-64/mpsqrt.h: Likewise. + * sysdeps/ieee754/dbl-64/mptan.c: Likewise. + * sysdeps/ieee754/dbl-64/s_sin.c: Likewise. + * sysdeps/ieee754/dbl-64/s_tan.c: Likewise. + * sysdeps/ieee754/dbl-64/sincos32.c: Likewise. + * sysdeps/ieee754/dbl-64/slowexp.c: Likewise. + * sysdeps/ieee754/dbl-64/slowpow.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/brandred-fma4.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/doasin-fma4.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/dosincos-fma4.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/e_asin-fma4.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/e_log-fma4.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/mpa-fma4.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/mplog-fma4.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/mptan-fma4.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/s_sin-fma4.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c: Likewise. + 2011-10-24 Ulrich Drepper * sysdeps/x86_64/dla.h: Move to ... diff --git a/sysdeps/ieee754/dbl-64/branred.c b/sysdeps/ieee754/dbl-64/branred.c index 76015f0c5c..c8483034af 100644 --- a/sysdeps/ieee754/dbl-64/branred.c +++ b/sysdeps/ieee754/dbl-64/branred.c @@ -1,7 +1,7 @@ /* * IBM Accurate Mathematical Library * Written by International Business Machines Corp. - * Copyright (C) 2001 Free Software Foundation, Inc. + * Copyright (C) 2001, 2011 Free Software Foundation, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by @@ -38,6 +38,10 @@ #include "branred.h" #include "math_private.h" +#ifndef SECTION +# define SECTION +#endif + /*******************************************************************/ /* Routine branred() performs range reduction of a double number */ @@ -45,7 +49,9 @@ /* x=n*pi/2+(a+aa), abs(a+aa) #include "math_private.h" +#ifndef SECTION +# define SECTION +#endif + /********************************************************************/ /* Compute arcsin(x,dx,v) of double-length number (x+dx) the result */ /* stored in v where v= v[0]+v[1] =arcsin(x+dx) */ /********************************************************************/ -void __doasin(double x, double dx, double v[]) { +void +SECTION +__doasin(double x, double dx, double v[]) { #include "doasin.h" diff --git a/sysdeps/ieee754/dbl-64/dosincos.c b/sysdeps/ieee754/dbl-64/dosincos.c index 712d585b9e..e8890ff8de 100644 --- a/sysdeps/ieee754/dbl-64/dosincos.c +++ b/sysdeps/ieee754/dbl-64/dosincos.c @@ -39,6 +39,10 @@ #include "dosincos.h" #include "math_private.h" +#ifndef SECTION +# define SECTION +#endif + extern const union { int4 i[880]; @@ -52,7 +56,9 @@ extern const union /*(x+dx) between 0 and PI/4 */ /***********************************************************************/ -void __dubsin(double x, double dx, double v[]) { +void +SECTION +__dubsin(double x, double dx, double v[]) { double r,s,c,cc,d,dd,d2,dd2,e,ee, sn,ssn,cs,ccs,ds,dss,dc,dcc; #ifndef DLA_FMS @@ -106,7 +112,9 @@ void __dubsin(double x, double dx, double v[]) { /*(x+dx) between 0 and PI/4 */ /**********************************************************************/ -void __dubcos(double x, double dx, double v[]) { +void +SECTION +__dubcos(double x, double dx, double v[]) { double r,s,c,cc,d,dd,d2,dd2,e,ee, sn,ssn,cs,ccs,ds,dss,dc,dcc; #ifndef DLA_FMS @@ -172,7 +180,9 @@ void __dubcos(double x, double dx, double v[]) { /* Routine receive Double-Length number (x+dx) and computes cos(x+dx) */ /* as Double-Length number and store it in array v */ /**********************************************************************/ -void __docos(double x, double dx, double v[]) { +void +SECTION +__docos(double x, double dx, double v[]) { double y,yy,p,w[2]; if (x>0) {y=x; yy=dx;} else {y=-x; yy=-dx;} diff --git a/sysdeps/ieee754/dbl-64/e_asin.c b/sysdeps/ieee754/dbl-64/e_asin.c index cd4cc2e2c2..65319c0b58 100644 --- a/sysdeps/ieee754/dbl-64/e_asin.c +++ b/sysdeps/ieee754/dbl-64/e_asin.c @@ -42,6 +42,10 @@ #include "uasncs.h" #include "math_private.h" +#ifndef SECTION +# define SECTION +#endif + void __doasin(double x, double dx, double w[]); void __dubsin(double x, double dx, double v[]); void __dubcos(double x, double dx, double v[]); @@ -53,7 +57,9 @@ double __cos32(double x, double res, double res1); /* An ultimate asin routine. Given an IEEE double machine number x */ /* it computes the correctly rounded (to nearest) value of arcsin(x) */ /***************************************************************************/ -double __ieee754_asin(double x){ +double +SECTION +__ieee754_asin(double x){ double x1,x2,xx,s1,s2,res1,p,t,res,r,cor,cc,y,c,z,w[2]; mynumber u,v; int4 k,m,n; @@ -334,7 +340,9 @@ strong_alias (__ieee754_asin, __asin_finite) /* */ /*******************************************************************/ -double __ieee754_acos(double x) +double +SECTION +__ieee754_acos(double x) { double x1,x2,xx,s1,s2,res1,p,t,res,r,cor,cc,y,c,z,w[2],eps; #if 0 diff --git a/sysdeps/ieee754/dbl-64/e_atan2.c b/sysdeps/ieee754/dbl-64/e_atan2.c index 9caacccf4c..64dae3e8d5 100644 --- a/sysdeps/ieee754/dbl-64/e_atan2.c +++ b/sysdeps/ieee754/dbl-64/e_atan2.c @@ -44,6 +44,10 @@ #include "atnat2.h" #include "math_private.h" +#ifndef SECTION +# define SECTION +#endif + /************************************************************************/ /* An ultimate atan2 routine. Given two IEEE double machine numbers y,x */ /* it computes the correctly rounded (to nearest) value of atan2(y,x). */ @@ -59,7 +63,9 @@ static double signArctan2(double y,double z) static double normalized(double ,double,double ,double); void __mpatan2(mp_no *,mp_no *,mp_no *,int); -double __ieee754_atan2(double y,double x) { +double +SECTION +__ieee754_atan2(double y,double x) { int i,de,ux,dx,uy,dy; #if 0 @@ -384,7 +390,9 @@ strong_alias (__ieee754_atan2, __atan2_finite) #endif /* Treat the Denormalized case */ -static double normalized(double ax,double ay,double y, double z) +static double +SECTION +normalized(double ax,double ay,double y, double z) { int p; mp_no mpx,mpy,mpz,mperr,mpz2,mpt1; p=6; @@ -394,7 +402,9 @@ static double normalized(double ax,double ay,double y, double z) return signArctan2(y,z); } /* Stage 3: Perform a multi-Precision computation */ -static double atan2Mp(double x,double y,const int pr[]) +static double +SECTION +atan2Mp(double x,double y,const int pr[]) { double z1,z2; int i,p; diff --git a/sysdeps/ieee754/dbl-64/e_exp.c b/sysdeps/ieee754/dbl-64/e_exp.c index 48bbb05ed8..e7a839d42e 100644 --- a/sysdeps/ieee754/dbl-64/e_exp.c +++ b/sysdeps/ieee754/dbl-64/e_exp.c @@ -40,13 +40,19 @@ #include "uexp.tbl" #include "math_private.h" +#ifndef SECTION +# define SECTION +#endif + double __slowexp(double); /***************************************************************************/ /* An ultimate exp routine. Given an IEEE double machine number x */ /* it computes the correctly rounded (to nearest) value of e^x */ /***************************************************************************/ -double __ieee754_exp(double x) { +double +SECTION +__ieee754_exp(double x) { double bexp, t, eps, del, base, y, al, bet, res, rem, cor; mynumber junk1, junk2, binexp = {{0,0}}; #if 0 @@ -156,7 +162,9 @@ strong_alias (__ieee754_exp, __exp_finite) /*else return e^(x + xx) (always positive ) */ /************************************************************************/ -double __exp1(double x, double xx, double error) { +double +SECTION +__exp1(double x, double xx, double error) { double bexp, t, eps, del, base, y, al, bet, res, rem, cor; mynumber junk1, junk2, binexp = {{0,0}}; #if 0 diff --git a/sysdeps/ieee754/dbl-64/e_log.c b/sysdeps/ieee754/dbl-64/e_log.c index 7a0a26f251..e45520eba8 100644 --- a/sysdeps/ieee754/dbl-64/e_log.c +++ b/sysdeps/ieee754/dbl-64/e_log.c @@ -41,13 +41,19 @@ #include "MathLib.h" #include "math_private.h" +#ifndef SECTION +# define SECTION +#endif + void __mplog(mp_no *, mp_no *, int); /*********************************************************************/ /* An ultimate log routine. Given an IEEE double machine number x */ /* it computes the correctly rounded (to nearest) value of log(x). */ /*********************************************************************/ -double __ieee754_log(double x) { +double +SECTION +__ieee754_log(double x) { #define M 4 static const int pr[M]={8,10,18,32}; int i,j,n,ux,dx,p; diff --git a/sysdeps/ieee754/dbl-64/e_pow.c b/sysdeps/ieee754/dbl-64/e_pow.c index 94b1ab8961..350e93986d 100644 --- a/sysdeps/ieee754/dbl-64/e_pow.c +++ b/sysdeps/ieee754/dbl-64/e_pow.c @@ -43,6 +43,10 @@ #include "upow.tbl" #include "math_private.h" +#ifndef SECTION +# define SECTION +#endif + double __exp1(double x, double xx, double error); static double log1(double x, double *delta, double *error); @@ -55,7 +59,9 @@ static int checkint(double x); /* An ultimate power routine. Given two IEEE double machine numbers y,x */ /* it computes the correctly rounded (to nearest) value of X^y. */ /***************************************************************************/ -double __ieee754_pow(double x, double y) { +double +SECTION +__ieee754_pow(double x, double y) { double z,a,aa,error, t,a1,a2,y1,y2; #if 0 double gor=1.0; @@ -160,7 +166,9 @@ strong_alias (__ieee754_pow, __pow_finite) /**************************************************************************/ /* Computing x^y using more accurate but more slow log routine */ /**************************************************************************/ -static double power1(double x, double y) { +static double +SECTION +power1(double x, double y) { double z,a,aa,error, t,a1,a2,y1,y2; z = my_log2(x,&aa,&error); t = y*134217729.0; @@ -183,7 +191,9 @@ static double power1(double x, double y) { /* + the parameter delta. */ /* The result is bounded by error (rightmost argument) */ /****************************************************************************/ -static double log1(double x, double *delta, double *error) { +static double +SECTION +log1(double x, double *delta, double *error) { int i,j,m; #if 0 int n; @@ -275,7 +285,9 @@ static double log1(double x, double *delta, double *error) { /* Computing log(x)(x is left argument).The result is return double + delta.*/ /* The result is bounded by error (right argument) */ /****************************************************************************/ -static double my_log2(double x, double *delta, double *error) { +static double +SECTION +my_log2(double x, double *delta, double *error) { int i,j,m; #if 0 int n; @@ -369,7 +381,9 @@ static double my_log2(double x, double *delta, double *error) { /* Routine receives a double x and checks if it is an integer. If not */ /* it returns 0, else it returns 1 if even or -1 if odd. */ /**********************************************************************/ -static int checkint(double x) { +static int +SECTION +checkint(double x) { union {int4 i[2]; double x;} u; int k,m,n; #if 0 diff --git a/sysdeps/ieee754/dbl-64/halfulp.c b/sysdeps/ieee754/dbl-64/halfulp.c index 31bd2daf4d..6018309427 100644 --- a/sysdeps/ieee754/dbl-64/halfulp.c +++ b/sysdeps/ieee754/dbl-64/halfulp.c @@ -40,6 +40,10 @@ #include #include "math_private.h" +#ifndef SECTION +# define SECTION +#endif + static const int4 tab54[32] = { 262143, 11585, 1782, 511, 210, 107, 63, 42, 30, 22, 17, 14, 12, 10, 9, 7, @@ -47,7 +51,9 @@ static const int4 tab54[32] = { 3, 3, 3, 3, 3, 3, 3, 3 }; -double __halfulp(double x, double y) +double +SECTION +__halfulp(double x, double y) { mynumber v; double z,u,uu; diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c index ad5a639c4b..39c640882b 100644 --- a/sysdeps/ieee754/dbl-64/mpa.c +++ b/sysdeps/ieee754/dbl-64/mpa.c @@ -47,11 +47,18 @@ #include "mpa.h" #include "mpa2.h" #include /* For MIN() */ + +#ifndef SECTION +# define SECTION +#endif + +#ifndef NO___ACR /* mcr() compares the sizes of the mantissas of two multiple precision */ /* numbers. Mantissas are compared regardless of the signs of the */ /* numbers, even if x->d[0] or y->d[0] are zero. Exponents are also */ /* disregarded. */ -static int mcr(const mp_no *x, const mp_no *y, int p) { +static int +mcr(const mp_no *x, const mp_no *y, int p) { int i; for (i=1; i<=p; i++) { if (X[i] == Y[i]) continue; @@ -61,9 +68,9 @@ static int mcr(const mp_no *x, const mp_no *y, int p) { } - /* acr() compares the absolute values of two multiple precision numbers */ -static int __acr(const mp_no *x, const mp_no *y, int p) { +int +__acr(const mp_no *x, const mp_no *y, int p) { int i; if (X[0] == ZERO) { @@ -79,10 +86,11 @@ static int __acr(const mp_no *x, const mp_no *y, int p) { return i; } +#endif #if 0 -/* cr90 compares the values of two multiple precision numbers */ +/* cr() compares the values of two multiple precision numbers */ static int __cr(const mp_no *x, const mp_no *y, int p) { int i; @@ -119,8 +127,6 @@ static void __cpymn(const mp_no *x, int m, mp_no *y, int n) { EY = EX; k=MIN(m,n); for (i=0; i <= k; i++) Y[i] = X[i]; for ( ; i <= n; i++) Y[i] = ZERO; - - return; } #endif @@ -177,7 +183,6 @@ static void norm(const mp_no *x, double *y, int p) for (i=1; i>EX; i--) c *= RADIXI; *y = c; - return; #undef R } @@ -225,8 +230,6 @@ static void denorm(const mp_no *x, double *y, int p) c = X[0]*((z[1] + R*(z[2] + R*z[3])) - TWO10); *y = c*TWOM1032; - return; - #undef R } @@ -252,7 +255,9 @@ void __mp_dbl(const mp_no *x, double *y, int p) { /* number *y. If the precision p is too small the result is truncated. x is */ /* left unchanged. */ -void __dbl_mp(double x, mp_no *y, int p) { +void +SECTION +__dbl_mp(double x, mp_no *y, int p) { int i,n; double u; @@ -273,7 +278,6 @@ void __dbl_mp(double x, mp_no *y, int p) { if (u>x) u -= ONE; Y[i] = u; x -= u; x *= RADIX; } for ( ; i<=p; i++) Y[i] = ZERO; - return; } @@ -283,7 +287,9 @@ void __dbl_mp(double x, mp_no *y, int p) { /* No guard digit is used. The result equals the exact sum, truncated. */ /* *x & *y are left unchanged. */ -static void add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { +static void +SECTION +add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { int i,j,k; @@ -325,7 +331,9 @@ static void add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { /* or y&z. One guard digit is used. The error is less than one ulp. */ /* *x & *y are left unchanged. */ -static void sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { +static void +SECTION +sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { int i,j,k; @@ -372,8 +380,6 @@ static void sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { Z[k++] = Z[i++]; for (; k <= p; ) Z[k++] = ZERO; - - return; } @@ -381,7 +387,9 @@ static void sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { /* but not x&z or y&z. One guard digit is used. The error is less than */ /* one ulp. *x & *y are left unchanged. */ -void __add(const mp_no *x, const mp_no *y, mp_no *z, int p) { +void +SECTION +__add(const mp_no *x, const mp_no *y, mp_no *z, int p) { int n; @@ -397,7 +405,6 @@ void __add(const mp_no *x, const mp_no *y, mp_no *z, int p) { else if (n == -1) {sub_magnitudes(y,x,z,p); Z[0] = Y[0]; } else Z[0] = ZERO; } - return; } @@ -405,7 +412,9 @@ void __add(const mp_no *x, const mp_no *y, mp_no *z, int p) { /* overlap but not x&z or y&z. One guard digit is used. The error is */ /* less than one ulp. *x & *y are left unchanged. */ -void __sub(const mp_no *x, const mp_no *y, mp_no *z, int p) { +void +SECTION +__sub(const mp_no *x, const mp_no *y, mp_no *z, int p) { int n; @@ -421,7 +430,6 @@ void __sub(const mp_no *x, const mp_no *y, mp_no *z, int p) { else if (n == -1) {sub_magnitudes(y,x,z,p); Z[0] = -Y[0]; } else Z[0] = ZERO; } - return; } @@ -430,7 +438,9 @@ void __sub(const mp_no *x, const mp_no *y, mp_no *z, int p) { /* truncated to p digits. In case p>3 the error is bounded by 1.001 ulp. */ /* *x & *y are left unchanged. */ -void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) { +void +SECTION +__mul(const mp_no *x, const mp_no *y, mp_no *z, int p) { int i, i1, i2, j, k, k2; double u; @@ -461,7 +471,6 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) { EZ = EX + EY; Z[0] = X[0] * Y[0]; - return; } @@ -470,7 +479,9 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) { /* 2.001*r**(1-p) for p>3. */ /* *x=0 is not permissible. *x is left unchanged. */ -static void __inv(const mp_no *x, mp_no *y, int p) { +static +SECTION +void __inv(const mp_no *x, mp_no *y, int p) { int i; #if 0 int l; @@ -493,7 +504,6 @@ static void __inv(const mp_no *x, mp_no *y, int p) { __sub(&mptwo,y,&z,p); __mul(&w,&z,y,p); } - return; } @@ -502,11 +512,12 @@ static void __inv(const mp_no *x, mp_no *y, int p) { /* Relative error bound = 2.001*r**(1-p) for p=2, 2.063*r**(1-p) for p=3 */ /* and 3.001*r**(1-p) for p>3. *y=0 is not permissible. */ -void __dvd(const mp_no *x, const mp_no *y, mp_no *z, int p) { +void +SECTION +__dvd(const mp_no *x, const mp_no *y, mp_no *z, int p) { mp_no w; if (X[0] == ZERO) Z[0] = ZERO; else {__inv(y,&w,p); __mul(x,&w,z,p);} - return; } diff --git a/sysdeps/ieee754/dbl-64/mpa.h b/sysdeps/ieee754/dbl-64/mpa.h index 3ca0ca5f00..5647ab7b4f 100644 --- a/sysdeps/ieee754/dbl-64/mpa.h +++ b/sysdeps/ieee754/dbl-64/mpa.h @@ -64,7 +64,7 @@ typedef union { int i[2]; double d; } number; #define ABS(x) ((x) < 0 ? -(x) : (x)) -// int __acr(const mp_no *, const mp_no *, int); +int __acr(const mp_no *, const mp_no *, int); // int __cr(const mp_no *, const mp_no *, int); void __cpy(const mp_no *, mp_no *, int); // void __cpymn(const mp_no *, int, mp_no *, int); diff --git a/sysdeps/ieee754/dbl-64/mpatan.c b/sysdeps/ieee754/dbl-64/mpatan.c index ee21c25138..f40873ea59 100644 --- a/sysdeps/ieee754/dbl-64/mpatan.c +++ b/sysdeps/ieee754/dbl-64/mpatan.c @@ -1,8 +1,7 @@ - /* * IBM Accurate Mathematical Library * written by International Business Machines Corp. - * Copyright (C) 2001 Free Software Foundation + * Copyright (C) 2001, 2011 Free Software Foundation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by @@ -34,10 +33,18 @@ #include "endian.h" #include "mpa.h" + +#ifndef SECTION +# define SECTION +#endif + +#include "mpatan.h" + void __mpsqrt(mp_no *, mp_no *, int); -void __mpatan(mp_no *x, mp_no *y, int p) { -#include "mpatan.h" +void +SECTION +__mpatan(mp_no *x, mp_no *y, int p) { int i,m,n; double dx; @@ -54,19 +61,19 @@ void __mpatan(mp_no *x, mp_no *y, int p) { mp_no mps,mpsm,mpt,mpt1,mpt2,mpt3; - /* Choose m and initiate mpone, mptwo & mptwoim1 */ + /* Choose m and initiate mpone, mptwo & mptwoim1 */ if (EX>0) m=7; else if (EX<0) m=0; else { __mp_dbl(x,&dx,p); dx=ABS(dx); for (m=6; m>0; m--) - {if (dx>xm[m].d) break;} + {if (dx>__atan_xm[m].d) break;} } mpone.e = mptwo.e = mptwoim1.e = 1; mpone.d[0] = mpone.d[1] = mptwo.d[0] = mptwoim1.d[0] = ONE; mptwo.d[1] = TWO; - /* Reduce x m times */ + /* Reduce x m times */ __mul(x,x,&mpsm,p); if (m==0) __cpy(x,&mps,p); else { @@ -82,8 +89,8 @@ void __mpatan(mp_no *x, mp_no *y, int p) { __mpsqrt(&mpsm,&mps,p); mps.d[0] = X[0]; } - /* Evaluate a truncated power series for Atan(s) */ - n=np[p]; mptwoim1.d[1] = twonm1[p].d; + /* Evaluate a truncated power series for Atan(s) */ + n=__atan_np[p]; mptwoim1.d[1] = __atan_twonm1[p].d; __dvd(&mpsm,&mptwoim1,&mpt,p); for (i=n-1; i>1; i--) { mptwoim1.d[1] -= TWO; @@ -94,8 +101,8 @@ void __mpatan(mp_no *x, mp_no *y, int p) { __mul(&mps,&mpt,&mpt1,p); __sub(&mps,&mpt1,&mpt,p); - /* Compute Atan(x) */ - mptwoim1.d[1] = twom[m].d; + /* Compute Atan(x) */ + mptwoim1.d[1] = __atan_twom[m].d; __mul(&mptwoim1,&mpt,y,p); return; diff --git a/sysdeps/ieee754/dbl-64/mpatan.h b/sysdeps/ieee754/dbl-64/mpatan.h index d420ff3408..003b06c695 100644 --- a/sysdeps/ieee754/dbl-64/mpatan.h +++ b/sysdeps/ieee754/dbl-64/mpatan.h @@ -1,8 +1,7 @@ - /* * IBM Accurate Mathematical Library * Written by International Business Machines Corp. - * Copyright (C) 2001 Free Software Foundation, Inc. + * Copyright (C) 2001, 2011 Free Software Foundation, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by @@ -29,9 +28,18 @@ #ifndef MPATAN_H #define MPATAN_H +extern const number __atan_xm[8] attribute_hidden; +extern const number __atan_twonm1[33] attribute_hidden; +extern const number __atan_twom[8] attribute_hidden; +extern const number __atan_one attribute_hidden; +extern const number __atan_two attribute_hidden; +extern const int __atan_np[33] attribute_hidden; + + +#ifndef AVOID_MPATAN_H #ifdef BIG_ENDI - static const number - xm[8] = { /* x[m] */ + const number + __atan_xm[8] = { /* x[m] */ /**/ {{0x00000000, 0x00000000} }, /* 0.0 */ /**/ {{0x3f8930be, 0x00000000} }, /* 0.0123 */ /**/ {{0x3f991687, 0x00000000} }, /* 0.0245 */ @@ -40,9 +48,9 @@ /**/ {{0x3fc95810, 0x00000000} }, /* 0.198 */ /**/ {{0x3fda7ef9, 0x00000000} }, /* 0.414 */ /**/ {{0x3ff00000, 0x00000000} }, /* 1.0 */ - }; - static const number - twonm1[33] = { /* 2n-1 */ + }; + const number + __atan_twonm1[33] = { /* 2n-1 */ /**/ {{0x00000000, 0x00000000} }, /* 0 */ /**/ {{0x00000000, 0x00000000} }, /* 0 */ /**/ {{0x00000000, 0x00000000} }, /* 0 */ @@ -76,10 +84,10 @@ /**/ {{0x405b4000, 0x00000000} }, /* 109 */ /**/ {{0x405c4000, 0x00000000} }, /* 113 */ /**/ {{0x405d4000, 0x00000000} }, /* 117 */ - }; + }; - static const number - twom[8] = { /* 2**m */ + const number + __atan_twom[8] = { /* 2**m */ /**/ {{0x3ff00000, 0x00000000} }, /* 1.0 */ /**/ {{0x40000000, 0x00000000} }, /* 2.0 */ /**/ {{0x40100000, 0x00000000} }, /* 4.0 */ @@ -88,17 +96,17 @@ /**/ {{0x40400000, 0x00000000} }, /* 32.0 */ /**/ {{0x40500000, 0x00000000} }, /* 64.0 */ /**/ {{0x40600000, 0x00000000} }, /* 128.0 */ - }; + }; - static const number -/**/ one = {{0x3ff00000, 0x00000000} }, /* 1 */ -/**/ two = {{0x40000000, 0x00000000} }; /* 2 */ + const number +/**/ __atan_one = {{0x3ff00000, 0x00000000} }, /* 1 */ +/**/ __atan_two = {{0x40000000, 0x00000000} }; /* 2 */ #else #ifdef LITTLE_ENDI - static const number - xm[8] = { /* x[m] */ + const number + __atan_xm[8] = { /* x[m] */ /**/ {{0x00000000, 0x00000000} }, /* 0.0 */ /**/ {{0x00000000, 0x3f8930be} }, /* 0.0123 */ /**/ {{0x00000000, 0x3f991687} }, /* 0.0245 */ @@ -107,9 +115,9 @@ /**/ {{0x00000000, 0x3fc95810} }, /* 0.198 */ /**/ {{0x00000000, 0x3fda7ef9} }, /* 0.414 */ /**/ {{0x00000000, 0x3ff00000} }, /* 1.0 */ - }; - static const number - twonm1[33] = { /* 2n-1 */ + }; + const number +__atan_twonm1[33] = { /* 2n-1 */ /**/ {{0x00000000, 0x00000000} }, /* 0 */ /**/ {{0x00000000, 0x00000000} }, /* 0 */ /**/ {{0x00000000, 0x00000000} }, /* 0 */ @@ -143,10 +151,10 @@ /**/ {{0x00000000, 0x405b4000} }, /* 109 */ /**/ {{0x00000000, 0x405c4000} }, /* 113 */ /**/ {{0x00000000, 0x405d4000} }, /* 117 */ - }; + }; - static const number - twom[8] = { /* 2**m */ + const number + __atan_twom[8] = { /* 2**m */ /**/ {{0x00000000, 0x3ff00000} }, /* 1.0 */ /**/ {{0x00000000, 0x40000000} }, /* 2.0 */ /**/ {{0x00000000, 0x40100000} }, /* 4.0 */ @@ -155,20 +163,21 @@ /**/ {{0x00000000, 0x40400000} }, /* 32.0 */ /**/ {{0x00000000, 0x40500000} }, /* 64.0 */ /**/ {{0x00000000, 0x40600000} }, /* 128.0 */ - }; + }; - static const number -/**/ one = {{0x00000000, 0x3ff00000} }, /* 1 */ -/**/ two = {{0x00000000, 0x40000000} }; /* 2 */ + const number +/**/ __atan_one = {{0x00000000, 0x3ff00000} }, /* 1 */ +/**/ __atan_two = {{0x00000000, 0x40000000} }; /* 2 */ #endif #endif -#define ONE one.d -#define TWO two.d - - static const int - np[33] = { 0, 0, 0, 0, 6, 8,10,11,13,15,17,19,21,23,25,27,28, - 30,32,34,36,38,40,42,43,45,47,49,51,53,55,57,59}; + const int + __atan_np[33] = { 0, 0, 0, 0, 6, 8,10,11,13,15,17,19,21,23,25,27,28, + 30,32,34,36,38,40,42,43,45,47,49,51,53,55,57,59}; #endif +#endif + +#define ONE __atan_one.d +#define TWO __atan_two.d diff --git a/sysdeps/ieee754/dbl-64/mpatan2.c b/sysdeps/ieee754/dbl-64/mpatan2.c index 8977ec9042..1deb056417 100644 --- a/sysdeps/ieee754/dbl-64/mpatan2.c +++ b/sysdeps/ieee754/dbl-64/mpatan2.c @@ -1,8 +1,7 @@ - /* * IBM Accurate Mathematical Library * written by International Business Machines Corp. - * Copyright (C) 2001 Free Software Foundation + * Copyright (C) 2001, 2011 Free Software Foundation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by @@ -38,18 +37,24 @@ #include "mpa.h" +#ifndef SECTION +# define SECTION +#endif + void __mpsqrt(mp_no *, mp_no *, int); void __mpatan(mp_no *, mp_no *, int); /* Multi-Precision Atan2(y,x) function subroutine, for p >= 4. */ /* y=0 is not permitted if x<=0. No error messages are given. */ -void __mpatan2(mp_no *y, mp_no *x, mp_no *z, int p) { +void +SECTION +__mpatan2(mp_no *y, mp_no *x, mp_no *z, int p) { static const double ZERO = 0.0, ONE = 1.0; mp_no mpone = {0,{0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, - 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, - 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}}; + 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, + 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}}; mp_no mpt1,mpt2,mpt3; diff --git a/sysdeps/ieee754/dbl-64/mpexp.c b/sysdeps/ieee754/dbl-64/mpexp.c index e2ab71b2cc..b0cffe2fe5 100644 --- a/sysdeps/ieee754/dbl-64/mpexp.c +++ b/sysdeps/ieee754/dbl-64/mpexp.c @@ -1,8 +1,7 @@ - /* * IBM Accurate Mathematical Library * written by International Business Machines Corp. - * Copyright (C) 2001 Free Software Foundation + * Copyright (C) 2001, 2011 Free Software Foundation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by @@ -34,34 +33,40 @@ #include "mpa.h" #include "mpexp.h" +#ifndef SECTION +# define SECTION +#endif + /* Multi-Precision exponential function subroutine (for p >= 4, */ /* 2**(-55) <= abs(x) <= 1024). */ -void __mpexp(mp_no *x, mp_no *y, int p) { +void +SECTION +__mpexp(mp_no *x, mp_no *y, int p) { int i,j,k,m,m1,m2,n; double a,b; static const int np[33] = {0,0,0,0,3,3,4,4,5,4,4,5,5,5,6,6,6,6,6,6, - 6,6,6,6,7,7,7,7,8,8,8,8,8}; + 6,6,6,6,7,7,7,7,8,8,8,8,8}; static const int m1p[33]= {0,0,0,0,17,23,23,28,27,38,42,39,43,47,43,47,50,54, - 57,60,64,67,71,74,68,71,74,77,70,73,76,78,81}; + 57,60,64,67,71,74,68,71,74,77,70,73,76,78,81}; static const int m1np[7][18] = { - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 0, 0, 0,36,48,60,72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 0, 0, 0,24,32,40,48,56,64,72, 0, 0, 0, 0, 0, 0, 0}, - { 0, 0, 0, 0,17,23,29,35,41,47,53,59,65, 0, 0, 0, 0, 0}, - { 0, 0, 0, 0, 0, 0,23,28,33,38,42,47,52,57,62,66, 0, 0}, - { 0, 0, 0, 0, 0, 0, 0, 0,27, 0, 0,39,43,47,51,55,59,63}, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,43,47,50,54}}; + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0,36,48,60,72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0,24,32,40,48,56,64,72, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0,17,23,29,35,41,47,53,59,65, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0,23,28,33,38,42,47,52,57,62,66, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0,27, 0, 0,39,43,47,51,55,59,63}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,43,47,50,54}}; mp_no mpone = {0,{0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, - 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, - 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}}; + 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, + 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}}; mp_no mpk = {0,{0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, - 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, - 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}}; + 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, + 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}}; mp_no mps,mpak,mpt1,mpt2; /* Choose m,n and compute a=2**(-m) */ - n = np[p]; m1 = m1p[p]; a = twomm1[p].d; + n = np[p]; m1 = m1p[p]; a = __mpexp_twomm1[p].d; for (i=0; iEX; i--) a *= RADIX; b = X[1]*RADIXI; m2 = 24*EX; @@ -81,12 +86,12 @@ void __mpexp(mp_no *x, mp_no *y, int p) { /* Evaluate the polynomial. Put result in mpt2 */ mpone.e=1; mpone.d[0]=ONE; mpone.d[1]=ONE; - mpk.e = 1; mpk.d[0] = ONE; mpk.d[1]=nn[n].d; + mpk.e = 1; mpk.d[0] = ONE; mpk.d[1]=__mpexp_nn[n].d; __dvd(&mps,&mpk,&mpt1,p); __add(&mpone,&mpt1,&mpak,p); for (k=n-1; k>1; k--) { __mul(&mps,&mpak,&mpt1,p); - mpk.d[1]=nn[k].d; + mpk.d[1]=__mpexp_nn[k].d; __dvd(&mpt1,&mpk,&mpt2,p); __add(&mpone,&mpt2,&mpak,p); } diff --git a/sysdeps/ieee754/dbl-64/mpexp.h b/sysdeps/ieee754/dbl-64/mpexp.h index a0b08ccb41..7985060a8c 100644 --- a/sysdeps/ieee754/dbl-64/mpexp.h +++ b/sysdeps/ieee754/dbl-64/mpexp.h @@ -1,7 +1,7 @@ /* * IBM Accurate Mathematical Library * Written by International Business Machines Corp. - * Copyright (C) 2001 Free Software Foundation, Inc. + * Copyright (C) 2001, 2011 Free Software Foundation, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by @@ -28,9 +28,20 @@ #ifndef MPEXP_H #define MPEXP_H +extern const number __mpexp_twomm1[33] attribute_hidden; +extern const number __mpexp_nn[9] attribute_hidden; +extern const number __mpexp_radix attribute_hidden; +extern const number __mpexp_radixi attribute_hidden; +extern const number __mpexp_zero attribute_hidden; +extern const number __mpexp_one attribute_hidden; +extern const number __mpexp_two attribute_hidden; +extern const number __mpexp_half attribute_hidden; + + +#ifndef AVOID_MPEXP_H #ifdef BIG_ENDI - static const number - twomm1[33] = { /* 2**-m1 */ + const number + __mpexp_twomm1[33] = { /* 2**-m1 */ /**/ {{0x00000000, 0x00000000} }, /* 0 */ /**/ {{0x00000000, 0x00000000} }, /* 0 */ /**/ {{0x00000000, 0x00000000} }, /* 0 */ @@ -65,8 +76,8 @@ /**/ {{0x3b100000, 0x00000000} }, /* 2**-78 */ /**/ {{0x3ae00000, 0x00000000} }, /* 2**-81 */ }; - static const number - nn[9]={ /* n */ + const number + __mpexp_nn[9]={ /* n */ /**/ {{0x00000000, 0x00000000} }, /* 0 */ /**/ {{0x3ff00000, 0x00000000} }, /* 1 */ /**/ {{0x40000000, 0x00000000} }, /* 2 */ @@ -78,18 +89,18 @@ /**/ {{0x40200000, 0x00000000} }, /* 8 */ }; - static const number -/**/ radix = {{0x41700000, 0x00000000} }, /* 2**24 */ -/**/ radixi = {{0x3e700000, 0x00000000} }, /* 2**-24 */ -/**/ zero = {{0x00000000, 0x00000000} }, /* 0 */ -/**/ one = {{0x3ff00000, 0x00000000} }, /* 1 */ -/**/ two = {{0x40000000, 0x00000000} }, /* 2 */ -/**/ half = {{0x3fe00000, 0x00000000} }; /* 1/2 */ + const number +/**/ __mpexp_radix = {{0x41700000, 0x00000000} }, /* 2**24 */ +/**/ __mpexp_radixi = {{0x3e700000, 0x00000000} }, /* 2**-24 */ +/**/ __mpexp_zero = {{0x00000000, 0x00000000} }, /* 0 */ +/**/ __mpexp_one = {{0x3ff00000, 0x00000000} }, /* 1 */ +/**/ __mpexp_two = {{0x40000000, 0x00000000} }, /* 2 */ +/**/ __mpexp_half = {{0x3fe00000, 0x00000000} }; /* 1/2 */ #else #ifdef LITTLE_ENDI - static const number - twomm1[33] = { /* 2**-m1 */ + const number + __mpexp_twomm1[33] = { /* 2**-m1 */ /**/ {{0x00000000, 0x00000000} }, /* 0 */ /**/ {{0x00000000, 0x00000000} }, /* 0 */ /**/ {{0x00000000, 0x00000000} }, /* 0 */ @@ -124,8 +135,8 @@ /**/ {{0x00000000, 0x3b100000} }, /* 2**-78 */ /**/ {{0x00000000, 0x3ae00000} }, /* 2**-81 */ }; - static const number - nn[9]={ /* n */ + const number + __mpexp_nn[9]={ /* n */ /**/ {{0x00000000, 0x00000000} }, /* 0 */ /**/ {{0x00000000, 0x3ff00000} }, /* 1 */ /**/ {{0x00000000, 0x40000000} }, /* 2 */ @@ -137,22 +148,23 @@ /**/ {{0x00000000, 0x40200000} }, /* 8 */ }; - static const number -/**/ radix = {{0x00000000, 0x41700000} }, /* 2**24 */ -/**/ radixi = {{0x00000000, 0x3e700000} }, /* 2**-24 */ -/**/ zero = {{0x00000000, 0x00000000} }, /* 0 */ -/**/ one = {{0x00000000, 0x3ff00000} }, /* 1 */ -/**/ two = {{0x00000000, 0x40000000} }, /* 2 */ -/**/ half = {{0x00000000, 0x3fe00000} }; /* 1/2 */ + const number +/**/ __mpexp_radix = {{0x00000000, 0x41700000} }, /* 2**24 */ +/**/ __mpexp_radixi = {{0x00000000, 0x3e700000} }, /* 2**-24 */ +/**/ __mpexp_zero = {{0x00000000, 0x00000000} }, /* 0 */ +/**/ __mpexp_one = {{0x00000000, 0x3ff00000} }, /* 1 */ +/**/ __mpexp_two = {{0x00000000, 0x40000000} }, /* 2 */ +/**/ __mpexp_half = {{0x00000000, 0x3fe00000} }; /* 1/2 */ +#endif #endif #endif -#define RADIX radix.d -#define RADIXI radixi.d -#define ZERO zero.d -#define ONE one.d -#define TWO two.d -#define HALF half.d +#define RADIX __mpexp_radix.d +#define RADIXI __mpexp_radixi.d +#define ZERO __mpexp_zero.d +#define ONE __mpexp_one.d +#define TWO __mpexp_two.d +#define HALF __mpexp_half.d #endif diff --git a/sysdeps/ieee754/dbl-64/mpsqrt.c b/sysdeps/ieee754/dbl-64/mpsqrt.c index bea623296b..d1a80f9091 100644 --- a/sysdeps/ieee754/dbl-64/mpsqrt.c +++ b/sysdeps/ieee754/dbl-64/mpsqrt.c @@ -32,6 +32,12 @@ #include "endian.h" #include "mpa.h" +#ifndef SECTION +# define SECTION +#endif + +#include "mpsqrt.h" + /****************************************************************************/ /* Multi-Precision square root function subroutine for precision p >= 4. */ /* The relative error is bounded by 3.501*r**(1-p), where r=2**24. */ @@ -42,9 +48,9 @@ static double fastiroot(double); -void __mpsqrt(mp_no *x, mp_no *y, int p) { -#include "mpsqrt.h" - +void +SECTION +__mpsqrt(mp_no *x, mp_no *y, int p) { int i,m,ex,ey; double dx,dy; mp_no @@ -64,7 +70,7 @@ void __mpsqrt(mp_no *x, mp_no *y, int p) { __mp_dbl(&mpxn,&dx,p); dy=fastiroot(dx); __dbl_mp(dy,&mpu,p); __mul(&mpxn,&mphalf,&mpz,p); - m=mp[p]; + m=__mpsqrt_mp[p]; for (i=0; i0.8) { __sub(&hp,&c,&a,p); - __c32(&a,&c,&b,p); + __c32(&a,&c,&b,p); } else __c32(&c,&b,&a,p); /* b=cos(0.5*(res+res1)) */ __dbl_mp(x,&c,p); /* c = x */ __sub(&b,&c,&a,p); - /* if a>0 return max(res,res1), otherwise return min(res,res1) */ + /* if a>0 return max(res,res1), otherwise return min(res,res1) */ if (a.d[0]>0) return (res>res1)?res:res1; else return (res= 0) return res; /* if result was really computed by halfulp */ - /* else, if result was not really computed by halfulp */ + /* else, if result was not really computed by halfulp */ p = 10; /* p=precision */ __dbl_mp(x,&mpx,p); __dbl_mp(y,&mpy,p); diff --git a/sysdeps/x86_64/fpu/multiarch/brandred-fma4.c b/sysdeps/x86_64/fpu/multiarch/brandred-fma4.c index 93fb5a11a2..f4f68ac962 100644 --- a/sysdeps/x86_64/fpu/multiarch/brandred-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/brandred-fma4.c @@ -1,3 +1,4 @@ #define __branred __branred_fma4 +#define SECTION __attribute__ ((section (".text.fma4"))) #include diff --git a/sysdeps/x86_64/fpu/multiarch/doasin-fma4.c b/sysdeps/x86_64/fpu/multiarch/doasin-fma4.c index d7ba67e60b..53eb419472 100644 --- a/sysdeps/x86_64/fpu/multiarch/doasin-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/doasin-fma4.c @@ -1,3 +1,4 @@ #define __doasin __doasin_fma4 +#define SECTION __attribute__ ((section (".text.fma4"))) #include diff --git a/sysdeps/x86_64/fpu/multiarch/dosincos-fma4.c b/sysdeps/x86_64/fpu/multiarch/dosincos-fma4.c index 02b420bbd5..1578b2fce0 100644 --- a/sysdeps/x86_64/fpu/multiarch/dosincos-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/dosincos-fma4.c @@ -1,5 +1,6 @@ #define __docos __docos_fma4 #define __dubcos __dubcos_fma4 #define __dubsin __dubsin_fma4 +#define SECTION __attribute__ ((section (".text.fma4"))) #include diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_asin-fma4.c index 938bc84187..2657c31f49 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_asin-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/e_asin-fma4.c @@ -6,5 +6,6 @@ #define __dubcos __dubcos_fma4 #define __dubsin __dubsin_fma4 #define __sin32 __sin32_fma4 +#define SECTION __attribute__ ((section (".text.fma4"))) #include diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c index 84a6f86349..f4e986293e 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c @@ -5,5 +5,6 @@ #define __mpatan2 __mpatan2_fma4 #define __mul __mul_fma4 #define __sub __sub_fma4 +#define SECTION __attribute__ ((section (".text.fma4"))) #include diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c index 942dfffd78..ae6eb67603 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c @@ -1,5 +1,6 @@ #define __ieee754_exp __ieee754_exp_fma4 #define __exp1 __exp1_fma4 #define __slowexp __slowexp_fma4 +#define SECTION __attribute__ ((section (".text.fma4"))) #include diff --git a/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c index 0be66d160c..a2346cc618 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c @@ -3,5 +3,6 @@ #define __add __add_fma4 #define __dbl_mp __dbl_mp_fma4 #define __sub __sub_fma4 +#define SECTION __attribute__ ((section (".text.fma4"))) #include diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c index 20313be389..5b3ea8e103 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c @@ -1,5 +1,6 @@ #define __ieee754_pow __ieee754_pow_fma4 #define __exp1 __exp1_fma4 #define __slowpow __slowpow_fma4 +#define SECTION __attribute__ ((section (".text.fma4"))) #include diff --git a/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c b/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c index 3fc223e613..a00c17c016 100644 --- a/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c @@ -1,3 +1,4 @@ #define __halfulp __halfulp_fma4 +#define SECTION __attribute__ ((section (".text.fma4"))) #include diff --git a/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c index 7b9e2ef8d1..f8ed8f3464 100644 --- a/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c @@ -6,5 +6,7 @@ #define NO___CPY 1 #define NO___MP_DBL 1 +#define NO___ACR 1 +#define SECTION __attribute__ ((section (".text.fma4"))) #include diff --git a/sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c index 942974b26e..fbd3bd49a2 100644 --- a/sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c @@ -4,5 +4,7 @@ #define __mpsqrt __mpsqrt_fma4 #define __mul __mul_fma4 #define __sub __sub_fma4 +#define AVOID_MPATAN_H 1 +#define SECTION __attribute__ ((section (".text.fma4"))) #include diff --git a/sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c index e7c469e4ca..e6e44d49b0 100644 --- a/sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c @@ -4,5 +4,6 @@ #define __mpatan __mpatan_fma4 #define __mpsqrt __mpsqrt_fma4 #define __mul __mul_fma4 +#define SECTION __attribute__ ((section (".text.fma4"))) #include diff --git a/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c index 021970c670..07ca6e9ad0 100644 --- a/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c @@ -3,5 +3,7 @@ #define __dbl_mp __dbl_mp_fma4 #define __dvd __dvd_fma4 #define __mul __mul_fma4 +#define AVOID_MPEXP_H 1 +#define SECTION __attribute__ ((section (".text.fma4"))) #include diff --git a/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c b/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c index 9581eaf5c8..b4733118d7 100644 --- a/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c @@ -3,5 +3,6 @@ #define __mpexp __mpexp_fma4 #define __mul __mul_fma4 #define __sub __sub_fma4 +#define SECTION __attribute__ ((section (".text.fma4"))) #include diff --git a/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c index 43b6493db2..f8a1ba2d92 100644 --- a/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c @@ -2,5 +2,7 @@ #define __dbl_mp __dbl_mp_fma4 #define __mul __mul_fma4 #define __sub __sub_fma4 +#define AVOID_MPSQRT_H 1 +#define SECTION __attribute__ ((section (".text.fma4"))) #include diff --git a/sysdeps/x86_64/fpu/multiarch/mptan-fma4.c b/sysdeps/x86_64/fpu/multiarch/mptan-fma4.c index 767924edf0..fb4a9d48ca 100644 --- a/sysdeps/x86_64/fpu/multiarch/mptan-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/mptan-fma4.c @@ -2,5 +2,6 @@ #define __c32 __c32_fma4 #define __dvd __dvd_fma4 #define __mpranred __mpranred_fma4 +#define SECTION __attribute__ ((section (".text.fma4"))) #include diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c b/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c index a8f0977649..9e83e6cdab 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c @@ -4,5 +4,6 @@ #define __mpatan __mpatan_fma4 #define __mul __mul_fma4 #define __sub __sub_fma4 +#define SECTION __attribute__ ((section (".text.fma4"))) #include diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin-fma4.c b/sysdeps/x86_64/fpu/multiarch/s_sin-fma4.c index 97cef8bd42..2501af981a 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_sin-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/s_sin-fma4.c @@ -7,5 +7,6 @@ #define __mpcos1 __mpcos1_fma4 #define __mpsin __mpsin_fma4 #define __mpsin1 __mpsin1_fma4 +#define SECTION __attribute__ ((section (".text.fma4"))) #include diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c b/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c index c3cefc2e66..d7dab3ca9e 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c @@ -4,6 +4,6 @@ #define __mpranred __mpranred_fma4 #define __mptan __mptan_fma4 #define __sub __sub_fma4 - +#define SECTION __attribute__ ((section (".text.fma4"))) #include diff --git a/sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c b/sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c index f0d2d27575..ebbfa18cca 100644 --- a/sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c @@ -10,5 +10,6 @@ #define __dbl_mp __dbl_mp_fma4 #define __mul __mul_fma4 #define __sub __sub_fma4 +#define SECTION __attribute__ ((section (".text.fma4"))) #include diff --git a/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c b/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c index 83cb359901..3bcde84233 100644 --- a/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c @@ -4,5 +4,6 @@ #define __mpexp __mpexp_fma4 #define __mul __mul_fma4 #define __sub __sub_fma4 +#define SECTION __attribute__ ((section (".text.fma4"))) #include diff --git a/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c b/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c index 744f3f6e53..69d69823bb 100644 --- a/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c @@ -6,5 +6,6 @@ #define __mul __mul_fma4 #define __sub __sub_fma4 #define __halfulp __halfulp_fma4 +#define SECTION __attribute__ ((section (".text.fma4"))) #include