/* Copyright (C) 2007 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. In addition to the permissions in the GNU General Public License, the Free Software Foundation gives you unlimited permission to link the compiled version of this file into combinations with other programs, and to distribute those combinations without any restriction coming from the use of this file. (The General Public License restrictions do apply in other respects; for example, they cover modification of the file, and distribution when not linked into a combine executable.) GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GCC; see the file COPYING. If not, write to the Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "bid_internal.h" /***************************************************************************** * BID64_round_integral_exact ****************************************************************************/ #if DECIMAL_CALL_BY_REFERENCE void bid64_round_integral_exact (UINT64 * pres, UINT64 * px _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { UINT64 x = *px; #if !DECIMAL_GLOBAL_ROUNDING unsigned int rnd_mode = *prnd_mode; #endif #else UINT64 bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { #endif UINT64 res = 0xbaddbaddbaddbaddull; UINT64 x_sign; int exp; // unbiased exponent // Note: C1 represents the significand (UINT64) BID_UI64DOUBLE tmp1; int x_nr_bits; int q, ind, shift; UINT64 C1; // UINT64 res is C* at first - represents up to 16 decimal digits <= 54 bits UINT128 fstar = { {0x0ull, 0x0ull} }; UINT128 P128; x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative // check for NaNs and infinities if ((x & MASK_NAN) == MASK_NAN) { // check for NaN if ((x & 0x0003ffffffffffffull) > 999999999999999ull) x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits else x = x & 0xfe03ffffffffffffull; // clear G6-G12 if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN // set invalid flag *pfpsf |= INVALID_EXCEPTION; // return quiet (SNaN) res = x & 0xfdffffffffffffffull; } else { // QNaN res = x; } BID_RETURN (res); } else if ((x & MASK_INF) == MASK_INF) { // check for Infinity res = x_sign | 0x7800000000000000ull; BID_RETURN (res); } // unpack x if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) { // if the steering bits are 11 (condition will be 0), then // the exponent is G[0:w+1] exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398; C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2; if (C1 > 9999999999999999ull) { // non-canonical C1 = 0; } } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS) exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398; C1 = (x & MASK_BINARY_SIG1); } // if x is 0 or non-canonical return 0 preserving the sign bit and // the preferred exponent of MAX(Q(x), 0) if (C1 == 0) { if (exp < 0) exp = 0; res = x_sign | (((UINT64) exp + 398) << 53); BID_RETURN (res); } // x is a finite non-zero number (not 0, non-canonical, or special) switch (rnd_mode) { case ROUNDING_TO_NEAREST: case ROUNDING_TIES_AWAY: // return 0 if (exp <= -(p+1)) if (exp <= -17) { res = x_sign | 0x31c0000000000000ull; *pfpsf |= INEXACT_EXCEPTION; BID_RETURN (res); } break; case ROUNDING_DOWN: // return 0 if (exp <= -p) if (exp <= -16) { if (x_sign) { res = 0xb1c0000000000001ull; } else { res = 0x31c0000000000000ull; } *pfpsf |= INEXACT_EXCEPTION; BID_RETURN (res); } break; case ROUNDING_UP: // return 0 if (exp <= -p) if (exp <= -16) { if (x_sign) { res = 0xb1c0000000000000ull; } else { res = 0x31c0000000000001ull; } *pfpsf |= INEXACT_EXCEPTION; BID_RETURN (res); } break; case ROUNDING_TO_ZERO: // return 0 if (exp <= -p) if (exp <= -16) { res = x_sign | 0x31c0000000000000ull; *pfpsf |= INEXACT_EXCEPTION; BID_RETURN (res); } break; } // end switch () // q = nr. of decimal digits in x (1 <= q <= 54) // determine first the nr. of bits in x if (C1 >= 0x0020000000000000ull) { // x >= 2^53 q = 16; } else { // if x < 2^53 tmp1.d = (double) C1; // exact conversion x_nr_bits = 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); q = nr_digits[x_nr_bits - 1].digits; if (q == 0) { q = nr_digits[x_nr_bits - 1].digits1; if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo) q++; } } if (exp >= 0) { // -exp <= 0 // the argument is an integer already res = x; BID_RETURN (res); } switch (rnd_mode) { case ROUNDING_TO_NEAREST: if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q // need to shift right -exp digits from the coefficient; exp will be 0 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' // chop off ind digits from the lower part of C1 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate C1 = C1 + midpoint64[ind - 1]; // calculate C* and f* // C* is actually floor(C*) in this case // C* and f* need shifting and masking, as shown by // shiftright128[] and maskhigh128[] // 1 <= x <= 16 // kx = 10^(-x) = ten2mk64[ind - 1] // C* = (C1 + 1/2 * 10^x) * 10^(-x) // the approximation of 10^(-x) was rounded up to 64 bits __mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]); // if (0 < f* < 10^(-x)) then the result is a midpoint // if floor(C*) is even then C* = floor(C*) - logical right // shift; C* has p decimal digits, correct by Prop. 1) // else if floor(C*) is odd C* = floor(C*)-1 (logical right // shift; C* has p decimal digits, correct by Pr. 1) // else // C* = floor(C*) (logical right shift; C has p decimal digits, // correct by Property 1) // n = C* * 10^(e+x) if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 res = P128.w[1]; fstar.w[1] = 0; fstar.w[0] = P128.w[0]; } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 shift = shiftright128[ind - 1]; // 3 <= shift <= 63 res = (P128.w[1] >> shift); fstar.w[1] = P128.w[1] & maskhigh128[ind - 1]; fstar.w[0] = P128.w[0]; } // if (0 < f* < 10^(-x)) then the result is a midpoint // since round_to_even, subtract 1 if current result is odd if ((res & 0x0000000000000001ull) && (fstar.w[1] == 0) && (fstar.w[0] < ten2mk64[ind - 1])) { res--; } // determine inexactness of the rounding of C* // if (0 < f* - 1/2 < 10^(-x)) then // the result is exact // else // if (f* - 1/2 > T*) then // the result is inexact if (ind - 1 <= 2) { if (fstar.w[0] > 0x8000000000000000ull) { // f* > 1/2 and the result may be exact // fstar.w[0] - 0x8000000000000000ull is f* - 1/2 if ((fstar.w[0] - 0x8000000000000000ull) > ten2mk64[ind - 1]) { // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; } // else the result is exact } else { // the result is inexact; f2* <= 1/2 // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; } } else { // if 3 <= ind - 1 <= 21 if (fstar.w[1] > onehalf128[ind - 1] || (fstar.w[1] == onehalf128[ind - 1] && fstar.w[0])) { // f2* > 1/2 and the result may be exact // Calculate f2* - 1/2 if (fstar.w[1] > onehalf128[ind - 1] || fstar.w[0] > ten2mk64[ind - 1]) { // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; } // else the result is exact } else { // the result is inexact; f2* <= 1/2 // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; } } // set exponent to zero as it was negative before. res = x_sign | 0x31c0000000000000ull | res; BID_RETURN (res); } else { // if exp < 0 and q + exp < 0 // the result is +0 or -0 res = x_sign | 0x31c0000000000000ull; *pfpsf |= INEXACT_EXCEPTION; BID_RETURN (res); } break; case ROUNDING_TIES_AWAY: if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q // need to shift right -exp digits from the coefficient; exp will be 0 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' // chop off ind digits from the lower part of C1 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate C1 = C1 + midpoint64[ind - 1]; // calculate C* and f* // C* is actually floor(C*) in this case // C* and f* need shifting and masking, as shown by // shiftright128[] and maskhigh128[] // 1 <= x <= 16 // kx = 10^(-x) = ten2mk64[ind - 1] // C* = (C1 + 1/2 * 10^x) * 10^(-x) // the approximation of 10^(-x) was rounded up to 64 bits __mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]); // if (0 < f* < 10^(-x)) then the result is a midpoint // C* = floor(C*) - logical right shift; C* has p decimal digits, // correct by Prop. 1) // else // C* = floor(C*) (logical right shift; C has p decimal digits, // correct by Property 1) // n = C* * 10^(e+x) if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 res = P128.w[1]; fstar.w[1] = 0; fstar.w[0] = P128.w[0]; } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 shift = shiftright128[ind - 1]; // 3 <= shift <= 63 res = (P128.w[1] >> shift); fstar.w[1] = P128.w[1] & maskhigh128[ind - 1]; fstar.w[0] = P128.w[0]; } // midpoints are already rounded correctly // determine inexactness of the rounding of C* // if (0 < f* - 1/2 < 10^(-x)) then // the result is exact // else // if (f* - 1/2 > T*) then // the result is inexact if (ind - 1 <= 2) { if (fstar.w[0] > 0x8000000000000000ull) { // f* > 1/2 and the result may be exact // fstar.w[0] - 0x8000000000000000ull is f* - 1/2 if ((fstar.w[0] - 0x8000000000000000ull) > ten2mk64[ind - 1]) { // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; } // else the result is exact } else { // the result is inexact; f2* <= 1/2 // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; } } else { // if 3 <= ind - 1 <= 21 if (fstar.w[1] > onehalf128[ind - 1] || (fstar.w[1] == onehalf128[ind - 1] && fstar.w[0])) { // f2* > 1/2 and the result may be exact // Calculate f2* - 1/2 if (fstar.w[1] > onehalf128[ind - 1] || fstar.w[0] > ten2mk64[ind - 1]) { // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; } // else the result is exact } else { // the result is inexact; f2* <= 1/2 // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; } } // set exponent to zero as it was negative before. res = x_sign | 0x31c0000000000000ull | res; BID_RETURN (res); } else { // if exp < 0 and q + exp < 0 // the result is +0 or -0 res = x_sign | 0x31c0000000000000ull; *pfpsf |= INEXACT_EXCEPTION; BID_RETURN (res); } break; case ROUNDING_DOWN: if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q // need to shift right -exp digits from the coefficient; exp will be 0 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' // chop off ind digits from the lower part of C1 // C1 fits in 64 bits // calculate C* and f* // C* is actually floor(C*) in this case // C* and f* need shifting and masking, as shown by // shiftright128[] and maskhigh128[] // 1 <= x <= 16 // kx = 10^(-x) = ten2mk64[ind - 1] // C* = C1 * 10^(-x) // the approximation of 10^(-x) was rounded up to 64 bits __mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]); // C* = floor(C*) (logical right shift; C has p decimal digits, // correct by Property 1) // if (0 < f* < 10^(-x)) then the result is exact // n = C* * 10^(e+x) if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 res = P128.w[1]; fstar.w[1] = 0; fstar.w[0] = P128.w[0]; } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 shift = shiftright128[ind - 1]; // 3 <= shift <= 63 res = (P128.w[1] >> shift); fstar.w[1] = P128.w[1] & maskhigh128[ind - 1]; fstar.w[0] = P128.w[0]; } // if (f* > 10^(-x)) then the result is inexact if ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1])) { if (x_sign) { // if negative and not exact, increment magnitude res++; } *pfpsf |= INEXACT_EXCEPTION; } // set exponent to zero as it was negative before. res = x_sign | 0x31c0000000000000ull | res; BID_RETURN (res); } else { // if exp < 0 and q + exp <= 0 // the result is +0 or -1 if (x_sign) { res = 0xb1c0000000000001ull; } else { res = 0x31c0000000000000ull; } *pfpsf |= INEXACT_EXCEPTION; BID_RETURN (res); } break; case ROUNDING_UP: if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q // need to shift right -exp digits from the coefficient; exp will be 0 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' // chop off ind digits from the lower part of C1 // C1 fits in 64 bits // calculate C* and f* // C* is actually floor(C*) in this case // C* and f* need shifting and masking, as shown by // shiftright128[] and maskhigh128[] // 1 <= x <= 16 // kx = 10^(-x) = ten2mk64[ind - 1] // C* = C1 * 10^(-x) // the approximation of 10^(-x) was rounded up to 64 bits __mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]); // C* = floor(C*) (logical right shift; C has p decimal digits, // correct by Property 1) // if (0 < f* < 10^(-x)) then the result is exact // n = C* * 10^(e+x) if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 res = P128.w[1]; fstar.w[1] = 0; fstar.w[0] = P128.w[0]; } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 shift = shiftright128[ind - 1]; // 3 <= shift <= 63 res = (P128.w[1] >> shift); fstar.w[1] = P128.w[1] & maskhigh128[ind - 1]; fstar.w[0] = P128.w[0]; } // if (f* > 10^(-x)) then the result is inexact if ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1])) { if (!x_sign) { // if positive and not exact, increment magnitude res++; } *pfpsf |= INEXACT_EXCEPTION; } // set exponent to zero as it was negative before. res = x_sign | 0x31c0000000000000ull | res; BID_RETURN (res); } else { // if exp < 0 and q + exp <= 0 // the result is -0 or +1 if (x_sign) { res = 0xb1c0000000000000ull; } else { res = 0x31c0000000000001ull; } *pfpsf |= INEXACT_EXCEPTION; BID_RETURN (res); } break; case ROUNDING_TO_ZERO: if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q // need to shift right -exp digits from the coefficient; exp will be 0 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' // chop off ind digits from the lower part of C1 // C1 fits in 127 bits // calculate C* and f* // C* is actually floor(C*) in this case // C* and f* need shifting and masking, as shown by // shiftright128[] and maskhigh128[] // 1 <= x <= 16 // kx = 10^(-x) = ten2mk64[ind - 1] // C* = C1 * 10^(-x) // the approximation of 10^(-x) was rounded up to 64 bits __mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]); // C* = floor(C*) (logical right shift; C has p decimal digits, // correct by Property 1) // if (0 < f* < 10^(-x)) then the result is exact // n = C* * 10^(e+x) if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 res = P128.w[1]; fstar.w[1] = 0; fstar.w[0] = P128.w[0]; } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 shift = shiftright128[ind - 1]; // 3 <= shift <= 63 res = (P128.w[1] >> shift); fstar.w[1] = P128.w[1] & maskhigh128[ind - 1]; fstar.w[0] = P128.w[0]; } // if (f* > 10^(-x)) then the result is inexact if ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1])) { *pfpsf |= INEXACT_EXCEPTION; } // set exponent to zero as it was negative before. res = x_sign | 0x31c0000000000000ull | res; BID_RETURN (res); } else { // if exp < 0 and q + exp < 0 // the result is +0 or -0 res = x_sign | 0x31c0000000000000ull; *pfpsf |= INEXACT_EXCEPTION; BID_RETURN (res); } break; } // end switch () BID_RETURN (res); } /***************************************************************************** * BID64_round_integral_nearest_even ****************************************************************************/ #if DECIMAL_CALL_BY_REFERENCE void bid64_round_integral_nearest_even (UINT64 * pres, UINT64 * px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { UINT64 x = *px; #else UINT64 bid64_round_integral_nearest_even (UINT64 x _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { #endif UINT64 res = 0xbaddbaddbaddbaddull; UINT64 x_sign; int exp; // unbiased exponent // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64) BID_UI64DOUBLE tmp1; int x_nr_bits; int q, ind, shift; UINT64 C1; UINT128 fstar; UINT128 P128; x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative // check for NaNs and infinities if ((x & MASK_NAN) == MASK_NAN) { // check for NaN if ((x & 0x0003ffffffffffffull) > 999999999999999ull) x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits else x = x & 0xfe03ffffffffffffull; // clear G6-G12 if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN // set invalid flag *pfpsf |= INVALID_EXCEPTION; // return quiet (SNaN) res = x & 0xfdffffffffffffffull; } else { // QNaN res = x; } BID_RETURN (res); } else if ((x & MASK_INF) == MASK_INF) { // check for Infinity res = x_sign | 0x7800000000000000ull; BID_RETURN (res); } // unpack x if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) { // if the steering bits are 11 (condition will be 0), then // the exponent is G[0:w+1] exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398; C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2; if (C1 > 9999999999999999ull) { // non-canonical C1 = 0; } } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS) exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398; C1 = (x & MASK_BINARY_SIG1); } // if x is 0 or non-canonical if (C1 == 0) { if (exp < 0) exp = 0; res = x_sign | (((UINT64) exp + 398) << 53); BID_RETURN (res); } // x is a finite non-zero number (not 0, non-canonical, or special) // return 0 if (exp <= -(p+1)) if (exp <= -17) { res = x_sign | 0x31c0000000000000ull; BID_RETURN (res); } // q = nr. of decimal digits in x (1 <= q <= 54) // determine first the nr. of bits in x if (C1 >= 0x0020000000000000ull) { // x >= 2^53 q = 16; } else { // if x < 2^53 tmp1.d = (double) C1; // exact conversion x_nr_bits = 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); q = nr_digits[x_nr_bits - 1].digits; if (q == 0) { q = nr_digits[x_nr_bits - 1].digits1; if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo) q++; } } if (exp >= 0) { // -exp <= 0 // the argument is an integer already res = x; BID_RETURN (res); } else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q // need to shift right -exp digits from the coefficient; the exp will be 0 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' // chop off ind digits from the lower part of C1 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate C1 = C1 + midpoint64[ind - 1]; // calculate C* and f* // C* is actually floor(C*) in this case // C* and f* need shifting and masking, as shown by // shiftright128[] and maskhigh128[] // 1 <= x <= 16 // kx = 10^(-x) = ten2mk64[ind - 1] // C* = (C1 + 1/2 * 10^x) * 10^(-x) // the approximation of 10^(-x) was rounded up to 64 bits __mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]); // if (0 < f* < 10^(-x)) then the result is a midpoint // if floor(C*) is even then C* = floor(C*) - logical right // shift; C* has p decimal digits, correct by Prop. 1) // else if floor(C*) is odd C* = floor(C*)-1 (logical right // shift; C* has p decimal digits, correct by Pr. 1) // else // C* = floor(C*) (logical right shift; C has p decimal digits, // correct by Property 1) // n = C* * 10^(e+x) if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 res = P128.w[1]; fstar.w[1] = 0; fstar.w[0] = P128.w[0]; } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 shift = shiftright128[ind - 1]; // 3 <= shift <= 63 res = (P128.w[1] >> shift); fstar.w[1] = P128.w[1] & maskhigh128[ind - 1]; fstar.w[0] = P128.w[0]; } // if (0 < f* < 10^(-x)) then the result is a midpoint // since round_to_even, subtract 1 if current result is odd if ((res & 0x0000000000000001ull) && (fstar.w[1] == 0) && (fstar.w[0] < ten2mk64[ind - 1])) { res--; } // set exponent to zero as it was negative before. res = x_sign | 0x31c0000000000000ull | res; BID_RETURN (res); } else { // if exp < 0 and q + exp < 0 // the result is +0 or -0 res = x_sign | 0x31c0000000000000ull; BID_RETURN (res); } } /***************************************************************************** * BID64_round_integral_negative *****************************************************************************/ #if DECIMAL_CALL_BY_REFERENCE void bid64_round_integral_negative (UINT64 * pres, UINT64 * px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { UINT64 x = *px; #else UINT64 bid64_round_integral_negative (UINT64 x _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { #endif UINT64 res = 0xbaddbaddbaddbaddull; UINT64 x_sign; int exp; // unbiased exponent // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64) BID_UI64DOUBLE tmp1; int x_nr_bits; int q, ind, shift; UINT64 C1; // UINT64 res is C* at first - represents up to 34 decimal digits ~ 113 bits UINT128 fstar; UINT128 P128; x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative // check for NaNs and infinities if ((x & MASK_NAN) == MASK_NAN) { // check for NaN if ((x & 0x0003ffffffffffffull) > 999999999999999ull) x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits else x = x & 0xfe03ffffffffffffull; // clear G6-G12 if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN // set invalid flag *pfpsf |= INVALID_EXCEPTION; // return quiet (SNaN) res = x & 0xfdffffffffffffffull; } else { // QNaN res = x; } BID_RETURN (res); } else if ((x & MASK_INF) == MASK_INF) { // check for Infinity res = x_sign | 0x7800000000000000ull; BID_RETURN (res); } // unpack x if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) { // if the steering bits are 11 (condition will be 0), then // the exponent is G[0:w+1] exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398; C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2; if (C1 > 9999999999999999ull) { // non-canonical C1 = 0; } } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS) exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398; C1 = (x & MASK_BINARY_SIG1); } // if x is 0 or non-canonical if (C1 == 0) { if (exp < 0) exp = 0; res = x_sign | (((UINT64) exp + 398) << 53); BID_RETURN (res); } // x is a finite non-zero number (not 0, non-canonical, or special) // return 0 if (exp <= -p) if (exp <= -16) { if (x_sign) { res = 0xb1c0000000000001ull; } else { res = 0x31c0000000000000ull; } BID_RETURN (res); } // q = nr. of decimal digits in x (1 <= q <= 54) // determine first the nr. of bits in x if (C1 >= 0x0020000000000000ull) { // x >= 2^53 q = 16; } else { // if x < 2^53 tmp1.d = (double) C1; // exact conversion x_nr_bits = 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); q = nr_digits[x_nr_bits - 1].digits; if (q == 0) { q = nr_digits[x_nr_bits - 1].digits1; if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo) q++; } } if (exp >= 0) { // -exp <= 0 // the argument is an integer already res = x; BID_RETURN (res); } else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q // need to shift right -exp digits from the coefficient; the exp will be 0 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' // chop off ind digits from the lower part of C1 // C1 fits in 64 bits // calculate C* and f* // C* is actually floor(C*) in this case // C* and f* need shifting and masking, as shown by // shiftright128[] and maskhigh128[] // 1 <= x <= 16 // kx = 10^(-x) = ten2mk64[ind - 1] // C* = C1 * 10^(-x) // the approximation of 10^(-x) was rounded up to 64 bits __mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]); // C* = floor(C*) (logical right shift; C has p decimal digits, // correct by Property 1) // if (0 < f* < 10^(-x)) then the result is exact // n = C* * 10^(e+x) if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 res = P128.w[1]; fstar.w[1] = 0; fstar.w[0] = P128.w[0]; } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 shift = shiftright128[ind - 1]; // 3 <= shift <= 63 res = (P128.w[1] >> shift); fstar.w[1] = P128.w[1] & maskhigh128[ind - 1]; fstar.w[0] = P128.w[0]; } // if (f* > 10^(-x)) then the result is inexact if (x_sign && ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1]))) { // if negative and not exact, increment magnitude res++; } // set exponent to zero as it was negative before. res = x_sign | 0x31c0000000000000ull | res; BID_RETURN (res); } else { // if exp < 0 and q + exp <= 0 // the result is +0 or -1 if (x_sign) { res = 0xb1c0000000000001ull; } else { res = 0x31c0000000000000ull; } BID_RETURN (res); } } /***************************************************************************** * BID64_round_integral_positive ****************************************************************************/ #if DECIMAL_CALL_BY_REFERENCE void bid64_round_integral_positive (UINT64 * pres, UINT64 * px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { UINT64 x = *px; #else UINT64 bid64_round_integral_positive (UINT64 x _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { #endif UINT64 res = 0xbaddbaddbaddbaddull; UINT64 x_sign; int exp; // unbiased exponent // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64) BID_UI64DOUBLE tmp1; int x_nr_bits; int q, ind, shift; UINT64 C1; // UINT64 res is C* at first - represents up to 34 decimal digits ~ 113 bits UINT128 fstar; UINT128 P128; x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative // check for NaNs and infinities if ((x & MASK_NAN) == MASK_NAN) { // check for NaN if ((x & 0x0003ffffffffffffull) > 999999999999999ull) x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits else x = x & 0xfe03ffffffffffffull; // clear G6-G12 if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN // set invalid flag *pfpsf |= INVALID_EXCEPTION; // return quiet (SNaN) res = x & 0xfdffffffffffffffull; } else { // QNaN res = x; } BID_RETURN (res); } else if ((x & MASK_INF) == MASK_INF) { // check for Infinity res = x_sign | 0x7800000000000000ull; BID_RETURN (res); } // unpack x if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) { // if the steering bits are 11 (condition will be 0), then // the exponent is G[0:w+1] exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398; C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2; if (C1 > 9999999999999999ull) { // non-canonical C1 = 0; } } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS) exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398; C1 = (x & MASK_BINARY_SIG1); } // if x is 0 or non-canonical if (C1 == 0) { if (exp < 0) exp = 0; res = x_sign | (((UINT64) exp + 398) << 53); BID_RETURN (res); } // x is a finite non-zero number (not 0, non-canonical, or special) // return 0 if (exp <= -p) if (exp <= -16) { if (x_sign) { res = 0xb1c0000000000000ull; } else { res = 0x31c0000000000001ull; } BID_RETURN (res); } // q = nr. of decimal digits in x (1 <= q <= 54) // determine first the nr. of bits in x if (C1 >= 0x0020000000000000ull) { // x >= 2^53 q = 16; } else { // if x < 2^53 tmp1.d = (double) C1; // exact conversion x_nr_bits = 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); q = nr_digits[x_nr_bits - 1].digits; if (q == 0) { q = nr_digits[x_nr_bits - 1].digits1; if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo) q++; } } if (exp >= 0) { // -exp <= 0 // the argument is an integer already res = x; BID_RETURN (res); } else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q // need to shift right -exp digits from the coefficient; the exp will be 0 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' // chop off ind digits from the lower part of C1 // C1 fits in 64 bits // calculate C* and f* // C* is actually floor(C*) in this case // C* and f* need shifting and masking, as shown by // shiftright128[] and maskhigh128[] // 1 <= x <= 16 // kx = 10^(-x) = ten2mk64[ind - 1] // C* = C1 * 10^(-x) // the approximation of 10^(-x) was rounded up to 64 bits __mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]); // C* = floor(C*) (logical right shift; C has p decimal digits, // correct by Property 1) // if (0 < f* < 10^(-x)) then the result is exact // n = C* * 10^(e+x) if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 res = P128.w[1]; fstar.w[1] = 0; fstar.w[0] = P128.w[0]; } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 shift = shiftright128[ind - 1]; // 3 <= shift <= 63 res = (P128.w[1] >> shift); fstar.w[1] = P128.w[1] & maskhigh128[ind - 1]; fstar.w[0] = P128.w[0]; } // if (f* > 10^(-x)) then the result is inexact if (!x_sign && ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1]))) { // if positive and not exact, increment magnitude res++; } // set exponent to zero as it was negative before. res = x_sign | 0x31c0000000000000ull | res; BID_RETURN (res); } else { // if exp < 0 and q + exp <= 0 // the result is -0 or +1 if (x_sign) { res = 0xb1c0000000000000ull; } else { res = 0x31c0000000000001ull; } BID_RETURN (res); } } /***************************************************************************** * BID64_round_integral_zero ****************************************************************************/ #if DECIMAL_CALL_BY_REFERENCE void bid64_round_integral_zero (UINT64 * pres, UINT64 * px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { UINT64 x = *px; #else UINT64 bid64_round_integral_zero (UINT64 x _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { #endif UINT64 res = 0xbaddbaddbaddbaddull; UINT64 x_sign; int exp; // unbiased exponent // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64) BID_UI64DOUBLE tmp1; int x_nr_bits; int q, ind, shift; UINT64 C1; // UINT64 res is C* at first - represents up to 34 decimal digits ~ 113 bits UINT128 P128; x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative // check for NaNs and infinities if ((x & MASK_NAN) == MASK_NAN) { // check for NaN if ((x & 0x0003ffffffffffffull) > 999999999999999ull) x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits else x = x & 0xfe03ffffffffffffull; // clear G6-G12 if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN // set invalid flag *pfpsf |= INVALID_EXCEPTION; // return quiet (SNaN) res = x & 0xfdffffffffffffffull; } else { // QNaN res = x; } BID_RETURN (res); } else if ((x & MASK_INF) == MASK_INF) { // check for Infinity res = x_sign | 0x7800000000000000ull; BID_RETURN (res); } // unpack x if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) { // if the steering bits are 11 (condition will be 0), then // the exponent is G[0:w+1] exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398; C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2; if (C1 > 9999999999999999ull) { // non-canonical C1 = 0; } } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS) exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398; C1 = (x & MASK_BINARY_SIG1); } // if x is 0 or non-canonical if (C1 == 0) { if (exp < 0) exp = 0; res = x_sign | (((UINT64) exp + 398) << 53); BID_RETURN (res); } // x is a finite non-zero number (not 0, non-canonical, or special) // return 0 if (exp <= -p) if (exp <= -16) { res = x_sign | 0x31c0000000000000ull; BID_RETURN (res); } // q = nr. of decimal digits in x (1 <= q <= 54) // determine first the nr. of bits in x if (C1 >= 0x0020000000000000ull) { // x >= 2^53 q = 16; } else { // if x < 2^53 tmp1.d = (double) C1; // exact conversion x_nr_bits = 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); q = nr_digits[x_nr_bits - 1].digits; if (q == 0) { q = nr_digits[x_nr_bits - 1].digits1; if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo) q++; } } if (exp >= 0) { // -exp <= 0 // the argument is an integer already res = x; BID_RETURN (res); } else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q // need to shift right -exp digits from the coefficient; the exp will be 0 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' // chop off ind digits from the lower part of C1 // C1 fits in 127 bits // calculate C* and f* // C* is actually floor(C*) in this case // C* and f* need shifting and masking, as shown by // shiftright128[] and maskhigh128[] // 1 <= x <= 16 // kx = 10^(-x) = ten2mk64[ind - 1] // C* = C1 * 10^(-x) // the approximation of 10^(-x) was rounded up to 64 bits __mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]); // C* = floor(C*) (logical right shift; C has p decimal digits, // correct by Property 1) // if (0 < f* < 10^(-x)) then the result is exact // n = C* * 10^(e+x) if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 res = P128.w[1]; // redundant fstar.w[1] = 0; // redundant fstar.w[0] = P128.w[0]; } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 shift = shiftright128[ind - 1]; // 3 <= shift <= 63 res = (P128.w[1] >> shift); // redundant fstar.w[1] = P128.w[1] & maskhigh128[ind - 1]; // redundant fstar.w[0] = P128.w[0]; } // if (f* > 10^(-x)) then the result is inexact // if ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind-1])){ // // redundant // } // set exponent to zero as it was negative before. res = x_sign | 0x31c0000000000000ull | res; BID_RETURN (res); } else { // if exp < 0 and q + exp < 0 // the result is +0 or -0 res = x_sign | 0x31c0000000000000ull; BID_RETURN (res); } } /***************************************************************************** * BID64_round_integral_nearest_away ****************************************************************************/ #if DECIMAL_CALL_BY_REFERENCE void bid64_round_integral_nearest_away (UINT64 * pres, UINT64 * px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { UINT64 x = *px; #else UINT64 bid64_round_integral_nearest_away (UINT64 x _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { #endif UINT64 res = 0xbaddbaddbaddbaddull; UINT64 x_sign; int exp; // unbiased exponent // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64) BID_UI64DOUBLE tmp1; int x_nr_bits; int q, ind, shift; UINT64 C1; UINT128 P128; x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative // check for NaNs and infinities if ((x & MASK_NAN) == MASK_NAN) { // check for NaN if ((x & 0x0003ffffffffffffull) > 999999999999999ull) x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits else x = x & 0xfe03ffffffffffffull; // clear G6-G12 if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN // set invalid flag *pfpsf |= INVALID_EXCEPTION; // return quiet (SNaN) res = x & 0xfdffffffffffffffull; } else { // QNaN res = x; } BID_RETURN (res); } else if ((x & MASK_INF) == MASK_INF) { // check for Infinity res = x_sign | 0x7800000000000000ull; BID_RETURN (res); } // unpack x if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) { // if the steering bits are 11 (condition will be 0), then // the exponent is G[0:w+1] exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398; C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2; if (C1 > 9999999999999999ull) { // non-canonical C1 = 0; } } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS) exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398; C1 = (x & MASK_BINARY_SIG1); } // if x is 0 or non-canonical if (C1 == 0) { if (exp < 0) exp = 0; res = x_sign | (((UINT64) exp + 398) << 53); BID_RETURN (res); } // x is a finite non-zero number (not 0, non-canonical, or special) // return 0 if (exp <= -(p+1)) if (exp <= -17) { res = x_sign | 0x31c0000000000000ull; BID_RETURN (res); } // q = nr. of decimal digits in x (1 <= q <= 54) // determine first the nr. of bits in x if (C1 >= 0x0020000000000000ull) { // x >= 2^53 q = 16; } else { // if x < 2^53 tmp1.d = (double) C1; // exact conversion x_nr_bits = 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); q = nr_digits[x_nr_bits - 1].digits; if (q == 0) { q = nr_digits[x_nr_bits - 1].digits1; if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo) q++; } } if (exp >= 0) { // -exp <= 0 // the argument is an integer already res = x; BID_RETURN (res); } else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q // need to shift right -exp digits from the coefficient; the exp will be 0 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' // chop off ind digits from the lower part of C1 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate C1 = C1 + midpoint64[ind - 1]; // calculate C* and f* // C* is actually floor(C*) in this case // C* and f* need shifting and masking, as shown by // shiftright128[] and maskhigh128[] // 1 <= x <= 16 // kx = 10^(-x) = ten2mk64[ind - 1] // C* = (C1 + 1/2 * 10^x) * 10^(-x) // the approximation of 10^(-x) was rounded up to 64 bits __mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]); // if (0 < f* < 10^(-x)) then the result is a midpoint // C* = floor(C*) - logical right shift; C* has p decimal digits, // correct by Prop. 1) // else // C* = floor(C*) (logical right shift; C has p decimal digits, // correct by Property 1) // n = C* * 10^(e+x) if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 res = P128.w[1]; } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 shift = shiftright128[ind - 1]; // 3 <= shift <= 63 res = (P128.w[1] >> shift); } // midpoints are already rounded correctly // set exponent to zero as it was negative before. res = x_sign | 0x31c0000000000000ull | res; BID_RETURN (res); } else { // if exp < 0 and q + exp < 0 // the result is +0 or -0 res = x_sign | 0x31c0000000000000ull; BID_RETURN (res); } }