/* Copyright (C) 2007 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. In addition to the permissions in the GNU General Public License, the Free Software Foundation gives you unlimited permission to link the compiled version of this file into combinations with other programs, and to distribute those combinations without any restriction coming from the use of this file. (The General Public License restrictions do apply in other respects; for example, they cover modification of the file, and distribution when not linked into a combine executable.) GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GCC; see the file COPYING. If not, write to the Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /***************************************************************************** * * BID128 fma x * y + z * ****************************************************************************/ #include "bid_conf.h" #include "bid_functions.h" #include "bid_internal.h" static void rounding_correction ( unsigned int rnd_mode, unsigned int is_inexact_lt_midpoint, unsigned int is_inexact_gt_midpoint, unsigned int is_midpoint_lt_even, unsigned int is_midpoint_gt_even, int unbexp, UINT128 * ptrres, _IDEC_flags * ptrfpsf) { // unbiased true exponent unbexp may be larger than emax UINT128 res = *ptrres; // expected to have the correct sign and coefficient // (the exponent field is ignored, as unbexp is used instead) UINT64 sign, exp; UINT64 C_hi, C_lo; // general correction from RN to RA, RM, RP, RZ // Note: if the result is negative, then is_inexact_lt_midpoint, // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even // have to be considered as if determined for the absolute value of the // result (so they seem to be reversed) if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || is_midpoint_lt_even || is_midpoint_gt_even) { *ptrfpsf |= INEXACT_EXCEPTION; } // apply correction to result calculated with unbounded exponent sign = res.w[1] & MASK_SIGN; exp = (UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax C_hi = res.w[1] & MASK_COEFF; C_lo = res.w[0]; if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) || ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) && is_midpoint_gt_even))) || (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) || ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) && is_midpoint_gt_even)))) { // C = C + 1 C_lo = C_lo + 1; if (C_lo == 0) C_hi = C_hi + 1; if (C_hi == 0x0001ed09bead87c0ull && C_lo == 0x378d8e6400000000ull) { // C = 10^34 => rounding overflow C_hi = 0x0000314dc6448d93ull; C_lo = 0x38c15b0a00000000ull; // 10^33 // exp = exp + EXP_P1; unbexp = unbexp + 1; exp = (UINT64) (unbexp + 6176) << 49; } } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) && ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) || (!sign && (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) { // C = C - 1 C_lo = C_lo - 1; if (C_lo == 0xffffffffffffffffull) C_hi--; // check if we crossed into the lower decade if (exp > 0 && C_hi == 0x0000314dc6448d93ull && C_lo == 0x38c15b09ffffffffull) { // 10^33 - 1 C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1 C_lo = 0x378d8e63ffffffffull; // exp = exp - EXP_P1; unbexp = unbexp - 1; exp = (UINT64) (unbexp + 6176) << 49; } } else { ; // the result is already correct } if (unbexp > expmax) { // 6111 *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); exp = 0; if (!sign) { // result is positive if (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TIES_AWAY) { // +inf C_hi = 0x7800000000000000ull; C_lo = 0x0000000000000000ull; } else { // res = +MAXFP = (10^34-1) * 10^emax C_hi = 0x5fffed09bead87c0ull; C_lo = 0x378d8e63ffffffffull; } } else { // result is negative if (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TIES_AWAY) { // -inf C_hi = 0xf800000000000000ull; C_lo = 0x0000000000000000ull; } else { // res = -MAXFP = -(10^34-1) * 10^emax C_hi = 0xdfffed09bead87c0ull; C_lo = 0x378d8e63ffffffffull; } } } // assemble the result res.w[1] = sign | exp | C_hi; res.w[0] = C_lo; *ptrres = res; } static void add256 (UINT256 x, UINT256 y, UINT256 * pz) { // *z = x + yl assume the sum fits in 256 bits UINT256 z; z.w[0] = x.w[0] + y.w[0]; if (z.w[0] < x.w[0]) { x.w[1]++; if (x.w[1] == 0x0000000000000000ull) { x.w[2]++; if (x.w[2] == 0x0000000000000000ull) { x.w[3]++; } } } z.w[1] = x.w[1] + y.w[1]; if (z.w[1] < x.w[1]) { x.w[2]++; if (x.w[2] == 0x0000000000000000ull) { x.w[3]++; } } z.w[2] = x.w[2] + y.w[2]; if (z.w[2] < x.w[2]) { x.w[3]++; } z.w[3] = x.w[3] + y.w[3]; // it was assumed that no carry is possible *pz = z; } static void sub256 (UINT256 x, UINT256 y, UINT256 * pz) { // *z = x - y; assume x >= y UINT256 z; z.w[0] = x.w[0] - y.w[0]; if (z.w[0] > x.w[0]) { x.w[1]--; if (x.w[1] == 0xffffffffffffffffull) { x.w[2]--; if (x.w[2] == 0xffffffffffffffffull) { x.w[3]--; } } } z.w[1] = x.w[1] - y.w[1]; if (z.w[1] > x.w[1]) { x.w[2]--; if (x.w[2] == 0xffffffffffffffffull) { x.w[3]--; } } z.w[2] = x.w[2] - y.w[2]; if (z.w[2] > x.w[2]) { x.w[3]--; } z.w[3] = x.w[3] - y.w[3]; // no borrow possible, because x >= y *pz = z; } static int __bid_nr_digits256 (UINT256 R256) { int ind; // determine the number of decimal digits in R256 if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) { // between 1 and 19 digits for (ind = 1; ind <= 19; ind++) { if (R256.w[0] < __bid_ten2k64[ind]) { break; } } // ind digits } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && (R256.w[1] < __bid_ten2k128[0].w[1] || (R256.w[1] == __bid_ten2k128[0].w[1] && R256.w[0] < __bid_ten2k128[0].w[0]))) { // 20 digits ind = 20; } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) { // between 21 and 38 digits for (ind = 1; ind <= 18; ind++) { if (R256.w[1] < __bid_ten2k128[ind].w[1] || (R256.w[1] == __bid_ten2k128[ind].w[1] && R256.w[0] < __bid_ten2k128[ind].w[0])) { break; } } // ind + 20 digits ind = ind + 20; } else if (((R256.w[3] == 0x0 && (R256.w[2] < __bid_ten2k256[0].w[2])) || (R256.w[2] == __bid_ten2k256[0].w[2] && R256.w[1] < __bid_ten2k256[0].w[1]) || (R256.w[2] == __bid_ten2k256[0].w[2] && R256.w[1] == __bid_ten2k256[0].w[1] && R256.w[0] < __bid_ten2k256[0].w[0]))) { // 39 digits ind = 39; } else { // between 40 and 68 digits for (ind = 1; ind <= 29; ind++) { if (R256.w[3] < __bid_ten2k256[ind].w[3] || (R256.w[3] == __bid_ten2k256[ind].w[3] && R256.w[2] < __bid_ten2k256[ind].w[2]) || (R256.w[3] == __bid_ten2k256[ind].w[3] && R256.w[2] == __bid_ten2k256[ind].w[2] && R256.w[1] < __bid_ten2k256[ind].w[1]) || (R256.w[3] == __bid_ten2k256[ind].w[3] && R256.w[2] == __bid_ten2k256[ind].w[2] && R256.w[1] == __bid_ten2k256[ind].w[1] && R256.w[0] < __bid_ten2k256[ind].w[0])) { break; } } // ind + 39 digits ind = ind + 39; } return (ind); } // add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so // use the rounding information from ptr_is_* to avoid a double rounding error static void add_and_round (int q3, int q4, int e4, int delta, int p34, UINT64 z_sign, UINT64 p_sign, UINT128 C3, UINT256 C4, int rnd_mode, int *ptr_is_midpoint_lt_even, int *ptr_is_midpoint_gt_even, int *ptr_is_inexact_lt_midpoint, int *ptr_is_inexact_gt_midpoint, _IDEC_flags * ptrfpsf, UINT128 * ptrres) { int scale; int x0; int ind; UINT64 R64; UINT128 P128, R128; UINT192 P192, R192; UINT256 R256; int is_midpoint_lt_even = 0; int is_midpoint_gt_even = 0; int is_inexact_lt_midpoint = 0; int is_inexact_gt_midpoint = 0; int is_midpoint_lt_even0 = 0; int is_midpoint_gt_even0 = 0; int is_inexact_lt_midpoint0 = 0; int is_inexact_gt_midpoint0 = 0; int incr_exp = 0; int is_tiny = 0; int lt_half_ulp = 0; int eq_half_ulp = 0; // int gt_half_ulp = 0; UINT128 res = *ptrres; // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66 scale = q4 - delta - q3; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1 // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6)) if (scale == 0) { R256.w[3] = 0x0ull; R256.w[2] = 0x0ull; R256.w[1] = C3.w[1]; R256.w[0] = C3.w[0]; } else if (scale <= 19) { // 10^scale fits in 64 bits P128.w[1] = 0; P128.w[0] = __bid_ten2k64[scale]; __mul_128x128_to_256 (R256, P128, C3); } else if (scale <= 38) { // 10^scale fits in 128 bits __mul_128x128_to_256 (R256, __bid_ten2k128[scale - 20], C3); } else if (scale <= 57) { // 39 <= scale <= 57 // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits // (10^67 has 223 bits; 10^69 has 230 bits); // must split the computation: // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits __mul_64x128_to_128 (R128, __bid_ten2k64[scale - 38], C3); // now multiply R128 by 10^38 __mul_128x128_to_256 (R256, R128, __bid_ten2k128[18]); } else { // 58 <= scale <= 66 // 10^scale takes between 193 and 220 bits, // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits) // must split the computation: // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because // 10^(scale-38) takes more than 64 bits, C3 will take less than 64 __mul_64x128_to_128 (R128, C3.w[0], __bid_ten2k128[scale - 58]); // now calculate 10*38 * 10^(scale-38) * C3 __mul_128x128_to_256 (R256, R128, __bid_ten2k128[18]); } // C3 * 10^scale is now in R256 // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is // possible // add/subtract C4 and C3 * 10^scale; the exponent is e4 if (p_sign == z_sign) { // R256 = C4 + R256 // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact, // but may require rounding add256 (C4, R256, &R256); } else { // if (p_sign != z_sign) { // R256 = C4 - R256 // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact, // but may require rounding // compare first R256 = C3 * 10^scale and C4 if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) || (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) || (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] && R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4 // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact, // but may require rounding sub256 (R256, C4, &R256); // flip p_sign too, because the result has the sign of z p_sign = z_sign; } else { // if C4 > C3 * 10^scale // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact, // but may require rounding sub256 (C4, R256, &R256); } // if the result is pure zero, the sign depends on the rounding mode // (x*y and z had opposite signs) if (R256.w[3] == 0x0ull && R256.w[2] == 0x0ull && R256.w[1] == 0x0ull && R256.w[0] == 0x0ull) { if (rnd_mode != ROUNDING_DOWN) p_sign = 0x0000000000000000ull; else p_sign = 0x8000000000000000ull; // the exponent is max (e4, expmin) if (e4 < -6176) e4 = expmin; // assemble result res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49); res.w[0] = 0x0; *ptrres = res; return; } } // determine the number of decimal digits in R256 ind = __bid_nr_digits256 (R256); // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind; // round to the destination precision, with unbounded exponent if (ind <= p34) { // result rounded to the destination precision with unbounded exponent // is exact if (ind + e4 < p34 + expmin) { is_tiny = 1; // applies to all rounding modes } res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R256.w[1]; res.w[0] = R256.w[0]; // Note: res is correct only if expmin <= e4 <= expmax } else { // if (ind > p34) // if more than P digits, round to nearest to P digits // round R256 to p34 digits x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68 if (ind <= 38) { P128.w[1] = R256.w[1]; P128.w[0] = R256.w[0]; __bid_round128_19_38 (ind, x0, P128, &R128, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); } else if (ind <= 57) { P192.w[2] = R256.w[2]; P192.w[1] = R256.w[1]; P192.w[0] = R256.w[0]; __bid_round192_39_57 (ind, x0, P192, &R192, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); R128.w[1] = R192.w[1]; R128.w[0] = R192.w[0]; } else { // if (ind <= 68) __bid_round256_58_76 (ind, x0, R256, &R256, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); R128.w[1] = R256.w[1]; R128.w[0] = R256.w[0]; } // the rounded result has p34 = 34 digits e4 = e4 + x0 + incr_exp; if (rnd_mode == ROUNDING_TO_NEAREST) { if (e4 < expmin) { is_tiny = 1; // for other rounding modes apply correction } } else { // for RM, RP, RZ, RA apply correction in order to determine tininess // but do not save the result; apply the correction to // (-1)^p_sign * significand * 10^0 P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1]; P128.w[0] = R128.w[0]; rounding_correction (rnd_mode, is_inexact_lt_midpoint, is_inexact_gt_midpoint, is_midpoint_lt_even, is_midpoint_gt_even, 0, &P128, ptrfpsf); scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1 // the number of digits in the significand is p34 = 34 if (e4 + scale < expmin) { is_tiny = 1; } } ind = p34; // the number of decimal digits in the signifcand of res res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R128.w[1]; // RN res.w[0] = R128.w[0]; // Note: res is correct only if expmin <= e4 <= expmax // set the inexact flag after rounding with bounded exponent, if any } // at this point we have the result rounded with unbounded exponent in // res and we know its tininess: // res = (-1)^p_sign * significand * 10^e4, // where q (significand) = ind <= p34 // Note: res is correct only if expmin <= e4 <= expmax // check for overflow if RN if (rnd_mode == ROUNDING_TO_NEAREST && (ind + e4) > (p34 + expmax)) { res.w[1] = p_sign | 0x7800000000000000ull; res.w[0] = 0x0000000000000000ull; *ptrres = res; *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); return; // BID_RETURN (res) } // else not overflow or not RN, so continue // if (e4 >= expmin) we have the result rounded with bounded exponent if (e4 < expmin) { x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res // where the result rounded [at most] once is // (-1)^p_sign * significand_res * 10^e4 // avoid double rounding error is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; is_midpoint_lt_even0 = is_midpoint_lt_even; is_midpoint_gt_even0 = is_midpoint_gt_even; is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 0; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; if (x0 > ind) { // nothing is left of res when moving the decimal point left x0 digits is_inexact_lt_midpoint = 1; res.w[1] = p_sign | 0x0000000000000000ull; res.w[0] = 0x0000000000000000ull; e4 = expmin; } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34 // this is <, =, or > 1/2 ulp // compare the ind-digit value in the significand of res with // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is // less than, equal to, or greater than 1/2 ulp (significand of res) R128.w[1] = res.w[1] & MASK_COEFF; R128.w[0] = res.w[0]; if (ind <= 19) { if (R128.w[0] < __bid_midpoint64[ind - 1]) { // < 1/2 ulp lt_half_ulp = 1; is_inexact_lt_midpoint = 1; } else if (R128.w[0] == __bid_midpoint64[ind - 1]) { // = 1/2 ulp eq_half_ulp = 1; is_midpoint_gt_even = 1; } else { // > 1/2 ulp // gt_half_ulp = 1; is_inexact_gt_midpoint = 1; } } else { // if (ind <= 38) { if (R128.w[1] < __bid_midpoint128[ind - 20].w[1] || (R128.w[1] == __bid_midpoint128[ind - 20].w[1] && R128.w[0] < __bid_midpoint128[ind - 20].w[0])) { // < 1/2 ulp lt_half_ulp = 1; is_inexact_lt_midpoint = 1; } else if (R128.w[1] == __bid_midpoint128[ind - 20].w[1] && R128.w[0] == __bid_midpoint128[ind - 20].w[0]) { // = 1/2 ulp eq_half_ulp = 1; is_midpoint_gt_even = 1; } else { // > 1/2 ulp // gt_half_ulp = 1; is_inexact_gt_midpoint = 1; } } if (lt_half_ulp || eq_half_ulp) { // res = +0.0 * 10^expmin res.w[1] = 0x0000000000000000ull; res.w[0] = 0x0000000000000000ull; } else { // if (gt_half_ulp) // res = +1 * 10^expmin res.w[1] = 0x0000000000000000ull; res.w[0] = 0x0000000000000001ull; } res.w[1] = p_sign | res.w[1]; e4 = expmin; } else { // if (1 <= x0 <= ind - 1 <= 33) // round the ind-digit result to ind - x0 digits if (ind <= 18) { // 2 <= ind <= 18 __bid_round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); res.w[1] = 0x0; res.w[0] = R64; } else if (ind <= 38) { P128.w[1] = res.w[1] & MASK_COEFF; P128.w[0] = res.w[0]; __bid_round128_19_38 (ind, x0, P128, &res, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); } e4 = e4 + x0; // expmin // we want the exponent to be expmin, so if incr_exp = 1 then // multiply the rounded result by 10 - it will still fit in 113 bits if (incr_exp) { // 64 x 128 -> 128 P128.w[1] = res.w[1] & MASK_COEFF; P128.w[0] = res.w[0]; __mul_64x128_to_128 (res, __bid_ten2k64[1], P128); } res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF); // avoid a double rounding error if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && is_midpoint_lt_even) { // double rounding error upward // res = res - 1 res.w[0]--; if (res.w[0] == 0xffffffffffffffffull) res.w[1]--; // Note: a double rounding error upward is not possible; for this // the result after the first rounding would have to be 99...95 // (35 digits in all), possibly followed by a number of zeros; this // is not possible in Cases (2)-(6) or (15)-(17) which may get here is_midpoint_lt_even = 0; is_inexact_lt_midpoint = 1; } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && is_midpoint_gt_even) { // double rounding error downward // res = res + 1 res.w[0]++; if (res.w[0] == 0) res.w[1]++; is_midpoint_gt_even = 0; is_inexact_gt_midpoint = 1; } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { // if this second rounding was exact the result may still be // inexact because of the first rounding if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { is_inexact_gt_midpoint = 1; } if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { is_inexact_lt_midpoint = 1; } } else if (is_midpoint_gt_even && (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { // pulled up to a midpoint is_inexact_lt_midpoint = 1; is_inexact_gt_midpoint = 0; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; } else if (is_midpoint_lt_even && (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { // pulled down to a midpoint is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 1; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; } else { ; } } } // res contains the correct result // apply correction if not rounding to nearest if (rnd_mode != ROUNDING_TO_NEAREST) { rounding_correction (rnd_mode, is_inexact_lt_midpoint, is_inexact_gt_midpoint, is_midpoint_lt_even, is_midpoint_gt_even, e4, &res, ptrfpsf); } if (is_midpoint_lt_even || is_midpoint_gt_even || is_inexact_lt_midpoint || is_inexact_gt_midpoint) { // set the inexact flag *ptrfpsf |= INEXACT_EXCEPTION; if (is_tiny) *ptrfpsf |= UNDERFLOW_EXCEPTION; } *ptr_is_midpoint_lt_even = is_midpoint_lt_even; *ptr_is_midpoint_gt_even = is_midpoint_gt_even; *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; *ptrres = res; return; } #if DECIMAL_CALL_BY_REFERENCE void __bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { UINT128 x = *px, y = *py, z = *pz; #if !DECIMAL_GLOBAL_ROUNDING unsigned int rnd_mode = *prnd_mode; #endif #else UINT128 __bid128_fma (UINT128 x, UINT128 y, UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { #endif UINT128 res = {{ 0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull }}; UINT64 x_sign, y_sign, z_sign, p_sign; UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp; int true_p_exp; UINT128 C1, C2, C3; UINT256 C4; int q1 = 0, q2 = 0, q3 = 0, q4; int e1, e2, e3, e4; int scale, ind, delta, x0; int p34 = P34; // used to modify the limit on the number of digits BID_UI64DOUBLE tmp; int x_nr_bits, y_nr_bits, z_nr_bits; unsigned int save_fpsf; int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0; int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0; int is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0; int incr_exp = 0; int lsb; int lt_half_ulp = 0; int eq_half_ulp = 0; int gt_half_ulp = 0; int is_tiny = 0; UINT64 R64, tmp64; UINT128 P128, R128; UINT192 P192, R192; UINT256 R256; // the following are based on the table of special cases for fma; the NaN // behavior is similar to that of the IPF fma // identify cases where at least one operand is NaN if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y) // check first for non-canonical NaN payload if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || (((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && (y.w[0] > 0x38c15b09ffffffffull))) { y.w[1] = y.w[1] & 0xffffc00000000000ull; y.w[0] = 0x0ull; } if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN // set invalid flag *pfpsf |= INVALID_EXCEPTION; // return quiet (y) res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] res.w[0] = y.w[0]; } else { // y is QNaN // return y res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] res.w[0] = y.w[0]; // if z = SNaN or x = SNaN signal invalid exception if ((z.w[1] & MASK_SNAN) == MASK_SNAN || (x.w[1] & MASK_SNAN) == MASK_SNAN) { // set invalid flag *pfpsf |= INVALID_EXCEPTION; } } BID_RETURN (res) } else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z) // check first for non-canonical NaN payload if (((z.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || (((z.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && (z.w[0] > 0x38c15b09ffffffffull))) { z.w[1] = z.w[1] & 0xffffc00000000000ull; z.w[0] = 0x0ull; } if ((z.w[1] & MASK_SNAN) == MASK_SNAN) { // z is SNAN // set invalid flag *pfpsf |= INVALID_EXCEPTION; // return quiet (z) res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] res.w[0] = z.w[0]; } else { // z is QNaN // return z res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] res.w[0] = z.w[0]; // if x = SNaN signal invalid exception if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // set invalid flag *pfpsf |= INVALID_EXCEPTION; } } BID_RETURN (res) } else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x) // check first for non-canonical NaN payload if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && (x.w[0] > 0x38c15b09ffffffffull))) { x.w[1] = x.w[1] & 0xffffc00000000000ull; x.w[0] = 0x0ull; } if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN // set invalid flag *pfpsf |= INVALID_EXCEPTION; // return quiet (x) res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] res.w[0] = x.w[0]; } else { // x is QNaN // return x res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] res.w[0] = x.w[0]; } BID_RETURN (res) } // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check // for non-canonical values x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative C1.w[1] = x.w[1] & MASK_COEFF; C1.w[0] = x.w[0]; if ((x.w[1] & MASK_ANY_INF) != MASK_INF) { // x != inf // if x is not infinity check for non-canonical values - treated as zero if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 // non-canonical x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits C1.w[1] = 0; // significand high C1.w[0] = 0; // significand low } else { // G0_G1 != 11 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits if (C1.w[1] > 0x0001ed09bead87c0ull || (C1.w[1] == 0x0001ed09bead87c0ull && C1.w[0] > 0x378d8e63ffffffffull)) { // x is non-canonical if coefficient is larger than 10^34 -1 C1.w[1] = 0; C1.w[0] = 0; } else { // canonical ; } } } y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative C2.w[1] = y.w[1] & MASK_COEFF; C2.w[0] = y.w[0]; if ((y.w[1] & MASK_ANY_INF) != MASK_INF) { // y != inf // if y is not infinity check for non-canonical values - treated as zero if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 // non-canonical y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits C2.w[1] = 0; // significand high C2.w[0] = 0; // significand low } else { // G0_G1 != 11 y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits if (C2.w[1] > 0x0001ed09bead87c0ull || (C2.w[1] == 0x0001ed09bead87c0ull && C2.w[0] > 0x378d8e63ffffffffull)) { // y is non-canonical if coefficient is larger than 10^34 -1 C2.w[1] = 0; C2.w[0] = 0; } else { // canonical ; } } } z_sign = z.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative C3.w[1] = z.w[1] & MASK_COEFF; C3.w[0] = z.w[0]; if ((z.w[1] & MASK_ANY_INF) != MASK_INF) { // z != inf // if z is not infinity check for non-canonical values - treated as zero if ((z.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 // non-canonical z_exp = (z.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits C3.w[1] = 0; // significand high C3.w[0] = 0; // significand low } else { // G0_G1 != 11 z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits if (C3.w[1] > 0x0001ed09bead87c0ull || (C3.w[1] == 0x0001ed09bead87c0ull && C3.w[0] > 0x378d8e63ffffffffull)) { // z is non-canonical if coefficient is larger than 10^34 -1 C3.w[1] = 0; C3.w[0] = 0; } else { // canonical ; } } } p_sign = x_sign ^ y_sign; // sign of the product // identify cases where at least one operand is infinity if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x = inf if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf if (p_sign == z_sign) { res.w[1] = z_sign | MASK_INF; res.w[0] = 0x0; } else { // return QNaN Indefinite res.w[1] = 0x7c00000000000000ull; res.w[0] = 0x0000000000000000ull; // set invalid flag *pfpsf |= INVALID_EXCEPTION; } } else { // z = 0 or z = f res.w[1] = p_sign | MASK_INF; res.w[0] = 0x0; } } else if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf if (p_sign == z_sign) { res.w[1] = z_sign | MASK_INF; res.w[0] = 0x0; } else { // return QNaN Indefinite res.w[1] = 0x7c00000000000000ull; res.w[0] = 0x0000000000000000ull; // set invalid flag *pfpsf |= INVALID_EXCEPTION; } } else { // z = 0 or z = f res.w[1] = p_sign | MASK_INF; res.w[0] = 0x0; } } else { // y = 0 // return QNaN Indefinite res.w[1] = 0x7c00000000000000ull; res.w[0] = 0x0000000000000000ull; // set invalid flag *pfpsf |= INVALID_EXCEPTION; } BID_RETURN (res) } else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf // x = f, necessarily if ((p_sign != z_sign) || (C1.w[1] == 0x0ull && C1.w[0] == 0x0ull)) { // return QNaN Indefinite res.w[1] = 0x7c00000000000000ull; res.w[0] = 0x0000000000000000ull; // set invalid flag *pfpsf |= INVALID_EXCEPTION; } else { res.w[1] = z.w[1]; res.w[0] = z.w[0]; } } else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0 // z = 0, f, inf // return QNaN Indefinite res.w[1] = 0x7c00000000000000ull; res.w[0] = 0x0000000000000000ull; // set invalid flag *pfpsf |= INVALID_EXCEPTION; } else { // x = f and z = 0, f, necessarily res.w[1] = p_sign | MASK_INF; res.w[0] = 0x0; } BID_RETURN (res) } else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf // x = 0, f and y = 0, f, necessarily res.w[1] = z_sign | MASK_INF; res.w[0] = 0x0; BID_RETURN (res) } true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176; if (true_p_exp < -6176) p_exp = 0; // cannot be less than EXP_MIN // MACH DEBUG else if (true_p_exp > 6111) // p_exp = (UINT64) (6111 + 6176) << 49; // cannot be more than EXP_MAX else p_exp = (UINT64) (true_p_exp + 6176) << 49; if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0 // the result is 0 if (p_exp < z_exp) res.w[1] = p_exp; // preferred exponent else res.w[1] = z_exp; // preferred exponent if (p_sign == z_sign) { res.w[1] |= z_sign; res.w[0] = 0x0; } else { // x * y and z have opposite signs if (rnd_mode == ROUNDING_DOWN) { // res = -0.0 res.w[1] |= MASK_SIGN; res.w[0] = 0x0; } else { // res = +0.0 // res.w[1] |= 0x0; res.w[0] = 0x0; } } BID_RETURN (res) } // from this point on, we may need to know the number of decimal digits // in the significands of x, y, z when x, y, z != 0 if (C1.w[1] != 0 || C1.w[0] != 0) { // x = f (non-zero finite) // q1 = nr. of decimal digits in x // determine first the nr. of bits in x if (C1.w[1] == 0) { if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 // split the 64-bit value in two 32-bit halves to avoid rounding errors if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 tmp.d = (double) (C1.w[0] >> 32); // exact conversion x_nr_bits = 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); } else { // x < 2^32 tmp.d = (double) (C1.w[0]); // exact conversion x_nr_bits = 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); } } else { // if x < 2^53 tmp.d = (double) C1.w[0]; // exact conversion x_nr_bits = 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); } } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) tmp.d = (double) C1.w[1]; // exact conversion x_nr_bits = 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); } q1 = __bid_nr_digits[x_nr_bits - 1].digits; if (q1 == 0) { q1 = __bid_nr_digits[x_nr_bits - 1].digits1; if (C1.w[1] > __bid_nr_digits[x_nr_bits - 1].threshold_hi || (C1.w[1] == __bid_nr_digits[x_nr_bits - 1].threshold_hi && C1.w[0] >= __bid_nr_digits[x_nr_bits - 1].threshold_lo)) q1++; } } if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f (non-zero finite) if (C2.w[1] == 0) { if (C2.w[0] >= 0x0020000000000000ull) { // y >= 2^53 // split the 64-bit value in two 32-bit halves to avoid rounding errors if (C2.w[0] >= 0x0000000100000000ull) { // y >= 2^32 tmp.d = (double) (C2.w[0] >> 32); // exact conversion y_nr_bits = 32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); } else { // y < 2^32 tmp.d = (double) C2.w[0]; // exact conversion y_nr_bits = ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); } } else { // if y < 2^53 tmp.d = (double) C2.w[0]; // exact conversion y_nr_bits = ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); } } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1]) tmp.d = (double) C2.w[1]; // exact conversion y_nr_bits = 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); } q2 = __bid_nr_digits[y_nr_bits].digits; if (q2 == 0) { q2 = __bid_nr_digits[y_nr_bits].digits1; if (C2.w[1] > __bid_nr_digits[y_nr_bits].threshold_hi || (C2.w[1] == __bid_nr_digits[y_nr_bits].threshold_hi && C2.w[0] >= __bid_nr_digits[y_nr_bits].threshold_lo)) q2++; } } if (C3.w[1] != 0 || C3.w[0] != 0) { // z = f (non-zero finite) if (C3.w[1] == 0) { if (C3.w[0] >= 0x0020000000000000ull) { // z >= 2^53 // split the 64-bit value in two 32-bit halves to avoid rounding errors if (C3.w[0] >= 0x0000000100000000ull) { // z >= 2^32 tmp.d = (double) (C3.w[0] >> 32); // exact conversion z_nr_bits = 32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); } else { // z < 2^32 tmp.d = (double) C3.w[0]; // exact conversion z_nr_bits = ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); } } else { // if z < 2^53 tmp.d = (double) C3.w[0]; // exact conversion z_nr_bits = ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); } } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1]) tmp.d = (double) C3.w[1]; // exact conversion z_nr_bits = 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); } q3 = __bid_nr_digits[z_nr_bits].digits; if (q3 == 0) { q3 = __bid_nr_digits[z_nr_bits].digits1; if (C3.w[1] > __bid_nr_digits[z_nr_bits].threshold_hi || (C3.w[1] == __bid_nr_digits[z_nr_bits].threshold_hi && C3.w[0] >= __bid_nr_digits[z_nr_bits].threshold_lo)) q3++; } } if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) { // x = 0 or y = 0 // z = f, necessarily; for 0 + z return z, with the preferred exponent // the result is z, but need to get the preferred exponent if (z_exp <= p_exp) { // the preferred exponent is z_exp res.w[1] = z_sign | (z_exp & MASK_EXP) | C3.w[1]; res.w[0] = C3.w[0]; } else { // if (p_exp < z_exp) the preferred exponent is p_exp // return (C3 * 10^scale) * 10^(z_exp - scale) // where scale = min (p34-q3, (z_exp-p_exp) >> 49) scale = p34 - q3; ind = (z_exp - p_exp) >> 49; if (ind < scale) scale = ind; if (scale == 0) { res.w[1] = z.w[1]; // & MASK_COEFF, which is redundant res.w[0] = z.w[0]; } else if (q3 <= 19) { // z fits in 64 bits if (scale <= 19) { // 10^scale fits in 64 bits // 64 x 64 C3.w[0] * __bid_ten2k64[scale] __mul_64x64_to_128MACH (res, C3.w[0], __bid_ten2k64[scale]); } else { // 10^scale fits in 128 bits // 64 x 128 C3.w[0] * __bid_ten2k128[scale - 20] __mul_128x64_to_128 (res, C3.w[0], __bid_ten2k128[scale - 20]); } } else { // z fits in 128 bits, but 10^scale must fit in 64 bits // 64 x 128 __bid_ten2k64[scale] * C3 __mul_128x64_to_128 (res, __bid_ten2k64[scale], C3); } // subtract scale from the exponent z_exp = z_exp - ((UINT64) scale << 49); res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; } BID_RETURN (res) } else { ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f } e1 = (x_exp >> 49) - 6176; // unbiased exponent of x e2 = (y_exp >> 49) - 6176; // unbiased exponent of y e3 = (z_exp >> 49) - 6176; // unbiased exponent of z e4 = e1 + e2; // unbiased exponent of the exact x * y // calculate C1 * C2 and its number of decimal digits, q4 // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits // where 2 <= q1 + q2 <= 68 // calculate C4 = C1 * C2 and determine q C4.w[3] = C4.w[2] = C4.w[1] = C4.w[0] = 0; if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits C4.w[0] = C1.w[0] * C2.w[0]; // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 if (C4.w[0] < __bid_ten2k64[q1 + q2 - 1]) q4 = q1 + q2 - 1; // q4 in [1, 18] else q4 = q1 + q2; // q4 in [2, 19] // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; } else if (q1 + q2 == 20) { // C4 = C1 * C2 fits in 64 or 128 bits // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits __mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]); // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20 if (C4.w[1] == 0 && C4.w[0] < __bid_ten2k64[19]) { // 19 = q1+q2-1 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; q4 = 19; // 19 = q1 + q2 - 1 } else { // if (C4.w[1] == 0) // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; // else // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; q4 = 20; // 20 = q1 + q2 } } else if (q1 + q2 <= 38) { // 21 <= q1 + q2 <= 38 // C4 = C1 * C2 fits in 64 or 128 bits // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits) // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits if (q1 <= 19) { __mul_128x64_to_128 (C4, C1.w[0], C2); } else { // q2 <= 19 __mul_128x64_to_128 (C4, C2.w[0], C1); } // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 if (C4.w[1] < __bid_ten2k128[q1 + q2 - 21].w[1] || (C4.w[1] == __bid_ten2k128[q1 + q2 - 21].w[1] && C4.w[0] < __bid_ten2k128[q1 + q2 - 21].w[0])) { // if (C4.w[1] == 0) // q4 = 20, necessarily // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; // else // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; q4 = q1 + q2 - 1; // q4 in [20, 37] } else { // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; q4 = q1 + q2; // q4 in [21, 38] } } else if (q1 + q2 == 39) { // C4 = C1 * C2 fits in 128 or 192 bits // both C1 and C2 fit in 128 bits (actually in 113 bits) // may replace this by 128x128_to192 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0 // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39 if (C4.w[2] == 0 && (C4.w[1] < __bid_ten2k128[18].w[1] || (C4.w[1] == __bid_ten2k128[18].w[1] && C4.w[0] < __bid_ten2k128[18].w[0]))) { // 18 = 38 - 20 = q1+q2-1 - 20 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; q4 = 38; // 38 = q1 + q2 - 1 } else { // if (C4.w[2] == 0) // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; // else // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; q4 = 39; // 39 = q1 + q2 } } else if (q1 + q2 <= 57) { // 40 <= q1 + q2 <= 57 // C4 = C1 * C2 fits in 128 or 192 bits // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits) // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one // may fit in 64 bits if (C1.w[1] == 0) { // C1 fits in 64 bits // __mul_64x128_full (REShi64, RESlo128, A64, B128) __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); } else if (C2.w[1] == 0) { // C2 fits in 64 bits // __mul_64x128_full (REShi64, RESlo128, A64, B128) __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); } else { // both C1 and C2 require 128 bits // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1); __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0 } // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 if (C4.w[2] < __bid_ten2k256[q1 + q2 - 40].w[2] || (C4.w[2] == __bid_ten2k256[q1 + q2 - 40].w[2] && (C4.w[1] < __bid_ten2k256[q1 + q2 - 40].w[1] || (C4.w[1] == __bid_ten2k256[q1 + q2 - 40].w[1] && C4.w[0] < __bid_ten2k256[q1 + q2 - 40].w[0])))) { // if (C4.w[2] == 0) // q4 = 39, necessarily // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; // else // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; q4 = q1 + q2 - 1; // q4 in [39, 56] } else { // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; q4 = q1 + q2; // q4 in [40, 57] } } else if (q1 + q2 == 58) { // C4 = C1 * C2 fits in 192 or 256 bits // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one // may fit in 64 bits if (C1.w[1] == 0) { // C1 * C2 will fit in 192 bits __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); // may use 64x128_to_192 } else if (C2.w[1] == 0) { // C1 * C2 will fit in 192 bits __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); // may use 64x128_to_192 } else { // C1 * C2 will fit in 192 bits or in 256 bits __mul_128x128_to_256 (C4, C1, C2); } // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58 if (C4.w[3] == 0 && (C4.w[2] < __bid_ten2k256[18].w[2] || (C4.w[2] == __bid_ten2k256[18].w[2] && (C4.w[1] < __bid_ten2k256[18].w[1] || (C4.w[1] == __bid_ten2k256[18].w[1] && C4.w[0] < __bid_ten2k256[18].w[0]))))) { // 18 = 57 - 39 = q1+q2-1 - 39 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; q4 = 57; // 57 = q1 + q2 - 1 } else { // if (C4.w[3] == 0) // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; // else // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; q4 = 58; // 58 = q1 + q2 } } else { // if 59 <= q1 + q2 <= 68 // C4 = C1 * C2 fits in 192 or 256 bits // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits) // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in // 64 bits // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1); __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 if (C4.w[3] < __bid_ten2k256[q1 + q2 - 40].w[3] || (C4.w[3] == __bid_ten2k256[q1 + q2 - 40].w[3] && (C4.w[2] < __bid_ten2k256[q1 + q2 - 40].w[2] || (C4.w[2] == __bid_ten2k256[q1 + q2 - 40].w[2] && (C4.w[1] < __bid_ten2k256[q1 + q2 - 40].w[1] || (C4.w[1] == __bid_ten2k256[q1 + q2 - 40].w[1] && C4.w[0] < __bid_ten2k256[q1 + q2 - 40].w[0])))))) { // if (C4.w[3] == 0) // q4 = 58, necessarily // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; // else // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; q4 = q1 + q2 - 1; // q4 in [58, 67] } else { // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; q4 = q1 + q2; // q4 in [59, 68] } } if (C3.w[1] == 0x0 && C3.w[0] == 0x0) { // x = f, y = f, z = 0 save_fpsf = *pfpsf; // sticky bits - caller value must be preserved *pfpsf = 0; if (q4 > p34) { // truncate C4 to p34 digits into res // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68 x0 = q4 - p34; if (q4 <= 38) { P128.w[1] = C4.w[1]; P128.w[0] = C4.w[0]; __bid_round128_19_38 (q4, x0, P128, &res, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); } else if (q4 <= 57) { // 35 <= q4 <= 57 P192.w[2] = C4.w[2]; P192.w[1] = C4.w[1]; P192.w[0] = C4.w[0]; __bid_round192_39_57 (q4, x0, P192, &R192, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); res.w[0] = R192.w[0]; res.w[1] = R192.w[1]; } else { // if (q4 <= 68) __bid_round256_58_76 (q4, x0, C4, &R256, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); res.w[0] = R256.w[0]; res.w[1] = R256.w[1]; } e4 = e4 + x0; if (incr_exp) { e4 = e4 + 1; } q4 = p34; // res is now the coefficient of the result rounded to the destination // precision, with unbounded exponent; the exponent is e4; q4=digits(res) } else { // if (q4 <= p34) // C4 * 10^e4 is the result rounded to the destination precision, with // unbounded exponent (which is exact) if ((q4 + e4 <= p34 + expmax) && (e4 > expmax)) { // e4 is too large, but can be brought within range by scaling up C4 scale = e4 - expmax; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2 // res = (C4 * 10^scale) * 10^expmax if (q4 <= 19) { // C4 fits in 64 bits if (scale <= 19) { // 10^scale fits in 64 bits // 64 x 64 C4.w[0] * __bid_ten2k64[scale] __mul_64x64_to_128MACH (res, C4.w[0], __bid_ten2k64[scale]); } else { // 10^scale fits in 128 bits // 64 x 128 C4.w[0] * __bid_ten2k128[scale - 20] __mul_128x64_to_128 (res, C4.w[0], __bid_ten2k128[scale - 20]); } } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits // 64 x 128 __bid_ten2k64[scale] * CC43 __mul_128x64_to_128 (res, __bid_ten2k64[scale], C4); } e4 = e4 - scale; // expmax q4 = q4 + scale; } else { res.w[1] = C4.w[1]; res.w[0] = C4.w[0]; } // res is the coefficient of the result rounded to the destination // precision, with unbounded exponent (it has q4 digits); the exponent // is e4 (exact result) } // check for overflow if (q4 + e4 > p34 + expmax) { if (rnd_mode == ROUNDING_TO_NEAREST) { res.w[1] = p_sign | 0x7800000000000000ull; // +/-inf res.w[0] = 0x0000000000000000ull; *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); } else { res.w[1] = p_sign | res.w[1]; rounding_correction (rnd_mode, is_inexact_lt_midpoint, is_inexact_gt_midpoint, is_midpoint_lt_even, is_midpoint_gt_even, e4, &res, pfpsf); } *pfpsf |= save_fpsf; BID_RETURN (res) } // check for underflow if (q4 + e4 < expmin + P34) { is_tiny = 1; // the result is tiny if (e4 < expmin) { // if e4 < expmin, we must truncate more of res x0 = expmin - e4; // x0 >= 1 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; is_midpoint_lt_even0 = is_midpoint_lt_even; is_midpoint_gt_even0 = is_midpoint_gt_even; is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 0; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; // the number of decimal digits in res is q4 if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17 __bid_round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); if (incr_exp) { // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17 R64 = __bid_ten2k64[q4 - x0]; } // res.w[1] = 0; (from above) res.w[0] = R64; } else { // if (q4 <= 34) // 19 <= q4 <= 38 P128.w[1] = res.w[1]; P128.w[0] = res.w[0]; __bid_round128_19_38 (q4, x0, P128, &res, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); if (incr_exp) { // increase coefficient by a factor of 10; this will be <= 10^33 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 // res.w[1] = 0; res.w[0] = __bid_ten2k64[q4 - x0]; } else { // 20 <= q4 - x0 <= 37 res.w[0] = __bid_ten2k128[q4 - x0 - 20].w[0]; res.w[1] = __bid_ten2k128[q4 - x0 - 20].w[1]; } } } e4 = e4 + x0; // expmin } else if (x0 == q4) { // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin // determine relationship with 1/2 ulp if (q4 <= 19) { if (res.w[0] < __bid_midpoint64[q4 - 1]) { // < 1/2 ulp lt_half_ulp = 1; is_inexact_lt_midpoint = 1; } else if (res.w[0] == __bid_midpoint64[q4 - 1]) { // = 1/2 ulp eq_half_ulp = 1; is_midpoint_gt_even = 1; } else { // > 1/2 ulp // gt_half_ulp = 1; is_inexact_gt_midpoint = 1; } } else { // if (q4 <= 34) if (res.w[1] < __bid_midpoint128[q4 - 20].w[1] || (res.w[1] == __bid_midpoint128[q4 - 20].w[1] && res.w[0] < __bid_midpoint128[q4 - 20].w[0])) { // < 1/2 ulp lt_half_ulp = 1; is_inexact_lt_midpoint = 1; } else if (res.w[1] == __bid_midpoint128[q4 - 20].w[1] && res.w[0] == __bid_midpoint128[q4 - 20].w[0]) { // = 1/2 ulp eq_half_ulp = 1; is_midpoint_gt_even = 1; } else { // > 1/2 ulp // gt_half_ulp = 1; is_inexact_gt_midpoint = 1; } } if (lt_half_ulp || eq_half_ulp) { // res = +0.0 * 10^expmin res.w[1] = 0x0000000000000000ull; res.w[0] = 0x0000000000000000ull; } else { // if (gt_half_ulp) // res = +1 * 10^expmin res.w[1] = 0x0000000000000000ull; res.w[0] = 0x0000000000000001ull; } e4 = expmin; } else { // if (x0 > q4) // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin res.w[1] = 0; res.w[0] = 0; e4 = expmin; is_inexact_lt_midpoint = 1; } // avoid a double rounding error if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && is_midpoint_lt_even) { // double rounding error upward // res = res - 1 res.w[0]--; if (res.w[0] == 0xffffffffffffffffull) res.w[1]--; // Note: a double rounding error upward is not possible; for this // the result after the first rounding would have to be 99...95 // (35 digits in all), possibly followed by a number of zeros; this // not possible for f * f + 0 is_midpoint_lt_even = 0; is_inexact_lt_midpoint = 1; } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && is_midpoint_gt_even) { // double rounding error downward // res = res + 1 res.w[0]++; if (res.w[0] == 0) res.w[1]++; is_midpoint_gt_even = 0; is_inexact_gt_midpoint = 1; } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { // if this second rounding was exact the result may still be // inexact because of the first rounding if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { is_inexact_gt_midpoint = 1; } if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { is_inexact_lt_midpoint = 1; } } else if (is_midpoint_gt_even && (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { // pulled up to a midpoint is_inexact_lt_midpoint = 1; is_inexact_gt_midpoint = 0; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; } else if (is_midpoint_lt_even && (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { // pulled down to a midpoint is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 1; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; } else { ; } } else { // if e4 >= emin then q4 < P and the result is tiny and exact if (e3 < e4) { // if (e3 < e4) the preferred exponent is e3 // return (C4 * 10^scale) * 10^(e4 - scale) // where scale = min (p34-q4, (e4 - e3)) scale = p34 - q4; ind = e4 - e3; if (ind < scale) scale = ind; if (scale == 0) { ; // res and e4 are unchanged } else if (q4 <= 19) { // C4 fits in 64 bits if (scale <= 19) { // 10^scale fits in 64 bits // 64 x 64 res.w[0] * __bid_ten2k64[scale] __mul_64x64_to_128MACH (res, res.w[0], __bid_ten2k64[scale]); } else { // 10^scale fits in 128 bits // 64 x 128 res.w[0] * __bid_ten2k128[scale - 20] __mul_128x64_to_128 (res, res.w[0], __bid_ten2k128[scale - 20]); } } else { // res fits in 128 bits, but 10^scale must fit in 64 bits // 64 x 128 __bid_ten2k64[scale] * C3 __mul_128x64_to_128 (res, __bid_ten2k64[scale], res); } // subtract scale from the exponent e4 = e4 - scale; } } // check for inexact result if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || is_midpoint_lt_even || is_midpoint_gt_even) { // set the inexact flag and the underflow flag *pfpsf |= INEXACT_EXCEPTION; *pfpsf |= UNDERFLOW_EXCEPTION; } res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; if (rnd_mode != ROUNDING_TO_NEAREST) { rounding_correction (rnd_mode, is_inexact_lt_midpoint, is_inexact_gt_midpoint, is_midpoint_lt_even, is_midpoint_gt_even, e4, &res, pfpsf); } *pfpsf |= save_fpsf; BID_RETURN (res) } // no overflow and no underflow if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || is_midpoint_lt_even || is_midpoint_gt_even) { // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; } res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; if (rnd_mode != ROUNDING_TO_NEAREST) { rounding_correction (rnd_mode, is_inexact_lt_midpoint, is_inexact_gt_midpoint, is_midpoint_lt_even, is_midpoint_gt_even, e4, &res, pfpsf); } if ((*pfpsf & INEXACT_EXCEPTION) == 0) { // x * y is exact // need to ensure that the result has the preferred exponent p_exp = res.w[1] & MASK_EXP; if (z_exp < p_exp) { // the preferred exponent is z_exp // signficand of res in C3 C3.w[1] = res.w[1] & MASK_COEFF; C3.w[0] = res.w[0]; // the number of decimal digits of x * y is q4 <= 34 // Note: the coefficient fits in 128 bits // return (C3 * 10^scale) * 10^(p_exp - scale) // where scale = min (p34-q4, (p_exp-z_exp) >> 49) scale = p34 - q4; ind = (p_exp - z_exp) >> 49; if (ind < scale) scale = ind; // subtract scale from the exponent p_exp = p_exp - ((UINT64) scale << 49); if (scale == 0) { ; // leave res unchanged } else if (q4 <= 19) { // x * y fits in 64 bits if (scale <= 19) { // 10^scale fits in 64 bits // 64 x 64 C3.w[0] * __bid_ten2k64[scale] __mul_64x64_to_128MACH (res, C3.w[0], __bid_ten2k64[scale]); } else { // 10^scale fits in 128 bits // 64 x 128 C3.w[0] * __bid_ten2k128[scale - 20] __mul_128x64_to_128 (res, C3.w[0], __bid_ten2k128[scale - 20]); } res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits // 64 x 128 __bid_ten2k64[scale] * C3 __mul_128x64_to_128 (res, __bid_ten2k64[scale], C3); res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; } } // else leave the result as it is, because p_exp <= z_exp } *pfpsf |= save_fpsf; BID_RETURN (res) } // else we have f * f + f // continue with x = f, y = f, z = f delta = q3 + e3 - q4 - e4; delta_ge_zero: if (delta >= 0) { if (p34 <= delta - 1 || // Case (1') (p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A) // check for overflow, which can occur only in Case (1') if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) { // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary // condition for (q3 + e3) > (p34 + expmax) if (rnd_mode == ROUNDING_TO_NEAREST) { res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf res.w[0] = 0x0000000000000000ull; *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); } else { if (p_sign == z_sign) { is_inexact_lt_midpoint = 1; } else { is_inexact_gt_midpoint = 1; } // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3) scale = p34 - q3; if (scale == 0) { res.w[1] = z_sign | C3.w[1]; res.w[0] = C3.w[0]; } else { if (q3 <= 19) { // C3 fits in 64 bits if (scale <= 19) { // 10^scale fits in 64 bits // 64 x 64 C3.w[0] * __bid_ten2k64[scale] __mul_64x64_to_128MACH (res, C3.w[0], __bid_ten2k64[scale]); } else { // 10^scale fits in 128 bits // 64 x 128 C3.w[0] * __bid_ten2k128[scale - 20] __mul_128x64_to_128 (res, C3.w[0], __bid_ten2k128[scale - 20]); } } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits // 64 x 128 __bid_ten2k64[scale] * C3 __mul_128x64_to_128 (res, __bid_ten2k64[scale], C3); } // the coefficient in res has q3 + scale = p34 digits } e3 = e3 - scale; res.w[1] = z_sign | res.w[1]; rounding_correction (rnd_mode, is_inexact_lt_midpoint, is_inexact_gt_midpoint, is_midpoint_lt_even, is_midpoint_gt_even, e3, &res, pfpsf); } BID_RETURN (res) } // res = z if (q3 < p34) { // the preferred exponent is z_exp - (p34 - q3) // return (C3 * 10^scale) * 10^(z_exp - scale) // where scale = min (p34-q3, z_exp-EMIN) scale = p34 - q3; ind = e3 + 6176; if (ind < scale) scale = ind; if (scale == 0) { res.w[1] = C3.w[1]; res.w[0] = C3.w[0]; } else if (q3 <= 19) { // z fits in 64 bits if (scale <= 19) { // 10^scale fits in 64 bits // 64 x 64 C3.w[0] * __bid_ten2k64[scale] __mul_64x64_to_128MACH (res, C3.w[0], __bid_ten2k64[scale]); } else { // 10^scale fits in 128 bits // 64 x 128 C3.w[0] * __bid_ten2k128[scale - 20] __mul_128x64_to_128 (res, C3.w[0], __bid_ten2k128[scale - 20]); } } else { // z fits in 128 bits, but 10^scale must fit in 64 bits // 64 x 128 __bid_ten2k64[scale] * C3 __mul_128x64_to_128 (res, __bid_ten2k64[scale], C3); } // the coefficient in res has q3 + scale digits // subtract scale from the exponent z_exp = z_exp - ((UINT64) scale << 49); e3 = e3 - scale; res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; if (scale + q3 < p34) *pfpsf |= UNDERFLOW_EXCEPTION; } else { scale = 0; res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | C3.w[1]; res.w[0] = C3.w[0]; } // use the following to avoid double rounding errors when operating on // mixed formats in rounding to nearest, and for correcting the result // if not rounding to nearest if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) { // there is a gap of exactly one digit between the scaled C3 and C4 // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case if ((q3 <= 19 && C3.w[0] != __bid_ten2k64[q3 - 1]) || (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != __bid_ten2k64[19])) || (q3 >= 21 && (C3.w[1] != __bid_ten2k128[q3 - 21].w[1] || C3.w[0] != __bid_ten2k128[q3 - 21].w[0]))) { // C3 * 10^ scale != 10^(q3-1) // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull || // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33 is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value } else { // if C3 * 10^scale = 10^(q3+scale-1) // ok from above e3 = (z_exp >> 49) - 6176; // the result is always inexact if (q4 == 1) { R64 = C4.w[0]; } else { // if q4 > 1 then truncate C4 from q4 digits to 1 digit; // x = q4-1, 1 <= x <= 67 and check if this operation is exact if (q4 <= 18) { // 2 <= q4 <= 18 __bid_round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); } else if (q4 <= 38) { P128.w[1] = C4.w[1]; P128.w[0] = C4.w[0]; __bid_round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); R64 = R128.w[0]; // one decimal digit } else if (q4 <= 57) { P192.w[2] = C4.w[2]; P192.w[1] = C4.w[1]; P192.w[0] = C4.w[0]; __bid_round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); R64 = R192.w[0]; // one decimal digit } else { // if (q4 <= 68) __bid_round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); R64 = R256.w[0]; // one decimal digit } if (incr_exp) { R64 = 10; } } if (q4 == 1 && C4.w[0] == 5) { is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 0; is_midpoint_lt_even = 1; is_midpoint_gt_even = 0; } else if ((e3 == expmin) || R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) { // result does not change is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 1; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; } else { is_inexact_lt_midpoint = 1; is_inexact_gt_midpoint = 0; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; // result decremented is 10^(q3+scale) - 1 if ((q3 + scale) <= 19) { res.w[1] = 0; res.w[0] = __bid_ten2k64[q3 + scale]; } else { // if ((q3 + scale + 1) <= 35) res.w[1] = __bid_ten2k128[q3 + scale - 20].w[1]; res.w[0] = __bid_ten2k128[q3 + scale - 20].w[0]; } res.w[0] = res.w[0] - 1; // borrow never occurs z_exp = z_exp - EXP_P1; e3 = e3 - 1; res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; } if (e3 == expmin) { if (R64 < 5 || (R64 == 5 && !is_inexact_lt_midpoint)) { ; // result not tiny (in round-to-nearest mode) } else { *pfpsf |= UNDERFLOW_EXCEPTION; } } } // end 10^(q3+scale-1) // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; } else { if (p_sign == z_sign) { // if (z_sign), set as if for absolute value is_inexact_lt_midpoint = 1; } else { // if (p_sign != z_sign) // if (z_sign), set as if for absolute value is_inexact_gt_midpoint = 1; } *pfpsf |= INEXACT_EXCEPTION; } // the result is always inexact => set the inexact flag // Determine tininess: // if (exp > expmin) // the result is not tiny // else // if exp = emin // if (q3 + scale < p34) // the result is tiny // else // if (q3 + scale = p34) // if (C3 * 10^scale > 10^33) // the result is not tiny // else // if C3 * 10^scale = 10^33 // if (xy * z > 0) // the result is not tiny // else // if (xy * z < 0) // if (z > 0) // if rnd_mode != RP // the result is tiny // else // if RP // the result is not tiny // else // if (z < 0) // if rnd_mode != RM // the result is tiny // else // if RM // the result is not tiny // endif // endif // endif // endif // endif // endif if (((e3 == expmin) && ((q3 + scale) < p34)) || ((e3 == expmin) && ((q3 + scale) == p34) && ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull) && // 10^33_high (res.w[0] == 0x38c15b0a00000000ull) && // 10^33_low (z_sign != p_sign) && ((!z_sign && rnd_mode != ROUNDING_UP) || (z_sign && rnd_mode != ROUNDING_DOWN)))) { *pfpsf |= UNDERFLOW_EXCEPTION; } if (rnd_mode != ROUNDING_TO_NEAREST) { rounding_correction (rnd_mode, is_inexact_lt_midpoint, is_inexact_gt_midpoint, is_midpoint_lt_even, is_midpoint_gt_even, e3, &res, pfpsf); } BID_RETURN (res) } else if (p34 == delta) { // Case (1''B) // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3 // and C3 can be scaled up to p34 digits if needed // scale C3 to p34 digits if needed scale = p34 - q3; // 0 <= scale <= p34 - 1 if (scale == 0) { res.w[1] = C3.w[1]; res.w[0] = C3.w[0]; } else if (q3 <= 19) { // z fits in 64 bits if (scale <= 19) { // 10^scale fits in 64 bits // 64 x 64 C3.w[0] * __bid_ten2k64[scale] __mul_64x64_to_128MACH (res, C3.w[0], __bid_ten2k64[scale]); } else { // 10^scale fits in 128 bits // 64 x 128 C3.w[0] * __bid_ten2k128[scale - 20] __mul_128x64_to_128 (res, C3.w[0], __bid_ten2k128[scale - 20]); } } else { // z fits in 128 bits, but 10^scale must fit in 64 bits // 64 x 128 __bid_ten2k64[scale] * C3 __mul_128x64_to_128 (res, __bid_ten2k64[scale], C3); } // subtract scale from the exponent z_exp = z_exp - ((UINT64) scale << 49); e3 = e3 - scale; // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits // determine whether x * y is less than, equal to, or greater than // 1/2 ulp (z) if (q4 <= 19) { if (C4.w[0] < __bid_midpoint64[q4 - 1]) { // < 1/2 ulp lt_half_ulp = 1; } else if (C4.w[0] == __bid_midpoint64[q4 - 1]) { // = 1/2 ulp eq_half_ulp = 1; } else { // > 1/2 ulp gt_half_ulp = 1; } } else if (q4 <= 38) { if (C4.w[2] == 0 && (C4.w[1] < __bid_midpoint128[q4 - 20].w[1] || (C4.w[1] == __bid_midpoint128[q4 - 20].w[1] && C4.w[0] < __bid_midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp lt_half_ulp = 1; } else if (C4.w[2] == 0 && C4.w[1] == __bid_midpoint128[q4 - 20].w[1] && C4.w[0] == __bid_midpoint128[q4 - 20].w[0]) { // = 1/2 ulp eq_half_ulp = 1; } else { // > 1/2 ulp gt_half_ulp = 1; } } else if (q4 <= 58) { if (C4.w[3] == 0 && (C4.w[2] < __bid_midpoint192[q4 - 39].w[2] || (C4.w[2] == __bid_midpoint192[q4 - 39].w[2] && C4.w[1] < __bid_midpoint192[q4 - 39].w[1]) || (C4.w[2] == __bid_midpoint192[q4 - 39].w[2] && C4.w[1] == __bid_midpoint192[q4 - 39].w[1] && C4.w[0] < __bid_midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp lt_half_ulp = 1; } else if (C4.w[3] == 0 && C4.w[2] == __bid_midpoint192[q4 - 39].w[2] && C4.w[1] == __bid_midpoint192[q4 - 39].w[1] && C4.w[0] == __bid_midpoint192[q4 - 39].w[0]) { // = 1/2 ulp eq_half_ulp = 1; } else { // > 1/2 ulp gt_half_ulp = 1; } } else { if (C4.w[3] < __bid_midpoint256[q4 - 59].w[3] || (C4.w[3] == __bid_midpoint256[q4 - 59].w[3] && C4.w[2] < __bid_midpoint256[q4 - 59].w[2]) || (C4.w[3] == __bid_midpoint256[q4 - 59].w[3] && C4.w[2] == __bid_midpoint256[q4 - 59].w[2] && C4.w[1] < __bid_midpoint256[q4 - 59].w[1]) || (C4.w[3] == __bid_midpoint256[q4 - 59].w[3] && C4.w[2] == __bid_midpoint256[q4 - 59].w[2] && C4.w[1] == __bid_midpoint256[q4 - 59].w[1] && C4.w[0] < __bid_midpoint256[q4 - 59].w[0])) { // < 1/2 ulp lt_half_ulp = 1; } else if (C4.w[3] == __bid_midpoint256[q4 - 59].w[3] && C4.w[2] == __bid_midpoint256[q4 - 59].w[2] && C4.w[1] == __bid_midpoint256[q4 - 59].w[1] && C4.w[0] == __bid_midpoint256[q4 - 59].w[0]) { // = 1/2 ulp eq_half_ulp = 1; } else { // > 1/2 ulp gt_half_ulp = 1; } } if (p_sign == z_sign) { if (lt_half_ulp) { res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; // use the following to avoid double rounding errors when operating on // mixed formats in rounding to nearest is_inexact_lt_midpoint = 1; // if (z_sign), as if for absolute value } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) { // add 1 ulp to the significand res.w[0]++; if (res.w[0] == 0x0ull) res.w[1]++; // check for rounding overflow, when coeff == 10^34 if ((res.w[1] & MASK_COEFF) == 0x0001ed09bead87c0ull && res.w[0] == 0x378d8e6400000000ull) { // coefficient = 10^34 e3 = e3 + 1; // coeff = 10^33 z_exp = ((UINT64) (e3 + 6176) << 49) & MASK_EXP; res.w[1] = 0x0000314dc6448d93ull; res.w[0] = 0x38c15b0a00000000ull; } // end add 1 ulp res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; if (eq_half_ulp) { is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value } else { is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value } } else { // if (eq_half_ulp && !(res.w[0] & 0x01)) // leave unchanged res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value } // the result is always inexact, and never tiny // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; // check for overflow if (e3 > expmax && rnd_mode == ROUNDING_TO_NEAREST) { res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf res.w[0] = 0x0000000000000000ull; *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); BID_RETURN (res) } if (rnd_mode != ROUNDING_TO_NEAREST) { rounding_correction (rnd_mode, is_inexact_lt_midpoint, is_inexact_gt_midpoint, is_midpoint_lt_even, is_midpoint_gt_even, e3, &res, pfpsf); } } else { // if (p_sign != z_sign) // consider two cases, because C3 * 10^scale = 10^33 is a special case if (res.w[1] != 0x0000314dc6448d93ull || res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33 if (lt_half_ulp) { res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; // use the following to avoid double rounding errors when operating // on mixed formats in rounding to nearest is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) { // subtract 1 ulp from the significand res.w[0]--; if (res.w[0] == 0xffffffffffffffffull) res.w[1]--; res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; if (eq_half_ulp) { is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value } else { is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value } } else { // if (eq_half_ulp && !(res.w[0] & 0x01)) // leave unchanged res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value } // the result is always inexact, and never tiny // check for overflow for RN if (e3 > expmax) { if (rnd_mode == ROUNDING_TO_NEAREST) { res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf res.w[0] = 0x0000000000000000ull; *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); } else { rounding_correction (rnd_mode, is_inexact_lt_midpoint, is_inexact_gt_midpoint, is_midpoint_lt_even, is_midpoint_gt_even, e3, &res, pfpsf); } BID_RETURN (res) } // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; if (rnd_mode != ROUNDING_TO_NEAREST) { rounding_correction (rnd_mode, is_inexact_lt_midpoint, is_inexact_gt_midpoint, is_midpoint_lt_even, is_midpoint_gt_even, e3, &res, pfpsf); } } else { // if C3 * 10^scale = 10^33 e3 = (z_exp >> 49) - 6176; if (e3 > expmin) { // the result is exact if exp > expmin and C4 = d*10^(q4-1), // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact if (q4 == 1) { // if q4 = 1 the result is exact // result coefficient = 10^34 - C4 res.w[1] = 0x0001ed09bead87c0ull; res.w[0] = 0x378d8e6400000000ull - C4.w[0]; z_exp = z_exp - EXP_P1; e3 = e3 - 1; res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; } else { // if q4 > 1 then truncate C4 from q4 digits to 1 digit; // x = q4-1, 1 <= x <= 67 and check if this operation is exact if (q4 <= 18) { // 2 <= q4 <= 18 __bid_round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); } else if (q4 <= 38) { P128.w[1] = C4.w[1]; P128.w[0] = C4.w[0]; __bid_round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); R64 = R128.w[0]; // one decimal digit } else if (q4 <= 57) { P192.w[2] = C4.w[2]; P192.w[1] = C4.w[1]; P192.w[0] = C4.w[0]; __bid_round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); R64 = R192.w[0]; // one decimal digit } else { // if (q4 <= 68) __bid_round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); R64 = R256.w[0]; // one decimal digit } if (!is_midpoint_lt_even && !is_midpoint_gt_even && !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { // the result is exact: 10^34 - R64 // incr_exp = 0 with certainty z_exp = z_exp - EXP_P1; e3 = e3 - 1; res.w[1] = z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull; res.w[0] = 0x378d8e6400000000ull - R64; } else { // We want R64 to be the top digit of C4, but we actually // obtained (C4 * 10^(-q4+1))RN; a correction may be needed, // because the top digit is (C4 * 10^(-q4+1))RZ // however, if incr_exp = 1 then R64 = 10 with certainty if (incr_exp) { R64 = 10; } // the result is inexact as C4 has more than 1 significant digit // and C3 * 10^scale = 10^33 // example of case that is treated here: // 100...0 * 10^e3 - 0.41 * 10^e3 = // 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1) // note that (e3 > expmin} // in order to round, subtract R64 from 10^34 and then compare // C4 - R64 * 10^(q4-1) with 1/2 ulp // calculate 10^34 - R64 res.w[1] = 0x0001ed09bead87c0ull; res.w[0] = 0x378d8e6400000000ull - R64; z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand // calculate C4 - R64 * 10^(q4-1); this is a rare case and // R64 is small, 1 <= R64 <= 9 e3 = e3 - 1; if (is_inexact_lt_midpoint) { is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 1; } else if (is_inexact_gt_midpoint) { is_inexact_gt_midpoint = 0; is_inexact_lt_midpoint = 1; } else if (is_midpoint_lt_even) { is_midpoint_lt_even = 0; is_midpoint_gt_even = 1; } else if (is_midpoint_gt_even) { is_midpoint_gt_even = 0; is_midpoint_lt_even = 1; } else { ; } // the result is always inexact, and never tiny // check for overflow for RN if (e3 > expmax) { if (rnd_mode == ROUNDING_TO_NEAREST) { res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf res.w[0] = 0x0000000000000000ull; *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); } else { rounding_correction (rnd_mode, is_inexact_lt_midpoint, is_inexact_gt_midpoint, is_midpoint_lt_even, is_midpoint_gt_even, e3, &res, pfpsf); } BID_RETURN (res) } // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; if (rnd_mode != ROUNDING_TO_NEAREST) { rounding_correction (rnd_mode, is_inexact_lt_midpoint, is_inexact_gt_midpoint, is_midpoint_lt_even, is_midpoint_gt_even, e3, &res, pfpsf); } z_exp = res.w[1] & MASK_EXP; e3 = (z_exp >> 49) - 6176; } // end result is inexact } // end q4 > 1 } else { // if (e3 = emin) // if e3 = expmin the result is also tiny (the condition for // tininess is C4 > 050...0 [q4 digits] which is met because // the msd of C4 is not zero) // the result is tiny and inexact in all rounding modes; // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp, // gt_half_ulp to calculate) // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp if (gt_half_ulp) { // res = 10^33 - 1 res.w[1] = 0x0000314dc6448d93ull; res.w[0] = 0x38c15b09ffffffffull; } else { res.w[1] = 0x0000314dc6448d93ull; res.w[0] = 0x38c15b0a00000000ull; } res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; *pfpsf |= UNDERFLOW_EXCEPTION; // inexact is set later if (eq_half_ulp) { is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value } else if (lt_half_ulp) { is_inexact_gt_midpoint = 1; //if(z_sign), as if for absolute value } else { // if (gt_half_ulp) is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value } if (rnd_mode != ROUNDING_TO_NEAREST) { rounding_correction (rnd_mode, is_inexact_lt_midpoint, is_inexact_gt_midpoint, is_midpoint_lt_even, is_midpoint_gt_even, e3, &res, pfpsf); } } // end e3 = emin // set the inexact flag (if the result was not exact) if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || is_midpoint_lt_even || is_midpoint_gt_even) *pfpsf |= INEXACT_EXCEPTION; } // end 10^33 } // end if (p_sign != z_sign) res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; BID_RETURN (res) } else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2) (q3 <= delta && delta + q4 <= p34) || // Case (3) (delta < q3 && p34 < delta + q4) || // Case (4) (delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5) (delta + q4 < q3)) &&// Case (6) !(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6) // the result has the sign of z if ((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2) (delta < q3 && p34 < delta + q4)) { // Case (4) // round first the sum x * y + z with unbounded exponent // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1, // 1 <= scale <= 33 // calculate res = C3 * 10^scale scale = p34 - q3; x0 = delta + q4 - p34; } else if (delta + q4 < q3) { // Case (6) // make Case (6) look like Case (3) or Case (5) with scale = 0 // by scaling up C4 by 10^(q3 - delta - q4) scale = q3 - delta - q4; // 1 <= scale <= 33 if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits if (scale <= 19) { // 10^scale fits in 64 bits // 64 x 64 C4.w[0] * __bid_ten2k64[scale] __mul_64x64_to_128MACH (P128, C4.w[0], __bid_ten2k64[scale]); } else { // 10^scale fits in 128 bits // 64 x 128 C4.w[0] * __bid_ten2k128[scale - 20] __mul_128x64_to_128 (P128, C4.w[0], __bid_ten2k128[scale - 20]); } } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits // 64 x 128 __bid_ten2k64[scale] * C4 __mul_128x64_to_128 (P128, __bid_ten2k64[scale], C4); } C4.w[0] = P128.w[0]; C4.w[1] = P128.w[1]; // e4 does not need adjustment, as it is not used from this point on scale = 0; x0 = 0; // now Case (6) looks like Case (3) or Case (5) with scale = 0 } else { // if Case (3) or Case (5) // Note: Case (3) is similar to Case (2), but scale differs and the // result is exact, unless it is tiny (so x0 = 0 when calculating the // result with unbounded exponent) // calculate first the sum x * y + z with unbounded exponent (exact) // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1, // 1 <= scale <= 33 // calculate res = C3 * 10^scale scale = delta + q4 - q3; x0 = 0; // Note: the comments which follow refer [mainly] to Case (2)] } case2_repeat: if (scale == 0) { // this could happen e.g. if we return to case2_repeat // or in Case (4) res.w[1] = C3.w[1]; res.w[0] = C3.w[0]; } else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits if (scale <= 19) { // 10^scale fits in 64 bits // 64 x 64 C3.w[0] * __bid_ten2k64[scale] __mul_64x64_to_128MACH (res, C3.w[0], __bid_ten2k64[scale]); } else { // 10^scale fits in 128 bits // 64 x 128 C3.w[0] * __bid_ten2k128[scale - 20] __mul_128x64_to_128 (res, C3.w[0], __bid_ten2k128[scale - 20]); } } else { // z fits in 128 bits, but 10^scale must fit in 64 bits // 64 x 128 __bid_ten2k64[scale] * C3 __mul_128x64_to_128 (res, __bid_ten2k64[scale], C3); } // e3 is already calculated e3 = e3 - scale; // now res = C3 * 10^scale and e3 = e3 - scale // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat // because the result was too small // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34, // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67) // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of // the rounding fits in 128 bits!) // x0 = delta + q4 - p34 (calculated before reaching case2_repeat) // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34 if (x0 == 0) { // this could happen only if we return to case2_repeat, or // for Case (6) R128.w[1] = C4.w[1]; R128.w[0] = C4.w[0]; } else if (q4 <= 18) { // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17 __bid_round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); if (incr_exp) { // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17 R64 = __bid_ten2k64[q4 - x0]; } R128.w[1] = 0; R128.w[0] = R64; } else if (q4 <= 38) { // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37 P128.w[1] = C4.w[1]; P128.w[0] = C4.w[0]; __bid_round128_19_38 (q4, x0, P128, &R128, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); if (incr_exp) { // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 R128.w[0] = __bid_ten2k64[q4 - x0]; // R128.w[1] stays 0 } else { // 20 <= q4 - x0 <= 37 R128.w[0] = __bid_ten2k128[q4 - x0 - 20].w[0]; R128.w[1] = __bid_ten2k128[q4 - x0 - 20].w[1]; } } } else if (q4 <= 57) { // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56 P192.w[2] = C4.w[2]; P192.w[1] = C4.w[1]; P192.w[0] = C4.w[0]; __bid_round192_39_57 (q4, x0, P192, &R192, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); // R192.w[2] is always 0 if (incr_exp) { // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 R192.w[0] = __bid_ten2k64[q4 - x0]; // R192.w[1] stays 0 // R192.w[2] stays 0 } else { // 20 <= q4 - x0 <= 33 R192.w[0] = __bid_ten2k128[q4 - x0 - 20].w[0]; R192.w[1] = __bid_ten2k128[q4 - x0 - 20].w[1]; // R192.w[2] stays 0 } } R128.w[1] = R192.w[1]; R128.w[0] = R192.w[0]; } else { // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67 __bid_round256_58_76 (q4, x0, C4, &R256, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); // R256.w[3] and R256.w[2] are always 0 if (incr_exp) { // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 R256.w[0] = __bid_ten2k64[q4 - x0]; // R256.w[1] stays 0 // R256.w[2] stays 0 // R256.w[3] stays 0 } else { // 20 <= q4 - x0 <= 33 R256.w[0] = __bid_ten2k128[q4 - x0 - 20].w[0]; R256.w[1] = __bid_ten2k128[q4 - x0 - 20].w[1]; // R256.w[2] stays 0 // R256.w[3] stays 0 } } R128.w[1] = R256.w[1]; R128.w[0] = R256.w[0]; } // now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4, // rounded to nearest, which were copied into R128 if (z_sign == p_sign) { lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale // the sum can result in [up to] p34 or p34 + 1 digits res.w[0] = res.w[0] + R128.w[0]; res.w[1] = res.w[1] + R128.w[1]; if (res.w[0] < R128.w[0]) res.w[1]++; // carry // if res > 10^34 - 1 need to increase x0 and decrease scale by 1 if (res.w[1] > 0x0001ed09bead87c0ull || (res.w[1] == 0x0001ed09bead87c0ull && res.w[0] > 0x378d8e63ffffffffull)) { // avoid double rounding error is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; is_midpoint_lt_even0 = is_midpoint_lt_even; is_midpoint_gt_even0 = is_midpoint_gt_even; is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 0; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; P128.w[1] = res.w[1]; P128.w[0] = res.w[0]; __bid_round128_19_38 (35, 1, P128, &res, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); // incr_exp is 0 with certainty in this case // avoid a double rounding error if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && is_midpoint_lt_even) { // double rounding error upward // res = res - 1 res.w[0]--; if (res.w[0] == 0xffffffffffffffffull) res.w[1]--; // Note: a double rounding error upward is not possible; for this // the result after the first rounding would have to be 99...95 // (35 digits in all), possibly followed by a number of zeros; this // not possible in Cases (2)-(6) or (15)-(17) which may get here is_midpoint_lt_even = 0; is_inexact_lt_midpoint = 1; } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && is_midpoint_gt_even) { // double rounding error downward // res = res + 1 res.w[0]++; if (res.w[0] == 0) res.w[1]++; is_midpoint_gt_even = 0; is_inexact_gt_midpoint = 1; } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { // if this second rounding was exact the result may still be // inexact because of the first rounding if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { is_inexact_gt_midpoint = 1; } if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { is_inexact_lt_midpoint = 1; } } else if (is_midpoint_gt_even && (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { // pulled up to a midpoint is_inexact_lt_midpoint = 1; is_inexact_gt_midpoint = 0; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; } else if (is_midpoint_lt_even && (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { // pulled down to a midpoint is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 1; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; } else { ; } // adjust exponent e3 = e3 + 1; if (!is_midpoint_lt_even && !is_midpoint_gt_even && !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { if (is_midpoint_lt_even0 || is_midpoint_gt_even0 || is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) { is_inexact_lt_midpoint = 1; } } } else { // this is the result rounded with unbounded exponent, unless a // correction is needed res.w[1] = res.w[1] & MASK_COEFF; if (lsb == 1) { if (is_midpoint_gt_even) { // res = res + 1 is_midpoint_gt_even = 0; is_midpoint_lt_even = 1; res.w[0]++; if (res.w[0] == 0x0) res.w[1]++; // check for rounding overflow if (res.w[1] == 0x0001ed09bead87c0ull && res.w[0] == 0x378d8e6400000000ull) { // res = 10^34 => rounding overflow res.w[1] = 0x0000314dc6448d93ull; res.w[0] = 0x38c15b0a00000000ull; // 10^33 e3++; } } else if (is_midpoint_lt_even) { // res = res - 1 is_midpoint_lt_even = 0; is_midpoint_gt_even = 1; res.w[0]--; if (res.w[0] == 0xffffffffffffffffull) res.w[1]--; // if the result is pure zero, the sign depends on the rounding // mode (x*y and z had opposite signs) if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) { if (rnd_mode != ROUNDING_DOWN) z_sign = 0x0000000000000000ull; else z_sign = 0x8000000000000000ull; // the exponent is max (e3, expmin) res.w[1] = 0x0; res.w[0] = 0x0; BID_RETURN (res) } } else { ; } } } } else { // if (z_sign != p_sign) lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4 // used to swap rounding indicators if p_sign != z_sign // the sum can result in [up to] p34 or p34 - 1 digits tmp64 = res.w[0]; res.w[0] = res.w[0] - R128.w[0]; res.w[1] = res.w[1] - R128.w[1]; if (res.w[0] > tmp64) res.w[1]--; // borrow // if res < 10^33 and exp > expmin need to decrease x0 and // increase scale by 1 if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull || (res.w[1] == 0x0000314dc6448d93ull && res.w[0] < 0x38c15b0a00000000ull)) || (is_inexact_lt_midpoint && res.w[1] == 0x0000314dc6448d93ull && res.w[0] == 0x38c15b0a00000000ull)) && x0 >= 1) { x0 = x0 - 1; // first restore e3, otherwise it will be too small e3 = e3 + scale; scale = scale + 1; is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 0; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; incr_exp = 0; goto case2_repeat; } // else this is the result rounded with unbounded exponent // because the result has opposite sign to that of C4 which was // rounded, need to change the rounding indicators if (is_inexact_lt_midpoint) { is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 1; } else if (is_inexact_gt_midpoint) { is_inexact_gt_midpoint = 0; is_inexact_lt_midpoint = 1; } else if (lsb == 0) { if (is_midpoint_lt_even) { is_midpoint_lt_even = 0; is_midpoint_gt_even = 1; } else if (is_midpoint_gt_even) { is_midpoint_gt_even = 0; is_midpoint_lt_even = 1; } else { ; } } else if (lsb == 1) { if (is_midpoint_lt_even) { // res = res + 1 res.w[0]++; if (res.w[0] == 0x0) res.w[1]++; // check for rounding overflow if (res.w[1] == 0x0001ed09bead87c0ull && res.w[0] == 0x378d8e6400000000ull) { // res = 10^34 => rounding overflow res.w[1] = 0x0000314dc6448d93ull; res.w[0] = 0x38c15b0a00000000ull; // 10^33 e3++; } } else if (is_midpoint_gt_even) { // res = res - 1 res.w[0]--; if (res.w[0] == 0xffffffffffffffffull) res.w[1]--; // if the result is pure zero, the sign depends on the rounding // mode (x*y and z had opposite signs) if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) { if (rnd_mode != ROUNDING_DOWN) z_sign = 0x0000000000000000ull; else z_sign = 0x8000000000000000ull; // the exponent is max (e3, expmin) res.w[1] = 0x0; res.w[0] = 0x0; BID_RETURN (res) } } else { ; } } else { ; } } // check for underflow if (e3 < expmin) { // the result is tiny, so we must truncate more of res is_tiny = 1; x0 = expmin - e3; is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; is_midpoint_lt_even0 = is_midpoint_lt_even; is_midpoint_gt_even0 = is_midpoint_gt_even; is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 0; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; // determine the number of decimal digits in res if (res.w[1] == 0x0) { // between 1 and 19 digits for (ind = 1; ind <= 19; ind++) { if (res.w[0] < __bid_ten2k64[ind]) { break; } } // ind digits } else if ((res.w[1] < __bid_ten2k128[0].w[1] || (res.w[1] == __bid_ten2k128[0].w[1] && res.w[0] < __bid_ten2k128[0].w[0]))) { // 20 digits ind = 20; } else { // between 21 and 38 digits for (ind = 1; ind <= 18; ind++) { if (res.w[1] < __bid_ten2k128[ind].w[1] || (res.w[1] == __bid_ten2k128[ind].w[1] && res.w[0] < __bid_ten2k128[ind].w[0])) { break; } } // ind + 20 digits ind = ind + 20; } // at this point ind >= x0; because delta >= 2 on this path, the case // ind = x0 cab occur only in Case (2) or case (3), when C3 has one // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin), // the signs of x * y and z are opposite, and through cancellation // the most significant decimal digit in res has the weight // 10^(emin-1); however, it is clear that in this case the most // significant digit is 9, so the result before rounding is // 0.9... * 10^emin // Otherwise, ind > x0 because there are non-zero decimal digits in the // result with weight of at least 10^emin, and correction for underflow // can be carried out using the round*_*_2_* () routines if (x0 == ind) { // the result before rounding is 0.9... * 10^emin res.w[1] = 0x0; res.w[0] = 0x1; is_inexact_gt_midpoint = 1; } else if (ind <= 18) { // check that 2 <= ind // 2 <= ind <= 18, 1 <= x0 <= 17 __bid_round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); if (incr_exp) { // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17 R64 = __bid_ten2k64[q4 - x0]; } res.w[1] = 0; res.w[0] = R64; } else if (ind <= 38) { // 19 <= ind <= 38 P128.w[1] = res.w[1]; P128.w[0] = res.w[0]; __bid_round128_19_38 (ind, x0, P128, &res, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); if (incr_exp) { // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37 if (ind - x0 <= 19) { // 1 <= ind - x0 <= 19 res.w[0] = __bid_ten2k64[ind - x0]; // res.w[1] stays 0 } else { // 20 <= ind - x0 <= 37 res.w[0] = __bid_ten2k128[ind - x0 - 20].w[0]; res.w[1] = __bid_ten2k128[ind - x0 - 20].w[1]; } } } // avoid a double rounding error if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && is_midpoint_lt_even) { // double rounding error upward // res = res - 1 res.w[0]--; if (res.w[0] == 0xffffffffffffffffull) res.w[1]--; // Note: a double rounding error upward is not possible; for this // the result after the first rounding would have to be 99...95 // (35 digits in all), possibly followed by a number of zeros; this // not possible in Cases (2)-(6) which may get here is_midpoint_lt_even = 0; is_inexact_lt_midpoint = 1; } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && is_midpoint_gt_even) { // double rounding error downward // res = res + 1 res.w[0]++; if (res.w[0] == 0) res.w[1]++; is_midpoint_gt_even = 0; is_inexact_gt_midpoint = 1; } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { // if this second rounding was exact the result may still be // inexact because of the first rounding if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { is_inexact_gt_midpoint = 1; } if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { is_inexact_lt_midpoint = 1; } } else if (is_midpoint_gt_even && (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { // pulled up to a midpoint is_inexact_lt_midpoint = 1; is_inexact_gt_midpoint = 0; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; } else if (is_midpoint_lt_even && (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { // pulled down to a midpoint is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 1; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; } else { ; } // adjust exponent e3 = e3 + x0; if (!is_midpoint_lt_even && !is_midpoint_gt_even && !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { if (is_midpoint_lt_even0 || is_midpoint_gt_even0 || is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) { is_inexact_lt_midpoint = 1; } } } // check for inexact result if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || is_midpoint_lt_even || is_midpoint_gt_even) { // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; if (is_tiny) *pfpsf |= UNDERFLOW_EXCEPTION; } // now check for significand = 10^34 (may have resulted from going // back to case2_repeat) if (res.w[1] == 0x0001ed09bead87c0ull && res.w[0] == 0x378d8e6400000000ull) { // if res = 10^34 res.w[1] = 0x0000314dc6448d93ull; // res = 10^33 res.w[0] = 0x38c15b0a00000000ull; e3 = e3 + 1; } res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; // check for overflow if (rnd_mode == ROUNDING_TO_NEAREST && e3 > expmax) { res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf res.w[0] = 0x0000000000000000ull; *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); } if (rnd_mode != ROUNDING_TO_NEAREST) { rounding_correction (rnd_mode, is_inexact_lt_midpoint, is_inexact_gt_midpoint, is_midpoint_lt_even, is_midpoint_gt_even, e3, &res, pfpsf); } BID_RETURN (res) } else { // we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and // the signs of x*y and z are opposite; in these cases massive // cancellation can occur, so it is better to scale either C3 or C4 and // to perform the subtraction before rounding; rounding is performed // next, depending on the number of decimal digits in the result and on // the exponent value // Note: overlow is not possible in this case // this is similar to Cases (15), (16), and (17) if (delta + q4 < q3) { // from Case (6) // Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and // (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign) // and call add_and_round; delta stays positive // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3 P128.w[1] = C3.w[1]; P128.w[0] = C3.w[0]; C3.w[1] = C4.w[1]; C3.w[0] = C4.w[0]; C4.w[1] = P128.w[1]; C4.w[0] = P128.w[0]; ind = q3; q3 = q4; q4 = ind; ind = e3; e3 = e4; e4 = ind; tmp64 = z_sign; z_sign = p_sign; p_sign = tmp64; } else { // from Cases (2), (3), (4), (5) // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be // scaled up by q4 + delta - q3; this is the same as in Cases (15), // (16), and (17) if we just change the sign of delta delta = -delta; } add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4, rnd_mode, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, pfpsf, &res); BID_RETURN (res) } } else { // if delta < 0 delta = -delta; if (p34 < q4 && q4 <= delta) { // Case (7) // truncate C4 to p34 digits into res // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68 x0 = q4 - p34; if (q4 <= 38) { P128.w[1] = C4.w[1]; P128.w[0] = C4.w[0]; __bid_round128_19_38 (q4, x0, P128, &res, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); } else if (q4 <= 57) { // 35 <= q4 <= 57 P192.w[2] = C4.w[2]; P192.w[1] = C4.w[1]; P192.w[0] = C4.w[0]; __bid_round192_39_57 (q4, x0, P192, &R192, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); res.w[0] = R192.w[0]; res.w[1] = R192.w[1]; } else { // if (q4 <= 68) __bid_round256_58_76 (q4, x0, C4, &R256, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); res.w[0] = R256.w[0]; res.w[1] = R256.w[1]; } e4 = e4 + x0; if (incr_exp) { e4 = e4 + 1; } if (!is_midpoint_lt_even && !is_midpoint_gt_even && !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { // if C4 rounded to p34 digits is exact then the result is inexact, // in a way that depends on the signs of x * y and z if (p_sign == z_sign) { is_inexact_lt_midpoint = 1; } else { // if (p_sign != z_sign) if (res.w[1] != 0x0000314dc6448d93ull || res.w[0] != 0x38c15b0a00000000ull) { // res != 10^33 is_inexact_gt_midpoint = 1; } else { // res = 10^33 and exact is a special case // if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1 // if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1 // if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1 // Note: ulp is really ulp/10 (after borrow which propagates to msd) if (delta > p34 + 1) { // C3 < 1/2 // res = 10^33, unchanged is_inexact_gt_midpoint = 1; } else { // if (delta == p34 + 1) if (q3 <= 19) { if (C3.w[0] < __bid_midpoint64[q3 - 1]) { // C3 < 1/2 ulp // res = 10^33, unchanged is_inexact_gt_midpoint = 1; } else if (C3.w[0] == __bid_midpoint64[q3 - 1]) { // C3 = 1/2 ulp // res = 10^33, unchanged is_midpoint_lt_even = 1; } else { // if (C3.w[0] > __bid_midpoint64[q3-1]), C3 > 1/2 ulp res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 res.w[0] = 0x378d8e63ffffffffull; e4 = e4 - 1; is_inexact_lt_midpoint = 1; } } else { // if (20 <= q3 <=34) if (C3.w[1] < __bid_midpoint128[q3 - 20].w[1] || (C3.w[1] == __bid_midpoint128[q3 - 20].w[1] && C3.w[0] < __bid_midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp // res = 10^33, unchanged is_inexact_gt_midpoint = 1; } else if (C3.w[1] == __bid_midpoint128[q3 - 20].w[1] && C3.w[0] == __bid_midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp // res = 10^33, unchanged is_midpoint_lt_even = 1; } else { // if (C3 > __bid_midpoint128[q3-20]), C3 > 1/2 ulp res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 res.w[0] = 0x378d8e63ffffffffull; e4 = e4 - 1; is_inexact_lt_midpoint = 1; } } } } } } else if (is_midpoint_lt_even) { if (z_sign != p_sign) { // needs correction: res = res - 1 res.w[0] = res.w[0] - 1; if (res.w[0] == 0xffffffffffffffffull) res.w[1]--; // if it is (10^33-1)*10^e4 then the corect result is // (10^34-1)*10(e4-1) if (res.w[1] == 0x0000314dc6448d93ull && res.w[0] == 0x38c15b09ffffffffull) { res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 res.w[0] = 0x378d8e63ffffffffull; e4 = e4 - 1; } is_midpoint_lt_even = 0; is_inexact_lt_midpoint = 1; } else { // if (z_sign == p_sign) is_midpoint_lt_even = 0; is_inexact_gt_midpoint = 1; } } else if (is_midpoint_gt_even) { if (z_sign == p_sign) { // needs correction: res = res + 1 (cannot cross in the next binade) res.w[0] = res.w[0] + 1; if (res.w[0] == 0x0000000000000000ull) res.w[1]++; is_midpoint_gt_even = 0; is_inexact_gt_midpoint = 1; } else { // if (z_sign != p_sign) is_midpoint_gt_even = 0; is_inexact_lt_midpoint = 1; } } else { ; // the rounded result is already correct } // check for overflow if (rnd_mode == ROUNDING_TO_NEAREST && e4 > expmax) { res.w[1] = p_sign | 0x7800000000000000ull; res.w[0] = 0x0000000000000000ull; *pfpsf |= (OVERFLOW_EXCEPTION | INEXACT_EXCEPTION); } else { // no overflow or not RN p_exp = ((UINT64) (e4 + 6176) << 49); res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; } if (rnd_mode != ROUNDING_TO_NEAREST) { rounding_correction (rnd_mode, is_inexact_lt_midpoint, is_inexact_gt_midpoint, is_midpoint_lt_even, is_midpoint_gt_even, e4, &res, pfpsf); } if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || is_midpoint_lt_even || is_midpoint_gt_even) { // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; } BID_RETURN (res) } else if ((q4 <= p34 && p34 <= delta) || // Case (8) (q4 <= delta && delta < p34 && p34 < delta + q3) || // Case (9) (q4 <= delta && delta + q3 <= p34) || // Case (10) (delta < q4 && q4 <= p34 && p34 < delta + q3) || // Case (13) (delta < q4 && q4 <= delta + q3 && delta + q3 <= p34) || // Case (14) (delta + q3 < q4 && q4 <= p34)) { // Case (18) // Case (8) is similar to Case (1), with C3 and C4 swapped // Case (9) is similar to Case (2), with C3 and C4 swapped // Case (10) is similar to Case (3), with C3 and C4 swapped // Case (13) is similar to Case (4), with C3 and C4 swapped // Case (14) is similar to Case (5), with C3 and C4 swapped // Case (18) is similar to Case (6), with C3 and C4 swapped // swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp) // and go back to delta_ge_zero // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3 P128.w[1] = C3.w[1]; P128.w[0] = C3.w[0]; C3.w[1] = C4.w[1]; C3.w[0] = C4.w[0]; C4.w[1] = P128.w[1]; C4.w[0] = P128.w[0]; ind = q3; q3 = q4; q4 = ind; ind = e3; e3 = e4; e4 = ind; tmp64 = z_sign; z_sign = p_sign; p_sign = tmp64; tmp64 = z_exp; z_exp = p_exp; p_exp = tmp64; goto delta_ge_zero; } else if ((p34 <= delta && delta < q4 && q4 < delta + q3) || // Case (11) (delta < p34 && p34 < q4 && q4 < delta + q3)) { // Case (12) // round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3, // 1 <= x0 <= q3 - 1 <= p34 - 1 x0 = e4 - e3; // or x0 = delta + q3 - q4 if (q3 <= 18) { // 2 <= q3 <= 18 __bid_round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); // C3.w[1] = 0; C3.w[0] = R64; } else if (q3 <= 38) { __bid_round128_19_38 (q3, x0, C3, &R128, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); C3.w[1] = R128.w[1]; C3.w[0] = R128.w[0]; } // the rounded result has q3 - x0 digits // we want the exponent to be e4, so if incr_exp = 1 then // multiply the rounded result by 10 - it will still fit in 113 bits if (incr_exp) { // 64 x 128 -> 128 P128.w[1] = C3.w[1]; P128.w[0] = C3.w[0]; __mul_64x128_to_128 (C3, __bid_ten2k64[1], P128); } e3 = e3 + x0; // this is e4 // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3; // the result will have the sign of x * y; the exponent is e4 R256.w[3] = 0; R256.w[2] = 0; R256.w[1] = C3.w[1]; R256.w[0] = C3.w[0]; if (p_sign == z_sign) { // R256 = C4 + R256 add256 (C4, R256, &R256); } else { // if (p_sign != z_sign) { // R256 = C4 - R256 sub256 (C4, R256, &R256); // the result cannot be pure zero // because the result has opposite sign to that of R256 which was // rounded, need to change the rounding indicators lsb = C4.w[0] & 0x01; if (is_inexact_lt_midpoint) { is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 1; } else if (is_inexact_gt_midpoint) { is_inexact_gt_midpoint = 0; is_inexact_lt_midpoint = 1; } else if (lsb == 0) { if (is_midpoint_lt_even) { is_midpoint_lt_even = 0; is_midpoint_gt_even = 1; } else if (is_midpoint_gt_even) { is_midpoint_gt_even = 0; is_midpoint_lt_even = 1; } else { ; } } else if (lsb == 1) { if (is_midpoint_lt_even) { // res = res + 1 R256.w[0]++; if (R256.w[0] == 0x0) { R256.w[1]++; if (R256.w[1] == 0x0) { R256.w[2]++; if (R256.w[2] == 0x0) { R256.w[3]++; } } } // no check for rounding overflow - R256 was a difference } else if (is_midpoint_gt_even) { // res = res - 1 R256.w[0]--; if (R256.w[0] == 0xffffffffffffffffull) { R256.w[1]--; if (R256.w[1] == 0xffffffffffffffffull) { R256.w[2]--; if (R256.w[2] == 0xffffffffffffffffull) { R256.w[3]--; } } } } else { ; } } else { ; } } // determine the number of decimal digits in R256 ind = __bid_nr_digits256 (R256); // ind >= p34 // if R256 is sum, then ind > p34; if R256 is a difference, then // ind >= p34; this means that we can calculate the result rounded to // the destination precision, with unbounded exponent, starting from R256 // and using the indicators from the rounding of C3 to avoid a double // rounding error if (ind < p34) { ; } else if (ind == p34) { // the result rounded to the destination precision with // unbounded exponent // is (-1)^p_sign * R256 * 10^e4 res.w[1] = R256.w[1]; res.w[0] = R256.w[0]; } else { // if (ind > p34) // if more than P digits, round to nearest to P digits // round R256 to p34 digits x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68 // save C3 rounding indicators to help avoid double rounding error is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; is_midpoint_lt_even0 = is_midpoint_lt_even; is_midpoint_gt_even0 = is_midpoint_gt_even; // initialize rounding indicators is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 0; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; // round to p34 digits; the result fits in 113 bits if (ind <= 38) { P128.w[1] = R256.w[1]; P128.w[0] = R256.w[0]; __bid_round128_19_38 (ind, x0, P128, &R128, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); } else if (ind <= 57) { P192.w[2] = R256.w[2]; P192.w[1] = R256.w[1]; P192.w[0] = R256.w[0]; __bid_round192_39_57 (ind, x0, P192, &R192, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); R128.w[1] = R192.w[1]; R128.w[0] = R192.w[0]; } else { // if (ind <= 68) __bid_round256_58_76 (ind, x0, R256, &R256, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); R128.w[1] = R256.w[1]; R128.w[0] = R256.w[0]; } // the rounded result has p34 = 34 digits e4 = e4 + x0 + incr_exp; res.w[1] = R128.w[1]; res.w[0] = R128.w[0]; // avoid a double rounding error if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && is_midpoint_lt_even) { // double rounding error upward // res = res - 1 res.w[0]--; if (res.w[0] == 0xffffffffffffffffull) res.w[1]--; is_midpoint_lt_even = 0; is_inexact_lt_midpoint = 1; // Note: a double rounding error upward is not possible; for this // the result after the first rounding would have to be 99...95 // (35 digits in all), possibly followed by a number of zeros; this // not possible in Cases (2)-(6) or (15)-(17) which may get here // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent if (res.w[1] == 0x0000314dc6448d93ull && res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 res.w[0] = 0x378d8e63ffffffffull; e4--; } } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && is_midpoint_gt_even) { // double rounding error downward // res = res + 1 res.w[0]++; if (res.w[0] == 0) res.w[1]++; is_midpoint_gt_even = 0; is_inexact_gt_midpoint = 1; } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { // if this second rounding was exact the result may still be // inexact because of the first rounding if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { is_inexact_gt_midpoint = 1; } if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { is_inexact_lt_midpoint = 1; } } else if (is_midpoint_gt_even && (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { // pulled up to a midpoint is_inexact_lt_midpoint = 1; is_inexact_gt_midpoint = 0; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; } else if (is_midpoint_lt_even && (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { // pulled down to a midpoint is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 1; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; } else { ; } } // determine tininess if (rnd_mode == ROUNDING_TO_NEAREST) { if (e4 < expmin) { is_tiny = 1; // for other rounding modes apply correction } } else { // for RM, RP, RZ, RA apply correction in order to determine tininess // but do not save the result; apply the correction to // (-1)^p_sign * res * 10^0 P128.w[1] = p_sign | 0x3040000000000000ull | res.w[1]; P128.w[0] = res.w[0]; rounding_correction (rnd_mode, is_inexact_lt_midpoint, is_inexact_gt_midpoint, is_midpoint_lt_even, is_midpoint_gt_even, 0, &P128, pfpsf); scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1 // the number of digits in the significand is p34 = 34 if (e4 + scale < expmin) { is_tiny = 1; } } // the result rounded to the destination precision with unbounded exponent // is (-1)^p_sign * res * 10^e4 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; // RN // res.w[0] unchanged; // Note: res is correct only if expmin <= e4 <= expmax ind = p34; // the number of decimal digits in the signifcand of res // at this point we have the result rounded with unbounded exponent in // res and we know its tininess: // res = (-1)^p_sign * significand * 10^e4, // where q (significand) = ind = p34 // Note: res is correct only if expmin <= e4 <= expmax // check for overflow if RN if (rnd_mode == ROUNDING_TO_NEAREST && (ind + e4) > (p34 + expmax)) { res.w[1] = p_sign | 0x7800000000000000ull; res.w[0] = 0x0000000000000000ull; *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); BID_RETURN (res) } // else not overflow or not RN, so continue // from this point on this is similar to the last part of the computation // for Cases (15), (16), (17) // if (e4 >= expmin) we have the result rounded with bounded exponent if (e4 < expmin) { x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res // where the result rounded [at most] once is // (-1)^p_sign * significand_res * 10^e4 // avoid double rounding error is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; is_midpoint_lt_even0 = is_midpoint_lt_even; is_midpoint_gt_even0 = is_midpoint_gt_even; is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 0; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; if (x0 > ind) { // nothing is left of res when moving the decimal point left x0 digits is_inexact_lt_midpoint = 1; res.w[1] = p_sign | 0x0000000000000000ull; res.w[0] = 0x0000000000000000ull; e4 = expmin; } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34 // this is <, =, or > 1/2 ulp // compare the ind-digit value in the significand of res with // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is // less than, equal to, or greater than 1/2 ulp (significand of res) R128.w[1] = res.w[1] & MASK_COEFF; R128.w[0] = res.w[0]; if (ind <= 19) { if (R128.w[0] < __bid_midpoint64[ind - 1]) { // < 1/2 ulp lt_half_ulp = 1; is_inexact_lt_midpoint = 1; } else if (R128.w[0] == __bid_midpoint64[ind - 1]) { // = 1/2 ulp eq_half_ulp = 1; is_midpoint_gt_even = 1; } else { // > 1/2 ulp gt_half_ulp = 1; is_inexact_gt_midpoint = 1; } } else { // if (ind <= 38) if (R128.w[1] < __bid_midpoint128[ind - 20].w[1] || (R128.w[1] == __bid_midpoint128[ind - 20].w[1] && R128.w[0] < __bid_midpoint128[ind - 20].w[0])) { // < 1/2 ulp lt_half_ulp = 1; is_inexact_lt_midpoint = 1; } else if (R128.w[1] == __bid_midpoint128[ind - 20].w[1] && R128.w[0] == __bid_midpoint128[ind - 20].w[0]) { // = 1/2 ulp eq_half_ulp = 1; is_midpoint_gt_even = 1; } else { // > 1/2 ulp gt_half_ulp = 1; is_inexact_gt_midpoint = 1; } } if (lt_half_ulp || eq_half_ulp) { // res = +0.0 * 10^expmin res.w[1] = 0x0000000000000000ull; res.w[0] = 0x0000000000000000ull; } else { // if (gt_half_ulp) // res = +1 * 10^expmin res.w[1] = 0x0000000000000000ull; res.w[0] = 0x0000000000000001ull; } res.w[1] = p_sign | res.w[1]; e4 = expmin; } else { // if (1 <= x0 <= ind - 1 <= 33) // round the ind-digit result to ind - x0 digits if (ind <= 18) { // 2 <= ind <= 18 __bid_round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); res.w[1] = 0x0; res.w[0] = R64; } else if (ind <= 38) { P128.w[1] = res.w[1] & MASK_COEFF; P128.w[0] = res.w[0]; __bid_round128_19_38 (ind, x0, P128, &res, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); } e4 = e4 + x0; // expmin // we want the exponent to be expmin, so if incr_exp = 1 then // multiply the rounded result by 10 - it will still fit in 113 bits if (incr_exp) { // 64 x 128 -> 128 P128.w[1] = res.w[1] & MASK_COEFF; P128.w[0] = res.w[0]; __mul_64x128_to_128 (res, __bid_ten2k64[1], P128); } res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF); // avoid a double rounding error if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && is_midpoint_lt_even) { // double rounding error upward // res = res - 1 res.w[0]--; if (res.w[0] == 0xffffffffffffffffull) res.w[1]--; // Note: a double rounding error upward is not possible; for this // the result after the first rounding would have to be 99...95 // (35 digits in all), possibly followed by a number of zeros; this // not possible in this underflow case is_midpoint_lt_even = 0; is_inexact_lt_midpoint = 1; } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && is_midpoint_gt_even) { // double rounding error downward // res = res + 1 res.w[0]++; if (res.w[0] == 0) res.w[1]++; is_midpoint_gt_even = 0; is_inexact_gt_midpoint = 1; } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { // if this second rounding was exact the result may still be // inexact because of the first rounding if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { is_inexact_gt_midpoint = 1; } if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { is_inexact_lt_midpoint = 1; } } else if (is_midpoint_gt_even && (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { // pulled up to a midpoint is_inexact_lt_midpoint = 1; is_inexact_gt_midpoint = 0; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; } else if (is_midpoint_lt_even && (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { // pulled down to a midpoint is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 1; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; } else { ; } } } // res contains the correct result // apply correction if not rounding to nearest if (rnd_mode != ROUNDING_TO_NEAREST) { rounding_correction (rnd_mode, is_inexact_lt_midpoint, is_inexact_gt_midpoint, is_midpoint_lt_even, is_midpoint_gt_even, e4, &res, pfpsf); } if (is_midpoint_lt_even || is_midpoint_gt_even || is_inexact_lt_midpoint || is_inexact_gt_midpoint) { // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; if (is_tiny) *pfpsf |= UNDERFLOW_EXCEPTION; } BID_RETURN (res) } else if ((p34 <= delta && delta + q3 <= q4) || // Case (15) (delta < p34 && p34 < delta + q3 && delta + q3 <= q4) || //Case (16) (delta + q3 <= p34 && p34 < q4)) { // Case (17) // calculate first the result rounded to the destination precision, with // unbounded exponent add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4, rnd_mode, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, pfpsf, &res); BID_RETURN (res) } else { ; } } // end if delta < 0 BID_RETURN (res) }