/* Copyright (C) 2007  Free Software Foundation, Inc.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2, or (at your option) any later
version.

In addition to the permissions in the GNU General Public License, the
Free Software Foundation gives you unlimited permission to link the
compiled version of this file into combinations with other programs,
and to distribute those combinations without any restriction coming
from the use of this file.  (The General Public License restrictions
do apply in other respects; for example, they cover modification of
the file, and distribution when not linked into a combine
executable.)

GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING.  If not, write to the Free
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.  */

/*****************************************************************************
 * 
 *  BID128 fma   x * y + z
 * 
 ****************************************************************************/

#include "bid_conf.h"
#include "bid_functions.h"
#include "bid_internal.h"

static void
rounding_correction (
    unsigned int rnd_mode, 
    unsigned int is_inexact_lt_midpoint, 
    unsigned int is_inexact_gt_midpoint, 
    unsigned int is_midpoint_lt_even, 
    unsigned int is_midpoint_gt_even, 
    int unbexp, 
    UINT128 * ptrres, 
    _IDEC_flags * ptrfpsf) {
    // unbiased true exponent unbexp may be larger than emax

  UINT128 res = *ptrres; // expected to have the correct sign and coefficient
      // (the exponent field is ignored, as unbexp is used instead)
  UINT64 sign, exp;
  UINT64 C_hi, C_lo;

  // general correction from RN to RA, RM, RP, RZ
  // Note: if the result is negative, then is_inexact_lt_midpoint, 
  // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even 
  // have to be considered as if determined for the absolute value of the 
  // result (so they seem to be reversed)

  if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
      is_midpoint_lt_even || is_midpoint_gt_even) {
    *ptrfpsf |= INEXACT_EXCEPTION;
  }
  // apply correction to result calculated with unbounded exponent
  sign = res.w[1] & MASK_SIGN;
  exp = (UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax
  C_hi = res.w[1] & MASK_COEFF;
  C_lo = res.w[0];
  if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) || 
      ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) && 
      is_midpoint_gt_even))) ||
      (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) || 
      ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) && 
      is_midpoint_gt_even)))) {
    // C = C + 1
    C_lo = C_lo + 1;
    if (C_lo == 0)
      C_hi = C_hi + 1;
    if (C_hi == 0x0001ed09bead87c0ull && C_lo == 0x378d8e6400000000ull) {
      // C = 10^34 => rounding overflow
      C_hi = 0x0000314dc6448d93ull;
      C_lo = 0x38c15b0a00000000ull; // 10^33
      // exp = exp + EXP_P1;
      unbexp = unbexp + 1;
      exp = (UINT64) (unbexp + 6176) << 49;
    }
  } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) &&
      ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) || 
      (!sign && (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) {
    // C = C - 1
    C_lo = C_lo - 1;
    if (C_lo == 0xffffffffffffffffull)
      C_hi--;
    // check if we crossed into the lower decade
    if (exp > 0 && C_hi == 0x0000314dc6448d93ull && 
        C_lo == 0x38c15b09ffffffffull) { // 10^33 - 1
      C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
      C_lo = 0x378d8e63ffffffffull;
      // exp = exp - EXP_P1;
      unbexp = unbexp - 1;
      exp = (UINT64) (unbexp + 6176) << 49;
    }
  } else {
    ; // the result is already correct
  }
  if (unbexp > expmax) { // 6111
    *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
    exp = 0;
    if (!sign) { // result is positive
      if (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TIES_AWAY) { // +inf
        C_hi = 0x7800000000000000ull;
        C_lo = 0x0000000000000000ull;
      } else { // res = +MAXFP = (10^34-1) * 10^emax
        C_hi = 0x5fffed09bead87c0ull;
        C_lo = 0x378d8e63ffffffffull;
      }
    } else { // result is negative
      if (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TIES_AWAY) { // -inf
        C_hi = 0xf800000000000000ull;
        C_lo = 0x0000000000000000ull;
      } else { // res = -MAXFP = -(10^34-1) * 10^emax
        C_hi = 0xdfffed09bead87c0ull;
        C_lo = 0x378d8e63ffffffffull;
      }
    }
  }
  // assemble the result
  res.w[1] = sign | exp | C_hi;
  res.w[0] = C_lo;
  *ptrres = res;
}

static void
add256 (UINT256 x, UINT256 y, UINT256 * pz) {
  // *z = x + yl assume the sum fits in 256 bits
  UINT256 z;
  z.w[0] = x.w[0] + y.w[0];
  if (z.w[0] < x.w[0]) {
    x.w[1]++;
    if (x.w[1] == 0x0000000000000000ull) {
      x.w[2]++;
      if (x.w[2] == 0x0000000000000000ull) {
        x.w[3]++;
      }
    }
  }
  z.w[1] = x.w[1] + y.w[1];
  if (z.w[1] < x.w[1]) {
    x.w[2]++;
    if (x.w[2] == 0x0000000000000000ull) {
      x.w[3]++;
    }
  }
  z.w[2] = x.w[2] + y.w[2];
  if (z.w[2] < x.w[2]) {
    x.w[3]++;
  }
  z.w[3] = x.w[3] + y.w[3]; // it was assumed that no carry is possible
  *pz = z;
}

static void
sub256 (UINT256 x, UINT256 y, UINT256 * pz) {
  // *z = x - y; assume x >= y
  UINT256 z;
  z.w[0] = x.w[0] - y.w[0];
  if (z.w[0] > x.w[0]) {
    x.w[1]--;
    if (x.w[1] == 0xffffffffffffffffull) {
      x.w[2]--;
      if (x.w[2] == 0xffffffffffffffffull) {
        x.w[3]--;
      }
    }
  }
  z.w[1] = x.w[1] - y.w[1];
  if (z.w[1] > x.w[1]) {
    x.w[2]--;
    if (x.w[2] == 0xffffffffffffffffull) {
      x.w[3]--;
    }
  }
  z.w[2] = x.w[2] - y.w[2];
  if (z.w[2] > x.w[2]) {
    x.w[3]--;
  }
  z.w[3] = x.w[3] - y.w[3]; // no borrow possible, because x >= y
  *pz = z;
}


static int
__bid_nr_digits256 (UINT256 R256) {
  int ind;
  // determine the number of decimal digits in R256
  if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) {
    // between 1 and 19 digits
    for (ind = 1; ind <= 19; ind++) {
      if (R256.w[0] < __bid_ten2k64[ind]) {
        break;
      }
    }
    // ind digits
  } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 &&
             (R256.w[1] < __bid_ten2k128[0].w[1]
              || (R256.w[1] == __bid_ten2k128[0].w[1]
              && R256.w[0] < __bid_ten2k128[0].w[0]))) {
    // 20 digits
    ind = 20;
  } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) {
    // between 21 and 38 digits
    for (ind = 1; ind <= 18; ind++) {
      if (R256.w[1] < __bid_ten2k128[ind].w[1] ||
          (R256.w[1] == __bid_ten2k128[ind].w[1] &&
          R256.w[0] < __bid_ten2k128[ind].w[0])) {
        break;
      }
    }
    // ind + 20 digits
    ind = ind + 20;
  } else if (((R256.w[3] == 0x0 &&
             (R256.w[2] < __bid_ten2k256[0].w[2])) ||
              (R256.w[2] == __bid_ten2k256[0].w[2] &&
              R256.w[1] < __bid_ten2k256[0].w[1]) ||
              (R256.w[2] == __bid_ten2k256[0].w[2] &&
              R256.w[1] == __bid_ten2k256[0].w[1] &&
              R256.w[0] < __bid_ten2k256[0].w[0]))) {
    // 39 digits
    ind = 39;
  } else {
    // between 40 and 68 digits
    for (ind = 1; ind <= 29; ind++) {
      if (R256.w[3] < __bid_ten2k256[ind].w[3] ||
          (R256.w[3] == __bid_ten2k256[ind].w[3] &&
          R256.w[2] < __bid_ten2k256[ind].w[2]) ||
          (R256.w[3] == __bid_ten2k256[ind].w[3] &&
          R256.w[2] == __bid_ten2k256[ind].w[2] &&
          R256.w[1] < __bid_ten2k256[ind].w[1]) ||
          (R256.w[3] == __bid_ten2k256[ind].w[3] &&
          R256.w[2] == __bid_ten2k256[ind].w[2] &&
          R256.w[1] == __bid_ten2k256[ind].w[1] &&
          R256.w[0] < __bid_ten2k256[ind].w[0])) {
        break;
      }
    }
    // ind + 39 digits
    ind = ind + 39;
  }
  return (ind);
}

// add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so
// use the rounding information from ptr_is_* to avoid a double rounding error
static void
add_and_round (int q3,
               int q4,
               int e4,
               int delta,
               int p34,
               UINT64 z_sign,
               UINT64 p_sign,
               UINT128 C3,
               UINT256 C4,
               int rnd_mode,
               int *ptr_is_midpoint_lt_even,
               int *ptr_is_midpoint_gt_even,
               int *ptr_is_inexact_lt_midpoint,
               int *ptr_is_inexact_gt_midpoint,
               _IDEC_flags * ptrfpsf, UINT128 * ptrres) {

  int scale;
  int x0;
  int ind;
  UINT64 R64;
  UINT128 P128, R128;
  UINT192 P192, R192;
  UINT256 R256;
  int is_midpoint_lt_even = 0;
  int is_midpoint_gt_even = 0;
  int is_inexact_lt_midpoint = 0;
  int is_inexact_gt_midpoint = 0;
  int is_midpoint_lt_even0 = 0;
  int is_midpoint_gt_even0 = 0;
  int is_inexact_lt_midpoint0 = 0;
  int is_inexact_gt_midpoint0 = 0;
  int incr_exp = 0;
  int is_tiny = 0;
  int lt_half_ulp = 0;
  int eq_half_ulp = 0;
  // int gt_half_ulp = 0;
  UINT128 res = *ptrres;

  // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66
  scale = q4 - delta - q3; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this
  // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1

  // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for
  // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6))
  if (scale == 0) {
    R256.w[3] = 0x0ull;
    R256.w[2] = 0x0ull;
    R256.w[1] = C3.w[1];
    R256.w[0] = C3.w[0];
  } else if (scale <= 19) { // 10^scale fits in 64 bits
    P128.w[1] = 0;
    P128.w[0] = __bid_ten2k64[scale];
    __mul_128x128_to_256 (R256, P128, C3);
  } else if (scale <= 38) { // 10^scale fits in 128 bits
    __mul_128x128_to_256 (R256, __bid_ten2k128[scale - 20], C3);
  } else if (scale <= 57) { // 39 <= scale <= 57 
    // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits
    // (10^67 has 223 bits; 10^69 has 230 bits);  
    // must split the computation:  
    // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
    // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
    // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits
    __mul_64x128_to_128 (R128, __bid_ten2k64[scale - 38], C3);
    // now multiply R128 by 10^38
    __mul_128x128_to_256 (R256, R128, __bid_ten2k128[18]);
  } else { // 58 <= scale <= 66
    // 10^scale takes between 193 and 220 bits,
    // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits)
    // must split the computation: 
    // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
    // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty 
    // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits
    // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because
    // 10^(scale-38) takes more than 64 bits, C3 will take less than 64
    __mul_64x128_to_128 (R128, C3.w[0], __bid_ten2k128[scale - 58]);
    // now calculate 10*38 * 10^(scale-38) * C3 
    __mul_128x128_to_256 (R256, R128, __bid_ten2k128[18]);
  }
  // C3 * 10^scale is now in R256 

  // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least 
  // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is 
  // possible 
  // add/subtract C4 and C3 * 10^scale; the exponent is e4
  if (p_sign == z_sign) { // R256 = C4 + R256
    // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact,
    // but may require rounding
    add256 (C4, R256, &R256);
  } else { // if (p_sign != z_sign) { // R256 = C4 - R256
    // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or
    // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact,
    // but may require rounding

    // compare first R256 = C3 * 10^scale and C4 
    if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) || 
        (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) ||
        (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] &&
        R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4
      // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact,
      // but may require rounding 
      sub256 (R256, C4, &R256);
      // flip p_sign too, because the result has the sign of z 
      p_sign = z_sign;
    } else { // if C4 > C3 * 10^scale
      // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact,
      // but may require rounding  
      sub256 (C4, R256, &R256);
    }
    // if the result is pure zero, the sign depends on the rounding mode
    // (x*y and z had opposite signs) 
    if (R256.w[3] == 0x0ull && R256.w[2] == 0x0ull &&
        R256.w[1] == 0x0ull && R256.w[0] == 0x0ull) {
      if (rnd_mode != ROUNDING_DOWN)
        p_sign = 0x0000000000000000ull;
      else
        p_sign = 0x8000000000000000ull;
      // the exponent is max (e4, expmin)
      if (e4 < -6176)
        e4 = expmin;
      // assemble result 
      res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49);
      res.w[0] = 0x0;
      *ptrres = res;
      return;
    }
  }

  // determine the number of decimal digits in R256
  ind = __bid_nr_digits256 (R256);

  // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind;
  // round to the destination precision, with unbounded exponent

  if (ind <= p34) {
    // result rounded to the destination precision with unbounded exponent
    // is exact
    if (ind + e4 < p34 + expmin) {
      is_tiny = 1; // applies to all rounding modes
    }
    res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R256.w[1];
    res.w[0] = R256.w[0];
    // Note: res is correct only if expmin <= e4 <= expmax
  } else { // if (ind > p34)
    // if more than P digits, round to nearest to P digits
    // round R256 to p34 digits
    x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
    if (ind <= 38) {
      P128.w[1] = R256.w[1];
      P128.w[0] = R256.w[0];
      __bid_round128_19_38 (ind, x0, P128, &R128, &incr_exp,
                      &is_midpoint_lt_even, &is_midpoint_gt_even,
                      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
    } else if (ind <= 57) {
      P192.w[2] = R256.w[2];
      P192.w[1] = R256.w[1];
      P192.w[0] = R256.w[0];
      __bid_round192_39_57 (ind, x0, P192, &R192, &incr_exp,
                      &is_midpoint_lt_even, &is_midpoint_gt_even,
                      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
      R128.w[1] = R192.w[1];
      R128.w[0] = R192.w[0];
    } else { // if (ind <= 68)
      __bid_round256_58_76 (ind, x0, R256, &R256, &incr_exp,
                      &is_midpoint_lt_even, &is_midpoint_gt_even,
                      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
      R128.w[1] = R256.w[1];
      R128.w[0] = R256.w[0];
    }
    // the rounded result has p34 = 34 digits
    e4 = e4 + x0 + incr_exp;
    if (rnd_mode == ROUNDING_TO_NEAREST) {
      if (e4 < expmin) {
        is_tiny = 1; // for other rounding modes apply correction
      }
    } else {
      // for RM, RP, RZ, RA apply correction in order to determine tininess
      // but do not save the result; apply the correction to 
      // (-1)^p_sign * significand * 10^0
      P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1];
      P128.w[0] = R128.w[0];
      rounding_correction (rnd_mode,
                           is_inexact_lt_midpoint,
                           is_inexact_gt_midpoint, is_midpoint_lt_even,
                           is_midpoint_gt_even, 0, &P128, ptrfpsf);
      scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
      // the number of digits in the significand is p34 = 34
      if (e4 + scale < expmin) {
        is_tiny = 1;
      }
    }
    ind = p34; // the number of decimal digits in the signifcand of res
    res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R128.w[1]; // RN
    res.w[0] = R128.w[0];
    // Note: res is correct only if expmin <= e4 <= expmax
    // set the inexact flag after rounding with bounded exponent, if any
  }
  // at this point we have the result rounded with unbounded exponent in
  // res and we know its tininess:
  // res = (-1)^p_sign * significand * 10^e4, 
  // where q (significand) = ind <= p34
  // Note: res is correct only if expmin <= e4 <= expmax

  // check for overflow if RN
  if (rnd_mode == ROUNDING_TO_NEAREST && (ind + e4) > (p34 + expmax)) {
    res.w[1] = p_sign | 0x7800000000000000ull;
    res.w[0] = 0x0000000000000000ull;
    *ptrres = res;
    *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
    return; // BID_RETURN (res)
  } // else not overflow or not RN, so continue

  // if (e4 >= expmin) we have the result rounded with bounded exponent
  if (e4 < expmin) {
    x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
    // where the result rounded [at most] once is
    //   (-1)^p_sign * significand_res * 10^e4

    // avoid double rounding error
    is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
    is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
    is_midpoint_lt_even0 = is_midpoint_lt_even;
    is_midpoint_gt_even0 = is_midpoint_gt_even;
    is_inexact_lt_midpoint = 0;
    is_inexact_gt_midpoint = 0;
    is_midpoint_lt_even = 0;
    is_midpoint_gt_even = 0;

    if (x0 > ind) {
      // nothing is left of res when moving the decimal point left x0 digits
      is_inexact_lt_midpoint = 1;
      res.w[1] = p_sign | 0x0000000000000000ull;
      res.w[0] = 0x0000000000000000ull;
      e4 = expmin;
    } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
      // this is <, =, or > 1/2 ulp
      // compare the ind-digit value in the significand of res with
      // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is 
      // less than, equal to, or greater than 1/2 ulp (significand of res)
      R128.w[1] = res.w[1] & MASK_COEFF;
      R128.w[0] = res.w[0];
      if (ind <= 19) {
        if (R128.w[0] < __bid_midpoint64[ind - 1]) { // < 1/2 ulp
          lt_half_ulp = 1;
          is_inexact_lt_midpoint = 1;
        } else if (R128.w[0] == __bid_midpoint64[ind - 1]) { // = 1/2 ulp
          eq_half_ulp = 1;
          is_midpoint_gt_even = 1;
        } else { // > 1/2 ulp
          // gt_half_ulp = 1;
          is_inexact_gt_midpoint = 1;
        }
      } else { // if (ind <= 38) {
        if (R128.w[1] < __bid_midpoint128[ind - 20].w[1] || 
            (R128.w[1] == __bid_midpoint128[ind - 20].w[1] && 
            R128.w[0] < __bid_midpoint128[ind - 20].w[0])) { // < 1/2 ulp
          lt_half_ulp = 1;
          is_inexact_lt_midpoint = 1;
        } else if (R128.w[1] == __bid_midpoint128[ind - 20].w[1] && 
            R128.w[0] == __bid_midpoint128[ind - 20].w[0]) { // = 1/2 ulp
          eq_half_ulp = 1;
          is_midpoint_gt_even = 1;
        } else { // > 1/2 ulp
          // gt_half_ulp = 1;
          is_inexact_gt_midpoint = 1;
        }
      }
      if (lt_half_ulp || eq_half_ulp) {
        // res = +0.0 * 10^expmin
        res.w[1] = 0x0000000000000000ull;
        res.w[0] = 0x0000000000000000ull;
      } else { // if (gt_half_ulp)
        // res = +1 * 10^expmin
        res.w[1] = 0x0000000000000000ull;
        res.w[0] = 0x0000000000000001ull;
      }
      res.w[1] = p_sign | res.w[1];
      e4 = expmin;
    } else { // if (1 <= x0 <= ind - 1 <= 33)
      // round the ind-digit result to ind - x0 digits

      if (ind <= 18) { // 2 <= ind <= 18
        __bid_round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
                      &is_midpoint_lt_even, &is_midpoint_gt_even,
                      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
        res.w[1] = 0x0;
        res.w[0] = R64;
      } else if (ind <= 38) {
        P128.w[1] = res.w[1] & MASK_COEFF;
        P128.w[0] = res.w[0];
        __bid_round128_19_38 (ind, x0, P128, &res, &incr_exp,
                        &is_midpoint_lt_even, &is_midpoint_gt_even,
                        &is_inexact_lt_midpoint,
                        &is_inexact_gt_midpoint);
      }
      e4 = e4 + x0; // expmin
      // we want the exponent to be expmin, so if incr_exp = 1 then
      // multiply the rounded result by 10 - it will still fit in 113 bits
      if (incr_exp) {
        // 64 x 128 -> 128
        P128.w[1] = res.w[1] & MASK_COEFF;
        P128.w[0] = res.w[0];
        __mul_64x128_to_128 (res, __bid_ten2k64[1], P128);
      }
      res.w[1] =
        p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF);
      // avoid a double rounding error
      if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 
          is_midpoint_lt_even) { // double rounding error upward
        // res = res - 1
        res.w[0]--;
        if (res.w[0] == 0xffffffffffffffffull)
          res.w[1]--;
        // Note: a double rounding error upward is not possible; for this
        // the result after the first rounding would have to be 99...95
        // (35 digits in all), possibly followed by a number of zeros; this
        // is not possible in Cases (2)-(6) or (15)-(17) which may get here
        is_midpoint_lt_even = 0;
        is_inexact_lt_midpoint = 1;
      } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 
          is_midpoint_gt_even) { // double rounding error downward
        // res = res + 1
        res.w[0]++;
        if (res.w[0] == 0)
          res.w[1]++;
        is_midpoint_gt_even = 0;
        is_inexact_gt_midpoint = 1;
      } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
          !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
        // if this second rounding was exact the result may still be 
        // inexact because of the first rounding
        if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
          is_inexact_gt_midpoint = 1;
        }
        if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
          is_inexact_lt_midpoint = 1;
        }
      } else if (is_midpoint_gt_even &&
                 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
        // pulled up to a midpoint
        is_inexact_lt_midpoint = 1;
        is_inexact_gt_midpoint = 0;
        is_midpoint_lt_even = 0;
        is_midpoint_gt_even = 0;
      } else if (is_midpoint_lt_even &&
                 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
        // pulled down to a midpoint
        is_inexact_lt_midpoint = 0;
        is_inexact_gt_midpoint = 1;
        is_midpoint_lt_even = 0;
        is_midpoint_gt_even = 0;
      } else {
        ;
      }
    }
  }
  // res contains the correct result
  // apply correction if not rounding to nearest
  if (rnd_mode != ROUNDING_TO_NEAREST) {
    rounding_correction (rnd_mode,
                         is_inexact_lt_midpoint, is_inexact_gt_midpoint,
                         is_midpoint_lt_even, is_midpoint_gt_even,
                         e4, &res, ptrfpsf);
  }
  if (is_midpoint_lt_even || is_midpoint_gt_even ||
      is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
    // set the inexact flag
    *ptrfpsf |= INEXACT_EXCEPTION;
    if (is_tiny)
      *ptrfpsf |= UNDERFLOW_EXCEPTION;
  }

  *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
  *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
  *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
  *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
  *ptrres = res;
  return;
}


#if DECIMAL_CALL_BY_REFERENCE
void
__bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
            _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
            _EXC_INFO_PARAM) {
  UINT128 x = *px, y = *py, z = *pz;
#if !DECIMAL_GLOBAL_ROUNDING
  unsigned int rnd_mode = *prnd_mode;
#endif
#else
UINT128
__bid128_fma (UINT128 x, UINT128 y, UINT128 z
            _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
            _EXC_INFO_PARAM) {
#endif

  UINT128 res = {{ 0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull }};
  UINT64 x_sign, y_sign, z_sign, p_sign;
  UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp;
  int true_p_exp;
  UINT128 C1, C2, C3;
  UINT256 C4;
  int q1 = 0, q2 = 0, q3 = 0, q4;
  int e1, e2, e3, e4;
  int scale, ind, delta, x0;
  int p34 = P34; // used to modify the limit on the number of digits
  BID_UI64DOUBLE tmp;
  int x_nr_bits, y_nr_bits, z_nr_bits;
  unsigned int save_fpsf;
  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0;
  int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
  int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0;
  int is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
  int incr_exp = 0;
  int lsb;
  int lt_half_ulp = 0;
  int eq_half_ulp = 0;
  int gt_half_ulp = 0;
  int is_tiny = 0;
  UINT64 R64, tmp64;
  UINT128 P128, R128;
  UINT192 P192, R192;
  UINT256 R256;

  // the following are based on the table of special cases for fma; the NaN
  // behavior is similar to that of the IPF fma 

  // identify cases where at least one operand is NaN

  if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN
    // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y)
    // check first for non-canonical NaN payload
    if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
        (((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
         (y.w[0] > 0x38c15b09ffffffffull))) {
      y.w[1] = y.w[1] & 0xffffc00000000000ull;
      y.w[0] = 0x0ull;
    }
    if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN
      // set invalid flag
      *pfpsf |= INVALID_EXCEPTION;
      // return quiet (y)
      res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
      res.w[0] = y.w[0];
    } else { // y is QNaN
      // return y
      res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
      res.w[0] = y.w[0];
      // if z = SNaN or x = SNaN signal invalid exception
      if ((z.w[1] & MASK_SNAN) == MASK_SNAN ||
          (x.w[1] & MASK_SNAN) == MASK_SNAN) {
        // set invalid flag
        *pfpsf |= INVALID_EXCEPTION;
      }
    }
    BID_RETURN (res)
  } else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN
    // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z)
    // check first for non-canonical NaN payload
    if (((z.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
        (((z.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
         (z.w[0] > 0x38c15b09ffffffffull))) {
      z.w[1] = z.w[1] & 0xffffc00000000000ull;
      z.w[0] = 0x0ull;
    }
    if ((z.w[1] & MASK_SNAN) == MASK_SNAN) { // z is SNAN 
      // set invalid flag 
      *pfpsf |= INVALID_EXCEPTION;
      // return quiet (z) 
      res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
      res.w[0] = z.w[0];
    } else { // z is QNaN 
      // return z  
      res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
      res.w[0] = z.w[0];
      // if x = SNaN signal invalid exception
      if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {
        // set invalid flag
        *pfpsf |= INVALID_EXCEPTION;
      }
    }
    BID_RETURN (res)
  } else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
    // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x)
    // check first for non-canonical NaN payload
    if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
        (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
         (x.w[0] > 0x38c15b09ffffffffull))) {
      x.w[1] = x.w[1] & 0xffffc00000000000ull;
      x.w[0] = 0x0ull;
    }
    if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN 
      // set invalid flag 
      *pfpsf |= INVALID_EXCEPTION;
      // return quiet (x) 
      res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
      res.w[0] = x.w[0];
    } else { // x is QNaN 
      // return x  
      res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
      res.w[0] = x.w[0];
    }
    BID_RETURN (res)
  }
  // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check
  // for non-canonical values

  x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
  C1.w[1] = x.w[1] & MASK_COEFF;
  C1.w[0] = x.w[0];
  if ((x.w[1] & MASK_ANY_INF) != MASK_INF) { // x != inf
    // if x is not infinity check for non-canonical values - treated as zero
    if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
      // non-canonical
      x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
      C1.w[1] = 0; // significand high
      C1.w[0] = 0; // significand low
    } else { // G0_G1 != 11
      x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
      if (C1.w[1] > 0x0001ed09bead87c0ull ||
          (C1.w[1] == 0x0001ed09bead87c0ull
          && C1.w[0] > 0x378d8e63ffffffffull)) {
        // x is non-canonical if coefficient is larger than 10^34 -1
        C1.w[1] = 0;
        C1.w[0] = 0;
      } else { // canonical          
        ;
      }
    }
  }
  y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
  C2.w[1] = y.w[1] & MASK_COEFF;
  C2.w[0] = y.w[0];
  if ((y.w[1] & MASK_ANY_INF) != MASK_INF) { // y != inf
    // if y is not infinity check for non-canonical values - treated as zero
    if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
      // non-canonical
      y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
      C2.w[1] = 0; // significand high
      C2.w[0] = 0; // significand low 
    } else { // G0_G1 != 11
      y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits
      if (C2.w[1] > 0x0001ed09bead87c0ull ||
          (C2.w[1] == 0x0001ed09bead87c0ull
          && C2.w[0] > 0x378d8e63ffffffffull)) {
        // y is non-canonical if coefficient is larger than 10^34 -1
        C2.w[1] = 0;
        C2.w[0] = 0;
      } else { // canonical
        ;
      }
    }
  }
  z_sign = z.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
  C3.w[1] = z.w[1] & MASK_COEFF;
  C3.w[0] = z.w[0];
  if ((z.w[1] & MASK_ANY_INF) != MASK_INF) { // z != inf
    // if z is not infinity check for non-canonical values - treated as zero
    if ((z.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
      // non-canonical
      z_exp = (z.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
      C3.w[1] = 0; // significand high
      C3.w[0] = 0; // significand low 
    } else { // G0_G1 != 11
      z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits
      if (C3.w[1] > 0x0001ed09bead87c0ull ||
          (C3.w[1] == 0x0001ed09bead87c0ull
          && C3.w[0] > 0x378d8e63ffffffffull)) {
        // z is non-canonical if coefficient is larger than 10^34 -1
        C3.w[1] = 0;
        C3.w[0] = 0;
      } else { // canonical
        ;
      }
    }
  }

  p_sign = x_sign ^ y_sign; // sign of the product

  // identify cases where at least one operand is infinity

  if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x = inf
    if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
      if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
        if (p_sign == z_sign) {
          res.w[1] = z_sign | MASK_INF;
          res.w[0] = 0x0;
        } else {
          // return QNaN Indefinite
          res.w[1] = 0x7c00000000000000ull;
          res.w[0] = 0x0000000000000000ull;
          // set invalid flag
          *pfpsf |= INVALID_EXCEPTION;
        }
      } else { // z = 0 or z = f
        res.w[1] = p_sign | MASK_INF;
        res.w[0] = 0x0;
      }
    } else if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f
      if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
        if (p_sign == z_sign) {
          res.w[1] = z_sign | MASK_INF;
          res.w[0] = 0x0;
        } else {
          // return QNaN Indefinite 
          res.w[1] = 0x7c00000000000000ull;
          res.w[0] = 0x0000000000000000ull;
          // set invalid flag
          *pfpsf |= INVALID_EXCEPTION;
        }
      } else { // z = 0 or z = f
        res.w[1] = p_sign | MASK_INF;
        res.w[0] = 0x0;
      }
    } else { // y = 0
      // return QNaN Indefinite
      res.w[1] = 0x7c00000000000000ull;
      res.w[0] = 0x0000000000000000ull;
      // set invalid flag
      *pfpsf |= INVALID_EXCEPTION;
    }
    BID_RETURN (res)
  } else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
    if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
      // x = f, necessarily
      if ((p_sign != z_sign)
          || (C1.w[1] == 0x0ull && C1.w[0] == 0x0ull)) {
        // return QNaN Indefinite
        res.w[1] = 0x7c00000000000000ull;
        res.w[0] = 0x0000000000000000ull;
        // set invalid flag
        *pfpsf |= INVALID_EXCEPTION;
      } else {
        res.w[1] = z.w[1];
        res.w[0] = z.w[0];
      }
    } else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0
      // z = 0, f, inf
      // return QNaN Indefinite
      res.w[1] = 0x7c00000000000000ull;
      res.w[0] = 0x0000000000000000ull;
      // set invalid flag
      *pfpsf |= INVALID_EXCEPTION;
    } else {
      // x = f and z = 0, f, necessarily
      res.w[1] = p_sign | MASK_INF;
      res.w[0] = 0x0;
    }
    BID_RETURN (res)
  } else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
    // x = 0, f and y = 0, f, necessarily
    res.w[1] = z_sign | MASK_INF;
    res.w[0] = 0x0;
    BID_RETURN (res)
  }

  true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176;
  if (true_p_exp < -6176)
    p_exp = 0; // cannot be less than EXP_MIN
  // MACH DEBUG else if (true_p_exp > 6111)
  //   p_exp = (UINT64) (6111 + 6176) << 49; // cannot be more than EXP_MAX
  else
    p_exp = (UINT64) (true_p_exp + 6176) << 49;

  if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || 
      (C2.w[1] == 0x0 && C2.w[0] == 0x0)) &&
      C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0
    // the result is 0
    if (p_exp < z_exp)
      res.w[1] = p_exp; // preferred exponent
    else
      res.w[1] = z_exp; // preferred exponent
    if (p_sign == z_sign) {
      res.w[1] |= z_sign;
      res.w[0] = 0x0;
    } else { // x * y and z have opposite signs
      if (rnd_mode == ROUNDING_DOWN) {
        // res = -0.0
        res.w[1] |= MASK_SIGN;
        res.w[0] = 0x0;
      } else {
        // res = +0.0
        // res.w[1] |= 0x0;
        res.w[0] = 0x0;
      }
    }
    BID_RETURN (res)
  }
  // from this point on, we may need to know the number of decimal digits
  // in the significands of x, y, z when x, y, z != 0

  if (C1.w[1] != 0 || C1.w[0] != 0) { // x = f (non-zero finite)
    // q1 = nr. of decimal digits in x
    // determine first the nr. of bits in x
    if (C1.w[1] == 0) {
      if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
        // split the 64-bit value in two 32-bit halves to avoid rounding errors
        if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
          tmp.d = (double) (C1.w[0] >> 32); // exact conversion
          x_nr_bits =
            33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
        } else { // x < 2^32
          tmp.d = (double) (C1.w[0]); // exact conversion
          x_nr_bits =
            1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
        }
      } else { // if x < 2^53
        tmp.d = (double) C1.w[0]; // exact conversion
        x_nr_bits =
          1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
      }
    } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
      tmp.d = (double) C1.w[1]; // exact conversion
      x_nr_bits =
        65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
    }
    q1 = __bid_nr_digits[x_nr_bits - 1].digits;
    if (q1 == 0) {
      q1 = __bid_nr_digits[x_nr_bits - 1].digits1;
      if (C1.w[1] > __bid_nr_digits[x_nr_bits - 1].threshold_hi ||
          (C1.w[1] == __bid_nr_digits[x_nr_bits - 1].threshold_hi &&
          C1.w[0] >= __bid_nr_digits[x_nr_bits - 1].threshold_lo))
        q1++;
    }
  }

  if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f (non-zero finite)
    if (C2.w[1] == 0) {
      if (C2.w[0] >= 0x0020000000000000ull) { // y >= 2^53
        // split the 64-bit value in two 32-bit halves to avoid rounding errors
        if (C2.w[0] >= 0x0000000100000000ull) { // y >= 2^32
          tmp.d = (double) (C2.w[0] >> 32); // exact conversion
          y_nr_bits =
            32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
        } else { // y < 2^32
          tmp.d = (double) C2.w[0]; // exact conversion
          y_nr_bits =
            ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
        }
      } else { // if y < 2^53
        tmp.d = (double) C2.w[0]; // exact conversion
        y_nr_bits =
          ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
      }
    } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1])
      tmp.d = (double) C2.w[1]; // exact conversion
      y_nr_bits =
        64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
    }

    q2 = __bid_nr_digits[y_nr_bits].digits;
    if (q2 == 0) {
      q2 = __bid_nr_digits[y_nr_bits].digits1;
      if (C2.w[1] > __bid_nr_digits[y_nr_bits].threshold_hi ||
          (C2.w[1] == __bid_nr_digits[y_nr_bits].threshold_hi &&
          C2.w[0] >= __bid_nr_digits[y_nr_bits].threshold_lo))
        q2++;
    }
  }

  if (C3.w[1] != 0 || C3.w[0] != 0) { // z = f (non-zero finite)
    if (C3.w[1] == 0) {
      if (C3.w[0] >= 0x0020000000000000ull) { // z >= 2^53
        // split the 64-bit value in two 32-bit halves to avoid rounding errors
        if (C3.w[0] >= 0x0000000100000000ull) { // z >= 2^32
          tmp.d = (double) (C3.w[0] >> 32); // exact conversion
          z_nr_bits =
            32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
        } else { // z < 2^32
          tmp.d = (double) C3.w[0]; // exact conversion
          z_nr_bits =
            ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
        }
      } else { // if z < 2^53
        tmp.d = (double) C3.w[0]; // exact conversion
        z_nr_bits =
          ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
      }
    } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1])
      tmp.d = (double) C3.w[1]; // exact conversion
      z_nr_bits =
        64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
    }

    q3 = __bid_nr_digits[z_nr_bits].digits;
    if (q3 == 0) {
      q3 = __bid_nr_digits[z_nr_bits].digits1;
      if (C3.w[1] > __bid_nr_digits[z_nr_bits].threshold_hi ||
          (C3.w[1] == __bid_nr_digits[z_nr_bits].threshold_hi &&
          C3.w[0] >= __bid_nr_digits[z_nr_bits].threshold_lo))
        q3++;
    }
  }

  if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) || 
      (C2.w[1] == 0x0 && C2.w[0] == 0x0)) {
      // x = 0 or y = 0
    // z = f, necessarily; for 0 + z return z, with the preferred exponent
    // the result is z, but need to get the preferred exponent
    if (z_exp <= p_exp) { // the preferred exponent is z_exp
      res.w[1] = z_sign | (z_exp & MASK_EXP) | C3.w[1];
      res.w[0] = C3.w[0];
    } else { // if (p_exp < z_exp) the preferred exponent is p_exp
      // return (C3 * 10^scale) * 10^(z_exp - scale)
      // where scale = min (p34-q3, (z_exp-p_exp) >> 49)
      scale = p34 - q3;
      ind = (z_exp - p_exp) >> 49;
      if (ind < scale)
        scale = ind;
      if (scale == 0) {
        res.w[1] = z.w[1]; // & MASK_COEFF, which is redundant
        res.w[0] = z.w[0];
      } else if (q3 <= 19) { // z fits in 64 bits 
        if (scale <= 19) { // 10^scale fits in 64 bits
          // 64 x 64 C3.w[0] * __bid_ten2k64[scale]
          __mul_64x64_to_128MACH (res, C3.w[0], __bid_ten2k64[scale]);
        } else { // 10^scale fits in 128 bits
          // 64 x 128 C3.w[0] * __bid_ten2k128[scale - 20]
          __mul_128x64_to_128 (res, C3.w[0], __bid_ten2k128[scale - 20]);
        }
      } else { // z fits in 128 bits, but 10^scale must fit in 64 bits 
        // 64 x 128 __bid_ten2k64[scale] * C3
        __mul_128x64_to_128 (res, __bid_ten2k64[scale], C3);
      }
      // subtract scale from the exponent
      z_exp = z_exp - ((UINT64) scale << 49);
      res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
    }
    BID_RETURN (res)
  } else {
    ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f
  }

  e1 = (x_exp >> 49) - 6176; // unbiased exponent of x 
  e2 = (y_exp >> 49) - 6176; // unbiased exponent of y 
  e3 = (z_exp >> 49) - 6176; // unbiased exponent of z
  e4 = e1 + e2; // unbiased exponent of the exact x * y

  // calculate C1 * C2 and its number of decimal digits, q4

  // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits
  // where 2 <= q1 + q2 <= 68
  // calculate C4 = C1 * C2 and determine q
  C4.w[3] = C4.w[2] = C4.w[1] = C4.w[0] = 0;
  if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits
    C4.w[0] = C1.w[0] * C2.w[0];
    // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
    if (C4.w[0] < __bid_ten2k64[q1 + q2 - 1])
      q4 = q1 + q2 - 1; // q4 in [1, 18]
    else
      q4 = q1 + q2; // q4 in [2, 19]
    // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
  } else if (q1 + q2 == 20) { // C4 = C1 * C2 fits in 64 or 128 bits
    // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits
    __mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]);
    // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20
    if (C4.w[1] == 0 && C4.w[0] < __bid_ten2k64[19]) { // 19 = q1+q2-1
      // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
      q4 = 19; // 19 = q1 + q2 - 1
    } else {
      // if (C4.w[1] == 0)
      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
      // else
      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
      q4 = 20; // 20 = q1 + q2
    }
  } else if (q1 + q2 <= 38) { // 21 <= q1 + q2 <= 38
    // C4 = C1 * C2 fits in 64 or 128 bits
    // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits)
    // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits
    if (q1 <= 19) {
      __mul_128x64_to_128 (C4, C1.w[0], C2);
    } else { // q2 <= 19
      __mul_128x64_to_128 (C4, C2.w[0], C1);
    }
    // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
    if (C4.w[1] < __bid_ten2k128[q1 + q2 - 21].w[1] ||
        (C4.w[1] == __bid_ten2k128[q1 + q2 - 21].w[1] &&
        C4.w[0] < __bid_ten2k128[q1 + q2 - 21].w[0])) {
      // if (C4.w[1] == 0) // q4 = 20, necessarily
      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
      // else
      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
      q4 = q1 + q2 - 1; // q4 in [20, 37]
    } else {
      // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
      q4 = q1 + q2; // q4 in [21, 38]
    }
  } else if (q1 + q2 == 39) { // C4 = C1 * C2 fits in 128 or 192 bits
    // both C1 and C2 fit in 128 bits (actually in 113 bits)
    // may replace this by 128x128_to192
    __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0
    // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39
    if (C4.w[2] == 0 && (C4.w[1] < __bid_ten2k128[18].w[1] || 
    (C4.w[1] == __bid_ten2k128[18].w[1] && C4.w[0] < __bid_ten2k128[18].w[0]))) { 
      // 18 = 38 - 20 = q1+q2-1 - 20
      // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
      q4 = 38; // 38 = q1 + q2 - 1
    } else {
      // if (C4.w[2] == 0)
      // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
      // else
      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
      q4 = 39; // 39 = q1 + q2
    }
  } else if (q1 + q2 <= 57) { // 40 <= q1 + q2 <= 57
    // C4 = C1 * C2 fits in 128 or 192 bits
    // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits)
    // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
    // may fit in 64 bits
    if (C1.w[1] == 0) { // C1 fits in 64 bits
      // __mul_64x128_full (REShi64, RESlo128, A64, B128)
      __mul_64x128_full (C4.w[2], C4, C1.w[0], C2);
    } else if (C2.w[1] == 0) { // C2 fits in 64 bits
      // __mul_64x128_full (REShi64, RESlo128, A64, B128)
      __mul_64x128_full (C4.w[2], C4, C2.w[0], C1);
    } else { // both C1 and C2 require 128 bits
      // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
      __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
    }
    // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
    if (C4.w[2] < __bid_ten2k256[q1 + q2 - 40].w[2] ||
        (C4.w[2] == __bid_ten2k256[q1 + q2 - 40].w[2] &&
        (C4.w[1] < __bid_ten2k256[q1 + q2 - 40].w[1] ||
         (C4.w[1] == __bid_ten2k256[q1 + q2 - 40].w[1] &&
         C4.w[0] < __bid_ten2k256[q1 + q2 - 40].w[0])))) {
      // if (C4.w[2] == 0) // q4 = 39, necessarily
      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
      // else
      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
      q4 = q1 + q2 - 1; // q4 in [39, 56]
    } else {
      // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
      q4 = q1 + q2; // q4 in [40, 57]
    }
  } else if (q1 + q2 == 58) { // C4 = C1 * C2 fits in 192 or 256 bits
    // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
    // may fit in 64 bits
    if (C1.w[1] == 0) { // C1 * C2 will fit in 192 bits
      __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); // may use 64x128_to_192
    } else if (C2.w[1] == 0) { // C1 * C2 will fit in 192 bits
      __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); // may use 64x128_to_192
    } else { // C1 * C2 will fit in 192 bits or in 256 bits
      __mul_128x128_to_256 (C4, C1, C2);
    }
    // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58
    if (C4.w[3] == 0 && (C4.w[2] < __bid_ten2k256[18].w[2] || 
        (C4.w[2] == __bid_ten2k256[18].w[2] && (C4.w[1] < __bid_ten2k256[18].w[1] || 
        (C4.w[1] == __bid_ten2k256[18].w[1] && C4.w[0] < __bid_ten2k256[18].w[0]))))) { 
        // 18 = 57 - 39 = q1+q2-1 - 39
      // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
      q4 = 57; // 57 = q1 + q2 - 1
    } else {
      // if (C4.w[3] == 0)
      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
      // else
      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
      q4 = 58; // 58 = q1 + q2
    }
  } else { // if 59 <= q1 + q2 <= 68
    // C4 = C1 * C2 fits in 192 or 256 bits
    // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits)
    // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in
    // 64 bits
    // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
    __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
    // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
    if (C4.w[3] < __bid_ten2k256[q1 + q2 - 40].w[3] ||
        (C4.w[3] == __bid_ten2k256[q1 + q2 - 40].w[3] &&
        (C4.w[2] < __bid_ten2k256[q1 + q2 - 40].w[2] ||
         (C4.w[2] == __bid_ten2k256[q1 + q2 - 40].w[2] &&
         (C4.w[1] < __bid_ten2k256[q1 + q2 - 40].w[1] ||
          (C4.w[1] == __bid_ten2k256[q1 + q2 - 40].w[1] &&
          C4.w[0] < __bid_ten2k256[q1 + q2 - 40].w[0])))))) {
      // if (C4.w[3] == 0) // q4 = 58, necessarily
      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
      // else
      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
      q4 = q1 + q2 - 1; // q4 in [58, 67]
    } else {
      // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
      q4 = q1 + q2; // q4 in [59, 68]
    }
  }

  if (C3.w[1] == 0x0 && C3.w[0] == 0x0) { // x = f, y = f, z = 0
    save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
    *pfpsf = 0;

    if (q4 > p34) {

      // truncate C4 to p34 digits into res
      // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
      x0 = q4 - p34;
      if (q4 <= 38) {
        P128.w[1] = C4.w[1];
        P128.w[0] = C4.w[0];
        __bid_round128_19_38 (q4, x0, P128, &res, &incr_exp,
                        &is_midpoint_lt_even, &is_midpoint_gt_even,
                        &is_inexact_lt_midpoint,
                        &is_inexact_gt_midpoint);
      } else if (q4 <= 57) { // 35 <= q4 <= 57
        P192.w[2] = C4.w[2];
        P192.w[1] = C4.w[1];
        P192.w[0] = C4.w[0];
        __bid_round192_39_57 (q4, x0, P192, &R192, &incr_exp,
                        &is_midpoint_lt_even, &is_midpoint_gt_even,
                        &is_inexact_lt_midpoint,
                        &is_inexact_gt_midpoint);
        res.w[0] = R192.w[0];
        res.w[1] = R192.w[1];
      } else { // if (q4 <= 68)
        __bid_round256_58_76 (q4, x0, C4, &R256, &incr_exp,
                        &is_midpoint_lt_even, &is_midpoint_gt_even,
                        &is_inexact_lt_midpoint,
                        &is_inexact_gt_midpoint);
        res.w[0] = R256.w[0];
        res.w[1] = R256.w[1];
      }
      e4 = e4 + x0;
      if (incr_exp) {
        e4 = e4 + 1;
      }
      q4 = p34;
      // res is now the coefficient of the result rounded to the destination 
      // precision, with unbounded exponent; the exponent is e4; q4=digits(res)
    } else { // if (q4 <= p34)
      // C4 * 10^e4 is the result rounded to the destination precision, with  
      // unbounded exponent (which is exact)

      if ((q4 + e4 <= p34 + expmax) && (e4 > expmax)) {
        // e4 is too large, but can be brought within range by scaling up C4
        scale = e4 - expmax; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2
        // res = (C4 * 10^scale) * 10^expmax
        if (q4 <= 19) { // C4 fits in 64 bits
          if (scale <= 19) { // 10^scale fits in 64 bits
            // 64 x 64 C4.w[0] * __bid_ten2k64[scale]
            __mul_64x64_to_128MACH (res, C4.w[0], __bid_ten2k64[scale]);
          } else { // 10^scale fits in 128 bits
            // 64 x 128 C4.w[0] * __bid_ten2k128[scale - 20]
            __mul_128x64_to_128 (res, C4.w[0], __bid_ten2k128[scale - 20]);
          }
        } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
          // 64 x 128 __bid_ten2k64[scale] * CC43
          __mul_128x64_to_128 (res, __bid_ten2k64[scale], C4);
        }
        e4 = e4 - scale; // expmax
        q4 = q4 + scale;
      } else {
        res.w[1] = C4.w[1];
        res.w[0] = C4.w[0];
      }
      // res is the coefficient of the result rounded to the destination 
      // precision, with unbounded exponent (it has q4 digits); the exponent 
      // is e4 (exact result)
    }

    // check for overflow
    if (q4 + e4 > p34 + expmax) {
      if (rnd_mode == ROUNDING_TO_NEAREST) {
        res.w[1] = p_sign | 0x7800000000000000ull; // +/-inf
        res.w[0] = 0x0000000000000000ull;
        *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
      } else {
        res.w[1] = p_sign | res.w[1];
        rounding_correction (rnd_mode,
                             is_inexact_lt_midpoint,
                             is_inexact_gt_midpoint,
                             is_midpoint_lt_even, is_midpoint_gt_even,
                             e4, &res, pfpsf);
      }
      *pfpsf |= save_fpsf;
      BID_RETURN (res)
    }
    // check for underflow
    if (q4 + e4 < expmin + P34) {
      is_tiny = 1; // the result is tiny
      if (e4 < expmin) {
        // if e4 < expmin, we must truncate more of res
        x0 = expmin - e4; // x0 >= 1
        is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
        is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
        is_midpoint_lt_even0 = is_midpoint_lt_even;
        is_midpoint_gt_even0 = is_midpoint_gt_even;
        is_inexact_lt_midpoint = 0;
        is_inexact_gt_midpoint = 0;
        is_midpoint_lt_even = 0;
        is_midpoint_gt_even = 0;
        // the number of decimal digits in res is q4
        if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits
          if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17
            __bid_round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp,
                          &is_midpoint_lt_even, &is_midpoint_gt_even,
                          &is_inexact_lt_midpoint,
                          &is_inexact_gt_midpoint);
            if (incr_exp) {
              // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
              R64 = __bid_ten2k64[q4 - x0];
            }
            // res.w[1] = 0; (from above)
            res.w[0] = R64;
          } else { // if (q4 <= 34)
            // 19 <= q4 <= 38
            P128.w[1] = res.w[1];
            P128.w[0] = res.w[0];
            __bid_round128_19_38 (q4, x0, P128, &res, &incr_exp,
                            &is_midpoint_lt_even, &is_midpoint_gt_even,
                            &is_inexact_lt_midpoint,
                            &is_inexact_gt_midpoint);
            if (incr_exp) {
              // increase coefficient by a factor of 10; this will be <= 10^33
              // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
              if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
                // res.w[1] = 0;
                res.w[0] = __bid_ten2k64[q4 - x0];
              } else { // 20 <= q4 - x0 <= 37
                res.w[0] = __bid_ten2k128[q4 - x0 - 20].w[0];
                res.w[1] = __bid_ten2k128[q4 - x0 - 20].w[1];
              }
            }
          }
          e4 = e4 + x0; // expmin 
        } else if (x0 == q4) {
          // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin
          // determine relationship with 1/2 ulp
          if (q4 <= 19) {
            if (res.w[0] < __bid_midpoint64[q4 - 1]) { // < 1/2 ulp
              lt_half_ulp = 1;
              is_inexact_lt_midpoint = 1;
            } else if (res.w[0] == __bid_midpoint64[q4 - 1]) { // = 1/2 ulp
              eq_half_ulp = 1;
              is_midpoint_gt_even = 1;
            } else { // > 1/2 ulp
              // gt_half_ulp = 1;
              is_inexact_gt_midpoint = 1;
            }
          } else { // if (q4 <= 34)
            if (res.w[1] < __bid_midpoint128[q4 - 20].w[1] ||
                (res.w[1] == __bid_midpoint128[q4 - 20].w[1] &&
                res.w[0] < __bid_midpoint128[q4 - 20].w[0])) { // < 1/2 ulp
              lt_half_ulp = 1;
              is_inexact_lt_midpoint = 1;
            } else if (res.w[1] == __bid_midpoint128[q4 - 20].w[1] &&
                res.w[0] == __bid_midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
              eq_half_ulp = 1;
              is_midpoint_gt_even = 1;
            } else { // > 1/2 ulp
              // gt_half_ulp = 1;
              is_inexact_gt_midpoint = 1;
            }
          }
          if (lt_half_ulp || eq_half_ulp) {
            // res = +0.0 * 10^expmin
            res.w[1] = 0x0000000000000000ull;
            res.w[0] = 0x0000000000000000ull;
          } else { // if (gt_half_ulp)
            // res = +1 * 10^expmin
            res.w[1] = 0x0000000000000000ull;
            res.w[0] = 0x0000000000000001ull;
          }
          e4 = expmin;
        } else { // if (x0 > q4)
          // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin
          res.w[1] = 0;
          res.w[0] = 0;
          e4 = expmin;
          is_inexact_lt_midpoint = 1;
        }
        // avoid a double rounding error
        if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 
			is_midpoint_lt_even) { // double rounding error upward
          // res = res - 1
          res.w[0]--;
          if (res.w[0] == 0xffffffffffffffffull)
            res.w[1]--;
          // Note: a double rounding error upward is not possible; for this
          // the result after the first rounding would have to be 99...95
          // (35 digits in all), possibly followed by a number of zeros; this
          // not possible for f * f + 0
          is_midpoint_lt_even = 0;
          is_inexact_lt_midpoint = 1;
        } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 
            is_midpoint_gt_even) { // double rounding error downward
          // res = res + 1
          res.w[0]++;
          if (res.w[0] == 0)
            res.w[1]++;
          is_midpoint_gt_even = 0;
          is_inexact_gt_midpoint = 1;
        } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
                   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
          // if this second rounding was exact the result may still be 
          // inexact because of the first rounding
          if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
            is_inexact_gt_midpoint = 1;
          }
          if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
            is_inexact_lt_midpoint = 1;
          }
        } else if (is_midpoint_gt_even &&
                   (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
          // pulled up to a midpoint
          is_inexact_lt_midpoint = 1;
          is_inexact_gt_midpoint = 0;
          is_midpoint_lt_even = 0;
          is_midpoint_gt_even = 0;
        } else if (is_midpoint_lt_even &&
                   (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
          // pulled down to a midpoint
          is_inexact_lt_midpoint = 0;
          is_inexact_gt_midpoint = 1;
          is_midpoint_lt_even = 0;
          is_midpoint_gt_even = 0;
        } else {
          ;
        }
      } else { // if e4 >= emin then q4 < P and the result is tiny and exact
        if (e3 < e4) {
          // if (e3 < e4) the preferred exponent is e3
          // return (C4 * 10^scale) * 10^(e4 - scale)
          // where scale = min (p34-q4, (e4 - e3))
          scale = p34 - q4;
          ind = e4 - e3;
          if (ind < scale)
            scale = ind;
          if (scale == 0) {
            ; // res and e4 are unchanged
          } else if (q4 <= 19) { // C4 fits in 64 bits
            if (scale <= 19) { // 10^scale fits in 64 bits
              // 64 x 64 res.w[0] * __bid_ten2k64[scale]
              __mul_64x64_to_128MACH (res, res.w[0], __bid_ten2k64[scale]);
            } else { // 10^scale fits in 128 bits
              // 64 x 128 res.w[0] * __bid_ten2k128[scale - 20]
              __mul_128x64_to_128 (res, res.w[0], __bid_ten2k128[scale - 20]);
            }
          } else { // res fits in 128 bits, but 10^scale must fit in 64 bits
            // 64 x 128 __bid_ten2k64[scale] * C3
            __mul_128x64_to_128 (res, __bid_ten2k64[scale], res);
          }
          // subtract scale from the exponent
          e4 = e4 - scale;
        }
      }
      // check for inexact result
      if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
          is_midpoint_lt_even || is_midpoint_gt_even) {
        // set the inexact flag and the underflow flag
        *pfpsf |= INEXACT_EXCEPTION;
        *pfpsf |= UNDERFLOW_EXCEPTION;
      }
      res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
      if (rnd_mode != ROUNDING_TO_NEAREST) {
        rounding_correction (rnd_mode,
                             is_inexact_lt_midpoint,
                             is_inexact_gt_midpoint,
                             is_midpoint_lt_even, is_midpoint_gt_even,
                             e4, &res, pfpsf);
      }
      *pfpsf |= save_fpsf;
      BID_RETURN (res)
    }

    // no overflow and no underflow
    if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
        is_midpoint_lt_even || is_midpoint_gt_even) {
      // set the inexact flag
      *pfpsf |= INEXACT_EXCEPTION;
    }
    res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];

    if (rnd_mode != ROUNDING_TO_NEAREST) {
      rounding_correction (rnd_mode,
                           is_inexact_lt_midpoint,
                           is_inexact_gt_midpoint,
                           is_midpoint_lt_even, is_midpoint_gt_even,
                           e4, &res, pfpsf);
    }

    if ((*pfpsf & INEXACT_EXCEPTION) == 0) { // x * y is exact
      // need to ensure that the result has the preferred exponent
      p_exp = res.w[1] & MASK_EXP;
      if (z_exp < p_exp) { // the preferred exponent is z_exp
        // signficand of res in C3
        C3.w[1] = res.w[1] & MASK_COEFF;
        C3.w[0] = res.w[0];
        // the number of decimal digits of x * y is q4 <= 34
        // Note: the coefficient fits in 128 bits

        // return (C3 * 10^scale) * 10^(p_exp - scale)
        // where scale = min (p34-q4, (p_exp-z_exp) >> 49)
        scale = p34 - q4;
        ind = (p_exp - z_exp) >> 49;
        if (ind < scale)
          scale = ind;
        // subtract scale from the exponent
        p_exp = p_exp - ((UINT64) scale << 49);
        if (scale == 0) {
          ; // leave res unchanged
        } else if (q4 <= 19) { // x * y fits in 64 bits
          if (scale <= 19) { // 10^scale fits in 64 bits
            // 64 x 64 C3.w[0] * __bid_ten2k64[scale] 
            __mul_64x64_to_128MACH (res, C3.w[0], __bid_ten2k64[scale]);
          } else { // 10^scale fits in 128 bits 
            // 64 x 128 C3.w[0] * __bid_ten2k128[scale - 20]
            __mul_128x64_to_128 (res, C3.w[0], __bid_ten2k128[scale - 20]);
          }
          res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
        } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits
          // 64 x 128 __bid_ten2k64[scale] * C3 
          __mul_128x64_to_128 (res, __bid_ten2k64[scale], C3);
          res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
        }
      } // else leave the result as it is, because p_exp <= z_exp
    }
    *pfpsf |= save_fpsf;
    BID_RETURN (res)
  } // else we have f * f + f

  // continue with x = f, y = f, z = f

  delta = q3 + e3 - q4 - e4;
delta_ge_zero:
  if (delta >= 0) {

    if (p34 <= delta - 1 || // Case (1')
        (p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A)
      // check for overflow, which can occur only in Case (1')
      if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) {
        // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary
        // condition for (q3 + e3) > (p34 + expmax)
        if (rnd_mode == ROUNDING_TO_NEAREST) {
          res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
          res.w[0] = 0x0000000000000000ull;
          *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
        } else {
          if (p_sign == z_sign) {
            is_inexact_lt_midpoint = 1;
          } else {
            is_inexact_gt_midpoint = 1;
          }
          // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3)
          scale = p34 - q3;
          if (scale == 0) {
            res.w[1] = z_sign | C3.w[1];
            res.w[0] = C3.w[0];
          } else {
            if (q3 <= 19) { // C3 fits in 64 bits
              if (scale <= 19) { // 10^scale fits in 64 bits
                // 64 x 64 C3.w[0] * __bid_ten2k64[scale]
                __mul_64x64_to_128MACH (res, C3.w[0], __bid_ten2k64[scale]);
              } else { // 10^scale fits in 128 bits
                // 64 x 128 C3.w[0] * __bid_ten2k128[scale - 20]
                __mul_128x64_to_128 (res, C3.w[0],
                                     __bid_ten2k128[scale - 20]);
              }
            } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits
              // 64 x 128 __bid_ten2k64[scale] * C3
              __mul_128x64_to_128 (res, __bid_ten2k64[scale], C3);
            }
            // the coefficient in res has q3 + scale = p34 digits
          }
          e3 = e3 - scale;
          res.w[1] = z_sign | res.w[1];
          rounding_correction (rnd_mode,
                               is_inexact_lt_midpoint,
                               is_inexact_gt_midpoint,
                               is_midpoint_lt_even, is_midpoint_gt_even,
                               e3, &res, pfpsf);
        }
        BID_RETURN (res)
      }
      // res = z
      if (q3 < p34) { // the preferred exponent is z_exp - (p34 - q3)
        // return (C3 * 10^scale) * 10^(z_exp - scale)
        // where scale = min (p34-q3, z_exp-EMIN)
        scale = p34 - q3;
        ind = e3 + 6176;
        if (ind < scale)
          scale = ind;
        if (scale == 0) {
          res.w[1] = C3.w[1];
          res.w[0] = C3.w[0];
        } else if (q3 <= 19) { // z fits in 64 bits
          if (scale <= 19) { // 10^scale fits in 64 bits
            // 64 x 64 C3.w[0] * __bid_ten2k64[scale]
            __mul_64x64_to_128MACH (res, C3.w[0], __bid_ten2k64[scale]);
          } else { // 10^scale fits in 128 bits
            // 64 x 128 C3.w[0] * __bid_ten2k128[scale - 20]
            __mul_128x64_to_128 (res, C3.w[0], __bid_ten2k128[scale - 20]);
          }
        } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
          // 64 x 128 __bid_ten2k64[scale] * C3
          __mul_128x64_to_128 (res, __bid_ten2k64[scale], C3);
        }
        // the coefficient in res has q3 + scale digits
        // subtract scale from the exponent
        z_exp = z_exp - ((UINT64) scale << 49);
        e3 = e3 - scale;
        res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
        if (scale + q3 < p34)
          *pfpsf |= UNDERFLOW_EXCEPTION;
      } else {
        scale = 0;
        res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | C3.w[1];
        res.w[0] = C3.w[0];
      }

      // use the following to avoid double rounding errors when operating on
      // mixed formats in rounding to nearest, and for correcting the result
      // if not rounding to nearest
      if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) {
        // there is a gap of exactly one digit between the scaled C3 and C4
        // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case
        if ((q3 <= 19 && C3.w[0] != __bid_ten2k64[q3 - 1]) || 
            (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != __bid_ten2k64[19])) || 
            (q3 >= 21 && (C3.w[1] != __bid_ten2k128[q3 - 21].w[1] || 
            C3.w[0] != __bid_ten2k128[q3 - 21].w[0]))) { 
            // C3 * 10^ scale != 10^(q3-1)
          // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull ||
          // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
          is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value
        } else { // if C3 * 10^scale = 10^(q3+scale-1)
          // ok from above e3 = (z_exp >> 49) - 6176;
          // the result is always inexact
          if (q4 == 1) {
            R64 = C4.w[0];
          } else {
            // if q4 > 1 then truncate C4 from q4 digits to 1 digit; 
            // x = q4-1, 1 <= x <= 67 and check if this operation is exact
            if (q4 <= 18) { // 2 <= q4 <= 18
              __bid_round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
                            &is_midpoint_lt_even, &is_midpoint_gt_even,
                            &is_inexact_lt_midpoint,
                            &is_inexact_gt_midpoint);
            } else if (q4 <= 38) {
              P128.w[1] = C4.w[1];
              P128.w[0] = C4.w[0];
              __bid_round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
                              &is_midpoint_lt_even,
                              &is_midpoint_gt_even,
                              &is_inexact_lt_midpoint,
                              &is_inexact_gt_midpoint);
              R64 = R128.w[0]; // one decimal digit
            } else if (q4 <= 57) {
              P192.w[2] = C4.w[2];
              P192.w[1] = C4.w[1];
              P192.w[0] = C4.w[0];
              __bid_round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
                              &is_midpoint_lt_even,
                              &is_midpoint_gt_even,
                              &is_inexact_lt_midpoint,
                              &is_inexact_gt_midpoint);
              R64 = R192.w[0]; // one decimal digit
            } else { // if (q4 <= 68)
              __bid_round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
                              &is_midpoint_lt_even,
                              &is_midpoint_gt_even,
                              &is_inexact_lt_midpoint,
                              &is_inexact_gt_midpoint);
              R64 = R256.w[0]; // one decimal digit
            }
            if (incr_exp) {
              R64 = 10;
            }
          }
          if (q4 == 1 && C4.w[0] == 5) {
            is_inexact_lt_midpoint = 0;
            is_inexact_gt_midpoint = 0;
            is_midpoint_lt_even = 1;
            is_midpoint_gt_even = 0;
          } else if ((e3 == expmin) ||
                     R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) {
            // result does not change
            is_inexact_lt_midpoint = 0;
            is_inexact_gt_midpoint = 1;
            is_midpoint_lt_even = 0;
            is_midpoint_gt_even = 0;
          } else {
            is_inexact_lt_midpoint = 1;
            is_inexact_gt_midpoint = 0;
            is_midpoint_lt_even = 0;
            is_midpoint_gt_even = 0;
            // result decremented is 10^(q3+scale) - 1
            if ((q3 + scale) <= 19) {
              res.w[1] = 0;
              res.w[0] = __bid_ten2k64[q3 + scale];
            } else { // if ((q3 + scale + 1) <= 35)
              res.w[1] = __bid_ten2k128[q3 + scale - 20].w[1];
              res.w[0] = __bid_ten2k128[q3 + scale - 20].w[0];
            }
            res.w[0] = res.w[0] - 1; // borrow never occurs
            z_exp = z_exp - EXP_P1;
            e3 = e3 - 1;
            res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
          }
          if (e3 == expmin) {
            if (R64 < 5 || (R64 == 5 && !is_inexact_lt_midpoint)) {
              ; // result not tiny (in round-to-nearest mode)
            } else {
              *pfpsf |= UNDERFLOW_EXCEPTION;
            }
          }
        }        // end 10^(q3+scale-1)
        // set the inexact flag
        *pfpsf |= INEXACT_EXCEPTION;
      } else {
        if (p_sign == z_sign) {
          // if (z_sign), set as if for absolute value
          is_inexact_lt_midpoint = 1;
        } else { // if (p_sign != z_sign)
          // if (z_sign), set as if for absolute value
          is_inexact_gt_midpoint = 1;
        }
        *pfpsf |= INEXACT_EXCEPTION;
      }
      // the result is always inexact => set the inexact flag
      // Determine tininess:
      //    if (exp > expmin)
      //      the result is not tiny
      //    else // if exp = emin
      //      if (q3 + scale < p34)
      //        the result is tiny
      //      else // if (q3 + scale = p34)
      //        if (C3 * 10^scale > 10^33)
      //          the result is not tiny
      //        else // if C3 * 10^scale = 10^33
      //          if (xy * z > 0)
      //            the result is not tiny
      //          else // if (xy * z < 0)
      //            if (z > 0)
      //              if rnd_mode != RP
      //                the result is tiny
      //              else // if RP
      //                the result is not tiny
      //            else // if (z < 0)
      //              if rnd_mode != RM
      //                the result is tiny
      //              else // if RM
      //                the result is not tiny
      //              endif
      //            endif
      //          endif
      //        endif
      //      endif
      //    endif 
      if (((e3 == expmin) && ((q3 + scale) < p34)) || 
          ((e3 == expmin) && ((q3 + scale) == p34) && 
          ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull) && // 10^33_high
          (res.w[0] == 0x38c15b0a00000000ull) && // 10^33_low
          (z_sign != p_sign) &&
          ((!z_sign && rnd_mode != ROUNDING_UP) || 
          (z_sign && rnd_mode != ROUNDING_DOWN)))) {
        *pfpsf |= UNDERFLOW_EXCEPTION;
      }
      if (rnd_mode != ROUNDING_TO_NEAREST) {
        rounding_correction (rnd_mode,
                             is_inexact_lt_midpoint,
                             is_inexact_gt_midpoint,
                             is_midpoint_lt_even, is_midpoint_gt_even,
                             e3, &res, pfpsf);
      }
      BID_RETURN (res)

    } else if (p34 == delta) { // Case (1''B)

      // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3
      // and C3 can be scaled up to p34 digits if needed

      // scale C3 to p34 digits if needed
      scale = p34 - q3; // 0 <= scale <= p34 - 1
      if (scale == 0) {
        res.w[1] = C3.w[1];
        res.w[0] = C3.w[0];
      } else if (q3 <= 19) { // z fits in 64 bits
        if (scale <= 19) { // 10^scale fits in 64 bits
          // 64 x 64 C3.w[0] * __bid_ten2k64[scale]
          __mul_64x64_to_128MACH (res, C3.w[0], __bid_ten2k64[scale]);
        } else { // 10^scale fits in 128 bits
          // 64 x 128 C3.w[0] * __bid_ten2k128[scale - 20]
          __mul_128x64_to_128 (res, C3.w[0], __bid_ten2k128[scale - 20]);
        }
      } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
        // 64 x 128 __bid_ten2k64[scale] * C3
        __mul_128x64_to_128 (res, __bid_ten2k64[scale], C3);
      }
      // subtract scale from the exponent
      z_exp = z_exp - ((UINT64) scale << 49);
      e3 = e3 - scale;
      // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits

      // determine whether x * y is less than, equal to, or greater than 
      // 1/2 ulp (z)
      if (q4 <= 19) {
        if (C4.w[0] < __bid_midpoint64[q4 - 1]) { // < 1/2 ulp
          lt_half_ulp = 1;
        } else if (C4.w[0] == __bid_midpoint64[q4 - 1]) { // = 1/2 ulp
          eq_half_ulp = 1;
        } else { // > 1/2 ulp
          gt_half_ulp = 1;
        }
      } else if (q4 <= 38) {
        if (C4.w[2] == 0 && (C4.w[1] < __bid_midpoint128[q4 - 20].w[1] || 
            (C4.w[1] == __bid_midpoint128[q4 - 20].w[1] && 
            C4.w[0] < __bid_midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp
          lt_half_ulp = 1;
        } else if (C4.w[2] == 0 && C4.w[1] == __bid_midpoint128[q4 - 20].w[1] && 
            C4.w[0] == __bid_midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
          eq_half_ulp = 1;
        } else { // > 1/2 ulp
          gt_half_ulp = 1;
        }
      } else if (q4 <= 58) {
        if (C4.w[3] == 0 && (C4.w[2] < __bid_midpoint192[q4 - 39].w[2] || 
            (C4.w[2] == __bid_midpoint192[q4 - 39].w[2] && 
            C4.w[1] < __bid_midpoint192[q4 - 39].w[1]) || 
            (C4.w[2] == __bid_midpoint192[q4 - 39].w[2] && 
            C4.w[1] == __bid_midpoint192[q4 - 39].w[1] && 
            C4.w[0] < __bid_midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp
          lt_half_ulp = 1;
        } else if (C4.w[3] == 0 && C4.w[2] == __bid_midpoint192[q4 - 39].w[2] && 
            C4.w[1] == __bid_midpoint192[q4 - 39].w[1] && 
            C4.w[0] == __bid_midpoint192[q4 - 39].w[0]) { // = 1/2 ulp
          eq_half_ulp = 1;
        } else { // > 1/2 ulp
          gt_half_ulp = 1;
        }
      } else {
        if (C4.w[3] < __bid_midpoint256[q4 - 59].w[3] || 
            (C4.w[3] == __bid_midpoint256[q4 - 59].w[3] && 
            C4.w[2] < __bid_midpoint256[q4 - 59].w[2]) || 
            (C4.w[3] == __bid_midpoint256[q4 - 59].w[3] && 
            C4.w[2] == __bid_midpoint256[q4 - 59].w[2] && 
            C4.w[1] < __bid_midpoint256[q4 - 59].w[1]) || 
            (C4.w[3] == __bid_midpoint256[q4 - 59].w[3] && 
            C4.w[2] == __bid_midpoint256[q4 - 59].w[2] && 
            C4.w[1] == __bid_midpoint256[q4 - 59].w[1] && 
            C4.w[0] < __bid_midpoint256[q4 - 59].w[0])) { // < 1/2 ulp
          lt_half_ulp = 1;
        } else if (C4.w[3] == __bid_midpoint256[q4 - 59].w[3] && 
            C4.w[2] == __bid_midpoint256[q4 - 59].w[2] && 
            C4.w[1] == __bid_midpoint256[q4 - 59].w[1] && 
            C4.w[0] == __bid_midpoint256[q4 - 59].w[0]) { // = 1/2 ulp
          eq_half_ulp = 1;
        } else { // > 1/2 ulp
          gt_half_ulp = 1;
        }
      }

      if (p_sign == z_sign) {
        if (lt_half_ulp) {
          res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
          // use the following to avoid double rounding errors when operating on
          // mixed formats in rounding to nearest
          is_inexact_lt_midpoint = 1; // if (z_sign), as if for absolute value
        } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
          // add 1 ulp to the significand
          res.w[0]++;
          if (res.w[0] == 0x0ull)
            res.w[1]++;
          // check for rounding overflow, when coeff == 10^34
          if ((res.w[1] & MASK_COEFF) == 0x0001ed09bead87c0ull && 
              res.w[0] == 0x378d8e6400000000ull) { // coefficient = 10^34
            e3 = e3 + 1;
            // coeff = 10^33
            z_exp = ((UINT64) (e3 + 6176) << 49) & MASK_EXP;
            res.w[1] = 0x0000314dc6448d93ull;
            res.w[0] = 0x38c15b0a00000000ull;
          }
          // end add 1 ulp
          res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
          if (eq_half_ulp) {
            is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
          } else {
            is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
          }
        } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
          // leave unchanged 
          res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
          is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
        }
        // the result is always inexact, and never tiny
        // set the inexact flag
        *pfpsf |= INEXACT_EXCEPTION;
        // check for overflow
        if (e3 > expmax && rnd_mode == ROUNDING_TO_NEAREST) {
          res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
          res.w[0] = 0x0000000000000000ull;
          *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
          BID_RETURN (res)
        }
        if (rnd_mode != ROUNDING_TO_NEAREST) {
          rounding_correction (rnd_mode,
                               is_inexact_lt_midpoint,
                               is_inexact_gt_midpoint,
                               is_midpoint_lt_even, is_midpoint_gt_even,
                               e3, &res, pfpsf);
        }
      } else { // if (p_sign != z_sign)
        // consider two cases, because C3 * 10^scale = 10^33 is a special case
        if (res.w[1] != 0x0000314dc6448d93ull || 
            res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
          if (lt_half_ulp) {
            res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
            // use the following to avoid double rounding errors when operating
            // on mixed formats in rounding to nearest
            is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
          } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
            // subtract 1 ulp from the significand
            res.w[0]--;
            if (res.w[0] == 0xffffffffffffffffull)
              res.w[1]--;
            res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
            if (eq_half_ulp) {
              is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
            } else {
              is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
            }
          } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
            // leave unchanged
            res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
            is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
          }
          // the result is always inexact, and never tiny
          // check for overflow for RN
          if (e3 > expmax) {
            if (rnd_mode == ROUNDING_TO_NEAREST) {
              res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
              res.w[0] = 0x0000000000000000ull;
              *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
            } else {
              rounding_correction (rnd_mode,
                                   is_inexact_lt_midpoint,
                                   is_inexact_gt_midpoint,
                                   is_midpoint_lt_even,
                                   is_midpoint_gt_even, e3, &res,
                                   pfpsf);
            }
            BID_RETURN (res)
          }
          // set the inexact flag
          *pfpsf |= INEXACT_EXCEPTION;
          if (rnd_mode != ROUNDING_TO_NEAREST) {
            rounding_correction (rnd_mode,
                                 is_inexact_lt_midpoint,
                                 is_inexact_gt_midpoint,
                                 is_midpoint_lt_even,
                                 is_midpoint_gt_even, e3, &res, pfpsf);
          }
        } else { // if C3 * 10^scale = 10^33
          e3 = (z_exp >> 49) - 6176;
          if (e3 > expmin) {
            // the result is exact if exp > expmin and C4 = d*10^(q4-1), 
            // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact
            if (q4 == 1) {
              // if q4 = 1 the result is exact
              // result coefficient = 10^34 - C4
              res.w[1] = 0x0001ed09bead87c0ull;
              res.w[0] = 0x378d8e6400000000ull - C4.w[0];
              z_exp = z_exp - EXP_P1;
              e3 = e3 - 1;
              res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
            } else {
              // if q4 > 1 then truncate C4 from q4 digits to 1 digit; 
              // x = q4-1, 1 <= x <= 67 and check if this operation is exact
              if (q4 <= 18) { // 2 <= q4 <= 18
                __bid_round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
                              &is_midpoint_lt_even,
                              &is_midpoint_gt_even,
                              &is_inexact_lt_midpoint,
                              &is_inexact_gt_midpoint);
              } else if (q4 <= 38) {
                P128.w[1] = C4.w[1];
                P128.w[0] = C4.w[0];
                __bid_round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
                                &is_midpoint_lt_even,
                                &is_midpoint_gt_even,
                                &is_inexact_lt_midpoint,
                                &is_inexact_gt_midpoint);
                R64 = R128.w[0]; // one decimal digit
              } else if (q4 <= 57) {
                P192.w[2] = C4.w[2];
                P192.w[1] = C4.w[1];
                P192.w[0] = C4.w[0];
                __bid_round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
                                &is_midpoint_lt_even,
                                &is_midpoint_gt_even,
                                &is_inexact_lt_midpoint,
                                &is_inexact_gt_midpoint);
                R64 = R192.w[0]; // one decimal digit
              } else { // if (q4 <= 68)
                __bid_round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
                                &is_midpoint_lt_even,
                                &is_midpoint_gt_even,
                                &is_inexact_lt_midpoint,
                                &is_inexact_gt_midpoint);
                R64 = R256.w[0]; // one decimal digit
              }
              if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
                  !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
                // the result is exact: 10^34 - R64
                // incr_exp = 0 with certainty
                z_exp = z_exp - EXP_P1;
                e3 = e3 - 1;
                res.w[1] =
                  z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull;
                res.w[0] = 0x378d8e6400000000ull - R64;
              } else {
                // We want R64 to be the top digit of C4, but we actually 
                // obtained (C4 * 10^(-q4+1))RN; a correction may be needed,
                // because the top digit is (C4 * 10^(-q4+1))RZ
                // however, if incr_exp = 1 then R64 = 10 with certainty
                if (incr_exp) {
                  R64 = 10;
                }
                // the result is inexact as C4 has more than 1 significant digit
                // and C3 * 10^scale = 10^33
                // example of case that is treated here:
                // 100...0 * 10^e3 - 0.41 * 10^e3 =
                // 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1)
                // note that (e3 > expmin}
                // in order to round, subtract R64 from 10^34 and then compare
                // C4 - R64 * 10^(q4-1) with 1/2 ulp
                // calculate 10^34 - R64
                res.w[1] = 0x0001ed09bead87c0ull;
                res.w[0] = 0x378d8e6400000000ull - R64;
                z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand
                // calculate C4 - R64 * 10^(q4-1); this is a rare case and
                // R64 is small, 1 <= R64 <= 9
                e3 = e3 - 1;
                if (is_inexact_lt_midpoint) {
                  is_inexact_lt_midpoint = 0;
                  is_inexact_gt_midpoint = 1;
                } else if (is_inexact_gt_midpoint) {
                  is_inexact_gt_midpoint = 0;
                  is_inexact_lt_midpoint = 1;
                } else if (is_midpoint_lt_even) {
                  is_midpoint_lt_even = 0;
                  is_midpoint_gt_even = 1;
                } else if (is_midpoint_gt_even) {
                  is_midpoint_gt_even = 0;
                  is_midpoint_lt_even = 1;
                } else {
                  ;
                }
                // the result is always inexact, and never tiny
                // check for overflow for RN
                if (e3 > expmax) {
                  if (rnd_mode == ROUNDING_TO_NEAREST) {
                    res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
                    res.w[0] = 0x0000000000000000ull;
                    *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
                  } else {
                    rounding_correction (rnd_mode,
                                         is_inexact_lt_midpoint,
                                         is_inexact_gt_midpoint,
                                         is_midpoint_lt_even,
                                         is_midpoint_gt_even, e3, &res,
                                         pfpsf);
                  }
                  BID_RETURN (res)
                }
                // set the inexact flag
                *pfpsf |= INEXACT_EXCEPTION;
                res.w[1] =
                  z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
                if (rnd_mode != ROUNDING_TO_NEAREST) {
                  rounding_correction (rnd_mode,
                                       is_inexact_lt_midpoint,
                                       is_inexact_gt_midpoint,
                                       is_midpoint_lt_even,
                                       is_midpoint_gt_even, e3, &res,
                                       pfpsf);
                }
                z_exp = res.w[1] & MASK_EXP;
                e3 = (z_exp >> 49) - 6176;
              }        // end result is inexact
            }        // end q4 > 1
          } else { // if (e3 = emin)
            // if e3 = expmin the result is also tiny (the condition for
            // tininess is C4 > 050...0 [q4 digits] which is met because
            // the msd of C4 is not zero)
            // the result is tiny and inexact in all rounding modes;
            // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp, 
            // gt_half_ulp to calculate)
            // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged

            // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp
            if (gt_half_ulp) { // res = 10^33 - 1
              res.w[1] = 0x0000314dc6448d93ull;
              res.w[0] = 0x38c15b09ffffffffull;
            } else {
              res.w[1] = 0x0000314dc6448d93ull;
              res.w[0] = 0x38c15b0a00000000ull;
            }
            res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
            *pfpsf |= UNDERFLOW_EXCEPTION; // inexact is set later

            if (eq_half_ulp) {
              is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
            } else if (lt_half_ulp) {
              is_inexact_gt_midpoint = 1; //if(z_sign), as if for absolute value
            } else { // if (gt_half_ulp)
              is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
            }

            if (rnd_mode != ROUNDING_TO_NEAREST) {
              rounding_correction (rnd_mode,
                                   is_inexact_lt_midpoint,
                                   is_inexact_gt_midpoint,
                                   is_midpoint_lt_even,
                                   is_midpoint_gt_even, e3, &res,
                                   pfpsf);
            }
          }        // end e3 = emin
          // set the inexact flag (if the result was not exact)
          if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
              is_midpoint_lt_even || is_midpoint_gt_even)
            *pfpsf |= INEXACT_EXCEPTION;
        }        // end 10^33
      }        // end if (p_sign != z_sign)
      res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
      BID_RETURN (res)

    } else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
        (q3 <= delta && delta + q4 <= p34) || // Case (3)
        (delta < q3 && p34 < delta + q4) || // Case (4)
        (delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5)
        (delta + q4 < q3)) &&// Case (6)
        !(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6)

      // the result has the sign of z

      if ((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
          (delta < q3 && p34 < delta + q4)) { // Case (4)
        // round first the sum x * y + z with unbounded exponent
        // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1, 
        // 1 <= scale <= 33
        // calculate res = C3 * 10^scale
        scale = p34 - q3;
        x0 = delta + q4 - p34;
      } else if (delta + q4 < q3) { // Case (6)
        // make Case (6) look like Case (3) or Case (5) with scale = 0
        // by scaling up C4 by 10^(q3 - delta - q4) 
        scale = q3 - delta - q4; // 1 <= scale <= 33
        if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits
          if (scale <= 19) { // 10^scale fits in 64 bits
            // 64 x 64 C4.w[0] * __bid_ten2k64[scale]
            __mul_64x64_to_128MACH (P128, C4.w[0], __bid_ten2k64[scale]);
          } else { // 10^scale fits in 128 bits
            // 64 x 128 C4.w[0] * __bid_ten2k128[scale - 20]
            __mul_128x64_to_128 (P128, C4.w[0], __bid_ten2k128[scale - 20]);
          }
        } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
          // 64 x 128 __bid_ten2k64[scale] * C4
          __mul_128x64_to_128 (P128, __bid_ten2k64[scale], C4);
        }
        C4.w[0] = P128.w[0];
        C4.w[1] = P128.w[1];
        // e4 does not need adjustment, as it is not used from this point on
        scale = 0;
        x0 = 0;
        // now Case (6) looks like Case (3) or Case (5) with scale = 0 
      } else { // if Case (3) or Case (5)
        // Note: Case (3) is similar to Case (2), but scale differs and the
        // result is exact, unless it is tiny (so x0 = 0 when calculating the
        // result with unbounded exponent)

        // calculate first the sum x * y + z with unbounded exponent (exact)
        // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1,
        // 1 <= scale <= 33
        // calculate res = C3 * 10^scale
        scale = delta + q4 - q3;
        x0 = 0;
        // Note: the comments which follow refer [mainly] to Case (2)]
      }

    case2_repeat:
      if (scale == 0) { // this could happen e.g. if we return to case2_repeat
        // or in Case (4)
        res.w[1] = C3.w[1];
        res.w[0] = C3.w[0];
      } else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits
        if (scale <= 19) { // 10^scale fits in 64 bits
          // 64 x 64 C3.w[0] * __bid_ten2k64[scale]
          __mul_64x64_to_128MACH (res, C3.w[0], __bid_ten2k64[scale]);
        } else { // 10^scale fits in 128 bits
          // 64 x 128 C3.w[0] * __bid_ten2k128[scale - 20]
          __mul_128x64_to_128 (res, C3.w[0], __bid_ten2k128[scale - 20]);
        }
      } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
        // 64 x 128 __bid_ten2k64[scale] * C3
        __mul_128x64_to_128 (res, __bid_ten2k64[scale], C3);
      }
      // e3 is already calculated
      e3 = e3 - scale;
      // now res = C3 * 10^scale and e3 = e3 - scale
      // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat
      // because the result was too small

      // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34,
      // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67)
      // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of
      // the rounding fits in 128 bits!)
      // x0 = delta + q4 - p34 (calculated before reaching case2_repeat)
      // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34
      if (x0 == 0) { // this could happen only if we return to case2_repeat, or
        // for Case (6)
        R128.w[1] = C4.w[1];
        R128.w[0] = C4.w[0];
      } else if (q4 <= 18) {
        // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17
        __bid_round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp,
                      &is_midpoint_lt_even, &is_midpoint_gt_even,
                      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
        if (incr_exp) {
          // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
          R64 = __bid_ten2k64[q4 - x0];
        }
        R128.w[1] = 0;
        R128.w[0] = R64;
      } else if (q4 <= 38) {
        // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37
        P128.w[1] = C4.w[1];
        P128.w[0] = C4.w[0];
        __bid_round128_19_38 (q4, x0, P128, &R128, &incr_exp,
                        &is_midpoint_lt_even, &is_midpoint_gt_even,
                        &is_inexact_lt_midpoint,
                        &is_inexact_gt_midpoint);
        if (incr_exp) {
          // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
          if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
            R128.w[0] = __bid_ten2k64[q4 - x0];
            // R128.w[1] stays 0
          } else { // 20 <= q4 - x0 <= 37
            R128.w[0] = __bid_ten2k128[q4 - x0 - 20].w[0];
            R128.w[1] = __bid_ten2k128[q4 - x0 - 20].w[1];
          }
        }
      } else if (q4 <= 57) {
        // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56
        P192.w[2] = C4.w[2];
        P192.w[1] = C4.w[1];
        P192.w[0] = C4.w[0];
        __bid_round192_39_57 (q4, x0, P192, &R192, &incr_exp,
                        &is_midpoint_lt_even, &is_midpoint_gt_even,
                        &is_inexact_lt_midpoint,
                        &is_inexact_gt_midpoint);
        // R192.w[2] is always 0
        if (incr_exp) {
          // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52
          if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
            R192.w[0] = __bid_ten2k64[q4 - x0];
            // R192.w[1] stays 0
            // R192.w[2] stays 0
          } else { // 20 <= q4 - x0 <= 33
            R192.w[0] = __bid_ten2k128[q4 - x0 - 20].w[0];
            R192.w[1] = __bid_ten2k128[q4 - x0 - 20].w[1];
            // R192.w[2] stays 0
          }
        }
        R128.w[1] = R192.w[1];
        R128.w[0] = R192.w[0];
      } else {
        // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67
        __bid_round256_58_76 (q4, x0, C4, &R256, &incr_exp,
                        &is_midpoint_lt_even, &is_midpoint_gt_even,
                        &is_inexact_lt_midpoint,
                        &is_inexact_gt_midpoint);
        // R256.w[3] and R256.w[2] are always 0
        if (incr_exp) {
          // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43
          if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19  
            R256.w[0] = __bid_ten2k64[q4 - x0];
            // R256.w[1] stays 0
            // R256.w[2] stays 0
            // R256.w[3] stays 0
          } else { // 20 <= q4 - x0 <= 33 
            R256.w[0] = __bid_ten2k128[q4 - x0 - 20].w[0];
            R256.w[1] = __bid_ten2k128[q4 - x0 - 20].w[1];
            // R256.w[2] stays 0
            // R256.w[3] stays 0
          }
        }
        R128.w[1] = R256.w[1];
        R128.w[0] = R256.w[0];
      }
      // now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4,
      // rounded to nearest, which were copied into R128
      if (z_sign == p_sign) {
        lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale
        // the sum can result in [up to] p34 or p34 + 1 digits
        res.w[0] = res.w[0] + R128.w[0];
        res.w[1] = res.w[1] + R128.w[1];
        if (res.w[0] < R128.w[0])
          res.w[1]++; // carry
        // if res > 10^34 - 1 need to increase x0 and decrease scale by 1
        if (res.w[1] > 0x0001ed09bead87c0ull ||
            (res.w[1] == 0x0001ed09bead87c0ull &&
            res.w[0] > 0x378d8e63ffffffffull)) {
          // avoid double rounding error
          is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
          is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
          is_midpoint_lt_even0 = is_midpoint_lt_even;
          is_midpoint_gt_even0 = is_midpoint_gt_even;
          is_inexact_lt_midpoint = 0;
          is_inexact_gt_midpoint = 0;
          is_midpoint_lt_even = 0;
          is_midpoint_gt_even = 0;
          P128.w[1] = res.w[1];
          P128.w[0] = res.w[0];
          __bid_round128_19_38 (35, 1, P128, &res, &incr_exp,
                          &is_midpoint_lt_even, &is_midpoint_gt_even,
                          &is_inexact_lt_midpoint,
                          &is_inexact_gt_midpoint);
          // incr_exp is 0 with certainty in this case
          // avoid a double rounding error
          if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 
              is_midpoint_lt_even) { // double rounding error upward
            // res = res - 1
            res.w[0]--;
            if (res.w[0] == 0xffffffffffffffffull)
              res.w[1]--;
            // Note: a double rounding error upward is not possible; for this
            // the result after the first rounding would have to be 99...95
            // (35 digits in all), possibly followed by a number of zeros; this
            // not possible in Cases (2)-(6) or (15)-(17) which may get here
            is_midpoint_lt_even = 0;
            is_inexact_lt_midpoint = 1;
          } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 
              is_midpoint_gt_even) { // double rounding error downward
            // res = res + 1
            res.w[0]++;
            if (res.w[0] == 0)
              res.w[1]++;
            is_midpoint_gt_even = 0;
            is_inexact_gt_midpoint = 1;
          } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
                     !is_inexact_lt_midpoint
                     && !is_inexact_gt_midpoint) {
            // if this second rounding was exact the result may still be 
            // inexact because of the first rounding
            if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
              is_inexact_gt_midpoint = 1;
            }
            if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
              is_inexact_lt_midpoint = 1;
            }
          } else if (is_midpoint_gt_even &&
              (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
            // pulled up to a midpoint
            is_inexact_lt_midpoint = 1;
            is_inexact_gt_midpoint = 0;
            is_midpoint_lt_even = 0;
            is_midpoint_gt_even = 0;
          } else if (is_midpoint_lt_even &&
              (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
            // pulled down to a midpoint
            is_inexact_lt_midpoint = 0;
            is_inexact_gt_midpoint = 1;
            is_midpoint_lt_even = 0;
            is_midpoint_gt_even = 0;
          } else {
            ;
          }
          // adjust exponent
          e3 = e3 + 1;
          if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
              !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
            if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
                is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
              is_inexact_lt_midpoint = 1;
            }
          }
        } else {
          // this is the result rounded with unbounded exponent, unless a
          // correction is needed
          res.w[1] = res.w[1] & MASK_COEFF;
          if (lsb == 1) {
            if (is_midpoint_gt_even) {
              // res = res + 1
              is_midpoint_gt_even = 0;
              is_midpoint_lt_even = 1;
              res.w[0]++;
              if (res.w[0] == 0x0)
                res.w[1]++;
              // check for rounding overflow
              if (res.w[1] == 0x0001ed09bead87c0ull &&
                  res.w[0] == 0x378d8e6400000000ull) {
                // res = 10^34 => rounding overflow
                res.w[1] = 0x0000314dc6448d93ull;
                res.w[0] = 0x38c15b0a00000000ull; // 10^33
                e3++;
              }
            } else if (is_midpoint_lt_even) {
              // res = res - 1
              is_midpoint_lt_even = 0;
              is_midpoint_gt_even = 1;
              res.w[0]--;
              if (res.w[0] == 0xffffffffffffffffull)
                res.w[1]--;
              // if the result is pure zero, the sign depends on the rounding 
              // mode (x*y and z had opposite signs)
              if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
                if (rnd_mode != ROUNDING_DOWN)
                  z_sign = 0x0000000000000000ull;
                else
                  z_sign = 0x8000000000000000ull;
                // the exponent is max (e3, expmin)
                res.w[1] = 0x0;
                res.w[0] = 0x0;
                BID_RETURN (res)
              }
            } else {
              ;
            }
          }
        }
      } else { // if (z_sign != p_sign)
        lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4
        // used to swap rounding indicators if p_sign != z_sign
        // the sum can result in [up to] p34 or p34 - 1 digits
        tmp64 = res.w[0];
        res.w[0] = res.w[0] - R128.w[0];
        res.w[1] = res.w[1] - R128.w[1];
        if (res.w[0] > tmp64)
          res.w[1]--; // borrow
        // if res < 10^33 and exp > expmin need to decrease x0 and 
        // increase scale by 1
        if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull || 
            (res.w[1] == 0x0000314dc6448d93ull && 
            res.w[0] < 0x38c15b0a00000000ull)) || 
            (is_inexact_lt_midpoint && res.w[1] == 0x0000314dc6448d93ull && 
            res.w[0] == 0x38c15b0a00000000ull)) && x0 >= 1) {
          x0 = x0 - 1;
          // first restore e3, otherwise it will be too small
          e3 = e3 + scale;
          scale = scale + 1;
          is_inexact_lt_midpoint = 0;
          is_inexact_gt_midpoint = 0;
          is_midpoint_lt_even = 0;
          is_midpoint_gt_even = 0;
          incr_exp = 0;
          goto case2_repeat;
        }
        // else this is the result rounded with unbounded exponent
        // because the result has opposite sign to that of C4 which was 
        // rounded, need to change the rounding indicators
        if (is_inexact_lt_midpoint) {
          is_inexact_lt_midpoint = 0;
          is_inexact_gt_midpoint = 1;
        } else if (is_inexact_gt_midpoint) {
          is_inexact_gt_midpoint = 0;
          is_inexact_lt_midpoint = 1;
        } else if (lsb == 0) {
          if (is_midpoint_lt_even) {
            is_midpoint_lt_even = 0;
            is_midpoint_gt_even = 1;
          } else if (is_midpoint_gt_even) {
            is_midpoint_gt_even = 0;
            is_midpoint_lt_even = 1;
          } else {
            ;
          }
        } else if (lsb == 1) {
          if (is_midpoint_lt_even) {
            // res = res + 1
            res.w[0]++;
            if (res.w[0] == 0x0)
              res.w[1]++;
            // check for rounding overflow
            if (res.w[1] == 0x0001ed09bead87c0ull &&
                res.w[0] == 0x378d8e6400000000ull) {
              // res = 10^34 => rounding overflow
              res.w[1] = 0x0000314dc6448d93ull;
              res.w[0] = 0x38c15b0a00000000ull; // 10^33
              e3++;
            }
          } else if (is_midpoint_gt_even) {
            // res = res - 1
            res.w[0]--;
            if (res.w[0] == 0xffffffffffffffffull)
              res.w[1]--;
            // if the result is pure zero, the sign depends on the rounding 
            // mode (x*y and z had opposite signs)
            if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
              if (rnd_mode != ROUNDING_DOWN)
                z_sign = 0x0000000000000000ull;
              else
                z_sign = 0x8000000000000000ull;
              // the exponent is max (e3, expmin)
              res.w[1] = 0x0;
              res.w[0] = 0x0;
              BID_RETURN (res)
            }
          } else {
            ;
          }
        } else {
          ;
        }
      }
      // check for underflow
      if (e3 < expmin) {
        // the result is tiny, so we must truncate more of res
        is_tiny = 1;
        x0 = expmin - e3;
        is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
        is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
        is_midpoint_lt_even0 = is_midpoint_lt_even;
        is_midpoint_gt_even0 = is_midpoint_gt_even;
        is_inexact_lt_midpoint = 0;
        is_inexact_gt_midpoint = 0;
        is_midpoint_lt_even = 0;
        is_midpoint_gt_even = 0;
        // determine the number of decimal digits in res
        if (res.w[1] == 0x0) {
          // between 1 and 19 digits
          for (ind = 1; ind <= 19; ind++) {
            if (res.w[0] < __bid_ten2k64[ind]) {
              break;
            }
          }
          // ind digits
        } else if ((res.w[1] < __bid_ten2k128[0].w[1] ||
                    (res.w[1] == __bid_ten2k128[0].w[1]
                    && res.w[0] < __bid_ten2k128[0].w[0]))) {
          // 20 digits
          ind = 20;
        } else { // between 21 and 38 digits
          for (ind = 1; ind <= 18; ind++) {
            if (res.w[1] < __bid_ten2k128[ind].w[1] ||
                (res.w[1] == __bid_ten2k128[ind].w[1] &&
                res.w[0] < __bid_ten2k128[ind].w[0])) {
              break;
            }
          }
          // ind + 20 digits
          ind = ind + 20;
        }

        // at this point ind >= x0; because delta >= 2 on this path, the case
        // ind = x0 cab occur only in Case (2) or case (3), when C3 has one
        // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin), 
        // the signs of x * y and z are opposite, and through cancellation 
        // the most significant decimal digit in res has the weight
        // 10^(emin-1); however, it is clear that in this case the most
        // significant digit is 9, so the result before rounding is
        // 0.9... * 10^emin
        // Otherwise, ind > x0 because there are non-zero decimal digits in the
        // result with weight of at least 10^emin, and correction for underflow
        //  can be carried out using the round*_*_2_* () routines
        if (x0 == ind) { // the result before rounding is 0.9... * 10^emin
          res.w[1] = 0x0;
          res.w[0] = 0x1;
          is_inexact_gt_midpoint = 1;
        } else if (ind <= 18) { // check that 2 <= ind
          // 2 <= ind <= 18, 1 <= x0 <= 17
          __bid_round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
                        &is_midpoint_lt_even, &is_midpoint_gt_even,
                        &is_inexact_lt_midpoint,
                        &is_inexact_gt_midpoint);
          if (incr_exp) {
            // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
            R64 = __bid_ten2k64[q4 - x0];
          }
          res.w[1] = 0;
          res.w[0] = R64;
        } else if (ind <= 38) {
          // 19 <= ind <= 38
          P128.w[1] = res.w[1];
          P128.w[0] = res.w[0];
          __bid_round128_19_38 (ind, x0, P128, &res, &incr_exp,
                          &is_midpoint_lt_even, &is_midpoint_gt_even,
                          &is_inexact_lt_midpoint,
                          &is_inexact_gt_midpoint);
          if (incr_exp) {
            // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
            if (ind - x0 <= 19) { // 1 <= ind - x0 <= 19
              res.w[0] = __bid_ten2k64[ind - x0];
              // res.w[1] stays 0
            } else { // 20 <= ind - x0 <= 37
              res.w[0] = __bid_ten2k128[ind - x0 - 20].w[0];
              res.w[1] = __bid_ten2k128[ind - x0 - 20].w[1];
            }
          }
        }
        // avoid a double rounding error
        if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 
            is_midpoint_lt_even) { // double rounding error upward
          // res = res - 1
          res.w[0]--;
          if (res.w[0] == 0xffffffffffffffffull)
            res.w[1]--;
          // Note: a double rounding error upward is not possible; for this
          // the result after the first rounding would have to be 99...95
          // (35 digits in all), possibly followed by a number of zeros; this
          // not possible in Cases (2)-(6) which may get here
          is_midpoint_lt_even = 0;
          is_inexact_lt_midpoint = 1;
        } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 
            is_midpoint_gt_even) { // double rounding error downward
          // res = res + 1
          res.w[0]++;
          if (res.w[0] == 0)
            res.w[1]++;
          is_midpoint_gt_even = 0;
          is_inexact_gt_midpoint = 1;
        } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
                   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
          // if this second rounding was exact the result may still be 
          // inexact because of the first rounding
          if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
            is_inexact_gt_midpoint = 1;
          }
          if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
            is_inexact_lt_midpoint = 1;
          }
        } else if (is_midpoint_gt_even &&
                   (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
          // pulled up to a midpoint
          is_inexact_lt_midpoint = 1;
          is_inexact_gt_midpoint = 0;
          is_midpoint_lt_even = 0;
          is_midpoint_gt_even = 0;
        } else if (is_midpoint_lt_even &&
                   (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
          // pulled down to a midpoint
          is_inexact_lt_midpoint = 0;
          is_inexact_gt_midpoint = 1;
          is_midpoint_lt_even = 0;
          is_midpoint_gt_even = 0;
        } else {
          ;
        }
        // adjust exponent
        e3 = e3 + x0;
        if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
            !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
          if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
              is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
            is_inexact_lt_midpoint = 1;
          }
        }
      }
      // check for inexact result
      if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
          is_midpoint_lt_even || is_midpoint_gt_even) {
        // set the inexact flag
        *pfpsf |= INEXACT_EXCEPTION;
        if (is_tiny)
          *pfpsf |= UNDERFLOW_EXCEPTION;
      }
      // now check for significand = 10^34 (may have resulted from going
      // back to case2_repeat)
      if (res.w[1] == 0x0001ed09bead87c0ull && 
          res.w[0] == 0x378d8e6400000000ull) { // if  res = 10^34
        res.w[1] = 0x0000314dc6448d93ull; // res = 10^33
        res.w[0] = 0x38c15b0a00000000ull;
        e3 = e3 + 1;
      }
      res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
      // check for overflow
      if (rnd_mode == ROUNDING_TO_NEAREST && e3 > expmax) {
        res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
        res.w[0] = 0x0000000000000000ull;
        *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
      }
      if (rnd_mode != ROUNDING_TO_NEAREST) {
        rounding_correction (rnd_mode,
                             is_inexact_lt_midpoint,
                             is_inexact_gt_midpoint,
                             is_midpoint_lt_even, is_midpoint_gt_even,
                             e3, &res, pfpsf);
      }
      BID_RETURN (res)

    } else {

      // we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and
      // the signs of x*y and z are opposite; in these cases massive
      // cancellation can occur, so it is better to scale either C3 or C4 and 
      // to perform the subtraction before rounding; rounding is performed 
      // next, depending on the number of decimal digits in the result and on 
      // the exponent value
      // Note: overlow is not possible in this case
      // this is similar to Cases (15), (16), and (17)

      if (delta + q4 < q3) { // from Case (6) 
        // Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and 
        // (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign)
        // and call add_and_round; delta stays positive
        // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
        P128.w[1] = C3.w[1];
        P128.w[0] = C3.w[0];
        C3.w[1] = C4.w[1];
        C3.w[0] = C4.w[0];
        C4.w[1] = P128.w[1];
        C4.w[0] = P128.w[0];
        ind = q3;
        q3 = q4;
        q4 = ind;
        ind = e3;
        e3 = e4;
        e4 = ind;
        tmp64 = z_sign;
        z_sign = p_sign;
        p_sign = tmp64;
      } else { // from Cases (2), (3), (4), (5)
        // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be 
        // scaled up by q4 + delta - q3; this is the same as in Cases (15), 
        // (16), and (17) if we just change the sign of delta
        delta = -delta;
      }
      add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
                     rnd_mode, &is_midpoint_lt_even,
                     &is_midpoint_gt_even, &is_inexact_lt_midpoint,
                     &is_inexact_gt_midpoint, pfpsf, &res);
      BID_RETURN (res)

    }

  } else { // if delta < 0

    delta = -delta;

    if (p34 < q4 && q4 <= delta) { // Case (7)

      // truncate C4 to p34 digits into res
      // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
      x0 = q4 - p34;
      if (q4 <= 38) {
        P128.w[1] = C4.w[1];
        P128.w[0] = C4.w[0];
        __bid_round128_19_38 (q4, x0, P128, &res, &incr_exp,
                        &is_midpoint_lt_even, &is_midpoint_gt_even,
                        &is_inexact_lt_midpoint,
                        &is_inexact_gt_midpoint);
      } else if (q4 <= 57) { // 35 <= q4 <= 57
        P192.w[2] = C4.w[2];
        P192.w[1] = C4.w[1];
        P192.w[0] = C4.w[0];
        __bid_round192_39_57 (q4, x0, P192, &R192, &incr_exp,
                        &is_midpoint_lt_even, &is_midpoint_gt_even,
                        &is_inexact_lt_midpoint,
                        &is_inexact_gt_midpoint);
        res.w[0] = R192.w[0];
        res.w[1] = R192.w[1];
      } else { // if (q4 <= 68)
        __bid_round256_58_76 (q4, x0, C4, &R256, &incr_exp,
                        &is_midpoint_lt_even, &is_midpoint_gt_even,
                        &is_inexact_lt_midpoint,
                        &is_inexact_gt_midpoint);
        res.w[0] = R256.w[0];
        res.w[1] = R256.w[1];
      }
      e4 = e4 + x0;
      if (incr_exp) {
        e4 = e4 + 1;
      }
      if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
          !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
        // if C4 rounded to p34 digits is exact then the result is inexact,
        // in a way that depends on the signs of x * y and z
        if (p_sign == z_sign) {
          is_inexact_lt_midpoint = 1;
        } else { // if (p_sign != z_sign)
          if (res.w[1] != 0x0000314dc6448d93ull || 
              res.w[0] != 0x38c15b0a00000000ull) { // res != 10^33
            is_inexact_gt_midpoint = 1;
          } else { // res = 10^33 and exact is a special case
            // if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1
            // if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1
            // if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1
            // Note: ulp is really ulp/10 (after borrow which propagates to msd)
            if (delta > p34 + 1) { // C3 < 1/2
              // res = 10^33, unchanged
              is_inexact_gt_midpoint = 1;
            } else { // if (delta == p34 + 1)
              if (q3 <= 19) {
                if (C3.w[0] < __bid_midpoint64[q3 - 1]) { // C3 < 1/2 ulp
                  // res = 10^33, unchanged
                  is_inexact_gt_midpoint = 1;
                } else if (C3.w[0] == __bid_midpoint64[q3 - 1]) { // C3 = 1/2 ulp
                  // res = 10^33, unchanged
                  is_midpoint_lt_even = 1;
                } else { // if (C3.w[0] > __bid_midpoint64[q3-1]), C3 > 1/2 ulp
                  res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
                  res.w[0] = 0x378d8e63ffffffffull;
                  e4 = e4 - 1;
                  is_inexact_lt_midpoint = 1;
                }
              } else { // if (20 <= q3 <=34)
                if (C3.w[1] < __bid_midpoint128[q3 - 20].w[1] || 
                    (C3.w[1] == __bid_midpoint128[q3 - 20].w[1] && 
                    C3.w[0] < __bid_midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp
                  // res = 10^33, unchanged
                  is_inexact_gt_midpoint = 1;
                } else if (C3.w[1] == __bid_midpoint128[q3 - 20].w[1] && 
                    C3.w[0] == __bid_midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp
                  // res = 10^33, unchanged
                  is_midpoint_lt_even = 1;
                } else { // if (C3 > __bid_midpoint128[q3-20]), C3 > 1/2 ulp
                  res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
                  res.w[0] = 0x378d8e63ffffffffull;
                  e4 = e4 - 1;
                  is_inexact_lt_midpoint = 1;
                }
              }
            }
          }
        }
      } else if (is_midpoint_lt_even) {
        if (z_sign != p_sign) {
          // needs correction: res = res - 1
          res.w[0] = res.w[0] - 1;
          if (res.w[0] == 0xffffffffffffffffull)
            res.w[1]--;
          // if it is (10^33-1)*10^e4 then the corect result is 
          // (10^34-1)*10(e4-1)
          if (res.w[1] == 0x0000314dc6448d93ull &&
              res.w[0] == 0x38c15b09ffffffffull) {
            res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
            res.w[0] = 0x378d8e63ffffffffull;
            e4 = e4 - 1;
          }
          is_midpoint_lt_even = 0;
          is_inexact_lt_midpoint = 1;
        } else { // if (z_sign == p_sign)
          is_midpoint_lt_even = 0;
          is_inexact_gt_midpoint = 1;
        }
      } else if (is_midpoint_gt_even) {
        if (z_sign == p_sign) {
          // needs correction: res = res + 1 (cannot cross in the next binade)
          res.w[0] = res.w[0] + 1;
          if (res.w[0] == 0x0000000000000000ull)
            res.w[1]++;
          is_midpoint_gt_even = 0;
          is_inexact_gt_midpoint = 1;
        } else { // if (z_sign != p_sign)
          is_midpoint_gt_even = 0;
          is_inexact_lt_midpoint = 1;
        }
      } else {
        ; // the rounded result is already correct
      }
      // check for overflow
      if (rnd_mode == ROUNDING_TO_NEAREST && e4 > expmax) {
        res.w[1] = p_sign | 0x7800000000000000ull;
        res.w[0] = 0x0000000000000000ull;
        *pfpsf |= (OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
      } else { // no overflow or not RN
        p_exp = ((UINT64) (e4 + 6176) << 49);
        res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
      }
      if (rnd_mode != ROUNDING_TO_NEAREST) {
        rounding_correction (rnd_mode,
                             is_inexact_lt_midpoint,
                             is_inexact_gt_midpoint,
                             is_midpoint_lt_even, is_midpoint_gt_even,
                             e4, &res, pfpsf);
      }
      if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
          is_midpoint_lt_even || is_midpoint_gt_even) {
        // set the inexact flag
        *pfpsf |= INEXACT_EXCEPTION;
      }
      BID_RETURN (res)

    } else if ((q4 <= p34 && p34 <= delta) || // Case (8)
        (q4 <= delta && delta < p34 && p34 < delta + q3) || // Case (9)
        (q4 <= delta && delta + q3 <= p34) || // Case (10)
        (delta < q4 && q4 <= p34 && p34 < delta + q3) || // Case (13)
        (delta < q4 && q4 <= delta + q3 && delta + q3 <= p34) || // Case (14)
        (delta + q3 < q4 && q4 <= p34)) { // Case (18)

      // Case (8) is similar to Case (1), with C3 and C4 swapped
      // Case (9) is similar to Case (2), with C3 and C4 swapped
      // Case (10) is similar to Case (3), with C3 and C4 swapped
      // Case (13) is similar to Case (4), with C3 and C4 swapped
      // Case (14) is similar to Case (5), with C3 and C4 swapped
      // Case (18) is similar to Case (6), with C3 and C4 swapped

      // swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp)
      // and go back to delta_ge_zero
      // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
      P128.w[1] = C3.w[1];
      P128.w[0] = C3.w[0];
      C3.w[1] = C4.w[1];
      C3.w[0] = C4.w[0];
      C4.w[1] = P128.w[1];
      C4.w[0] = P128.w[0];
      ind = q3;
      q3 = q4;
      q4 = ind;
      ind = e3;
      e3 = e4;
      e4 = ind;
      tmp64 = z_sign;
      z_sign = p_sign;
      p_sign = tmp64;
      tmp64 = z_exp;
      z_exp = p_exp;
      p_exp = tmp64;
      goto delta_ge_zero;

    } else if ((p34 <= delta && delta < q4 && q4 < delta + q3) || // Case (11)
               (delta < p34 && p34 < q4 && q4 < delta + q3)) { // Case (12)

      // round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3,
      // 1 <= x0 <= q3 - 1 <= p34 - 1 
      x0 = e4 - e3; // or x0 = delta + q3 - q4
      if (q3 <= 18) { // 2 <= q3 <= 18
        __bid_round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp,
                      &is_midpoint_lt_even, &is_midpoint_gt_even,
                      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
        // C3.w[1] = 0;
        C3.w[0] = R64;
      } else if (q3 <= 38) {
        __bid_round128_19_38 (q3, x0, C3, &R128, &incr_exp,
                        &is_midpoint_lt_even, &is_midpoint_gt_even,
                        &is_inexact_lt_midpoint,
                        &is_inexact_gt_midpoint);
        C3.w[1] = R128.w[1];
        C3.w[0] = R128.w[0];
      }
      // the rounded result has q3 - x0 digits
      // we want the exponent to be e4, so if incr_exp = 1 then
      // multiply the rounded result by 10 - it will still fit in 113 bits
      if (incr_exp) {
        // 64 x 128 -> 128
        P128.w[1] = C3.w[1];
        P128.w[0] = C3.w[0];
        __mul_64x128_to_128 (C3, __bid_ten2k64[1], P128);
      }
      e3 = e3 + x0; // this is e4
      // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3; 
      // the result will have the sign of x * y; the exponent is e4
      R256.w[3] = 0;
      R256.w[2] = 0;
      R256.w[1] = C3.w[1];
      R256.w[0] = C3.w[0];
      if (p_sign == z_sign) { // R256 = C4 + R256
        add256 (C4, R256, &R256);
      } else { // if (p_sign != z_sign) { // R256 = C4 - R256
        sub256 (C4, R256, &R256); // the result cannot be pure zero
        // because the result has opposite sign to that of R256 which was 
        // rounded, need to change the rounding indicators
        lsb = C4.w[0] & 0x01;
        if (is_inexact_lt_midpoint) {
          is_inexact_lt_midpoint = 0;
          is_inexact_gt_midpoint = 1;
        } else if (is_inexact_gt_midpoint) {
          is_inexact_gt_midpoint = 0;
          is_inexact_lt_midpoint = 1;
        } else if (lsb == 0) {
          if (is_midpoint_lt_even) {
            is_midpoint_lt_even = 0;
            is_midpoint_gt_even = 1;
          } else if (is_midpoint_gt_even) {
            is_midpoint_gt_even = 0;
            is_midpoint_lt_even = 1;
          } else {
            ;
          }
        } else if (lsb == 1) {
          if (is_midpoint_lt_even) {
            // res = res + 1
            R256.w[0]++;
            if (R256.w[0] == 0x0) {
              R256.w[1]++;
              if (R256.w[1] == 0x0) {
                R256.w[2]++;
                if (R256.w[2] == 0x0) {
                  R256.w[3]++;
                }
              }
            }
            // no check for rounding overflow - R256 was a difference
          } else if (is_midpoint_gt_even) {
            // res = res - 1
            R256.w[0]--;
            if (R256.w[0] == 0xffffffffffffffffull) {
              R256.w[1]--;
              if (R256.w[1] == 0xffffffffffffffffull) {
                R256.w[2]--;
                if (R256.w[2] == 0xffffffffffffffffull) {
                  R256.w[3]--;
                }
              }
            }
          } else {
            ;
          }
        } else {
          ;
        }
      }
      // determine the number of decimal digits in R256
      ind = __bid_nr_digits256 (R256); // ind >= p34
      // if R256 is sum, then ind > p34; if R256 is a difference, then 
      // ind >= p34; this means that we can calculate the result rounded to
      // the destination precision, with unbounded exponent, starting from R256
      // and using the indicators from the rounding of C3 to avoid a double
      // rounding error 

      if (ind < p34) {
        ;
      } else if (ind == p34) {
        // the result rounded to the destination precision with 
        // unbounded exponent
        // is (-1)^p_sign * R256 * 10^e4
        res.w[1] = R256.w[1];
        res.w[0] = R256.w[0];
      } else { // if (ind > p34)
        // if more than P digits, round to nearest to P digits
        // round R256 to p34 digits
        x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
        // save C3 rounding indicators to help avoid double rounding error
        is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
        is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
        is_midpoint_lt_even0 = is_midpoint_lt_even;
        is_midpoint_gt_even0 = is_midpoint_gt_even;
        // initialize rounding indicators
        is_inexact_lt_midpoint = 0;
        is_inexact_gt_midpoint = 0;
        is_midpoint_lt_even = 0;
        is_midpoint_gt_even = 0;
        // round to p34 digits; the result fits in 113 bits
        if (ind <= 38) {
          P128.w[1] = R256.w[1];
          P128.w[0] = R256.w[0];
          __bid_round128_19_38 (ind, x0, P128, &R128, &incr_exp,
                          &is_midpoint_lt_even, &is_midpoint_gt_even,
                          &is_inexact_lt_midpoint,
                          &is_inexact_gt_midpoint);
        } else if (ind <= 57) {
          P192.w[2] = R256.w[2];
          P192.w[1] = R256.w[1];
          P192.w[0] = R256.w[0];
          __bid_round192_39_57 (ind, x0, P192, &R192, &incr_exp,
                          &is_midpoint_lt_even, &is_midpoint_gt_even,
                          &is_inexact_lt_midpoint,
                          &is_inexact_gt_midpoint);
          R128.w[1] = R192.w[1];
          R128.w[0] = R192.w[0];
        } else { // if (ind <= 68)
          __bid_round256_58_76 (ind, x0, R256, &R256, &incr_exp,
                          &is_midpoint_lt_even, &is_midpoint_gt_even,
                          &is_inexact_lt_midpoint,
                          &is_inexact_gt_midpoint);
          R128.w[1] = R256.w[1];
          R128.w[0] = R256.w[0];
        }
        // the rounded result has p34 = 34 digits
        e4 = e4 + x0 + incr_exp;

        res.w[1] = R128.w[1];
        res.w[0] = R128.w[0];

        // avoid a double rounding error
        if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 
            is_midpoint_lt_even) { // double rounding error upward
          // res = res - 1
          res.w[0]--;
          if (res.w[0] == 0xffffffffffffffffull)
            res.w[1]--;
          is_midpoint_lt_even = 0;
          is_inexact_lt_midpoint = 1;
          // Note: a double rounding error upward is not possible; for this
          // the result after the first rounding would have to be 99...95
          // (35 digits in all), possibly followed by a number of zeros; this
          // not possible in Cases (2)-(6) or (15)-(17) which may get here
          // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent
          if (res.w[1] == 0x0000314dc6448d93ull && 
              res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1
            res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
            res.w[0] = 0x378d8e63ffffffffull;
            e4--;
          }
        } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 
            is_midpoint_gt_even) { // double rounding error downward
          // res = res + 1 
          res.w[0]++;
          if (res.w[0] == 0)
            res.w[1]++;
          is_midpoint_gt_even = 0;
          is_inexact_gt_midpoint = 1;
        } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
                   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
          // if this second rounding was exact the result may still be
          // inexact because of the first rounding
          if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
            is_inexact_gt_midpoint = 1;
          }
          if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
            is_inexact_lt_midpoint = 1;
          }
        } else if (is_midpoint_gt_even &&
                   (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
          // pulled up to a midpoint
          is_inexact_lt_midpoint = 1;
          is_inexact_gt_midpoint = 0;
          is_midpoint_lt_even = 0;
          is_midpoint_gt_even = 0;
        } else if (is_midpoint_lt_even &&
                   (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
          // pulled down to a midpoint
          is_inexact_lt_midpoint = 0;
          is_inexact_gt_midpoint = 1;
          is_midpoint_lt_even = 0;
          is_midpoint_gt_even = 0;
        } else {
          ;
        }
      }

      // determine tininess
      if (rnd_mode == ROUNDING_TO_NEAREST) {
        if (e4 < expmin) {
          is_tiny = 1; // for other rounding modes apply correction
        }
      } else {
        // for RM, RP, RZ, RA apply correction in order to determine tininess
        // but do not save the result; apply the correction to 
        // (-1)^p_sign * res * 10^0
        P128.w[1] = p_sign | 0x3040000000000000ull | res.w[1];
        P128.w[0] = res.w[0];
        rounding_correction (rnd_mode,
                             is_inexact_lt_midpoint,
                             is_inexact_gt_midpoint,
                             is_midpoint_lt_even, is_midpoint_gt_even,
                             0, &P128, pfpsf);
        scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
        // the number of digits in the significand is p34 = 34
        if (e4 + scale < expmin) {
          is_tiny = 1;
        }
      }

      // the result rounded to the destination precision with unbounded exponent
      // is (-1)^p_sign * res * 10^e4
      res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; // RN
      // res.w[0] unchanged;
      // Note: res is correct only if expmin <= e4 <= expmax
      ind = p34; // the number of decimal digits in the signifcand of res

      // at this point we have the result rounded with unbounded exponent in
      // res and we know its tininess:
      // res = (-1)^p_sign * significand * 10^e4, 
      // where q (significand) = ind = p34
      // Note: res is correct only if expmin <= e4 <= expmax

      // check for overflow if RN
      if (rnd_mode == ROUNDING_TO_NEAREST
          && (ind + e4) > (p34 + expmax)) {
        res.w[1] = p_sign | 0x7800000000000000ull;
        res.w[0] = 0x0000000000000000ull;
        *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
        BID_RETURN (res)
      }        // else not overflow or not RN, so continue

      // from this point on this is similar to the last part of the computation
      // for Cases (15), (16), (17)

      // if (e4 >= expmin) we have the result rounded with bounded exponent
      if (e4 < expmin) {
        x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
        // where the result rounded [at most] once is
        //   (-1)^p_sign * significand_res * 10^e4

        // avoid double rounding error
        is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
        is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
        is_midpoint_lt_even0 = is_midpoint_lt_even;
        is_midpoint_gt_even0 = is_midpoint_gt_even;
        is_inexact_lt_midpoint = 0;
        is_inexact_gt_midpoint = 0;
        is_midpoint_lt_even = 0;
        is_midpoint_gt_even = 0;

        if (x0 > ind) {
          // nothing is left of res when moving the decimal point left x0 digits
          is_inexact_lt_midpoint = 1;
          res.w[1] = p_sign | 0x0000000000000000ull;
          res.w[0] = 0x0000000000000000ull;
          e4 = expmin;
        } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
          // this is <, =, or > 1/2 ulp
          // compare the ind-digit value in the significand of res with
          // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is 
          // less than, equal to, or greater than 1/2 ulp (significand of res)
          R128.w[1] = res.w[1] & MASK_COEFF;
          R128.w[0] = res.w[0];
          if (ind <= 19) {
            if (R128.w[0] < __bid_midpoint64[ind - 1]) { // < 1/2 ulp
              lt_half_ulp = 1;
              is_inexact_lt_midpoint = 1;
            } else if (R128.w[0] == __bid_midpoint64[ind - 1]) { // = 1/2 ulp
              eq_half_ulp = 1;
              is_midpoint_gt_even = 1;
            } else { // > 1/2 ulp
              gt_half_ulp = 1;
              is_inexact_gt_midpoint = 1;
            }
          } else { // if (ind <= 38)
            if (R128.w[1] < __bid_midpoint128[ind - 20].w[1] || 
                (R128.w[1] == __bid_midpoint128[ind - 20].w[1] && 
                R128.w[0] < __bid_midpoint128[ind - 20].w[0])) { // < 1/2 ulp
              lt_half_ulp = 1;
              is_inexact_lt_midpoint = 1;
            } else if (R128.w[1] == __bid_midpoint128[ind - 20].w[1] && 
                R128.w[0] == __bid_midpoint128[ind - 20].w[0]) { // = 1/2 ulp
              eq_half_ulp = 1;
              is_midpoint_gt_even = 1;
            } else { // > 1/2 ulp
              gt_half_ulp = 1;
              is_inexact_gt_midpoint = 1;
            }
          }
          if (lt_half_ulp || eq_half_ulp) {
            // res = +0.0 * 10^expmin
            res.w[1] = 0x0000000000000000ull;
            res.w[0] = 0x0000000000000000ull;
          } else { // if (gt_half_ulp)
            // res = +1 * 10^expmin
            res.w[1] = 0x0000000000000000ull;
            res.w[0] = 0x0000000000000001ull;
          }
          res.w[1] = p_sign | res.w[1];
          e4 = expmin;
        } else { // if (1 <= x0 <= ind - 1 <= 33)
          // round the ind-digit result to ind - x0 digits

          if (ind <= 18) { // 2 <= ind <= 18
            __bid_round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
                          &is_midpoint_lt_even, &is_midpoint_gt_even,
                          &is_inexact_lt_midpoint,
                          &is_inexact_gt_midpoint);
            res.w[1] = 0x0;
            res.w[0] = R64;
          } else if (ind <= 38) {
            P128.w[1] = res.w[1] & MASK_COEFF;
            P128.w[0] = res.w[0];
            __bid_round128_19_38 (ind, x0, P128, &res, &incr_exp,
                            &is_midpoint_lt_even, &is_midpoint_gt_even,
                            &is_inexact_lt_midpoint,
                            &is_inexact_gt_midpoint);
          }
          e4 = e4 + x0; // expmin
          // we want the exponent to be expmin, so if incr_exp = 1 then
          // multiply the rounded result by 10 - it will still fit in 113 bits
          if (incr_exp) {
            // 64 x 128 -> 128
            P128.w[1] = res.w[1] & MASK_COEFF;
            P128.w[0] = res.w[0];
            __mul_64x128_to_128 (res, __bid_ten2k64[1], P128);
          }
          res.w[1] =
            p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF);
          // avoid a double rounding error
          if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 
              is_midpoint_lt_even) { // double rounding error upward
            // res = res - 1
            res.w[0]--;
            if (res.w[0] == 0xffffffffffffffffull)
              res.w[1]--;
            // Note: a double rounding error upward is not possible; for this
            // the result after the first rounding would have to be 99...95
            // (35 digits in all), possibly followed by a number of zeros; this
            // not possible in this underflow case
            is_midpoint_lt_even = 0;
            is_inexact_lt_midpoint = 1;
          } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 
              is_midpoint_gt_even) { // double rounding error downward
            // res = res + 1
            res.w[0]++;
            if (res.w[0] == 0)
              res.w[1]++;
            is_midpoint_gt_even = 0;
            is_inexact_gt_midpoint = 1;
          } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
              !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
            // if this second rounding was exact the result may still be 
            // inexact because of the first rounding
            if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
              is_inexact_gt_midpoint = 1;
            }
            if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
              is_inexact_lt_midpoint = 1;
            }
          } else if (is_midpoint_gt_even &&
                     (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
            // pulled up to a midpoint
            is_inexact_lt_midpoint = 1;
            is_inexact_gt_midpoint = 0;
            is_midpoint_lt_even = 0;
            is_midpoint_gt_even = 0;
          } else if (is_midpoint_lt_even &&
                     (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
            // pulled down to a midpoint
            is_inexact_lt_midpoint = 0;
            is_inexact_gt_midpoint = 1;
            is_midpoint_lt_even = 0;
            is_midpoint_gt_even = 0;
          } else {
            ;
          }
        }
      }
      // res contains the correct result
      // apply correction if not rounding to nearest
      if (rnd_mode != ROUNDING_TO_NEAREST) {
        rounding_correction (rnd_mode,
                             is_inexact_lt_midpoint,
                             is_inexact_gt_midpoint,
                             is_midpoint_lt_even, is_midpoint_gt_even,
                             e4, &res, pfpsf);
      }
      if (is_midpoint_lt_even || is_midpoint_gt_even ||
          is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
        // set the inexact flag
        *pfpsf |= INEXACT_EXCEPTION;
        if (is_tiny)
          *pfpsf |= UNDERFLOW_EXCEPTION;
      }

      BID_RETURN (res)

    } else if ((p34 <= delta && delta + q3 <= q4) || // Case (15)
        (delta < p34 && p34 < delta + q3 && delta + q3 <= q4) || //Case (16)
        (delta + q3 <= p34 && p34 < q4)) { // Case (17)

      // calculate first the result rounded to the destination precision, with
      // unbounded exponent

      add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
                     rnd_mode, &is_midpoint_lt_even,
                     &is_midpoint_gt_even, &is_inexact_lt_midpoint,
                     &is_inexact_gt_midpoint, pfpsf, &res);

      BID_RETURN (res)

    } else {
      ;
    }

  }        // end if delta < 0

  BID_RETURN (res)

}