gcc/libgcc/config/libbid/bid128_round_integral.c
H.J. Lu b2a00c8984 Makefile.in (dfp-filenames): Replace decimal_globals...
libgcc/

2007-09-27  H.J. Lu  <hongjiu.lu@intel.com>

	* Makefile.in (dfp-filenames): Replace decimal_globals,
	decimal_data, binarydecimal and convert_data with
	bid_decimal_globals, bid_decimal_data, bid_binarydecimal
	and bid_convert_data, respectively.

libgcc/config/libbid/

2007-09-27  H.J. Lu  <hongjiu.lu@intel.com>

	* bid128_fromstring.c: Removed.

	* bid_dpd.c: New from libbid 2007-09-26.
	* bid128_to_int16.c: Likewise.
	* bid128_to_int8.c: Likewise.
	* bid128_to_uint8.c: Likewise.
	* bid128_to_uint16.c: Likewise.
	* bid64_to_int16.c: Likewise.
	* bid64_to_int8.c: Likewise.
	* bid64_to_uint16.c: Likewise.
	* bid64_to_uint8.c: Likewise.

	* bid128_2_str.h: Updated from libbid 2007-09-26.
	* bid128_2_str_macros.h: Likewise.
	* bid128_2_str_tables.c: Likewise.
	* bid128_add.c: Likewise.
	* bid128.c: Likewise.
	* bid128_compare.c: Likewise.
	* bid128_div.c: Likewise.
	* bid128_fma.c: Likewise.
	* bid128_logb.c: Likewise.
	* bid128_minmax.c: Likewise.
	* bid128_mul.c: Likewise.
	* bid128_next.c: Likewise.
	* bid128_noncomp.c: Likewise.
	* bid128_quantize.c: Likewise.
	* bid128_rem.c: Likewise.
	* bid128_round_integral.c: Likewise.
	* bid128_scalb.c: Likewise.
	* bid128_sqrt.c: Likewise.
	* bid128_string.c: Likewise.
	* bid128_to_int32.c: Likewise.
	* bid128_to_int64.c: Likewise.
	* bid128_to_uint32.c: Likewise.
	* bid128_to_uint64.c: Likewise.
	* bid32_to_bid128.c: Likewise.
	* bid32_to_bid64.c: Likewise.
	* bid64_add.c: Likewise.
	* bid64_compare.c: Likewise.
	* bid64_div.c: Likewise.
	* bid64_fma.c: Likewise.
	* bid64_logb.c: Likewise.
	* bid64_minmax.c: Likewise.
	* bid64_mul.c: Likewise.
	* bid64_next.c: Likewise.
	* bid64_noncomp.c: Likewise.
	* bid64_quantize.c: Likewise.
	* bid64_rem.c: Likewise.
	* bid64_round_integral.c: Likewise.
	* bid64_scalb.c: Likewise.
	* bid64_sqrt.c: Likewise.
	* bid64_string.c: Likewise.
	* bid64_to_bid128.c: Likewise.
	* bid64_to_int32.c: Likewise.
	* bid64_to_int64.c: Likewise.
	* bid64_to_uint32.c: Likewise.
	* bid64_to_uint64.c: Likewise.
	* bid_b2d.h: Likewise.
	* bid_binarydecimal.c: Likewise.
	* bid_conf.h: Likewise.
	* bid_convert_data.c: Likewise.
	* bid_decimal_data.c: Likewise.
	* bid_decimal_globals.c: Likewise.
	* bid_div_macros.h: Likewise.
	* bid_flag_operations.c: Likewise.
	* bid_from_int.c: Likewise.
	* bid_functions.h: Likewise.
	* bid_gcc_intrinsics.h: Likewise.
	* bid_inline_add.h: Likewise.
	* bid_internal.h: Likewise.
	* bid_round.c: Likewise.
	* bid_sqrt_macros.h: Likewise.
	* _addsub_dd.c: Likewise.
	* _addsub_sd.c: Likewise.
	* _addsub_td.c: Likewise.
	* _dd_to_df.c: Likewise.
	* _dd_to_di.c: Likewise.
	* _dd_to_sd.c: Likewise.
	* _dd_to_sf.c: Likewise.
	* _dd_to_si.c: Likewise.
	* _dd_to_td.c: Likewise.
	* _dd_to_tf.c: Likewise.
	* _dd_to_udi.c: Likewise.
	* _dd_to_usi.c: Likewise.
	* _dd_to_xf.c: Likewise.
	* _df_to_dd.c: Likewise.
	* _df_to_sd.c: Likewise.
	* _df_to_td.c: Likewise.
	* _di_to_dd.c: Likewise.
	* _di_to_sd.c: Likewise.
	* _di_to_td.c: Likewise.
	* _div_dd.c: Likewise.
	* _div_sd.c: Likewise.
	* _div_td.c: Likewise.
	* _eq_dd.c: Likewise.
	* _eq_sd.c: Likewise.
	* _eq_td.c: Likewise.
	* _ge_dd.c: Likewise.
	* _ge_sd.c: Likewise.
	* _ge_td.c: Likewise.
	* _gt_dd.c: Likewise.
	* _gt_sd.c: Likewise.
	* _gt_td.c: Likewise.
	* _isinfd128.c: Likewise.
	* _isinfd32.c: Likewise.
	* _isinfd64.c: Likewise.
	* _le_dd.c: Likewise.
	* _le_sd.c: Likewise.
	* _le_td.c: Likewise.
	* _lt_dd.c: Likewise.
	* _lt_sd.c: Likewise.
	* _lt_td.c: Likewise.
	* _mul_dd.c: Likewise.
	* _mul_sd.c: Likewise.
	* _mul_td.c: Likewise.
	* _ne_dd.c: Likewise.
	* _ne_sd.c: Likewise.
	* _ne_td.c: Likewise.
	* _sd_to_dd.c: Likewise.
	* _sd_to_df.c: Likewise.
	* _sd_to_di.c: Likewise.
	* _sd_to_sf.c: Likewise.
	* _sd_to_si.c: Likewise.
	* _sd_to_td.c: Likewise.
	* _sd_to_tf.c: Likewise.
	* _sd_to_udi.c: Likewise.
	* _sd_to_usi.c: Likewise.
	* _sd_to_xf.c: Likewise.
	* _sf_to_dd.c: Likewise.
	* _sf_to_sd.c: Likewise.
	* _sf_to_td.c: Likewise.
	* _si_to_dd.c: Likewise.
	* _si_to_sd.c: Likewise.
	* _si_to_td.c: Likewise.
	* _td_to_dd.c: Likewise.
	* _td_to_df.c: Likewise.
	* _td_to_di.c: Likewise.
	* _td_to_sd.c: Likewise.
	* _td_to_sf.c: Likewise.
	* _td_to_si.c: Likewise.
	* _td_to_tf.c: Likewise.
	* _td_to_udi.c: Likewise.
	* _td_to_usi.c: Likewise.
	* _td_to_xf.c: Likewise.
	* _tf_to_dd.c: Likewise.
	* _tf_to_sd.c: Likewise.
	* _tf_to_td.c: Likewise.
	* _udi_to_dd.c: Likewise.
	* _udi_to_sd.c: Likewise.
	* _udi_to_td.c: Likewise.
	* _unord_dd.c: Likewise.
	* _unord_sd.c: Likewise.
	* _unord_td.c: Likewise.
	* _usi_to_dd.c: Likewise.
	* _usi_to_sd.c: Likewise.
	* _usi_to_td.c: Likewise.
	* _xf_to_dd.c: Likewise.
	* _xf_to_sd.c: Likewise.
	* _xf_to_td.c: Likewise.

2007-09-27  H.J. Lu  <hongjiu.lu@intel.com>

	* b2d.h: Renamed to ...
	* bid_b2d.h: This.

	* bid128_to_string.c: Renamed to ...
	* bid128_string.c: This.

	* bid_intrinsics.h: Renamed to ...
	* bid_gcc_intrinsics.h: This.

	* bid_string.c: Renamed to ...
	* bid64_string.c: This.

	* binarydecimal.c: Renamed to ...
	* bid_decimal_globals.c: This.

	* convert_data.c: Renamed to ...
	* bid_convert_data.c: This.

	* decimal_data.c: Renamed to ...
	* bid_decimal_data.c: This.

	* decimal_globals.c: Renamed to ...
	* bid_decimal_globals.c: This.

	* div_macros.h: Renamed to ...
	* bid_div_macros.h: This.

	* inline_bid_add.h: Renamed to ...
	* bid_inline_add.h: This.

	* sqrt_macros.h: Renamed to ...
	* bid_sqrt_macros.h: This.

From-SVN: r128841
2007-09-27 10:47:23 -07:00

1957 lines
68 KiB
C

/* Copyright (C) 2007 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2, or (at your option) any later
version.
In addition to the permissions in the GNU General Public License, the
Free Software Foundation gives you unlimited permission to link the
compiled version of this file into combinations with other programs,
and to distribute those combinations without any restriction coming
from the use of this file. (The General Public License restrictions
do apply in other respects; for example, they cover modification of
the file, and distribution when not linked into a combine
executable.)
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING. If not, write to the Free
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA. */
#define BID_128RES
#include "bid_internal.h"
/*****************************************************************************
* BID128_round_integral_exact
****************************************************************************/
BID128_FUNCTION_ARG1 (bid128_round_integral_exact, x)
UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
};
UINT64 x_sign;
UINT64 x_exp;
int exp; // unbiased exponent
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
UINT64 tmp64;
BID_UI64DOUBLE tmp1;
unsigned int x_nr_bits;
int q, ind, shift;
UINT128 C1;
UINT256 fstar;
UINT256 P256;
// check for NaN or Infinity
if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
// x is special
if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
// if x = NaN, then res = Q (x)
// check first for non-canonical NaN payload
if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
(((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
(x.w[0] > 0x38c15b09ffffffffull))) {
x.w[1] = x.w[1] & 0xffffc00000000000ull;
x.w[0] = 0x0ull;
}
if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
// set invalid flag
*pfpsf |= INVALID_EXCEPTION;
// return quiet (x)
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
res.w[0] = x.w[0];
} else { // x is QNaN
// return x
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
res.w[0] = x.w[0];
}
BID_RETURN (res)
} else { // x is not a NaN, so it must be infinity
if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf
// return +inf
res.w[1] = 0x7800000000000000ull;
res.w[0] = 0x0000000000000000ull;
} else { // x is -inf
// return -inf
res.w[1] = 0xf800000000000000ull;
res.w[0] = 0x0000000000000000ull;
}
BID_RETURN (res);
}
}
// unpack x
x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
C1.w[1] = x.w[1] & MASK_COEFF;
C1.w[0] = x.w[0];
// check for non-canonical values (treated as zero)
if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
// non-canonical
x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
C1.w[1] = 0; // significand high
C1.w[0] = 0; // significand low
} else { // G0_G1 != 11
x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
if (C1.w[1] > 0x0001ed09bead87c0ull ||
(C1.w[1] == 0x0001ed09bead87c0ull
&& C1.w[0] > 0x378d8e63ffffffffull)) {
// x is non-canonical if coefficient is larger than 10^34 -1
C1.w[1] = 0;
C1.w[0] = 0;
} else { // canonical
;
}
}
// test for input equal to zero
if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
// x is 0
// return 0 preserving the sign bit and the preferred exponent
// of MAX(Q(x), 0)
if (x_exp <= (0x1820ull << 49)) {
res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
} else {
res.w[1] = x_sign | x_exp;
}
res.w[0] = 0x0000000000000000ull;
BID_RETURN (res);
}
// x is not special and is not zero
switch (rnd_mode) {
case ROUNDING_TO_NEAREST:
case ROUNDING_TIES_AWAY:
// if (exp <= -(p+1)) return 0.0
if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35
res.w[1] = x_sign | 0x3040000000000000ull;
res.w[0] = 0x0000000000000000ull;
*pfpsf |= INEXACT_EXCEPTION;
BID_RETURN (res);
}
break;
case ROUNDING_DOWN:
// if (exp <= -p) return -1.0 or +0.0
if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffa000000000000ull == -34
if (x_sign) {
// if negative, return negative 1, because we know coefficient
// is non-zero (would have been caught above)
res.w[1] = 0xb040000000000000ull;
res.w[0] = 0x0000000000000001ull;
} else {
// if positive, return positive 0, because we know coefficient is
// non-zero (would have been caught above)
res.w[1] = 0x3040000000000000ull;
res.w[0] = 0x0000000000000000ull;
}
*pfpsf |= INEXACT_EXCEPTION;
BID_RETURN (res);
}
break;
case ROUNDING_UP:
// if (exp <= -p) return -0.0 or +1.0
if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34
if (x_sign) {
// if negative, return negative 0, because we know the coefficient
// is non-zero (would have been caught above)
res.w[1] = 0xb040000000000000ull;
res.w[0] = 0x0000000000000000ull;
} else {
// if positive, return positive 1, because we know coefficient is
// non-zero (would have been caught above)
res.w[1] = 0x3040000000000000ull;
res.w[0] = 0x0000000000000001ull;
}
*pfpsf |= INEXACT_EXCEPTION;
BID_RETURN (res);
}
break;
case ROUNDING_TO_ZERO:
// if (exp <= -p) return -0.0 or +0.0
if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34
res.w[1] = x_sign | 0x3040000000000000ull;
res.w[0] = 0x0000000000000000ull;
*pfpsf |= INEXACT_EXCEPTION;
BID_RETURN (res);
}
break;
}
// q = nr. of decimal digits in x
// determine first the nr. of bits in x
if (C1.w[1] == 0) {
if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
// split the 64-bit value in two 32-bit halves to avoid rounding errors
if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
tmp1.d = (double) (C1.w[0] >> 32); // exact conversion
x_nr_bits =
33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
} else { // x < 2^32
tmp1.d = (double) (C1.w[0]); // exact conversion
x_nr_bits =
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
} else { // if x < 2^53
tmp1.d = (double) C1.w[0]; // exact conversion
x_nr_bits =
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
tmp1.d = (double) C1.w[1]; // exact conversion
x_nr_bits =
65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
q = nr_digits[x_nr_bits - 1].digits;
if (q == 0) {
q = nr_digits[x_nr_bits - 1].digits1;
if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
(C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
q++;
}
exp = (x_exp >> 49) - 6176;
if (exp >= 0) { // -exp <= 0
// the argument is an integer already
res.w[1] = x.w[1];
res.w[0] = x.w[0];
BID_RETURN (res);
}
// exp < 0
switch (rnd_mode) {
case ROUNDING_TO_NEAREST:
if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
// need to shift right -exp digits from the coefficient; exp will be 0
ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
// chop off ind digits from the lower part of C1
// C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
tmp64 = C1.w[0];
if (ind <= 19) {
C1.w[0] = C1.w[0] + midpoint64[ind - 1];
} else {
C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
}
if (C1.w[0] < tmp64)
C1.w[1]++;
// calculate C* and f*
// C* is actually floor(C*) in this case
// C* and f* need shifting and masking, as shown by
// shiftright128[] and maskhigh128[]
// 1 <= x <= 34
// kx = 10^(-x) = ten2mk128[ind - 1]
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
// the approximation of 10^(-x) was rounded up to 118 bits
__mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
// determine the value of res and fstar
// determine inexactness of the rounding of C*
// if (0 < f* - 1/2 < 10^(-x)) then
// the result is exact
// else // if (f* - 1/2 > T*) then
// the result is inexact
// Note: we are going to use ten2mk128[] instead of ten2mk128trunc[]
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
// redundant shift = shiftright128[ind - 1]; // shift = 0
res.w[1] = P256.w[3];
res.w[0] = P256.w[2];
// redundant fstar.w[3] = 0;
// redundant fstar.w[2] = 0;
fstar.w[1] = P256.w[1];
fstar.w[0] = P256.w[0];
// fraction f* < 10^(-x) <=> midpoint
// f* is in the right position to be compared with
// 10^(-x) from ten2mk128[]
// if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even)
if ((res.w[0] & 0x0000000000000001ull) && // is result odd?
((fstar.w[1] < (ten2mk128[ind - 1].w[1]))
|| ((fstar.w[1] == ten2mk128[ind - 1].w[1])
&& (fstar.w[0] < ten2mk128[ind - 1].w[0])))) {
// subract 1 to make even
if (res.w[0]-- == 0) {
res.w[1]--;
}
}
if (fstar.w[1] > 0x8000000000000000ull ||
(fstar.w[1] == 0x8000000000000000ull
&& fstar.w[0] > 0x0ull)) {
// f* > 1/2 and the result may be exact
tmp64 = fstar.w[1] - 0x8000000000000000ull; // f* - 1/2
if (tmp64 > ten2mk128[ind - 1].w[1] ||
(tmp64 == ten2mk128[ind - 1].w[1] &&
fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
} // else the result is exact
} else { // the result is inexact; f2* <= 1/2
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
}
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
res.w[1] = (P256.w[3] >> shift);
res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
// redundant fstar.w[3] = 0;
fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
fstar.w[1] = P256.w[1];
fstar.w[0] = P256.w[0];
// fraction f* < 10^(-x) <=> midpoint
// f* is in the right position to be compared with
// 10^(-x) from ten2mk128[]
if ((res.w[0] & 0x0000000000000001ull) && // is result odd?
fstar.w[2] == 0 && (fstar.w[1] < ten2mk128[ind - 1].w[1] ||
(fstar.w[1] == ten2mk128[ind - 1].w[1] &&
fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
// subract 1 to make even
if (res.w[0]-- == 0) {
res.w[1]--;
}
}
if (fstar.w[2] > onehalf128[ind - 1] ||
(fstar.w[2] == onehalf128[ind - 1]
&& (fstar.w[1] || fstar.w[0]))) {
// f2* > 1/2 and the result may be exact
// Calculate f2* - 1/2
tmp64 = fstar.w[2] - onehalf128[ind - 1];
if (tmp64 || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
(fstar.w[1] == ten2mk128[ind - 1].w[1] &&
fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
} // else the result is exact
} else { // the result is inexact; f2* <= 1/2
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
}
} else { // 22 <= ind - 1 <= 33
shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
res.w[1] = 0;
res.w[0] = P256.w[3] >> shift;
fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
fstar.w[2] = P256.w[2];
fstar.w[1] = P256.w[1];
fstar.w[0] = P256.w[0];
// fraction f* < 10^(-x) <=> midpoint
// f* is in the right position to be compared with
// 10^(-x) from ten2mk128[]
if ((res.w[0] & 0x0000000000000001ull) && // is result odd?
fstar.w[3] == 0 && fstar.w[2] == 0 &&
(fstar.w[1] < ten2mk128[ind - 1].w[1] ||
(fstar.w[1] == ten2mk128[ind - 1].w[1] &&
fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
// subract 1 to make even
if (res.w[0]-- == 0) {
res.w[1]--;
}
}
if (fstar.w[3] > onehalf128[ind - 1] ||
(fstar.w[3] == onehalf128[ind - 1] &&
(fstar.w[2] || fstar.w[1] || fstar.w[0]))) {
// f2* > 1/2 and the result may be exact
// Calculate f2* - 1/2
tmp64 = fstar.w[3] - onehalf128[ind - 1];
if (tmp64 || fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1]
|| (fstar.w[1] == ten2mk128[ind - 1].w[1]
&& fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
} // else the result is exact
} else { // the result is inexact; f2* <= 1/2
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
}
}
res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
BID_RETURN (res);
} else { // if ((q + exp) < 0) <=> q < -exp
// the result is +0 or -0
res.w[1] = x_sign | 0x3040000000000000ull;
res.w[0] = 0x0000000000000000ull;
*pfpsf |= INEXACT_EXCEPTION;
BID_RETURN (res);
}
break;
case ROUNDING_TIES_AWAY:
if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
// need to shift right -exp digits from the coefficient; exp will be 0
ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
// chop off ind digits from the lower part of C1
// C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
tmp64 = C1.w[0];
if (ind <= 19) {
C1.w[0] = C1.w[0] + midpoint64[ind - 1];
} else {
C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
}
if (C1.w[0] < tmp64)
C1.w[1]++;
// calculate C* and f*
// C* is actually floor(C*) in this case
// C* and f* need shifting and masking, as shown by
// shiftright128[] and maskhigh128[]
// 1 <= x <= 34
// kx = 10^(-x) = ten2mk128[ind - 1]
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
// the approximation of 10^(-x) was rounded up to 118 bits
__mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
// the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
// if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
// if (0 < f* < 10^(-x)) then the result is a midpoint
// if floor(C*) is even then C* = floor(C*) - logical right
// shift; C* has p decimal digits, correct by Prop. 1)
// else if floor(C*) is odd C* = floor(C*)-1 (logical right
// shift; C* has p decimal digits, correct by Pr. 1)
// else
// C* = floor(C*) (logical right shift; C has p decimal digits,
// correct by Property 1)
// n = C* * 10^(e+x)
// determine also the inexactness of the rounding of C*
// if (0 < f* - 1/2 < 10^(-x)) then
// the result is exact
// else // if (f* - 1/2 > T*) then
// the result is inexact
// Note: we are going to use ten2mk128[] instead of ten2mk128trunc[]
// shift right C* by Ex-128 = shiftright128[ind]
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
// redundant shift = shiftright128[ind - 1]; // shift = 0
res.w[1] = P256.w[3];
res.w[0] = P256.w[2];
// redundant fstar.w[3] = 0;
// redundant fstar.w[2] = 0;
fstar.w[1] = P256.w[1];
fstar.w[0] = P256.w[0];
if (fstar.w[1] > 0x8000000000000000ull ||
(fstar.w[1] == 0x8000000000000000ull
&& fstar.w[0] > 0x0ull)) {
// f* > 1/2 and the result may be exact
tmp64 = fstar.w[1] - 0x8000000000000000ull; // f* - 1/2
if ((tmp64 > ten2mk128[ind - 1].w[1] ||
(tmp64 == ten2mk128[ind - 1].w[1] &&
fstar.w[0] >= ten2mk128[ind - 1].w[0]))) {
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
} // else the result is exact
} else { // the result is inexact; f2* <= 1/2
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
}
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
res.w[1] = (P256.w[3] >> shift);
res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
// redundant fstar.w[3] = 0;
fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
fstar.w[1] = P256.w[1];
fstar.w[0] = P256.w[0];
if (fstar.w[2] > onehalf128[ind - 1] ||
(fstar.w[2] == onehalf128[ind - 1]
&& (fstar.w[1] || fstar.w[0]))) {
// f2* > 1/2 and the result may be exact
// Calculate f2* - 1/2
tmp64 = fstar.w[2] - onehalf128[ind - 1];
if (tmp64 || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
(fstar.w[1] == ten2mk128[ind - 1].w[1] &&
fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
} // else the result is exact
} else { // the result is inexact; f2* <= 1/2
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
}
} else { // 22 <= ind - 1 <= 33
shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
res.w[1] = 0;
res.w[0] = P256.w[3] >> shift;
fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
fstar.w[2] = P256.w[2];
fstar.w[1] = P256.w[1];
fstar.w[0] = P256.w[0];
if (fstar.w[3] > onehalf128[ind - 1] ||
(fstar.w[3] == onehalf128[ind - 1] &&
(fstar.w[2] || fstar.w[1] || fstar.w[0]))) {
// f2* > 1/2 and the result may be exact
// Calculate f2* - 1/2
tmp64 = fstar.w[3] - onehalf128[ind - 1];
if (tmp64 || fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1]
|| (fstar.w[1] == ten2mk128[ind - 1].w[1]
&& fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
} // else the result is exact
} else { // the result is inexact; f2* <= 1/2
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
}
}
// if the result was a midpoint, it was already rounded away from zero
res.w[1] |= x_sign | 0x3040000000000000ull;
BID_RETURN (res);
} else { // if ((q + exp) < 0) <=> q < -exp
// the result is +0 or -0
res.w[1] = x_sign | 0x3040000000000000ull;
res.w[0] = 0x0000000000000000ull;
*pfpsf |= INEXACT_EXCEPTION;
BID_RETURN (res);
}
break;
case ROUNDING_DOWN:
if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
// need to shift right -exp digits from the coefficient; exp will be 0
ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
// (number of digits to be chopped off)
// chop off ind digits from the lower part of C1
// FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
// FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
// FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
// FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
// tmp64 = C1.w[0];
// if (ind <= 19) {
// C1.w[0] = C1.w[0] + midpoint64[ind - 1];
// } else {
// C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
// C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
// }
// if (C1.w[0] < tmp64) C1.w[1]++;
// if carry-out from C1.w[0], increment C1.w[1]
// calculate C* and f*
// C* is actually floor(C*) in this case
// C* and f* need shifting and masking, as shown by
// shiftright128[] and maskhigh128[]
// 1 <= x <= 34
// kx = 10^(-x) = ten2mk128[ind - 1]
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
// the approximation of 10^(-x) was rounded up to 118 bits
__mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
res.w[1] = P256.w[3];
res.w[0] = P256.w[2];
// redundant fstar.w[3] = 0;
// redundant fstar.w[2] = 0;
// redundant fstar.w[1] = P256.w[1];
// redundant fstar.w[0] = P256.w[0];
// fraction f* > 10^(-x) <=> inexact
// f* is in the right position to be compared with
// 10^(-x) from ten2mk128[]
if ((P256.w[1] > ten2mk128[ind - 1].w[1])
|| (P256.w[1] == ten2mk128[ind - 1].w[1]
&& (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
*pfpsf |= INEXACT_EXCEPTION;
// if positive, the truncated value is already the correct result
if (x_sign) { // if negative
if (++res.w[0] == 0) {
res.w[1]++;
}
}
}
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
shift = shiftright128[ind - 1]; // 0 <= shift <= 102
res.w[1] = (P256.w[3] >> shift);
res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
// redundant fstar.w[3] = 0;
fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
fstar.w[1] = P256.w[1];
fstar.w[0] = P256.w[0];
// fraction f* > 10^(-x) <=> inexact
// f* is in the right position to be compared with
// 10^(-x) from ten2mk128[]
if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
(fstar.w[1] == ten2mk128[ind - 1].w[1] &&
fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
*pfpsf |= INEXACT_EXCEPTION;
// if positive, the truncated value is already the correct result
if (x_sign) { // if negative
if (++res.w[0] == 0) {
res.w[1]++;
}
}
}
} else { // 22 <= ind - 1 <= 33
shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
res.w[1] = 0;
res.w[0] = P256.w[3] >> shift;
fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
fstar.w[2] = P256.w[2];
fstar.w[1] = P256.w[1];
fstar.w[0] = P256.w[0];
// fraction f* > 10^(-x) <=> inexact
// f* is in the right position to be compared with
// 10^(-x) from ten2mk128[]
if (fstar.w[3] || fstar.w[2]
|| fstar.w[1] > ten2mk128[ind - 1].w[1]
|| (fstar.w[1] == ten2mk128[ind - 1].w[1]
&& fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
*pfpsf |= INEXACT_EXCEPTION;
// if positive, the truncated value is already the correct result
if (x_sign) { // if negative
if (++res.w[0] == 0) {
res.w[1]++;
}
}
}
}
res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
BID_RETURN (res);
} else { // if exp < 0 and q + exp <= 0
if (x_sign) { // negative rounds down to -1.0
res.w[1] = 0xb040000000000000ull;
res.w[0] = 0x0000000000000001ull;
} else { // positive rpunds down to +0.0
res.w[1] = 0x3040000000000000ull;
res.w[0] = 0x0000000000000000ull;
}
*pfpsf |= INEXACT_EXCEPTION;
BID_RETURN (res);
}
break;
case ROUNDING_UP:
if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
// need to shift right -exp digits from the coefficient; exp will be 0
ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
// (number of digits to be chopped off)
// chop off ind digits from the lower part of C1
// FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
// FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
// FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
// FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
// tmp64 = C1.w[0];
// if (ind <= 19) {
// C1.w[0] = C1.w[0] + midpoint64[ind - 1];
// } else {
// C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
// C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
// }
// if (C1.w[0] < tmp64) C1.w[1]++;
// if carry-out from C1.w[0], increment C1.w[1]
// calculate C* and f*
// C* is actually floor(C*) in this case
// C* and f* need shifting and masking, as shown by
// shiftright128[] and maskhigh128[]
// 1 <= x <= 34
// kx = 10^(-x) = ten2mk128[ind - 1]
// C* = C1 * 10^(-x)
// the approximation of 10^(-x) was rounded up to 118 bits
__mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
res.w[1] = P256.w[3];
res.w[0] = P256.w[2];
// redundant fstar.w[3] = 0;
// redundant fstar.w[2] = 0;
// redundant fstar.w[1] = P256.w[1];
// redundant fstar.w[0] = P256.w[0];
// fraction f* > 10^(-x) <=> inexact
// f* is in the right position to be compared with
// 10^(-x) from ten2mk128[]
if ((P256.w[1] > ten2mk128[ind - 1].w[1])
|| (P256.w[1] == ten2mk128[ind - 1].w[1]
&& (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
*pfpsf |= INEXACT_EXCEPTION;
// if negative, the truncated value is already the correct result
if (!x_sign) { // if positive
if (++res.w[0] == 0) {
res.w[1]++;
}
}
}
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
res.w[1] = (P256.w[3] >> shift);
res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
// redundant fstar.w[3] = 0;
fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
fstar.w[1] = P256.w[1];
fstar.w[0] = P256.w[0];
// fraction f* > 10^(-x) <=> inexact
// f* is in the right position to be compared with
// 10^(-x) from ten2mk128[]
if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
(fstar.w[1] == ten2mk128[ind - 1].w[1] &&
fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
*pfpsf |= INEXACT_EXCEPTION;
// if negative, the truncated value is already the correct result
if (!x_sign) { // if positive
if (++res.w[0] == 0) {
res.w[1]++;
}
}
}
} else { // 22 <= ind - 1 <= 33
shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
res.w[1] = 0;
res.w[0] = P256.w[3] >> shift;
fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
fstar.w[2] = P256.w[2];
fstar.w[1] = P256.w[1];
fstar.w[0] = P256.w[0];
// fraction f* > 10^(-x) <=> inexact
// f* is in the right position to be compared with
// 10^(-x) from ten2mk128[]
if (fstar.w[3] || fstar.w[2]
|| fstar.w[1] > ten2mk128[ind - 1].w[1]
|| (fstar.w[1] == ten2mk128[ind - 1].w[1]
&& fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
*pfpsf |= INEXACT_EXCEPTION;
// if negative, the truncated value is already the correct result
if (!x_sign) { // if positive
if (++res.w[0] == 0) {
res.w[1]++;
}
}
}
}
res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
BID_RETURN (res);
} else { // if exp < 0 and q + exp <= 0
if (x_sign) { // negative rounds up to -0.0
res.w[1] = 0xb040000000000000ull;
res.w[0] = 0x0000000000000000ull;
} else { // positive rpunds up to +1.0
res.w[1] = 0x3040000000000000ull;
res.w[0] = 0x0000000000000001ull;
}
*pfpsf |= INEXACT_EXCEPTION;
BID_RETURN (res);
}
break;
case ROUNDING_TO_ZERO:
if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
// need to shift right -exp digits from the coefficient; exp will be 0
ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
// (number of digits to be chopped off)
// chop off ind digits from the lower part of C1
// FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
// FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
// FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
// FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
//tmp64 = C1.w[0];
// if (ind <= 19) {
// C1.w[0] = C1.w[0] + midpoint64[ind - 1];
// } else {
// C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
// C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
// }
// if (C1.w[0] < tmp64) C1.w[1]++;
// if carry-out from C1.w[0], increment C1.w[1]
// calculate C* and f*
// C* is actually floor(C*) in this case
// C* and f* need shifting and masking, as shown by
// shiftright128[] and maskhigh128[]
// 1 <= x <= 34
// kx = 10^(-x) = ten2mk128[ind - 1]
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
// the approximation of 10^(-x) was rounded up to 118 bits
__mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
res.w[1] = P256.w[3];
res.w[0] = P256.w[2];
// redundant fstar.w[3] = 0;
// redundant fstar.w[2] = 0;
// redundant fstar.w[1] = P256.w[1];
// redundant fstar.w[0] = P256.w[0];
// fraction f* > 10^(-x) <=> inexact
// f* is in the right position to be compared with
// 10^(-x) from ten2mk128[]
if ((P256.w[1] > ten2mk128[ind - 1].w[1])
|| (P256.w[1] == ten2mk128[ind - 1].w[1]
&& (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
*pfpsf |= INEXACT_EXCEPTION;
}
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
res.w[1] = (P256.w[3] >> shift);
res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
// redundant fstar.w[3] = 0;
fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
fstar.w[1] = P256.w[1];
fstar.w[0] = P256.w[0];
// fraction f* > 10^(-x) <=> inexact
// f* is in the right position to be compared with
// 10^(-x) from ten2mk128[]
if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
(fstar.w[1] == ten2mk128[ind - 1].w[1] &&
fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
*pfpsf |= INEXACT_EXCEPTION;
}
} else { // 22 <= ind - 1 <= 33
shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
res.w[1] = 0;
res.w[0] = P256.w[3] >> shift;
fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
fstar.w[2] = P256.w[2];
fstar.w[1] = P256.w[1];
fstar.w[0] = P256.w[0];
// fraction f* > 10^(-x) <=> inexact
// f* is in the right position to be compared with
// 10^(-x) from ten2mk128[]
if (fstar.w[3] || fstar.w[2]
|| fstar.w[1] > ten2mk128[ind - 1].w[1]
|| (fstar.w[1] == ten2mk128[ind - 1].w[1]
&& fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
*pfpsf |= INEXACT_EXCEPTION;
}
}
res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
BID_RETURN (res);
} else { // if exp < 0 and q + exp <= 0 the result is +0 or -0
res.w[1] = x_sign | 0x3040000000000000ull;
res.w[0] = 0x0000000000000000ull;
*pfpsf |= INEXACT_EXCEPTION;
BID_RETURN (res);
}
break;
}
BID_RETURN (res);
}
/*****************************************************************************
* BID128_round_integral_nearest_even
****************************************************************************/
BID128_FUNCTION_ARG1_NORND (bid128_round_integral_nearest_even, x)
UINT128 res;
UINT64 x_sign;
UINT64 x_exp;
int exp; // unbiased exponent
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
UINT64 tmp64;
BID_UI64DOUBLE tmp1;
unsigned int x_nr_bits;
int q, ind, shift;
UINT128 C1;
// UINT128 res is C* at first - represents up to 34 decimal digits ~ 113 bits
UINT256 fstar;
UINT256 P256;
// check for NaN or Infinity
if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
// x is special
if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
// if x = NaN, then res = Q (x)
// check first for non-canonical NaN payload
if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
(((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
(x.w[0] > 0x38c15b09ffffffffull))) {
x.w[1] = x.w[1] & 0xffffc00000000000ull;
x.w[0] = 0x0ull;
}
if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
// set invalid flag
*pfpsf |= INVALID_EXCEPTION;
// return quiet (x)
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
res.w[0] = x.w[0];
} else { // x is QNaN
// return x
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
res.w[0] = x.w[0];
}
BID_RETURN (res)
} else { // x is not a NaN, so it must be infinity
if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf
// return +inf
res.w[1] = 0x7800000000000000ull;
res.w[0] = 0x0000000000000000ull;
} else { // x is -inf
// return -inf
res.w[1] = 0xf800000000000000ull;
res.w[0] = 0x0000000000000000ull;
}
BID_RETURN (res);
}
}
// unpack x
x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
C1.w[1] = x.w[1] & MASK_COEFF;
C1.w[0] = x.w[0];
// check for non-canonical values (treated as zero)
if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
// non-canonical
x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
C1.w[1] = 0; // significand high
C1.w[0] = 0; // significand low
} else { // G0_G1 != 11
x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
if (C1.w[1] > 0x0001ed09bead87c0ull ||
(C1.w[1] == 0x0001ed09bead87c0ull
&& C1.w[0] > 0x378d8e63ffffffffull)) {
// x is non-canonical if coefficient is larger than 10^34 -1
C1.w[1] = 0;
C1.w[0] = 0;
} else { // canonical
;
}
}
// test for input equal to zero
if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
// x is 0
// return 0 preserving the sign bit and the preferred exponent
// of MAX(Q(x), 0)
if (x_exp <= (0x1820ull << 49)) {
res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
} else {
res.w[1] = x_sign | x_exp;
}
res.w[0] = 0x0000000000000000ull;
BID_RETURN (res);
}
// x is not special and is not zero
// if (exp <= -(p+1)) return 0
if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35
res.w[1] = x_sign | 0x3040000000000000ull;
res.w[0] = 0x0000000000000000ull;
BID_RETURN (res);
}
// q = nr. of decimal digits in x
// determine first the nr. of bits in x
if (C1.w[1] == 0) {
if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
// split the 64-bit value in two 32-bit halves to avoid rounding errors
if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
tmp1.d = (double) (C1.w[0] >> 32); // exact conversion
x_nr_bits =
33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
} else { // x < 2^32
tmp1.d = (double) (C1.w[0]); // exact conversion
x_nr_bits =
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
} else { // if x < 2^53
tmp1.d = (double) C1.w[0]; // exact conversion
x_nr_bits =
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
tmp1.d = (double) C1.w[1]; // exact conversion
x_nr_bits =
65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
q = nr_digits[x_nr_bits - 1].digits;
if (q == 0) {
q = nr_digits[x_nr_bits - 1].digits1;
if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi
|| (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
q++;
}
exp = (x_exp >> 49) - 6176;
if (exp >= 0) { // -exp <= 0
// the argument is an integer already
res.w[1] = x.w[1];
res.w[0] = x.w[0];
BID_RETURN (res);
} else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
// need to shift right -exp digits from the coefficient; the exp will be 0
ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
// chop off ind digits from the lower part of C1
// C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
tmp64 = C1.w[0];
if (ind <= 19) {
C1.w[0] = C1.w[0] + midpoint64[ind - 1];
} else {
C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
}
if (C1.w[0] < tmp64)
C1.w[1]++;
// calculate C* and f*
// C* is actually floor(C*) in this case
// C* and f* need shifting and masking, as shown by
// shiftright128[] and maskhigh128[]
// 1 <= x <= 34
// kx = 10^(-x) = ten2mk128[ind - 1]
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
// the approximation of 10^(-x) was rounded up to 118 bits
__mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
// determine the value of res and fstar
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
// redundant shift = shiftright128[ind - 1]; // shift = 0
res.w[1] = P256.w[3];
res.w[0] = P256.w[2];
// redundant fstar.w[3] = 0;
// redundant fstar.w[2] = 0;
// redundant fstar.w[1] = P256.w[1];
// redundant fstar.w[0] = P256.w[0];
// fraction f* < 10^(-x) <=> midpoint
// f* is in the right position to be compared with
// 10^(-x) from ten2mk128[]
// if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even)
if ((res.w[0] & 0x0000000000000001ull) && // is result odd?
((P256.w[1] < (ten2mk128[ind - 1].w[1]))
|| ((P256.w[1] == ten2mk128[ind - 1].w[1])
&& (P256.w[0] < ten2mk128[ind - 1].w[0])))) {
// subract 1 to make even
if (res.w[0]-- == 0) {
res.w[1]--;
}
}
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
res.w[1] = (P256.w[3] >> shift);
res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
// redundant fstar.w[3] = 0;
fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
fstar.w[1] = P256.w[1];
fstar.w[0] = P256.w[0];
// fraction f* < 10^(-x) <=> midpoint
// f* is in the right position to be compared with
// 10^(-x) from ten2mk128[]
if ((res.w[0] & 0x0000000000000001ull) && // is result odd?
fstar.w[2] == 0 && (fstar.w[1] < ten2mk128[ind - 1].w[1] ||
(fstar.w[1] == ten2mk128[ind - 1].w[1] &&
fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
// subract 1 to make even
if (res.w[0]-- == 0) {
res.w[1]--;
}
}
} else { // 22 <= ind - 1 <= 33
shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
res.w[1] = 0;
res.w[0] = P256.w[3] >> shift;
fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
fstar.w[2] = P256.w[2];
fstar.w[1] = P256.w[1];
fstar.w[0] = P256.w[0];
// fraction f* < 10^(-x) <=> midpoint
// f* is in the right position to be compared with
// 10^(-x) from ten2mk128[]
if ((res.w[0] & 0x0000000000000001ull) && // is result odd?
fstar.w[3] == 0 && fstar.w[2] == 0
&& (fstar.w[1] < ten2mk128[ind - 1].w[1]
|| (fstar.w[1] == ten2mk128[ind - 1].w[1]
&& fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
// subract 1 to make even
if (res.w[0]-- == 0) {
res.w[1]--;
}
}
}
res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
BID_RETURN (res);
} else { // if ((q + exp) < 0) <=> q < -exp
// the result is +0 or -0
res.w[1] = x_sign | 0x3040000000000000ull;
res.w[0] = 0x0000000000000000ull;
BID_RETURN (res);
}
}
/*****************************************************************************
* BID128_round_integral_negative
****************************************************************************/
BID128_FUNCTION_ARG1_NORND (bid128_round_integral_negative, x)
UINT128 res;
UINT64 x_sign;
UINT64 x_exp;
int exp; // unbiased exponent
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
// (all are UINT64)
BID_UI64DOUBLE tmp1;
unsigned int x_nr_bits;
int q, ind, shift;
UINT128 C1;
// UINT128 res is C* at first - represents up to 34 decimal digits ~
// 113 bits
UINT256 fstar;
UINT256 P256;
// check for NaN or Infinity
if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
// x is special
if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
// if x = NaN, then res = Q (x)
// check first for non-canonical NaN payload
if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
(((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
(x.w[0] > 0x38c15b09ffffffffull))) {
x.w[1] = x.w[1] & 0xffffc00000000000ull;
x.w[0] = 0x0ull;
}
if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
// set invalid flag
*pfpsf |= INVALID_EXCEPTION;
// return quiet (x)
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
res.w[0] = x.w[0];
} else { // x is QNaN
// return x
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
res.w[0] = x.w[0];
}
BID_RETURN (res)
} else { // x is not a NaN, so it must be infinity
if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf
// return +inf
res.w[1] = 0x7800000000000000ull;
res.w[0] = 0x0000000000000000ull;
} else { // x is -inf
// return -inf
res.w[1] = 0xf800000000000000ull;
res.w[0] = 0x0000000000000000ull;
}
BID_RETURN (res);
}
}
// unpack x
x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
C1.w[1] = x.w[1] & MASK_COEFF;
C1.w[0] = x.w[0];
// check for non-canonical values (treated as zero)
if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
// non-canonical
x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
C1.w[1] = 0; // significand high
C1.w[0] = 0; // significand low
} else { // G0_G1 != 11
x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
if (C1.w[1] > 0x0001ed09bead87c0ull ||
(C1.w[1] == 0x0001ed09bead87c0ull
&& C1.w[0] > 0x378d8e63ffffffffull)) {
// x is non-canonical if coefficient is larger than 10^34 -1
C1.w[1] = 0;
C1.w[0] = 0;
} else { // canonical
;
}
}
// test for input equal to zero
if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
// x is 0
// return 0 preserving the sign bit and the preferred exponent
// of MAX(Q(x), 0)
if (x_exp <= (0x1820ull << 49)) {
res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
} else {
res.w[1] = x_sign | x_exp;
}
res.w[0] = 0x0000000000000000ull;
BID_RETURN (res);
}
// x is not special and is not zero
// if (exp <= -p) return -1.0 or +0.0
if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34
if (x_sign) {
// if negative, return negative 1, because we know the coefficient
// is non-zero (would have been caught above)
res.w[1] = 0xb040000000000000ull;
res.w[0] = 0x0000000000000001ull;
} else {
// if positive, return positive 0, because we know coefficient is
// non-zero (would have been caught above)
res.w[1] = 0x3040000000000000ull;
res.w[0] = 0x0000000000000000ull;
}
BID_RETURN (res);
}
// q = nr. of decimal digits in x
// determine first the nr. of bits in x
if (C1.w[1] == 0) {
if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
// split the 64-bit value in two 32-bit halves to avoid rounding errors
if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
tmp1.d = (double) (C1.w[0] >> 32); // exact conversion
x_nr_bits =
33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
} else { // x < 2^32
tmp1.d = (double) (C1.w[0]); // exact conversion
x_nr_bits =
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
} else { // if x < 2^53
tmp1.d = (double) C1.w[0]; // exact conversion
x_nr_bits =
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
tmp1.d = (double) C1.w[1]; // exact conversion
x_nr_bits =
65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
q = nr_digits[x_nr_bits - 1].digits;
if (q == 0) {
q = nr_digits[x_nr_bits - 1].digits1;
if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
(C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
q++;
}
exp = (x_exp >> 49) - 6176;
if (exp >= 0) { // -exp <= 0
// the argument is an integer already
res.w[1] = x.w[1];
res.w[0] = x.w[0];
BID_RETURN (res);
} else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
// need to shift right -exp digits from the coefficient; the exp will be 0
ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
// (number of digits to be chopped off)
// chop off ind digits from the lower part of C1
// FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
// FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
// FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
// FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
//tmp64 = C1.w[0];
// if (ind <= 19) {
// C1.w[0] = C1.w[0] + midpoint64[ind - 1];
// } else {
// C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
// C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
// }
// if (C1.w[0] < tmp64) C1.w[1]++;
// if carry-out from C1.w[0], increment C1.w[1]
// calculate C* and f*
// C* is actually floor(C*) in this case
// C* and f* need shifting and masking, as shown by
// shiftright128[] and maskhigh128[]
// 1 <= x <= 34
// kx = 10^(-x) = ten2mk128[ind - 1]
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
// the approximation of 10^(-x) was rounded up to 118 bits
__mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
res.w[1] = P256.w[3];
res.w[0] = P256.w[2];
// if positive, the truncated value is already the correct result
if (x_sign) { // if negative
// redundant fstar.w[3] = 0;
// redundant fstar.w[2] = 0;
// redundant fstar.w[1] = P256.w[1];
// redundant fstar.w[0] = P256.w[0];
// fraction f* > 10^(-x) <=> inexact
// f* is in the right position to be compared with
// 10^(-x) from ten2mk128[]
if ((P256.w[1] > ten2mk128[ind - 1].w[1])
|| (P256.w[1] == ten2mk128[ind - 1].w[1]
&& (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
if (++res.w[0] == 0) {
res.w[1]++;
}
}
}
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
shift = shiftright128[ind - 1]; // 0 <= shift <= 102
res.w[1] = (P256.w[3] >> shift);
res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
// if positive, the truncated value is already the correct result
if (x_sign) { // if negative
// redundant fstar.w[3] = 0;
fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
fstar.w[1] = P256.w[1];
fstar.w[0] = P256.w[0];
// fraction f* > 10^(-x) <=> inexact
// f* is in the right position to be compared with
// 10^(-x) from ten2mk128[]
if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
(fstar.w[1] == ten2mk128[ind - 1].w[1] &&
fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
if (++res.w[0] == 0) {
res.w[1]++;
}
}
}
} else { // 22 <= ind - 1 <= 33
shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
res.w[1] = 0;
res.w[0] = P256.w[3] >> shift;
// if positive, the truncated value is already the correct result
if (x_sign) { // if negative
fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
fstar.w[2] = P256.w[2];
fstar.w[1] = P256.w[1];
fstar.w[0] = P256.w[0];
// fraction f* > 10^(-x) <=> inexact
// f* is in the right position to be compared with
// 10^(-x) from ten2mk128[]
if (fstar.w[3] || fstar.w[2]
|| fstar.w[1] > ten2mk128[ind - 1].w[1]
|| (fstar.w[1] == ten2mk128[ind - 1].w[1]
&& fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
if (++res.w[0] == 0) {
res.w[1]++;
}
}
}
}
res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
BID_RETURN (res);
} else { // if exp < 0 and q + exp <= 0
if (x_sign) { // negative rounds down to -1.0
res.w[1] = 0xb040000000000000ull;
res.w[0] = 0x0000000000000001ull;
} else { // positive rpunds down to +0.0
res.w[1] = 0x3040000000000000ull;
res.w[0] = 0x0000000000000000ull;
}
BID_RETURN (res);
}
}
/*****************************************************************************
* BID128_round_integral_positive
****************************************************************************/
BID128_FUNCTION_ARG1_NORND (bid128_round_integral_positive, x)
UINT128 res;
UINT64 x_sign;
UINT64 x_exp;
int exp; // unbiased exponent
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
// (all are UINT64)
BID_UI64DOUBLE tmp1;
unsigned int x_nr_bits;
int q, ind, shift;
UINT128 C1;
// UINT128 res is C* at first - represents up to 34 decimal digits ~
// 113 bits
UINT256 fstar;
UINT256 P256;
// check for NaN or Infinity
if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
// x is special
if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
// if x = NaN, then res = Q (x)
// check first for non-canonical NaN payload
if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
(((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
(x.w[0] > 0x38c15b09ffffffffull))) {
x.w[1] = x.w[1] & 0xffffc00000000000ull;
x.w[0] = 0x0ull;
}
if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
// set invalid flag
*pfpsf |= INVALID_EXCEPTION;
// return quiet (x)
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
res.w[0] = x.w[0];
} else { // x is QNaN
// return x
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
res.w[0] = x.w[0];
}
BID_RETURN (res)
} else { // x is not a NaN, so it must be infinity
if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf
// return +inf
res.w[1] = 0x7800000000000000ull;
res.w[0] = 0x0000000000000000ull;
} else { // x is -inf
// return -inf
res.w[1] = 0xf800000000000000ull;
res.w[0] = 0x0000000000000000ull;
}
BID_RETURN (res);
}
}
// unpack x
x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
C1.w[1] = x.w[1] & MASK_COEFF;
C1.w[0] = x.w[0];
// check for non-canonical values (treated as zero)
if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
// non-canonical
x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
C1.w[1] = 0; // significand high
C1.w[0] = 0; // significand low
} else { // G0_G1 != 11
x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
if (C1.w[1] > 0x0001ed09bead87c0ull ||
(C1.w[1] == 0x0001ed09bead87c0ull
&& C1.w[0] > 0x378d8e63ffffffffull)) {
// x is non-canonical if coefficient is larger than 10^34 -1
C1.w[1] = 0;
C1.w[0] = 0;
} else { // canonical
;
}
}
// test for input equal to zero
if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
// x is 0
// return 0 preserving the sign bit and the preferred exponent
// of MAX(Q(x), 0)
if (x_exp <= (0x1820ull << 49)) {
res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
} else {
res.w[1] = x_sign | x_exp;
}
res.w[0] = 0x0000000000000000ull;
BID_RETURN (res);
}
// x is not special and is not zero
// if (exp <= -p) return -0.0 or +1.0
if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34
if (x_sign) {
// if negative, return negative 0, because we know the coefficient
// is non-zero (would have been caught above)
res.w[1] = 0xb040000000000000ull;
res.w[0] = 0x0000000000000000ull;
} else {
// if positive, return positive 1, because we know coefficient is
// non-zero (would have been caught above)
res.w[1] = 0x3040000000000000ull;
res.w[0] = 0x0000000000000001ull;
}
BID_RETURN (res);
}
// q = nr. of decimal digits in x
// determine first the nr. of bits in x
if (C1.w[1] == 0) {
if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
// split 64-bit value in two 32-bit halves to avoid rounding errors
if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
tmp1.d = (double) (C1.w[0] >> 32); // exact conversion
x_nr_bits =
33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
} else { // x < 2^32
tmp1.d = (double) (C1.w[0]); // exact conversion
x_nr_bits =
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
} else { // if x < 2^53
tmp1.d = (double) C1.w[0]; // exact conversion
x_nr_bits =
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
tmp1.d = (double) C1.w[1]; // exact conversion
x_nr_bits =
65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
q = nr_digits[x_nr_bits - 1].digits;
if (q == 0) {
q = nr_digits[x_nr_bits - 1].digits1;
if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
(C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
q++;
}
exp = (x_exp >> 49) - 6176;
if (exp >= 0) { // -exp <= 0
// the argument is an integer already
res.w[1] = x.w[1];
res.w[0] = x.w[0];
BID_RETURN (res);
} else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
// need to shift right -exp digits from the coefficient; exp will be 0
ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
// (number of digits to be chopped off)
// chop off ind digits from the lower part of C1
// FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
// FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
// FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
// FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
// tmp64 = C1.w[0];
// if (ind <= 19) {
// C1.w[0] = C1.w[0] + midpoint64[ind - 1];
// } else {
// C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
// C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
// }
// if (C1.w[0] < tmp64) C1.w[1]++;
// if carry-out from C1.w[0], increment C1.w[1]
// calculate C* and f*
// C* is actually floor(C*) in this case
// C* and f* need shifting and masking, as shown by
// shiftright128[] and maskhigh128[]
// 1 <= x <= 34
// kx = 10^(-x) = ten2mk128[ind - 1]
// C* = C1 * 10^(-x)
// the approximation of 10^(-x) was rounded up to 118 bits
__mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
res.w[1] = P256.w[3];
res.w[0] = P256.w[2];
// if negative, the truncated value is already the correct result
if (!x_sign) { // if positive
// redundant fstar.w[3] = 0;
// redundant fstar.w[2] = 0;
// redundant fstar.w[1] = P256.w[1];
// redundant fstar.w[0] = P256.w[0];
// fraction f* > 10^(-x) <=> inexact
// f* is in the right position to be compared with
// 10^(-x) from ten2mk128[]
if ((P256.w[1] > ten2mk128[ind - 1].w[1])
|| (P256.w[1] == ten2mk128[ind - 1].w[1]
&& (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
if (++res.w[0] == 0) {
res.w[1]++;
}
}
}
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
res.w[1] = (P256.w[3] >> shift);
res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
// if negative, the truncated value is already the correct result
if (!x_sign) { // if positive
// redundant fstar.w[3] = 0;
fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
fstar.w[1] = P256.w[1];
fstar.w[0] = P256.w[0];
// fraction f* > 10^(-x) <=> inexact
// f* is in the right position to be compared with
// 10^(-x) from ten2mk128[]
if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
(fstar.w[1] == ten2mk128[ind - 1].w[1] &&
fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
if (++res.w[0] == 0) {
res.w[1]++;
}
}
}
} else { // 22 <= ind - 1 <= 33
shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
res.w[1] = 0;
res.w[0] = P256.w[3] >> shift;
// if negative, the truncated value is already the correct result
if (!x_sign) { // if positive
fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
fstar.w[2] = P256.w[2];
fstar.w[1] = P256.w[1];
fstar.w[0] = P256.w[0];
// fraction f* > 10^(-x) <=> inexact
// f* is in the right position to be compared with
// 10^(-x) from ten2mk128[]
if (fstar.w[3] || fstar.w[2]
|| fstar.w[1] > ten2mk128[ind - 1].w[1]
|| (fstar.w[1] == ten2mk128[ind - 1].w[1]
&& fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
if (++res.w[0] == 0) {
res.w[1]++;
}
}
}
}
res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
BID_RETURN (res);
} else { // if exp < 0 and q + exp <= 0
if (x_sign) { // negative rounds up to -0.0
res.w[1] = 0xb040000000000000ull;
res.w[0] = 0x0000000000000000ull;
} else { // positive rpunds up to +1.0
res.w[1] = 0x3040000000000000ull;
res.w[0] = 0x0000000000000001ull;
}
BID_RETURN (res);
}
}
/*****************************************************************************
* BID128_round_integral_zero
****************************************************************************/
BID128_FUNCTION_ARG1_NORND (bid128_round_integral_zero, x)
UINT128 res;
UINT64 x_sign;
UINT64 x_exp;
int exp; // unbiased exponent
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
// (all are UINT64)
BID_UI64DOUBLE tmp1;
unsigned int x_nr_bits;
int q, ind, shift;
UINT128 C1;
// UINT128 res is C* at first - represents up to 34 decimal digits ~
// 113 bits
UINT256 P256;
// check for NaN or Infinity
if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
// x is special
if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
// if x = NaN, then res = Q (x)
// check first for non-canonical NaN payload
if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
(((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
(x.w[0] > 0x38c15b09ffffffffull))) {
x.w[1] = x.w[1] & 0xffffc00000000000ull;
x.w[0] = 0x0ull;
}
if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
// set invalid flag
*pfpsf |= INVALID_EXCEPTION;
// return quiet (x)
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
res.w[0] = x.w[0];
} else { // x is QNaN
// return x
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
res.w[0] = x.w[0];
}
BID_RETURN (res)
} else { // x is not a NaN, so it must be infinity
if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf
// return +inf
res.w[1] = 0x7800000000000000ull;
res.w[0] = 0x0000000000000000ull;
} else { // x is -inf
// return -inf
res.w[1] = 0xf800000000000000ull;
res.w[0] = 0x0000000000000000ull;
}
BID_RETURN (res);
}
}
// unpack x
x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
C1.w[1] = x.w[1] & MASK_COEFF;
C1.w[0] = x.w[0];
// check for non-canonical values (treated as zero)
if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
// non-canonical
x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
C1.w[1] = 0; // significand high
C1.w[0] = 0; // significand low
} else { // G0_G1 != 11
x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
if (C1.w[1] > 0x0001ed09bead87c0ull ||
(C1.w[1] == 0x0001ed09bead87c0ull
&& C1.w[0] > 0x378d8e63ffffffffull)) {
// x is non-canonical if coefficient is larger than 10^34 -1
C1.w[1] = 0;
C1.w[0] = 0;
} else { // canonical
;
}
}
// test for input equal to zero
if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
// x is 0
// return 0 preserving the sign bit and the preferred exponent
// of MAX(Q(x), 0)
if (x_exp <= (0x1820ull << 49)) {
res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
} else {
res.w[1] = x_sign | x_exp;
}
res.w[0] = 0x0000000000000000ull;
BID_RETURN (res);
}
// x is not special and is not zero
// if (exp <= -p) return -0.0 or +0.0
if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34
res.w[1] = x_sign | 0x3040000000000000ull;
res.w[0] = 0x0000000000000000ull;
BID_RETURN (res);
}
// q = nr. of decimal digits in x
// determine first the nr. of bits in x
if (C1.w[1] == 0) {
if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
// split the 64-bit value in two 32-bit halves to avoid rounding errors
if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
tmp1.d = (double) (C1.w[0] >> 32); // exact conversion
x_nr_bits =
33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
} else { // x < 2^32
tmp1.d = (double) (C1.w[0]); // exact conversion
x_nr_bits =
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
} else { // if x < 2^53
tmp1.d = (double) C1.w[0]; // exact conversion
x_nr_bits =
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
tmp1.d = (double) C1.w[1]; // exact conversion
x_nr_bits =
65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
q = nr_digits[x_nr_bits - 1].digits;
if (q == 0) {
q = nr_digits[x_nr_bits - 1].digits1;
if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
(C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
q++;
}
exp = (x_exp >> 49) - 6176;
if (exp >= 0) { // -exp <= 0
// the argument is an integer already
res.w[1] = x.w[1];
res.w[0] = x.w[0];
BID_RETURN (res);
} else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
// need to shift right -exp digits from the coefficient; the exp will be 0
ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
// (number of digits to be chopped off)
// chop off ind digits from the lower part of C1
// FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
// FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
// FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
// FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
//tmp64 = C1.w[0];
// if (ind <= 19) {
// C1.w[0] = C1.w[0] + midpoint64[ind - 1];
// } else {
// C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
// C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
// }
// if (C1.w[0] < tmp64) C1.w[1]++;
// if carry-out from C1.w[0], increment C1.w[1]
// calculate C* and f*
// C* is actually floor(C*) in this case
// C* and f* need shifting and masking, as shown by
// shiftright128[] and maskhigh128[]
// 1 <= x <= 34
// kx = 10^(-x) = ten2mk128[ind - 1]
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
// the approximation of 10^(-x) was rounded up to 118 bits
__mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
res.w[1] = P256.w[3];
res.w[0] = P256.w[2];
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
res.w[1] = (P256.w[3] >> shift);
res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
} else { // 22 <= ind - 1 <= 33
shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
res.w[1] = 0;
res.w[0] = P256.w[3] >> shift;
}
res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
BID_RETURN (res);
} else { // if exp < 0 and q + exp <= 0 the result is +0 or -0
res.w[1] = x_sign | 0x3040000000000000ull;
res.w[0] = 0x0000000000000000ull;
BID_RETURN (res);
}
}
/*****************************************************************************
* BID128_round_integral_nearest_away
****************************************************************************/
BID128_FUNCTION_ARG1_NORND (bid128_round_integral_nearest_away, x)
UINT128 res;
UINT64 x_sign;
UINT64 x_exp;
int exp; // unbiased exponent
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
// (all are UINT64)
UINT64 tmp64;
BID_UI64DOUBLE tmp1;
unsigned int x_nr_bits;
int q, ind, shift;
UINT128 C1;
// UINT128 res is C* at first - represents up to 34 decimal digits ~
// 113 bits
// UINT256 fstar;
UINT256 P256;
// check for NaN or Infinity
if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
// x is special
if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
// if x = NaN, then res = Q (x)
// check first for non-canonical NaN payload
if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
(((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
(x.w[0] > 0x38c15b09ffffffffull))) {
x.w[1] = x.w[1] & 0xffffc00000000000ull;
x.w[0] = 0x0ull;
}
if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
// set invalid flag
*pfpsf |= INVALID_EXCEPTION;
// return quiet (x)
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
res.w[0] = x.w[0];
} else { // x is QNaN
// return x
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
res.w[0] = x.w[0];
}
BID_RETURN (res)
} else { // x is not a NaN, so it must be infinity
if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf
// return +inf
res.w[1] = 0x7800000000000000ull;
res.w[0] = 0x0000000000000000ull;
} else { // x is -inf
// return -inf
res.w[1] = 0xf800000000000000ull;
res.w[0] = 0x0000000000000000ull;
}
BID_RETURN (res);
}
}
// unpack x
x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
C1.w[1] = x.w[1] & MASK_COEFF;
C1.w[0] = x.w[0];
// check for non-canonical values (treated as zero)
if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
// non-canonical
x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
C1.w[1] = 0; // significand high
C1.w[0] = 0; // significand low
} else { // G0_G1 != 11
x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
if (C1.w[1] > 0x0001ed09bead87c0ull ||
(C1.w[1] == 0x0001ed09bead87c0ull
&& C1.w[0] > 0x378d8e63ffffffffull)) {
// x is non-canonical if coefficient is larger than 10^34 -1
C1.w[1] = 0;
C1.w[0] = 0;
} else { // canonical
;
}
}
// test for input equal to zero
if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
// x is 0
// return 0 preserving the sign bit and the preferred exponent
// of MAX(Q(x), 0)
if (x_exp <= (0x1820ull << 49)) {
res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
} else {
res.w[1] = x_sign | x_exp;
}
res.w[0] = 0x0000000000000000ull;
BID_RETURN (res);
}
// x is not special and is not zero
// if (exp <= -(p+1)) return 0.0
if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35
res.w[1] = x_sign | 0x3040000000000000ull;
res.w[0] = 0x0000000000000000ull;
BID_RETURN (res);
}
// q = nr. of decimal digits in x
// determine first the nr. of bits in x
if (C1.w[1] == 0) {
if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
// split the 64-bit value in two 32-bit halves to avoid rounding errors
if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
tmp1.d = (double) (C1.w[0] >> 32); // exact conversion
x_nr_bits =
33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
} else { // x < 2^32
tmp1.d = (double) (C1.w[0]); // exact conversion
x_nr_bits =
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
} else { // if x < 2^53
tmp1.d = (double) C1.w[0]; // exact conversion
x_nr_bits =
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
tmp1.d = (double) C1.w[1]; // exact conversion
x_nr_bits =
65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
q = nr_digits[x_nr_bits - 1].digits;
if (q == 0) {
q = nr_digits[x_nr_bits - 1].digits1;
if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
(C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
q++;
}
exp = (x_exp >> 49) - 6176;
if (exp >= 0) { // -exp <= 0
// the argument is an integer already
res.w[1] = x.w[1];
res.w[0] = x.w[0];
BID_RETURN (res);
} else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
// need to shift right -exp digits from the coefficient; the exp will be 0
ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
// chop off ind digits from the lower part of C1
// C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
tmp64 = C1.w[0];
if (ind <= 19) {
C1.w[0] = C1.w[0] + midpoint64[ind - 1];
} else {
C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
}
if (C1.w[0] < tmp64)
C1.w[1]++;
// calculate C* and f*
// C* is actually floor(C*) in this case
// C* and f* need shifting and masking, as shown by
// shiftright128[] and maskhigh128[]
// 1 <= x <= 34
// kx = 10^(-x) = ten2mk128[ind - 1]
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
// the approximation of 10^(-x) was rounded up to 118 bits
__mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
// the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
// if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
// if (0 < f* < 10^(-x)) then the result is a midpoint
// if floor(C*) is even then C* = floor(C*) - logical right
// shift; C* has p decimal digits, correct by Prop. 1)
// else if floor(C*) is odd C* = floor(C*)-1 (logical right
// shift; C* has p decimal digits, correct by Pr. 1)
// else
// C* = floor(C*) (logical right shift; C has p decimal digits,
// correct by Property 1)
// n = C* * 10^(e+x)
// shift right C* by Ex-128 = shiftright128[ind]
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
res.w[1] = P256.w[3];
res.w[0] = P256.w[2];
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
res.w[1] = (P256.w[3] >> shift);
} else { // 22 <= ind - 1 <= 33
shift = shiftright128[ind - 1]; // 2 <= shift <= 38
res.w[1] = 0;
res.w[0] = (P256.w[3] >> (shift - 64)); // 2 <= shift - 64 <= 38
}
// if the result was a midpoint, it was already rounded away from zero
res.w[1] |= x_sign | 0x3040000000000000ull;
BID_RETURN (res);
} else { // if ((q + exp) < 0) <=> q < -exp
// the result is +0 or -0
res.w[1] = x_sign | 0x3040000000000000ull;
res.w[0] = 0x0000000000000000ull;
BID_RETURN (res);
}
}