b2a00c8984
libgcc/ 2007-09-27 H.J. Lu <hongjiu.lu@intel.com> * Makefile.in (dfp-filenames): Replace decimal_globals, decimal_data, binarydecimal and convert_data with bid_decimal_globals, bid_decimal_data, bid_binarydecimal and bid_convert_data, respectively. libgcc/config/libbid/ 2007-09-27 H.J. Lu <hongjiu.lu@intel.com> * bid128_fromstring.c: Removed. * bid_dpd.c: New from libbid 2007-09-26. * bid128_to_int16.c: Likewise. * bid128_to_int8.c: Likewise. * bid128_to_uint8.c: Likewise. * bid128_to_uint16.c: Likewise. * bid64_to_int16.c: Likewise. * bid64_to_int8.c: Likewise. * bid64_to_uint16.c: Likewise. * bid64_to_uint8.c: Likewise. * bid128_2_str.h: Updated from libbid 2007-09-26. * bid128_2_str_macros.h: Likewise. * bid128_2_str_tables.c: Likewise. * bid128_add.c: Likewise. * bid128.c: Likewise. * bid128_compare.c: Likewise. * bid128_div.c: Likewise. * bid128_fma.c: Likewise. * bid128_logb.c: Likewise. * bid128_minmax.c: Likewise. * bid128_mul.c: Likewise. * bid128_next.c: Likewise. * bid128_noncomp.c: Likewise. * bid128_quantize.c: Likewise. * bid128_rem.c: Likewise. * bid128_round_integral.c: Likewise. * bid128_scalb.c: Likewise. * bid128_sqrt.c: Likewise. * bid128_string.c: Likewise. * bid128_to_int32.c: Likewise. * bid128_to_int64.c: Likewise. * bid128_to_uint32.c: Likewise. * bid128_to_uint64.c: Likewise. * bid32_to_bid128.c: Likewise. * bid32_to_bid64.c: Likewise. * bid64_add.c: Likewise. * bid64_compare.c: Likewise. * bid64_div.c: Likewise. * bid64_fma.c: Likewise. * bid64_logb.c: Likewise. * bid64_minmax.c: Likewise. * bid64_mul.c: Likewise. * bid64_next.c: Likewise. * bid64_noncomp.c: Likewise. * bid64_quantize.c: Likewise. * bid64_rem.c: Likewise. * bid64_round_integral.c: Likewise. * bid64_scalb.c: Likewise. * bid64_sqrt.c: Likewise. * bid64_string.c: Likewise. * bid64_to_bid128.c: Likewise. * bid64_to_int32.c: Likewise. * bid64_to_int64.c: Likewise. * bid64_to_uint32.c: Likewise. * bid64_to_uint64.c: Likewise. * bid_b2d.h: Likewise. * bid_binarydecimal.c: Likewise. * bid_conf.h: Likewise. * bid_convert_data.c: Likewise. * bid_decimal_data.c: Likewise. * bid_decimal_globals.c: Likewise. * bid_div_macros.h: Likewise. * bid_flag_operations.c: Likewise. * bid_from_int.c: Likewise. * bid_functions.h: Likewise. * bid_gcc_intrinsics.h: Likewise. * bid_inline_add.h: Likewise. * bid_internal.h: Likewise. * bid_round.c: Likewise. * bid_sqrt_macros.h: Likewise. * _addsub_dd.c: Likewise. * _addsub_sd.c: Likewise. * _addsub_td.c: Likewise. * _dd_to_df.c: Likewise. * _dd_to_di.c: Likewise. * _dd_to_sd.c: Likewise. * _dd_to_sf.c: Likewise. * _dd_to_si.c: Likewise. * _dd_to_td.c: Likewise. * _dd_to_tf.c: Likewise. * _dd_to_udi.c: Likewise. * _dd_to_usi.c: Likewise. * _dd_to_xf.c: Likewise. * _df_to_dd.c: Likewise. * _df_to_sd.c: Likewise. * _df_to_td.c: Likewise. * _di_to_dd.c: Likewise. * _di_to_sd.c: Likewise. * _di_to_td.c: Likewise. * _div_dd.c: Likewise. * _div_sd.c: Likewise. * _div_td.c: Likewise. * _eq_dd.c: Likewise. * _eq_sd.c: Likewise. * _eq_td.c: Likewise. * _ge_dd.c: Likewise. * _ge_sd.c: Likewise. * _ge_td.c: Likewise. * _gt_dd.c: Likewise. * _gt_sd.c: Likewise. * _gt_td.c: Likewise. * _isinfd128.c: Likewise. * _isinfd32.c: Likewise. * _isinfd64.c: Likewise. * _le_dd.c: Likewise. * _le_sd.c: Likewise. * _le_td.c: Likewise. * _lt_dd.c: Likewise. * _lt_sd.c: Likewise. * _lt_td.c: Likewise. * _mul_dd.c: Likewise. * _mul_sd.c: Likewise. * _mul_td.c: Likewise. * _ne_dd.c: Likewise. * _ne_sd.c: Likewise. * _ne_td.c: Likewise. * _sd_to_dd.c: Likewise. * _sd_to_df.c: Likewise. * _sd_to_di.c: Likewise. * _sd_to_sf.c: Likewise. * _sd_to_si.c: Likewise. * _sd_to_td.c: Likewise. * _sd_to_tf.c: Likewise. * _sd_to_udi.c: Likewise. * _sd_to_usi.c: Likewise. * _sd_to_xf.c: Likewise. * _sf_to_dd.c: Likewise. * _sf_to_sd.c: Likewise. * _sf_to_td.c: Likewise. * _si_to_dd.c: Likewise. * _si_to_sd.c: Likewise. * _si_to_td.c: Likewise. * _td_to_dd.c: Likewise. * _td_to_df.c: Likewise. * _td_to_di.c: Likewise. * _td_to_sd.c: Likewise. * _td_to_sf.c: Likewise. * _td_to_si.c: Likewise. * _td_to_tf.c: Likewise. * _td_to_udi.c: Likewise. * _td_to_usi.c: Likewise. * _td_to_xf.c: Likewise. * _tf_to_dd.c: Likewise. * _tf_to_sd.c: Likewise. * _tf_to_td.c: Likewise. * _udi_to_dd.c: Likewise. * _udi_to_sd.c: Likewise. * _udi_to_td.c: Likewise. * _unord_dd.c: Likewise. * _unord_sd.c: Likewise. * _unord_td.c: Likewise. * _usi_to_dd.c: Likewise. * _usi_to_sd.c: Likewise. * _usi_to_td.c: Likewise. * _xf_to_dd.c: Likewise. * _xf_to_sd.c: Likewise. * _xf_to_td.c: Likewise. 2007-09-27 H.J. Lu <hongjiu.lu@intel.com> * b2d.h: Renamed to ... * bid_b2d.h: This. * bid128_to_string.c: Renamed to ... * bid128_string.c: This. * bid_intrinsics.h: Renamed to ... * bid_gcc_intrinsics.h: This. * bid_string.c: Renamed to ... * bid64_string.c: This. * binarydecimal.c: Renamed to ... * bid_decimal_globals.c: This. * convert_data.c: Renamed to ... * bid_convert_data.c: This. * decimal_data.c: Renamed to ... * bid_decimal_data.c: This. * decimal_globals.c: Renamed to ... * bid_decimal_globals.c: This. * div_macros.h: Renamed to ... * bid_div_macros.h: This. * inline_bid_add.h: Renamed to ... * bid_inline_add.h: This. * sqrt_macros.h: Renamed to ... * bid_sqrt_macros.h: This. From-SVN: r128841
1227 lines
40 KiB
C
1227 lines
40 KiB
C
/* Copyright (C) 2007 Free Software Foundation, Inc.
|
|
|
|
This file is part of GCC.
|
|
|
|
GCC is free software; you can redistribute it and/or modify it under
|
|
the terms of the GNU General Public License as published by the Free
|
|
Software Foundation; either version 2, or (at your option) any later
|
|
version.
|
|
|
|
In addition to the permissions in the GNU General Public License, the
|
|
Free Software Foundation gives you unlimited permission to link the
|
|
compiled version of this file into combinations with other programs,
|
|
and to distribute those combinations without any restriction coming
|
|
from the use of this file. (The General Public License restrictions
|
|
do apply in other respects; for example, they cover modification of
|
|
the file, and distribution when not linked into a combine
|
|
executable.)
|
|
|
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with GCC; see the file COPYING. If not, write to the Free
|
|
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
|
02110-1301, USA. */
|
|
|
|
#include "bid_internal.h"
|
|
|
|
/*****************************************************************************
|
|
* BID64_round_integral_exact
|
|
****************************************************************************/
|
|
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
|
void
|
|
bid64_round_integral_exact (UINT64 * pres,
|
|
UINT64 *
|
|
px _RND_MODE_PARAM _EXC_FLAGS_PARAM
|
|
_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
|
|
UINT64 x = *px;
|
|
#if !DECIMAL_GLOBAL_ROUNDING
|
|
unsigned int rnd_mode = *prnd_mode;
|
|
#endif
|
|
#else
|
|
UINT64
|
|
bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM
|
|
_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
|
|
#endif
|
|
|
|
UINT64 res = 0xbaddbaddbaddbaddull;
|
|
UINT64 x_sign;
|
|
int exp; // unbiased exponent
|
|
// Note: C1 represents the significand (UINT64)
|
|
BID_UI64DOUBLE tmp1;
|
|
int x_nr_bits;
|
|
int q, ind, shift;
|
|
UINT64 C1;
|
|
// UINT64 res is C* at first - represents up to 16 decimal digits <= 54 bits
|
|
UINT128 fstar = { {0x0ull, 0x0ull} };
|
|
UINT128 P128;
|
|
|
|
x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
|
|
|
|
// check for NaNs and infinities
|
|
if ((x & MASK_NAN) == MASK_NAN) { // check for NaN
|
|
if ((x & 0x0003ffffffffffffull) > 999999999999999ull)
|
|
x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits
|
|
else
|
|
x = x & 0xfe03ffffffffffffull; // clear G6-G12
|
|
if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN
|
|
// set invalid flag
|
|
*pfpsf |= INVALID_EXCEPTION;
|
|
// return quiet (SNaN)
|
|
res = x & 0xfdffffffffffffffull;
|
|
} else { // QNaN
|
|
res = x;
|
|
}
|
|
BID_RETURN (res);
|
|
} else if ((x & MASK_INF) == MASK_INF) { // check for Infinity
|
|
res = x_sign | 0x7800000000000000ull;
|
|
BID_RETURN (res);
|
|
}
|
|
// unpack x
|
|
if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) {
|
|
// if the steering bits are 11 (condition will be 0), then
|
|
// the exponent is G[0:w+1]
|
|
exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398;
|
|
C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2;
|
|
if (C1 > 9999999999999999ull) { // non-canonical
|
|
C1 = 0;
|
|
}
|
|
} else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
|
|
exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398;
|
|
C1 = (x & MASK_BINARY_SIG1);
|
|
}
|
|
|
|
// if x is 0 or non-canonical return 0 preserving the sign bit and
|
|
// the preferred exponent of MAX(Q(x), 0)
|
|
if (C1 == 0) {
|
|
if (exp < 0)
|
|
exp = 0;
|
|
res = x_sign | (((UINT64) exp + 398) << 53);
|
|
BID_RETURN (res);
|
|
}
|
|
// x is a finite non-zero number (not 0, non-canonical, or special)
|
|
|
|
switch (rnd_mode) {
|
|
case ROUNDING_TO_NEAREST:
|
|
case ROUNDING_TIES_AWAY:
|
|
// return 0 if (exp <= -(p+1))
|
|
if (exp <= -17) {
|
|
res = x_sign | 0x31c0000000000000ull;
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
BID_RETURN (res);
|
|
}
|
|
break;
|
|
case ROUNDING_DOWN:
|
|
// return 0 if (exp <= -p)
|
|
if (exp <= -16) {
|
|
if (x_sign) {
|
|
res = 0xb1c0000000000001ull;
|
|
} else {
|
|
res = 0x31c0000000000000ull;
|
|
}
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
BID_RETURN (res);
|
|
}
|
|
break;
|
|
case ROUNDING_UP:
|
|
// return 0 if (exp <= -p)
|
|
if (exp <= -16) {
|
|
if (x_sign) {
|
|
res = 0xb1c0000000000000ull;
|
|
} else {
|
|
res = 0x31c0000000000001ull;
|
|
}
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
BID_RETURN (res);
|
|
}
|
|
break;
|
|
case ROUNDING_TO_ZERO:
|
|
// return 0 if (exp <= -p)
|
|
if (exp <= -16) {
|
|
res = x_sign | 0x31c0000000000000ull;
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
BID_RETURN (res);
|
|
}
|
|
break;
|
|
} // end switch ()
|
|
|
|
// q = nr. of decimal digits in x (1 <= q <= 54)
|
|
// determine first the nr. of bits in x
|
|
if (C1 >= 0x0020000000000000ull) { // x >= 2^53
|
|
q = 16;
|
|
} else { // if x < 2^53
|
|
tmp1.d = (double) C1; // exact conversion
|
|
x_nr_bits =
|
|
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
q = nr_digits[x_nr_bits - 1].digits;
|
|
if (q == 0) {
|
|
q = nr_digits[x_nr_bits - 1].digits1;
|
|
if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo)
|
|
q++;
|
|
}
|
|
}
|
|
|
|
if (exp >= 0) { // -exp <= 0
|
|
// the argument is an integer already
|
|
res = x;
|
|
BID_RETURN (res);
|
|
}
|
|
|
|
switch (rnd_mode) {
|
|
case ROUNDING_TO_NEAREST:
|
|
if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
|
|
// need to shift right -exp digits from the coefficient; exp will be 0
|
|
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
|
|
// chop off ind digits from the lower part of C1
|
|
// C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
|
|
// FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
|
|
C1 = C1 + midpoint64[ind - 1];
|
|
// calculate C* and f*
|
|
// C* is actually floor(C*) in this case
|
|
// C* and f* need shifting and masking, as shown by
|
|
// shiftright128[] and maskhigh128[]
|
|
// 1 <= x <= 16
|
|
// kx = 10^(-x) = ten2mk64[ind - 1]
|
|
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
|
|
// the approximation of 10^(-x) was rounded up to 64 bits
|
|
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
|
|
|
|
// if (0 < f* < 10^(-x)) then the result is a midpoint
|
|
// if floor(C*) is even then C* = floor(C*) - logical right
|
|
// shift; C* has p decimal digits, correct by Prop. 1)
|
|
// else if floor(C*) is odd C* = floor(C*)-1 (logical right
|
|
// shift; C* has p decimal digits, correct by Pr. 1)
|
|
// else
|
|
// C* = floor(C*) (logical right shift; C has p decimal digits,
|
|
// correct by Property 1)
|
|
// n = C* * 10^(e+x)
|
|
|
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
|
res = P128.w[1];
|
|
fstar.w[1] = 0;
|
|
fstar.w[0] = P128.w[0];
|
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
|
res = (P128.w[1] >> shift);
|
|
fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
|
|
fstar.w[0] = P128.w[0];
|
|
}
|
|
// if (0 < f* < 10^(-x)) then the result is a midpoint
|
|
// since round_to_even, subtract 1 if current result is odd
|
|
if ((res & 0x0000000000000001ull) && (fstar.w[1] == 0)
|
|
&& (fstar.w[0] < ten2mk64[ind - 1])) {
|
|
res--;
|
|
}
|
|
// determine inexactness of the rounding of C*
|
|
// if (0 < f* - 1/2 < 10^(-x)) then
|
|
// the result is exact
|
|
// else // if (f* - 1/2 > T*) then
|
|
// the result is inexact
|
|
if (ind - 1 <= 2) {
|
|
if (fstar.w[0] > 0x8000000000000000ull) {
|
|
// f* > 1/2 and the result may be exact
|
|
// fstar.w[0] - 0x8000000000000000ull is f* - 1/2
|
|
if ((fstar.w[0] - 0x8000000000000000ull) > ten2mk64[ind - 1]) {
|
|
// set the inexact flag
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
} // else the result is exact
|
|
} else { // the result is inexact; f2* <= 1/2
|
|
// set the inexact flag
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
}
|
|
} else { // if 3 <= ind - 1 <= 21
|
|
if (fstar.w[1] > onehalf128[ind - 1] ||
|
|
(fstar.w[1] == onehalf128[ind - 1] && fstar.w[0])) {
|
|
// f2* > 1/2 and the result may be exact
|
|
// Calculate f2* - 1/2
|
|
if (fstar.w[1] > onehalf128[ind - 1]
|
|
|| fstar.w[0] > ten2mk64[ind - 1]) {
|
|
// set the inexact flag
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
} // else the result is exact
|
|
} else { // the result is inexact; f2* <= 1/2
|
|
// set the inexact flag
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
}
|
|
}
|
|
// set exponent to zero as it was negative before.
|
|
res = x_sign | 0x31c0000000000000ull | res;
|
|
BID_RETURN (res);
|
|
} else { // if exp < 0 and q + exp < 0
|
|
// the result is +0 or -0
|
|
res = x_sign | 0x31c0000000000000ull;
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
BID_RETURN (res);
|
|
}
|
|
break;
|
|
case ROUNDING_TIES_AWAY:
|
|
if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
|
|
// need to shift right -exp digits from the coefficient; exp will be 0
|
|
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
|
|
// chop off ind digits from the lower part of C1
|
|
// C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
|
|
// FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
|
|
C1 = C1 + midpoint64[ind - 1];
|
|
// calculate C* and f*
|
|
// C* is actually floor(C*) in this case
|
|
// C* and f* need shifting and masking, as shown by
|
|
// shiftright128[] and maskhigh128[]
|
|
// 1 <= x <= 16
|
|
// kx = 10^(-x) = ten2mk64[ind - 1]
|
|
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
|
|
// the approximation of 10^(-x) was rounded up to 64 bits
|
|
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
|
|
|
|
// if (0 < f* < 10^(-x)) then the result is a midpoint
|
|
// C* = floor(C*) - logical right shift; C* has p decimal digits,
|
|
// correct by Prop. 1)
|
|
// else
|
|
// C* = floor(C*) (logical right shift; C has p decimal digits,
|
|
// correct by Property 1)
|
|
// n = C* * 10^(e+x)
|
|
|
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
|
res = P128.w[1];
|
|
fstar.w[1] = 0;
|
|
fstar.w[0] = P128.w[0];
|
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
|
res = (P128.w[1] >> shift);
|
|
fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
|
|
fstar.w[0] = P128.w[0];
|
|
}
|
|
// midpoints are already rounded correctly
|
|
// determine inexactness of the rounding of C*
|
|
// if (0 < f* - 1/2 < 10^(-x)) then
|
|
// the result is exact
|
|
// else // if (f* - 1/2 > T*) then
|
|
// the result is inexact
|
|
if (ind - 1 <= 2) {
|
|
if (fstar.w[0] > 0x8000000000000000ull) {
|
|
// f* > 1/2 and the result may be exact
|
|
// fstar.w[0] - 0x8000000000000000ull is f* - 1/2
|
|
if ((fstar.w[0] - 0x8000000000000000ull) > ten2mk64[ind - 1]) {
|
|
// set the inexact flag
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
} // else the result is exact
|
|
} else { // the result is inexact; f2* <= 1/2
|
|
// set the inexact flag
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
}
|
|
} else { // if 3 <= ind - 1 <= 21
|
|
if (fstar.w[1] > onehalf128[ind - 1] ||
|
|
(fstar.w[1] == onehalf128[ind - 1] && fstar.w[0])) {
|
|
// f2* > 1/2 and the result may be exact
|
|
// Calculate f2* - 1/2
|
|
if (fstar.w[1] > onehalf128[ind - 1]
|
|
|| fstar.w[0] > ten2mk64[ind - 1]) {
|
|
// set the inexact flag
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
} // else the result is exact
|
|
} else { // the result is inexact; f2* <= 1/2
|
|
// set the inexact flag
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
}
|
|
}
|
|
// set exponent to zero as it was negative before.
|
|
res = x_sign | 0x31c0000000000000ull | res;
|
|
BID_RETURN (res);
|
|
} else { // if exp < 0 and q + exp < 0
|
|
// the result is +0 or -0
|
|
res = x_sign | 0x31c0000000000000ull;
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
BID_RETURN (res);
|
|
}
|
|
break;
|
|
case ROUNDING_DOWN:
|
|
if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
|
|
// need to shift right -exp digits from the coefficient; exp will be 0
|
|
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
|
|
// chop off ind digits from the lower part of C1
|
|
// C1 fits in 64 bits
|
|
// calculate C* and f*
|
|
// C* is actually floor(C*) in this case
|
|
// C* and f* need shifting and masking, as shown by
|
|
// shiftright128[] and maskhigh128[]
|
|
// 1 <= x <= 16
|
|
// kx = 10^(-x) = ten2mk64[ind - 1]
|
|
// C* = C1 * 10^(-x)
|
|
// the approximation of 10^(-x) was rounded up to 64 bits
|
|
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
|
|
|
|
// C* = floor(C*) (logical right shift; C has p decimal digits,
|
|
// correct by Property 1)
|
|
// if (0 < f* < 10^(-x)) then the result is exact
|
|
// n = C* * 10^(e+x)
|
|
|
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
|
res = P128.w[1];
|
|
fstar.w[1] = 0;
|
|
fstar.w[0] = P128.w[0];
|
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
|
res = (P128.w[1] >> shift);
|
|
fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
|
|
fstar.w[0] = P128.w[0];
|
|
}
|
|
// if (f* > 10^(-x)) then the result is inexact
|
|
if ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1])) {
|
|
if (x_sign) {
|
|
// if negative and not exact, increment magnitude
|
|
res++;
|
|
}
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
}
|
|
// set exponent to zero as it was negative before.
|
|
res = x_sign | 0x31c0000000000000ull | res;
|
|
BID_RETURN (res);
|
|
} else { // if exp < 0 and q + exp <= 0
|
|
// the result is +0 or -1
|
|
if (x_sign) {
|
|
res = 0xb1c0000000000001ull;
|
|
} else {
|
|
res = 0x31c0000000000000ull;
|
|
}
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
BID_RETURN (res);
|
|
}
|
|
break;
|
|
case ROUNDING_UP:
|
|
if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
|
|
// need to shift right -exp digits from the coefficient; exp will be 0
|
|
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
|
|
// chop off ind digits from the lower part of C1
|
|
// C1 fits in 64 bits
|
|
// calculate C* and f*
|
|
// C* is actually floor(C*) in this case
|
|
// C* and f* need shifting and masking, as shown by
|
|
// shiftright128[] and maskhigh128[]
|
|
// 1 <= x <= 16
|
|
// kx = 10^(-x) = ten2mk64[ind - 1]
|
|
// C* = C1 * 10^(-x)
|
|
// the approximation of 10^(-x) was rounded up to 64 bits
|
|
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
|
|
|
|
// C* = floor(C*) (logical right shift; C has p decimal digits,
|
|
// correct by Property 1)
|
|
// if (0 < f* < 10^(-x)) then the result is exact
|
|
// n = C* * 10^(e+x)
|
|
|
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
|
res = P128.w[1];
|
|
fstar.w[1] = 0;
|
|
fstar.w[0] = P128.w[0];
|
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
|
res = (P128.w[1] >> shift);
|
|
fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
|
|
fstar.w[0] = P128.w[0];
|
|
}
|
|
// if (f* > 10^(-x)) then the result is inexact
|
|
if ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1])) {
|
|
if (!x_sign) {
|
|
// if positive and not exact, increment magnitude
|
|
res++;
|
|
}
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
}
|
|
// set exponent to zero as it was negative before.
|
|
res = x_sign | 0x31c0000000000000ull | res;
|
|
BID_RETURN (res);
|
|
} else { // if exp < 0 and q + exp <= 0
|
|
// the result is -0 or +1
|
|
if (x_sign) {
|
|
res = 0xb1c0000000000000ull;
|
|
} else {
|
|
res = 0x31c0000000000001ull;
|
|
}
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
BID_RETURN (res);
|
|
}
|
|
break;
|
|
case ROUNDING_TO_ZERO:
|
|
if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
|
|
// need to shift right -exp digits from the coefficient; exp will be 0
|
|
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
|
|
// chop off ind digits from the lower part of C1
|
|
// C1 fits in 127 bits
|
|
// calculate C* and f*
|
|
// C* is actually floor(C*) in this case
|
|
// C* and f* need shifting and masking, as shown by
|
|
// shiftright128[] and maskhigh128[]
|
|
// 1 <= x <= 16
|
|
// kx = 10^(-x) = ten2mk64[ind - 1]
|
|
// C* = C1 * 10^(-x)
|
|
// the approximation of 10^(-x) was rounded up to 64 bits
|
|
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
|
|
|
|
// C* = floor(C*) (logical right shift; C has p decimal digits,
|
|
// correct by Property 1)
|
|
// if (0 < f* < 10^(-x)) then the result is exact
|
|
// n = C* * 10^(e+x)
|
|
|
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
|
res = P128.w[1];
|
|
fstar.w[1] = 0;
|
|
fstar.w[0] = P128.w[0];
|
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
|
res = (P128.w[1] >> shift);
|
|
fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
|
|
fstar.w[0] = P128.w[0];
|
|
}
|
|
// if (f* > 10^(-x)) then the result is inexact
|
|
if ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1])) {
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
}
|
|
// set exponent to zero as it was negative before.
|
|
res = x_sign | 0x31c0000000000000ull | res;
|
|
BID_RETURN (res);
|
|
} else { // if exp < 0 and q + exp < 0
|
|
// the result is +0 or -0
|
|
res = x_sign | 0x31c0000000000000ull;
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
BID_RETURN (res);
|
|
}
|
|
break;
|
|
} // end switch ()
|
|
BID_RETURN (res);
|
|
}
|
|
|
|
/*****************************************************************************
|
|
* BID64_round_integral_nearest_even
|
|
****************************************************************************/
|
|
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
|
void
|
|
bid64_round_integral_nearest_even (UINT64 * pres,
|
|
UINT64 *
|
|
px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
|
_EXC_INFO_PARAM) {
|
|
UINT64 x = *px;
|
|
#else
|
|
UINT64
|
|
bid64_round_integral_nearest_even (UINT64 x _EXC_FLAGS_PARAM
|
|
_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
|
|
#endif
|
|
|
|
UINT64 res = 0xbaddbaddbaddbaddull;
|
|
UINT64 x_sign;
|
|
int exp; // unbiased exponent
|
|
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
|
|
BID_UI64DOUBLE tmp1;
|
|
int x_nr_bits;
|
|
int q, ind, shift;
|
|
UINT64 C1;
|
|
UINT128 fstar;
|
|
UINT128 P128;
|
|
|
|
x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
|
|
|
|
// check for NaNs and infinities
|
|
if ((x & MASK_NAN) == MASK_NAN) { // check for NaN
|
|
if ((x & 0x0003ffffffffffffull) > 999999999999999ull)
|
|
x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits
|
|
else
|
|
x = x & 0xfe03ffffffffffffull; // clear G6-G12
|
|
if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN
|
|
// set invalid flag
|
|
*pfpsf |= INVALID_EXCEPTION;
|
|
// return quiet (SNaN)
|
|
res = x & 0xfdffffffffffffffull;
|
|
} else { // QNaN
|
|
res = x;
|
|
}
|
|
BID_RETURN (res);
|
|
} else if ((x & MASK_INF) == MASK_INF) { // check for Infinity
|
|
res = x_sign | 0x7800000000000000ull;
|
|
BID_RETURN (res);
|
|
}
|
|
// unpack x
|
|
if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) {
|
|
// if the steering bits are 11 (condition will be 0), then
|
|
// the exponent is G[0:w+1]
|
|
exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398;
|
|
C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2;
|
|
if (C1 > 9999999999999999ull) { // non-canonical
|
|
C1 = 0;
|
|
}
|
|
} else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
|
|
exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398;
|
|
C1 = (x & MASK_BINARY_SIG1);
|
|
}
|
|
|
|
// if x is 0 or non-canonical
|
|
if (C1 == 0) {
|
|
if (exp < 0)
|
|
exp = 0;
|
|
res = x_sign | (((UINT64) exp + 398) << 53);
|
|
BID_RETURN (res);
|
|
}
|
|
// x is a finite non-zero number (not 0, non-canonical, or special)
|
|
|
|
// return 0 if (exp <= -(p+1))
|
|
if (exp <= -17) {
|
|
res = x_sign | 0x31c0000000000000ull;
|
|
BID_RETURN (res);
|
|
}
|
|
// q = nr. of decimal digits in x (1 <= q <= 54)
|
|
// determine first the nr. of bits in x
|
|
if (C1 >= 0x0020000000000000ull) { // x >= 2^53
|
|
q = 16;
|
|
} else { // if x < 2^53
|
|
tmp1.d = (double) C1; // exact conversion
|
|
x_nr_bits =
|
|
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
q = nr_digits[x_nr_bits - 1].digits;
|
|
if (q == 0) {
|
|
q = nr_digits[x_nr_bits - 1].digits1;
|
|
if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo)
|
|
q++;
|
|
}
|
|
}
|
|
|
|
if (exp >= 0) { // -exp <= 0
|
|
// the argument is an integer already
|
|
res = x;
|
|
BID_RETURN (res);
|
|
} else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
|
|
// need to shift right -exp digits from the coefficient; the exp will be 0
|
|
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
|
|
// chop off ind digits from the lower part of C1
|
|
// C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
|
|
// FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
|
|
C1 = C1 + midpoint64[ind - 1];
|
|
// calculate C* and f*
|
|
// C* is actually floor(C*) in this case
|
|
// C* and f* need shifting and masking, as shown by
|
|
// shiftright128[] and maskhigh128[]
|
|
// 1 <= x <= 16
|
|
// kx = 10^(-x) = ten2mk64[ind - 1]
|
|
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
|
|
// the approximation of 10^(-x) was rounded up to 64 bits
|
|
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
|
|
|
|
// if (0 < f* < 10^(-x)) then the result is a midpoint
|
|
// if floor(C*) is even then C* = floor(C*) - logical right
|
|
// shift; C* has p decimal digits, correct by Prop. 1)
|
|
// else if floor(C*) is odd C* = floor(C*)-1 (logical right
|
|
// shift; C* has p decimal digits, correct by Pr. 1)
|
|
// else
|
|
// C* = floor(C*) (logical right shift; C has p decimal digits,
|
|
// correct by Property 1)
|
|
// n = C* * 10^(e+x)
|
|
|
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
|
res = P128.w[1];
|
|
fstar.w[1] = 0;
|
|
fstar.w[0] = P128.w[0];
|
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
|
res = (P128.w[1] >> shift);
|
|
fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
|
|
fstar.w[0] = P128.w[0];
|
|
}
|
|
// if (0 < f* < 10^(-x)) then the result is a midpoint
|
|
// since round_to_even, subtract 1 if current result is odd
|
|
if ((res & 0x0000000000000001ull) && (fstar.w[1] == 0)
|
|
&& (fstar.w[0] < ten2mk64[ind - 1])) {
|
|
res--;
|
|
}
|
|
// set exponent to zero as it was negative before.
|
|
res = x_sign | 0x31c0000000000000ull | res;
|
|
BID_RETURN (res);
|
|
} else { // if exp < 0 and q + exp < 0
|
|
// the result is +0 or -0
|
|
res = x_sign | 0x31c0000000000000ull;
|
|
BID_RETURN (res);
|
|
}
|
|
}
|
|
|
|
/*****************************************************************************
|
|
* BID64_round_integral_negative
|
|
*****************************************************************************/
|
|
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
|
void
|
|
bid64_round_integral_negative (UINT64 * pres,
|
|
UINT64 *
|
|
px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
|
_EXC_INFO_PARAM) {
|
|
UINT64 x = *px;
|
|
#else
|
|
UINT64
|
|
bid64_round_integral_negative (UINT64 x _EXC_FLAGS_PARAM
|
|
_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
|
|
#endif
|
|
|
|
UINT64 res = 0xbaddbaddbaddbaddull;
|
|
UINT64 x_sign;
|
|
int exp; // unbiased exponent
|
|
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
|
|
BID_UI64DOUBLE tmp1;
|
|
int x_nr_bits;
|
|
int q, ind, shift;
|
|
UINT64 C1;
|
|
// UINT64 res is C* at first - represents up to 34 decimal digits ~ 113 bits
|
|
UINT128 fstar;
|
|
UINT128 P128;
|
|
|
|
x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
|
|
|
|
// check for NaNs and infinities
|
|
if ((x & MASK_NAN) == MASK_NAN) { // check for NaN
|
|
if ((x & 0x0003ffffffffffffull) > 999999999999999ull)
|
|
x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits
|
|
else
|
|
x = x & 0xfe03ffffffffffffull; // clear G6-G12
|
|
if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN
|
|
// set invalid flag
|
|
*pfpsf |= INVALID_EXCEPTION;
|
|
// return quiet (SNaN)
|
|
res = x & 0xfdffffffffffffffull;
|
|
} else { // QNaN
|
|
res = x;
|
|
}
|
|
BID_RETURN (res);
|
|
} else if ((x & MASK_INF) == MASK_INF) { // check for Infinity
|
|
res = x_sign | 0x7800000000000000ull;
|
|
BID_RETURN (res);
|
|
}
|
|
// unpack x
|
|
if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) {
|
|
// if the steering bits are 11 (condition will be 0), then
|
|
// the exponent is G[0:w+1]
|
|
exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398;
|
|
C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2;
|
|
if (C1 > 9999999999999999ull) { // non-canonical
|
|
C1 = 0;
|
|
}
|
|
} else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
|
|
exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398;
|
|
C1 = (x & MASK_BINARY_SIG1);
|
|
}
|
|
|
|
// if x is 0 or non-canonical
|
|
if (C1 == 0) {
|
|
if (exp < 0)
|
|
exp = 0;
|
|
res = x_sign | (((UINT64) exp + 398) << 53);
|
|
BID_RETURN (res);
|
|
}
|
|
// x is a finite non-zero number (not 0, non-canonical, or special)
|
|
|
|
// return 0 if (exp <= -p)
|
|
if (exp <= -16) {
|
|
if (x_sign) {
|
|
res = 0xb1c0000000000001ull;
|
|
} else {
|
|
res = 0x31c0000000000000ull;
|
|
}
|
|
BID_RETURN (res);
|
|
}
|
|
// q = nr. of decimal digits in x (1 <= q <= 54)
|
|
// determine first the nr. of bits in x
|
|
if (C1 >= 0x0020000000000000ull) { // x >= 2^53
|
|
q = 16;
|
|
} else { // if x < 2^53
|
|
tmp1.d = (double) C1; // exact conversion
|
|
x_nr_bits =
|
|
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
q = nr_digits[x_nr_bits - 1].digits;
|
|
if (q == 0) {
|
|
q = nr_digits[x_nr_bits - 1].digits1;
|
|
if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo)
|
|
q++;
|
|
}
|
|
}
|
|
|
|
if (exp >= 0) { // -exp <= 0
|
|
// the argument is an integer already
|
|
res = x;
|
|
BID_RETURN (res);
|
|
} else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
|
|
// need to shift right -exp digits from the coefficient; the exp will be 0
|
|
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
|
|
// chop off ind digits from the lower part of C1
|
|
// C1 fits in 64 bits
|
|
// calculate C* and f*
|
|
// C* is actually floor(C*) in this case
|
|
// C* and f* need shifting and masking, as shown by
|
|
// shiftright128[] and maskhigh128[]
|
|
// 1 <= x <= 16
|
|
// kx = 10^(-x) = ten2mk64[ind - 1]
|
|
// C* = C1 * 10^(-x)
|
|
// the approximation of 10^(-x) was rounded up to 64 bits
|
|
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
|
|
|
|
// C* = floor(C*) (logical right shift; C has p decimal digits,
|
|
// correct by Property 1)
|
|
// if (0 < f* < 10^(-x)) then the result is exact
|
|
// n = C* * 10^(e+x)
|
|
|
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
|
res = P128.w[1];
|
|
fstar.w[1] = 0;
|
|
fstar.w[0] = P128.w[0];
|
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
|
res = (P128.w[1] >> shift);
|
|
fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
|
|
fstar.w[0] = P128.w[0];
|
|
}
|
|
// if (f* > 10^(-x)) then the result is inexact
|
|
if (x_sign
|
|
&& ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1]))) {
|
|
// if negative and not exact, increment magnitude
|
|
res++;
|
|
}
|
|
// set exponent to zero as it was negative before.
|
|
res = x_sign | 0x31c0000000000000ull | res;
|
|
BID_RETURN (res);
|
|
} else { // if exp < 0 and q + exp <= 0
|
|
// the result is +0 or -1
|
|
if (x_sign) {
|
|
res = 0xb1c0000000000001ull;
|
|
} else {
|
|
res = 0x31c0000000000000ull;
|
|
}
|
|
BID_RETURN (res);
|
|
}
|
|
}
|
|
|
|
/*****************************************************************************
|
|
* BID64_round_integral_positive
|
|
****************************************************************************/
|
|
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
|
void
|
|
bid64_round_integral_positive (UINT64 * pres,
|
|
UINT64 *
|
|
px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
|
_EXC_INFO_PARAM) {
|
|
UINT64 x = *px;
|
|
#else
|
|
UINT64
|
|
bid64_round_integral_positive (UINT64 x _EXC_FLAGS_PARAM
|
|
_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
|
|
#endif
|
|
|
|
UINT64 res = 0xbaddbaddbaddbaddull;
|
|
UINT64 x_sign;
|
|
int exp; // unbiased exponent
|
|
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
|
|
BID_UI64DOUBLE tmp1;
|
|
int x_nr_bits;
|
|
int q, ind, shift;
|
|
UINT64 C1;
|
|
// UINT64 res is C* at first - represents up to 34 decimal digits ~ 113 bits
|
|
UINT128 fstar;
|
|
UINT128 P128;
|
|
|
|
x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
|
|
|
|
// check for NaNs and infinities
|
|
if ((x & MASK_NAN) == MASK_NAN) { // check for NaN
|
|
if ((x & 0x0003ffffffffffffull) > 999999999999999ull)
|
|
x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits
|
|
else
|
|
x = x & 0xfe03ffffffffffffull; // clear G6-G12
|
|
if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN
|
|
// set invalid flag
|
|
*pfpsf |= INVALID_EXCEPTION;
|
|
// return quiet (SNaN)
|
|
res = x & 0xfdffffffffffffffull;
|
|
} else { // QNaN
|
|
res = x;
|
|
}
|
|
BID_RETURN (res);
|
|
} else if ((x & MASK_INF) == MASK_INF) { // check for Infinity
|
|
res = x_sign | 0x7800000000000000ull;
|
|
BID_RETURN (res);
|
|
}
|
|
// unpack x
|
|
if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) {
|
|
// if the steering bits are 11 (condition will be 0), then
|
|
// the exponent is G[0:w+1]
|
|
exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398;
|
|
C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2;
|
|
if (C1 > 9999999999999999ull) { // non-canonical
|
|
C1 = 0;
|
|
}
|
|
} else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
|
|
exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398;
|
|
C1 = (x & MASK_BINARY_SIG1);
|
|
}
|
|
|
|
// if x is 0 or non-canonical
|
|
if (C1 == 0) {
|
|
if (exp < 0)
|
|
exp = 0;
|
|
res = x_sign | (((UINT64) exp + 398) << 53);
|
|
BID_RETURN (res);
|
|
}
|
|
// x is a finite non-zero number (not 0, non-canonical, or special)
|
|
|
|
// return 0 if (exp <= -p)
|
|
if (exp <= -16) {
|
|
if (x_sign) {
|
|
res = 0xb1c0000000000000ull;
|
|
} else {
|
|
res = 0x31c0000000000001ull;
|
|
}
|
|
BID_RETURN (res);
|
|
}
|
|
// q = nr. of decimal digits in x (1 <= q <= 54)
|
|
// determine first the nr. of bits in x
|
|
if (C1 >= 0x0020000000000000ull) { // x >= 2^53
|
|
q = 16;
|
|
} else { // if x < 2^53
|
|
tmp1.d = (double) C1; // exact conversion
|
|
x_nr_bits =
|
|
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
q = nr_digits[x_nr_bits - 1].digits;
|
|
if (q == 0) {
|
|
q = nr_digits[x_nr_bits - 1].digits1;
|
|
if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo)
|
|
q++;
|
|
}
|
|
}
|
|
|
|
if (exp >= 0) { // -exp <= 0
|
|
// the argument is an integer already
|
|
res = x;
|
|
BID_RETURN (res);
|
|
} else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
|
|
// need to shift right -exp digits from the coefficient; the exp will be 0
|
|
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
|
|
// chop off ind digits from the lower part of C1
|
|
// C1 fits in 64 bits
|
|
// calculate C* and f*
|
|
// C* is actually floor(C*) in this case
|
|
// C* and f* need shifting and masking, as shown by
|
|
// shiftright128[] and maskhigh128[]
|
|
// 1 <= x <= 16
|
|
// kx = 10^(-x) = ten2mk64[ind - 1]
|
|
// C* = C1 * 10^(-x)
|
|
// the approximation of 10^(-x) was rounded up to 64 bits
|
|
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
|
|
|
|
// C* = floor(C*) (logical right shift; C has p decimal digits,
|
|
// correct by Property 1)
|
|
// if (0 < f* < 10^(-x)) then the result is exact
|
|
// n = C* * 10^(e+x)
|
|
|
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
|
res = P128.w[1];
|
|
fstar.w[1] = 0;
|
|
fstar.w[0] = P128.w[0];
|
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
|
res = (P128.w[1] >> shift);
|
|
fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
|
|
fstar.w[0] = P128.w[0];
|
|
}
|
|
// if (f* > 10^(-x)) then the result is inexact
|
|
if (!x_sign
|
|
&& ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1]))) {
|
|
// if positive and not exact, increment magnitude
|
|
res++;
|
|
}
|
|
// set exponent to zero as it was negative before.
|
|
res = x_sign | 0x31c0000000000000ull | res;
|
|
BID_RETURN (res);
|
|
} else { // if exp < 0 and q + exp <= 0
|
|
// the result is -0 or +1
|
|
if (x_sign) {
|
|
res = 0xb1c0000000000000ull;
|
|
} else {
|
|
res = 0x31c0000000000001ull;
|
|
}
|
|
BID_RETURN (res);
|
|
}
|
|
}
|
|
|
|
/*****************************************************************************
|
|
* BID64_round_integral_zero
|
|
****************************************************************************/
|
|
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
|
void
|
|
bid64_round_integral_zero (UINT64 * pres,
|
|
UINT64 *
|
|
px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
|
_EXC_INFO_PARAM) {
|
|
UINT64 x = *px;
|
|
#else
|
|
UINT64
|
|
bid64_round_integral_zero (UINT64 x _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
|
_EXC_INFO_PARAM) {
|
|
#endif
|
|
|
|
UINT64 res = 0xbaddbaddbaddbaddull;
|
|
UINT64 x_sign;
|
|
int exp; // unbiased exponent
|
|
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
|
|
BID_UI64DOUBLE tmp1;
|
|
int x_nr_bits;
|
|
int q, ind, shift;
|
|
UINT64 C1;
|
|
// UINT64 res is C* at first - represents up to 34 decimal digits ~ 113 bits
|
|
UINT128 P128;
|
|
|
|
x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
|
|
|
|
// check for NaNs and infinities
|
|
if ((x & MASK_NAN) == MASK_NAN) { // check for NaN
|
|
if ((x & 0x0003ffffffffffffull) > 999999999999999ull)
|
|
x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits
|
|
else
|
|
x = x & 0xfe03ffffffffffffull; // clear G6-G12
|
|
if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN
|
|
// set invalid flag
|
|
*pfpsf |= INVALID_EXCEPTION;
|
|
// return quiet (SNaN)
|
|
res = x & 0xfdffffffffffffffull;
|
|
} else { // QNaN
|
|
res = x;
|
|
}
|
|
BID_RETURN (res);
|
|
} else if ((x & MASK_INF) == MASK_INF) { // check for Infinity
|
|
res = x_sign | 0x7800000000000000ull;
|
|
BID_RETURN (res);
|
|
}
|
|
// unpack x
|
|
if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) {
|
|
// if the steering bits are 11 (condition will be 0), then
|
|
// the exponent is G[0:w+1]
|
|
exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398;
|
|
C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2;
|
|
if (C1 > 9999999999999999ull) { // non-canonical
|
|
C1 = 0;
|
|
}
|
|
} else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
|
|
exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398;
|
|
C1 = (x & MASK_BINARY_SIG1);
|
|
}
|
|
|
|
// if x is 0 or non-canonical
|
|
if (C1 == 0) {
|
|
if (exp < 0)
|
|
exp = 0;
|
|
res = x_sign | (((UINT64) exp + 398) << 53);
|
|
BID_RETURN (res);
|
|
}
|
|
// x is a finite non-zero number (not 0, non-canonical, or special)
|
|
|
|
// return 0 if (exp <= -p)
|
|
if (exp <= -16) {
|
|
res = x_sign | 0x31c0000000000000ull;
|
|
BID_RETURN (res);
|
|
}
|
|
// q = nr. of decimal digits in x (1 <= q <= 54)
|
|
// determine first the nr. of bits in x
|
|
if (C1 >= 0x0020000000000000ull) { // x >= 2^53
|
|
q = 16;
|
|
} else { // if x < 2^53
|
|
tmp1.d = (double) C1; // exact conversion
|
|
x_nr_bits =
|
|
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
q = nr_digits[x_nr_bits - 1].digits;
|
|
if (q == 0) {
|
|
q = nr_digits[x_nr_bits - 1].digits1;
|
|
if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo)
|
|
q++;
|
|
}
|
|
}
|
|
|
|
if (exp >= 0) { // -exp <= 0
|
|
// the argument is an integer already
|
|
res = x;
|
|
BID_RETURN (res);
|
|
} else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
|
|
// need to shift right -exp digits from the coefficient; the exp will be 0
|
|
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
|
|
// chop off ind digits from the lower part of C1
|
|
// C1 fits in 127 bits
|
|
// calculate C* and f*
|
|
// C* is actually floor(C*) in this case
|
|
// C* and f* need shifting and masking, as shown by
|
|
// shiftright128[] and maskhigh128[]
|
|
// 1 <= x <= 16
|
|
// kx = 10^(-x) = ten2mk64[ind - 1]
|
|
// C* = C1 * 10^(-x)
|
|
// the approximation of 10^(-x) was rounded up to 64 bits
|
|
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
|
|
|
|
// C* = floor(C*) (logical right shift; C has p decimal digits,
|
|
// correct by Property 1)
|
|
// if (0 < f* < 10^(-x)) then the result is exact
|
|
// n = C* * 10^(e+x)
|
|
|
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
|
res = P128.w[1];
|
|
// redundant fstar.w[1] = 0;
|
|
// redundant fstar.w[0] = P128.w[0];
|
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
|
res = (P128.w[1] >> shift);
|
|
// redundant fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
|
|
// redundant fstar.w[0] = P128.w[0];
|
|
}
|
|
// if (f* > 10^(-x)) then the result is inexact
|
|
// if ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind-1])){
|
|
// // redundant
|
|
// }
|
|
// set exponent to zero as it was negative before.
|
|
res = x_sign | 0x31c0000000000000ull | res;
|
|
BID_RETURN (res);
|
|
} else { // if exp < 0 and q + exp < 0
|
|
// the result is +0 or -0
|
|
res = x_sign | 0x31c0000000000000ull;
|
|
BID_RETURN (res);
|
|
}
|
|
}
|
|
|
|
/*****************************************************************************
|
|
* BID64_round_integral_nearest_away
|
|
****************************************************************************/
|
|
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
|
void
|
|
bid64_round_integral_nearest_away (UINT64 * pres,
|
|
UINT64 *
|
|
px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
|
_EXC_INFO_PARAM) {
|
|
UINT64 x = *px;
|
|
#else
|
|
UINT64
|
|
bid64_round_integral_nearest_away (UINT64 x _EXC_FLAGS_PARAM
|
|
_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
|
|
#endif
|
|
|
|
UINT64 res = 0xbaddbaddbaddbaddull;
|
|
UINT64 x_sign;
|
|
int exp; // unbiased exponent
|
|
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
|
|
BID_UI64DOUBLE tmp1;
|
|
int x_nr_bits;
|
|
int q, ind, shift;
|
|
UINT64 C1;
|
|
UINT128 P128;
|
|
|
|
x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
|
|
|
|
// check for NaNs and infinities
|
|
if ((x & MASK_NAN) == MASK_NAN) { // check for NaN
|
|
if ((x & 0x0003ffffffffffffull) > 999999999999999ull)
|
|
x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits
|
|
else
|
|
x = x & 0xfe03ffffffffffffull; // clear G6-G12
|
|
if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN
|
|
// set invalid flag
|
|
*pfpsf |= INVALID_EXCEPTION;
|
|
// return quiet (SNaN)
|
|
res = x & 0xfdffffffffffffffull;
|
|
} else { // QNaN
|
|
res = x;
|
|
}
|
|
BID_RETURN (res);
|
|
} else if ((x & MASK_INF) == MASK_INF) { // check for Infinity
|
|
res = x_sign | 0x7800000000000000ull;
|
|
BID_RETURN (res);
|
|
}
|
|
// unpack x
|
|
if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) {
|
|
// if the steering bits are 11 (condition will be 0), then
|
|
// the exponent is G[0:w+1]
|
|
exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398;
|
|
C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2;
|
|
if (C1 > 9999999999999999ull) { // non-canonical
|
|
C1 = 0;
|
|
}
|
|
} else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
|
|
exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398;
|
|
C1 = (x & MASK_BINARY_SIG1);
|
|
}
|
|
|
|
// if x is 0 or non-canonical
|
|
if (C1 == 0) {
|
|
if (exp < 0)
|
|
exp = 0;
|
|
res = x_sign | (((UINT64) exp + 398) << 53);
|
|
BID_RETURN (res);
|
|
}
|
|
// x is a finite non-zero number (not 0, non-canonical, or special)
|
|
|
|
// return 0 if (exp <= -(p+1))
|
|
if (exp <= -17) {
|
|
res = x_sign | 0x31c0000000000000ull;
|
|
BID_RETURN (res);
|
|
}
|
|
// q = nr. of decimal digits in x (1 <= q <= 54)
|
|
// determine first the nr. of bits in x
|
|
if (C1 >= 0x0020000000000000ull) { // x >= 2^53
|
|
q = 16;
|
|
} else { // if x < 2^53
|
|
tmp1.d = (double) C1; // exact conversion
|
|
x_nr_bits =
|
|
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
q = nr_digits[x_nr_bits - 1].digits;
|
|
if (q == 0) {
|
|
q = nr_digits[x_nr_bits - 1].digits1;
|
|
if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo)
|
|
q++;
|
|
}
|
|
}
|
|
|
|
if (exp >= 0) { // -exp <= 0
|
|
// the argument is an integer already
|
|
res = x;
|
|
BID_RETURN (res);
|
|
} else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
|
|
// need to shift right -exp digits from the coefficient; the exp will be 0
|
|
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
|
|
// chop off ind digits from the lower part of C1
|
|
// C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
|
|
// FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
|
|
C1 = C1 + midpoint64[ind - 1];
|
|
// calculate C* and f*
|
|
// C* is actually floor(C*) in this case
|
|
// C* and f* need shifting and masking, as shown by
|
|
// shiftright128[] and maskhigh128[]
|
|
// 1 <= x <= 16
|
|
// kx = 10^(-x) = ten2mk64[ind - 1]
|
|
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
|
|
// the approximation of 10^(-x) was rounded up to 64 bits
|
|
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
|
|
|
|
// if (0 < f* < 10^(-x)) then the result is a midpoint
|
|
// C* = floor(C*) - logical right shift; C* has p decimal digits,
|
|
// correct by Prop. 1)
|
|
// else
|
|
// C* = floor(C*) (logical right shift; C has p decimal digits,
|
|
// correct by Property 1)
|
|
// n = C* * 10^(e+x)
|
|
|
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
|
res = P128.w[1];
|
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
|
res = (P128.w[1] >> shift);
|
|
}
|
|
// midpoints are already rounded correctly
|
|
// set exponent to zero as it was negative before.
|
|
res = x_sign | 0x31c0000000000000ull | res;
|
|
BID_RETURN (res);
|
|
} else { // if exp < 0 and q + exp < 0
|
|
// the result is +0 or -0
|
|
res = x_sign | 0x31c0000000000000ull;
|
|
BID_RETURN (res);
|
|
}
|
|
}
|