Use associated constants in core::num::dec2flt

This commit is contained in:
Robin Kruppe 2017-04-14 17:14:20 +02:00
parent 4f32e0dfb2
commit e9c74bc42d
4 changed files with 99 additions and 152 deletions

View File

@ -70,6 +70,7 @@
#![feature(allow_internal_unstable)]
#![feature(asm)]
#![feature(associated_type_defaults)]
#![feature(associated_consts)]
#![feature(cfg_target_feature)]
#![feature(cfg_target_has_atomic)]
#![feature(concat_idents)]

View File

@ -106,17 +106,17 @@ mod fpu_precision {
/// a bignum.
pub fn fast_path<T: RawFloat>(integral: &[u8], fractional: &[u8], e: i64) -> Option<T> {
let num_digits = integral.len() + fractional.len();
// log_10(f64::max_sig) ~ 15.95. We compare the exact value to max_sig near the end,
// log_10(f64::MAX_SIG) ~ 15.95. We compare the exact value to MAX_SIG near the end,
// this is just a quick, cheap rejection (and also frees the rest of the code from
// worrying about underflow).
if num_digits > 16 {
return None;
}
if e.abs() >= T::ceil_log5_of_max_sig() as i64 {
if e.abs() >= T::CEIL_LOG5_OF_MAX_SIG as i64 {
return None;
}
let f = num::from_str_unchecked(integral.iter().chain(fractional.iter()));
if f > T::max_sig() {
if f > T::MAX_SIG {
return None;
}
@ -154,14 +154,14 @@ pub fn fast_path<T: RawFloat>(integral: &[u8], fractional: &[u8], e: i64) -> Opt
/// > the best possible approximation that uses p bits of significand.)
pub fn bellerophon<T: RawFloat>(f: &Big, e: i16) -> T {
let slop;
if f <= &Big::from_u64(T::max_sig()) {
if f <= &Big::from_u64(T::MAX_SIG) {
// The cases abs(e) < log5(2^N) are in fast_path()
slop = if e >= 0 { 0 } else { 3 };
} else {
slop = if e >= 0 { 1 } else { 4 };
}
let z = rawfp::big_to_fp(f).mul(&power_of_ten(e)).normalize();
let exp_p_n = 1 << (P - T::sig_bits() as u32);
let exp_p_n = 1 << (P - T::SIG_BITS as u32);
let lowbits: i64 = (z.f % exp_p_n) as i64;
// Is the slop large enough to make a difference when
// rounding to n bits?
@ -210,14 +210,14 @@ fn algorithm_r<T: RawFloat>(f: &Big, e: i16, z0: T) -> T {
if d2 < y {
let mut d2_double = d2;
d2_double.mul_pow2(1);
if m == T::min_sig() && d_negative && d2_double > y {
if m == T::MIN_SIG && d_negative && d2_double > y {
z = prev_float(z);
} else {
return z;
}
} else if d2 == y {
if m % 2 == 0 {
if m == T::min_sig() && d_negative {
if m == T::MIN_SIG && d_negative {
z = prev_float(z);
} else {
return z;
@ -303,12 +303,12 @@ pub fn algorithm_m<T: RawFloat>(f: &Big, e: i16) -> T {
quick_start::<T>(&mut u, &mut v, &mut k);
let mut rem = Big::from_small(0);
let mut x = Big::from_small(0);
let min_sig = Big::from_u64(T::min_sig());
let max_sig = Big::from_u64(T::max_sig());
let min_sig = Big::from_u64(T::MIN_SIG);
let max_sig = Big::from_u64(T::MAX_SIG);
loop {
u.div_rem(&v, &mut x, &mut rem);
if k == T::min_exp_int() {
// We have to stop at the minimum exponent, if we wait until `k < T::min_exp_int()`,
if k == T::MIN_EXP_INT {
// We have to stop at the minimum exponent, if we wait until `k < T::MIN_EXP_INT`,
// then we'd be off by a factor of two. Unfortunately this means we have to special-
// case normal numbers with the minimum exponent.
// FIXME find a more elegant formulation, but run the `tiny-pow10` test to make sure
@ -318,8 +318,8 @@ pub fn algorithm_m<T: RawFloat>(f: &Big, e: i16) -> T {
}
return underflow(x, v, rem);
}
if k > T::max_exp_int() {
return T::infinity2();
if k > T::MAX_EXP_INT {
return T::INFINITY;
}
if x < min_sig {
u.mul_pow2(1);
@ -345,18 +345,18 @@ fn quick_start<T: RawFloat>(u: &mut Big, v: &mut Big, k: &mut i16) {
// The target ratio is one where u/v is in an in-range significand. Thus our termination
// condition is log2(u / v) being the significand bits, plus/minus one.
// FIXME Looking at the second bit could improve the estimate and avoid some more divisions.
let target_ratio = T::sig_bits() as i16;
let target_ratio = T::SIG_BITS as i16;
let log2_u = u.bit_length() as i16;
let log2_v = v.bit_length() as i16;
let mut u_shift: i16 = 0;
let mut v_shift: i16 = 0;
assert!(*k == 0);
loop {
if *k == T::min_exp_int() {
if *k == T::MIN_EXP_INT {
// Underflow or subnormal. Leave it to the main function.
break;
}
if *k == T::max_exp_int() {
if *k == T::MAX_EXP_INT {
// Overflow. Leave it to the main function.
break;
}
@ -376,7 +376,7 @@ fn quick_start<T: RawFloat>(u: &mut Big, v: &mut Big, k: &mut i16) {
}
fn underflow<T: RawFloat>(x: Big, v: Big, rem: Big) -> T {
if x < Big::from_u64(T::min_sig()) {
if x < Big::from_u64(T::MIN_SIG) {
let q = num::to_u64(&x);
let z = rawfp::encode_subnormal(q);
return round_by_remainder(v, rem, q, z);
@ -395,9 +395,9 @@ fn underflow<T: RawFloat>(x: Big, v: Big, rem: Big) -> T {
// needs to be rounded up. Only when the rounded off bits are 1/2 and the remainder
// is zero, we have a half-to-even situation.
let bits = x.bit_length();
let lsb = bits - T::sig_bits() as usize;
let lsb = bits - T::SIG_BITS as usize;
let q = num::get_bits(&x, lsb, bits);
let k = T::min_exp_int() + lsb as i16;
let k = T::MIN_EXP_INT + lsb as i16;
let z = rawfp::encode_normal(Unpacked::new(q, k));
let q_even = q % 2 == 0;
match num::compare_with_half_ulp(&x, lsb) {

View File

@ -214,11 +214,11 @@ fn dec2flt<T: RawFloat>(s: &str) -> Result<T, ParseFloatError> {
let (sign, s) = extract_sign(s);
let flt = match parse_decimal(s) {
ParseResult::Valid(decimal) => convert(decimal)?,
ParseResult::ShortcutToInf => T::infinity2(),
ParseResult::ShortcutToZero => T::zero2(),
ParseResult::ShortcutToInf => T::INFINITY,
ParseResult::ShortcutToZero => T::ZERO,
ParseResult::Invalid => match s {
"inf" => T::infinity2(),
"NaN" => T::nan2(),
"inf" => T::INFINITY,
"NaN" => T::NAN,
_ => { return Err(pfe_invalid()); }
}
};
@ -254,7 +254,7 @@ fn convert<T: RawFloat>(mut decimal: Decimal) -> Result<T, ParseFloatError> {
// FIXME These bounds are rather conservative. A more careful analysis of the failure modes
// of Bellerophon could allow using it in more cases for a massive speed up.
let exponent_in_range = table::MIN_E <= e && e <= table::MAX_E;
let value_in_range = upper_bound <= T::max_normal_digits() as u64;
let value_in_range = upper_bound <= T::MAX_NORMAL_DIGITS as u64;
if exponent_in_range && value_in_range {
Ok(algorithm::bellerophon(&f, e))
} else {
@ -315,17 +315,17 @@ fn bound_intermediate_digits(decimal: &Decimal, e: i64) -> u64 {
fn trivial_cases<T: RawFloat>(decimal: &Decimal) -> Option<T> {
// There were zeros but they were stripped by simplify()
if decimal.integral.is_empty() && decimal.fractional.is_empty() {
return Some(T::zero2());
return Some(T::ZERO);
}
// This is a crude approximation of ceil(log10(the real value)). We don't need to worry too
// much about overflow here because the input length is tiny (at least compared to 2^64) and
// the parser already handles exponents whose absolute value is greater than 10^18
// (which is still 10^19 short of 2^64).
let max_place = decimal.exp + decimal.integral.len() as i64;
if max_place > T::inf_cutoff() {
return Some(T::infinity2());
} else if max_place < T::zero_cutoff() {
return Some(T::zero2());
if max_place > T::INF_CUTOFF {
return Some(T::INFINITY);
} else if max_place < T::ZERO_CUTOFF {
return Some(T::ZERO);
}
None
}

View File

@ -56,24 +56,12 @@ impl Unpacked {
///
/// Should **never ever** be implemented for other types or be used outside the dec2flt module.
/// Inherits from `Float` because there is some overlap, but all the reused methods are trivial.
/// The "methods" (pseudo-constants) with default implementation should not be overriden.
pub trait RawFloat : Float + Copy + Debug + LowerExp
+ Mul<Output=Self> + Div<Output=Self> + Neg<Output=Self>
{
// suffix of "2" because Float::infinity is deprecated
#[allow(deprecated)]
fn infinity2() -> Self {
Float::infinity()
}
// suffix of "2" because Float::nan is deprecated
#[allow(deprecated)]
fn nan2() -> Self {
Float::nan()
}
// suffix of "2" because Float::zero is deprecated
fn zero2() -> Self;
const INFINITY: Self;
const NAN: Self;
const ZERO: Self;
// suffix of "2" because Float::integer_decode is deprecated
#[allow(deprecated)]
@ -94,94 +82,83 @@ pub trait RawFloat : Float + Copy + Debug + LowerExp
/// represented, the other code in this module makes sure to never let that happen.
fn from_int(x: u64) -> Self;
/// Get the value 10<sup>e</sup> from a pre-computed table. Panics for e >=
/// ceil_log5_of_max_sig().
/// Get the value 10<sup>e</sup> from a pre-computed table.
/// Panics for `e >= CEIL_LOG5_OF_MAX_SIG`.
fn short_fast_pow10(e: usize) -> Self;
// FIXME Everything that follows should be associated constants, but taking the value of an
// associated constant from a type parameter does not work (yet?)
// A possible workaround is having a `FloatInfo` struct for all the constants, but so far
// the methods aren't painful enough to rewrite.
/// What the name says. It's easier to hard code than juggling intrinsics and
/// hoping LLVM constant folds it.
fn ceil_log5_of_max_sig() -> i16;
const CEIL_LOG5_OF_MAX_SIG: i16;
// A conservative bound on the decimal digits of inputs that can't produce overflow or zero or
/// subnormals. Probably the decimal exponent of the maximum normal value, hence the name.
fn max_normal_digits() -> usize;
const MAX_NORMAL_DIGITS: usize;
/// When the most significant decimal digit has a place value greater than this, the number
/// is certainly rounded to infinity.
fn inf_cutoff() -> i64;
const INF_CUTOFF: i64;
/// When the most significant decimal digit has a place value less than this, the number
/// is certainly rounded to zero.
fn zero_cutoff() -> i64;
const ZERO_CUTOFF: i64;
/// The number of bits in the exponent.
fn exp_bits() -> u8;
const EXP_BITS: u8;
/// The number of bits in the singificand, *including* the hidden bit.
fn sig_bits() -> u8;
const SIG_BITS: u8;
/// The number of bits in the singificand, *excluding* the hidden bit.
fn explicit_sig_bits() -> u8 {
Self::sig_bits() - 1
}
const EXPLICIT_SIG_BITS: u8;
/// The maximum legal exponent in fractional representation.
fn max_exp() -> i16 {
(1 << (Self::exp_bits() - 1)) - 1
}
const MAX_EXP: i16;
/// The minimum legal exponent in fractional representation, excluding subnormals.
fn min_exp() -> i16 {
-Self::max_exp() + 1
}
const MIN_EXP: i16;
/// `MAX_EXP` for integral representation, i.e., with the shift applied.
fn max_exp_int() -> i16 {
Self::max_exp() - (Self::sig_bits() as i16 - 1)
}
const MAX_EXP_INT: i16;
/// `MAX_EXP` encoded (i.e., with offset bias)
fn max_encoded_exp() -> i16 {
(1 << Self::exp_bits()) - 1
}
const MAX_ENCODED_EXP: i16;
/// `MIN_EXP` for integral representation, i.e., with the shift applied.
fn min_exp_int() -> i16 {
Self::min_exp() - (Self::sig_bits() as i16 - 1)
}
const MIN_EXP_INT: i16;
/// The maximum normalized singificand in integral representation.
fn max_sig() -> u64 {
(1 << Self::sig_bits()) - 1
}
const MAX_SIG: u64;
/// The minimal normalized significand in integral representation.
fn min_sig() -> u64 {
1 << (Self::sig_bits() - 1)
const MIN_SIG: u64;
}
// Mostly a workaround for #34344.
macro_rules! other_constants {
($type: ident) => {
const EXPLICIT_SIG_BITS: u8 = Self::SIG_BITS - 1;
const MAX_EXP: i16 = (1 << (Self::EXP_BITS - 1)) - 1;
const MIN_EXP: i16 = -Self::MAX_EXP + 1;
const MAX_EXP_INT: i16 = Self::MAX_EXP - (Self::SIG_BITS as i16 - 1);
const MAX_ENCODED_EXP: i16 = (1 << Self::EXP_BITS) - 1;
const MIN_EXP_INT: i16 = Self::MIN_EXP - (Self::SIG_BITS as i16 - 1);
const MAX_SIG: u64 = (1 << Self::SIG_BITS) - 1;
const MIN_SIG: u64 = 1 << (Self::SIG_BITS - 1);
const INFINITY: Self = $crate::$type::INFINITY;
const NAN: Self = $crate::$type::NAN;
const ZERO: Self = 0.0;
}
}
impl RawFloat for f32 {
fn zero2() -> Self {
0.0
}
fn sig_bits() -> u8 {
24
}
fn exp_bits() -> u8 {
8
}
fn ceil_log5_of_max_sig() -> i16 {
11
}
const SIG_BITS: u8 = 24;
const EXP_BITS: u8 = 8;
const CEIL_LOG5_OF_MAX_SIG: i16 = 11;
const MAX_NORMAL_DIGITS: usize = 35;
const INF_CUTOFF: i64 = 40;
const ZERO_CUTOFF: i64 = -48;
other_constants!(f32);
fn transmute(self) -> u64 {
let bits: u32 = unsafe { transmute(self) };
@ -207,37 +184,17 @@ impl RawFloat for f32 {
fn short_fast_pow10(e: usize) -> Self {
table::F32_SHORT_POWERS[e]
}
fn max_normal_digits() -> usize {
35
}
fn inf_cutoff() -> i64 {
40
}
fn zero_cutoff() -> i64 {
-48
}
}
impl RawFloat for f64 {
fn zero2() -> Self {
0.0
}
fn sig_bits() -> u8 {
53
}
fn exp_bits() -> u8 {
11
}
fn ceil_log5_of_max_sig() -> i16 {
23
}
const SIG_BITS: u8 = 53;
const EXP_BITS: u8 = 11;
const CEIL_LOG5_OF_MAX_SIG: i16 = 23;
const MAX_NORMAL_DIGITS: usize = 305;
const INF_CUTOFF: i64 = 310;
const ZERO_CUTOFF: i64 = -326;
other_constants!(f64);
fn transmute(self) -> u64 {
let bits: u64 = unsafe { transmute(self) };
@ -262,38 +219,27 @@ impl RawFloat for f64 {
fn short_fast_pow10(e: usize) -> Self {
table::F64_SHORT_POWERS[e]
}
fn max_normal_digits() -> usize {
305
}
fn inf_cutoff() -> i64 {
310
}
fn zero_cutoff() -> i64 {
-326
}
}
/// Convert an Fp to the closest f64. Only handles number that fit into a normalized f64.
/// Convert an Fp to the closest machine float type.
/// Does not handle subnormal results.
pub fn fp_to_float<T: RawFloat>(x: Fp) -> T {
let x = x.normalize();
// x.f is 64 bit, so x.e has a mantissa shift of 63
let e = x.e + 63;
if e > T::max_exp() {
if e > T::MAX_EXP {
panic!("fp_to_float: exponent {} too large", e)
} else if e > T::min_exp() {
} else if e > T::MIN_EXP {
encode_normal(round_normal::<T>(x))
} else {
panic!("fp_to_float: exponent {} too small", e)
}
}
/// Round the 64-bit significand to 53 bit with half-to-even. Does not handle exponent overflow.
/// Round the 64-bit significand to T::SIG_BITS bits with half-to-even.
/// Does not handle exponent overflow.
pub fn round_normal<T: RawFloat>(x: Fp) -> Unpacked {
let excess = 64 - T::sig_bits() as i16;
let excess = 64 - T::SIG_BITS as i16;
let half: u64 = 1 << (excess - 1);
let (q, rem) = (x.f >> excess, x.f & ((1 << excess) - 1));
assert_eq!(q << excess | rem, x.f);
@ -303,8 +249,8 @@ pub fn round_normal<T: RawFloat>(x: Fp) -> Unpacked {
Unpacked::new(q, k)
} else if rem == half && (q % 2) == 0 {
Unpacked::new(q, k)
} else if q == T::max_sig() {
Unpacked::new(T::min_sig(), k + 1)
} else if q == T::MAX_SIG {
Unpacked::new(T::MIN_SIG, k + 1)
} else {
Unpacked::new(q + 1, k)
}
@ -313,22 +259,22 @@ pub fn round_normal<T: RawFloat>(x: Fp) -> Unpacked {
/// Inverse of `RawFloat::unpack()` for normalized numbers.
/// Panics if the significand or exponent are not valid for normalized numbers.
pub fn encode_normal<T: RawFloat>(x: Unpacked) -> T {
debug_assert!(T::min_sig() <= x.sig && x.sig <= T::max_sig(),
debug_assert!(T::MIN_SIG <= x.sig && x.sig <= T::MAX_SIG,
"encode_normal: significand not normalized");
// Remove the hidden bit
let sig_enc = x.sig & !(1 << T::explicit_sig_bits());
let sig_enc = x.sig & !(1 << T::EXPLICIT_SIG_BITS);
// Adjust the exponent for exponent bias and mantissa shift
let k_enc = x.k + T::max_exp() + T::explicit_sig_bits() as i16;
debug_assert!(k_enc != 0 && k_enc < T::max_encoded_exp(),
let k_enc = x.k + T::MAX_EXP + T::EXPLICIT_SIG_BITS as i16;
debug_assert!(k_enc != 0 && k_enc < T::MAX_ENCODED_EXP,
"encode_normal: exponent out of range");
// Leave sign bit at 0 ("+"), our numbers are all positive
let bits = (k_enc as u64) << T::explicit_sig_bits() | sig_enc;
let bits = (k_enc as u64) << T::EXPLICIT_SIG_BITS | sig_enc;
T::from_bits(bits)
}
/// Construct the subnormal. A mantissa of 0 is allowed and constructs zero.
/// Construct a subnormal. A mantissa of 0 is allowed and constructs zero.
pub fn encode_subnormal<T: RawFloat>(significand: u64) -> T {
assert!(significand < T::min_sig(), "encode_subnormal: not actually subnormal");
assert!(significand < T::MIN_SIG, "encode_subnormal: not actually subnormal");
// Encoded exponent is 0, the sign bit is 0, so we just have to reinterpret the bits.
T::from_bits(significand)
}
@ -364,8 +310,8 @@ pub fn prev_float<T: RawFloat>(x: T) -> T {
Zero => panic!("prev_float: argument is zero"),
Normal => {
let Unpacked { sig, k } = x.unpack();
if sig == T::min_sig() {
encode_normal(Unpacked::new(T::max_sig(), k - 1))
if sig == T::MIN_SIG {
encode_normal(Unpacked::new(T::MAX_SIG, k - 1))
} else {
encode_normal(Unpacked::new(sig - 1, k))
}
@ -380,7 +326,7 @@ pub fn prev_float<T: RawFloat>(x: T) -> T {
pub fn next_float<T: RawFloat>(x: T) -> T {
match x.classify() {
Nan => panic!("next_float: argument is NaN"),
Infinite => T::infinity2(),
Infinite => T::INFINITY,
// This seems too good to be true, but it works.
// 0.0 is encoded as the all-zero word. Subnormals are 0x000m...m where m is the mantissa.
// In particular, the smallest subnormal is 0x0...01 and the largest is 0x000F...F.