Fix `sum()` accuracy

`[1e20, 1.0, -1e20].sum()` returns `0.0`. This happens because during the summation, `1.0` is too small relative to `1e20`, making it negligible. I have tried Kahan summation but it hasn't fixed the problem. Therefore, I've used Python's `fsum()` implementation with some help from Jason Fager and Huon Wilson. For more details, read: www.cs.cmu.edu/~quake-papers/robust-arithmetic.ps Moreover, benchmark and unit tests were added. Note: `Status.sum` is still not fully fixed. It doesn't handle NaNs, infinities and overflow correctly. See issue 11059: https://github.com/mozilla/rust/issues/11059
2013-12-12 01:57:13 +02:00 · 2013-12-12 01:57:13 +02:00 · 05395cba88
parent 4e0cb316fc
commit 05395cba88
1 changed files with 67 additions and 2 deletions
--- a/src/libextra/stats.rs
+++ b/src/libextra/stats.rs
@ -15,6 +15,7 @@ use std::cmp;
 use std::hashmap;
 use std::io;
 use std::num;
 use std::util;
 // NB: this can probably be rewritten in terms of num::Num
 // to be less f64-specific.
@ -23,6 +24,12 @@ use std::num;
 pub trait Stats {
    /// Sum of the samples.
    ///
    /// Note: this method sacrifices performance at the altar of accuracy
    /// Depends on IEEE-754 arithmetic guarantees. See proof of correctness at:
    /// ["Adaptive Precision Floating-Point Arithmetic and Fast Robust Geometric Predicates"]
    /// (http://www.cs.cmu.edu/~quake-papers/robust-arithmetic.ps)
    /// *Discrete & Computational Geometry 18*, 3 (Oct 1997), 305-363, Shewchuk J.R.
    fn sum(self) -> f64;
    /// Minimum value of the samples.
@ -147,8 +154,37 @@ impl Summary {
 impl<'self> Stats for &'self [f64] {
    // FIXME #11059 handle NaN, inf and overflow
    fn sum(self) -> f64 {
-        self.iter().fold(0.0, |p,q| p + *q)
+        let mut partials : ~[f64] = ~[];
        for &mut x in self.iter() {
            let mut j = 0;
            // This inner loop applies `hi`/`lo` summation to each
            // partial so that the list of partial sums remains exact.
            for i in range(0, partials.len()) {
                let mut y = partials[i];
                if num::abs(x) < num::abs(y) {
                    util::swap(&mut x, &mut y);
                }
                // Rounded `x+y` is stored in `hi` with round-off stored in
                // `lo`. Together `hi+lo` are exactly equal to `x+y`.
                let hi = x + y;
                let lo = y - (hi - x);
                if lo != 0f64 {
                    partials[j] = lo;
                    j += 1;
                }
                x = hi;
            }
            if j >= partials.len() {
                partials.push(x);
            } else {
                partials[j] = x;
                partials.truncate(j+1);
            }
        }
        partials.iter().fold(0.0, |p, q| p + *q)
    }
    fn min(self) -> f64 {
@ -955,5 +991,34 @@ mod tests {
        t(&Summary::new([-2.0, 0.0]), ~"-2 |[------******#******---]| 0");
    }
-
+    #[test]
    fn test_sum_f64s() {
        assert_eq!([0.5, 3.2321, 1.5678].sum(), 5.2999);
    }
    #[test]
    fn test_sum_f64_between_ints_that_sum_to_0() {
        assert_eq!([1e30, 1.2, -1e30].sum(), 1.2);
    }
 }
 #[cfg(test)]
 mod bench {
    use extra::test::BenchHarness;
    use std::vec;
    #[bench]
    fn sum_three_items(bh: &mut BenchHarness) {
        bh.iter(|| {
            [1e20, 1.5, -1e20].sum();
        })
    }
    #[bench]
    fn sum_many_f64(bh: &mut BenchHarness) {
        let nums = [-1e30, 1e60, 1e30, 1.0, -1e60];
        let v = vec::from_fn(500, |i| nums[i%5]);
        bh.iter(|| {
            v.sum();
        })
    }
 }