std::rand: optimise & document ziggurat.

Before:

    test rand::distributions::bench::rand_exp ... bench: 1399 ns/iter (+/- 124) = 571 MB/s
    test rand::distributions::bench::rand_normal ... bench: 1611 ns/iter (+/- 123) = 496 MB/s

After:

    test rand::distributions::bench::rand_exp ... bench: 712 ns/iter (+/- 43) = 1123 MB/s
    test rand::distributions::bench::rand_normal ... bench: 1007 ns/iter (+/- 81) = 794 MB/s
This commit is contained in:
Huon Wilson 2013-10-11 19:38:32 +11:00
parent e0eb128086
commit ed5f2d7c7c
1 changed files with 72 additions and 8 deletions

View File

@ -63,21 +63,48 @@ impl<Sup: Rand> IndependentSample<Sup> for RandSample<Sup> {
mod ziggurat_tables;
// inlining should mean there is no performance penalty for this
#[inline]
/// Sample a random number using the Ziggurat method (specifically the
/// ZIGNOR variant from Doornik 2005). Most of the arguments are
/// directly from the paper:
///
/// * `rng`: source of randomness
/// * `symmetric`: whether this is a symmetric distribution, or one-sided with P(x < 0) = 0.
/// * `X`: the $x_i$ abscissae.
/// * `F`: precomputed values of the PDF at the $x_i$, (i.e. $f(x_i)$)
/// * `F_DIFF`: precomputed values of $f(x_i) - f(x_{i+1})$
/// * `pdf`: the probability density function
/// * `zero_case`: manual sampling from the tail when we chose the
/// bottom box (i.e. i == 0)
// the perf improvement (25-50%) is definitely worth the extra code
// size from force-inlining.
#[inline(always)]
fn ziggurat<R:Rng>(rng: &mut R,
center_u: bool,
symmetric: bool,
X: ziggurat_tables::ZigTable,
F: ziggurat_tables::ZigTable,
F_DIFF: ziggurat_tables::ZigTable,
pdf: &'static fn(f64) -> f64, // probability density function
pdf: &'static fn(f64) -> f64,
zero_case: &'static fn(&mut R, f64) -> f64) -> f64 {
static SCALE: f64 = (1u64 << 53) as f64;
loop {
let u = if center_u {2.0 * rng.gen() - 1.0} else {rng.gen()};
let i: uint = rng.gen::<uint>() & 0xff;
// reimplement the f64 generation as an optimisation suggested
// by the Doornik paper: we have a lot of precision-space
// (i.e. there are 11 bits of the 64 of a u64 to use after
// creating a f64), so we might as well reuse some to save
// generating a whole extra random number. (Seems to be 15%
// faster.)
let bits: u64 = rng.gen();
let i = (bits & 0xff) as uint;
let f = (bits >> 11) as f64 / SCALE;
// u is either U(-1, 1) or U(0, 1) depending on if this is a
// symmetric distribution or not.
let u = if symmetric {2.0 * f - 1.0} else {f};
let x = u * X[i];
let test_x = if center_u {num::abs(x)} else {x};
let test_x = if symmetric {num::abs(x)} else {x};
// algebraically equivalent to |u| < X[i+1]/X[i] (or u < X[i+1]/X[i])
if test_x < X[i + 1] {
@ -87,7 +114,7 @@ fn ziggurat<R:Rng>(rng: &mut R,
return zero_case(rng, u);
}
// algebraically equivalent to f1 + DRanU()*(f0 - f1) < 1
if F[i+1] + F_DIFF[i+1] * rng.gen() < pdf(x) {
if F[i + 1] + F_DIFF[i + 1] * rng.gen() < pdf(x) {
return x;
}
}
@ -318,3 +345,40 @@ mod tests {
Exp::new(-10.0);
}
}
#[cfg(test)]
mod bench {
use extra::test::BenchHarness;
use rand::*;
use super::*;
use iter::range;
use option::{Some, None};
use mem::size_of;
static N: u64 = 100;
#[bench]
fn rand_normal(bh: &mut BenchHarness) {
let mut rng = XorShiftRng::new();
let mut normal = Normal::new(-2.71828, 3.14159);
do bh.iter {
for _ in range(0, N) {
normal.sample(&mut rng);
}
}
bh.bytes = size_of::<f64>() as u64 * N;
}
#[bench]
fn rand_exp(bh: &mut BenchHarness) {
let mut rng = XorShiftRng::new();
let mut exp = Exp::new(2.71828 * 3.14159);
do bh.iter {
for _ in range(0, N) {
exp.sample(&mut rng);
}
}
bh.bytes = size_of::<f64>() as u64 * N;
}
}