librustc: use LLVM intrinsics for several floating point operations.

Achieves at least 5x speed up for some functions!

Also, reorganise the delegation code so that the delegated function wrappers
have the #[inline(always)] annotation, and reduce the repetition of
delegate!(..).
This commit is contained in:
Huon Wilson 2013-04-04 03:08:53 +11:00
parent 93c0888b6c
commit d9c54f8387
3 changed files with 166 additions and 153 deletions

View File

@ -10,12 +10,9 @@
//! Operations and constants for `f32`
use cmath;
use libc::{c_float, c_int};
use num::strconv;
use num;
use option::Option;
use unstable::intrinsics::floorf32;
use from_str;
use to_str;
@ -24,79 +21,93 @@ use to_str;
pub use cmath::c_float_targ_consts::*;
// An inner module is required to get the #[inline(always)] attribute on the
// functions.
pub use self::delegated::*;
macro_rules! delegate(
(
fn $name:ident(
$(
$arg:ident : $arg_ty:ty
),*
) -> $rv:ty = $bound_name:path
$(
fn $name:ident(
$(
$arg:ident : $arg_ty:ty
),*
) -> $rv:ty = $bound_name:path
),*
) => (
pub fn $name($( $arg : $arg_ty ),*) -> $rv {
unsafe {
$bound_name($( $arg ),*)
}
mod delegated {
use cmath::c_float_utils;
use libc::{c_float, c_int};
use unstable::intrinsics;
$(
#[inline(always)]
pub fn $name($( $arg : $arg_ty ),*) -> $rv {
unsafe {
$bound_name($( $arg ),*)
}
}
)*
}
)
)
delegate!(fn acos(n: c_float) -> c_float = cmath::c_float_utils::acos)
delegate!(fn asin(n: c_float) -> c_float = cmath::c_float_utils::asin)
delegate!(fn atan(n: c_float) -> c_float = cmath::c_float_utils::atan)
delegate!(fn atan2(a: c_float, b: c_float) -> c_float =
cmath::c_float_utils::atan2)
delegate!(fn cbrt(n: c_float) -> c_float = cmath::c_float_utils::cbrt)
delegate!(fn ceil(n: c_float) -> c_float = cmath::c_float_utils::ceil)
delegate!(fn copysign(x: c_float, y: c_float) -> c_float =
cmath::c_float_utils::copysign)
delegate!(fn cos(n: c_float) -> c_float = cmath::c_float_utils::cos)
delegate!(fn cosh(n: c_float) -> c_float = cmath::c_float_utils::cosh)
delegate!(fn erf(n: c_float) -> c_float = cmath::c_float_utils::erf)
delegate!(fn erfc(n: c_float) -> c_float = cmath::c_float_utils::erfc)
delegate!(fn exp(n: c_float) -> c_float = cmath::c_float_utils::exp)
delegate!(fn expm1(n: c_float) -> c_float = cmath::c_float_utils::expm1)
delegate!(fn exp2(n: c_float) -> c_float = cmath::c_float_utils::exp2)
delegate!(fn abs(n: c_float) -> c_float = cmath::c_float_utils::abs)
delegate!(fn abs_sub(a: c_float, b: c_float) -> c_float =
cmath::c_float_utils::abs_sub)
delegate!(fn mul_add(a: c_float, b: c_float, c: c_float) -> c_float =
cmath::c_float_utils::mul_add)
delegate!(fn fmax(a: c_float, b: c_float) -> c_float =
cmath::c_float_utils::fmax)
delegate!(fn fmin(a: c_float, b: c_float) -> c_float =
cmath::c_float_utils::fmin)
delegate!(fn nextafter(x: c_float, y: c_float) -> c_float =
cmath::c_float_utils::nextafter)
delegate!(fn frexp(n: c_float, value: &mut c_int) -> c_float =
cmath::c_float_utils::frexp)
delegate!(fn hypot(x: c_float, y: c_float) -> c_float =
cmath::c_float_utils::hypot)
delegate!(fn ldexp(x: c_float, n: c_int) -> c_float =
cmath::c_float_utils::ldexp)
delegate!(fn lgamma(n: c_float, sign: &mut c_int) -> c_float =
cmath::c_float_utils::lgamma)
delegate!(fn ln(n: c_float) -> c_float = cmath::c_float_utils::ln)
delegate!(fn log_radix(n: c_float) -> c_float =
cmath::c_float_utils::log_radix)
delegate!(fn ln1p(n: c_float) -> c_float = cmath::c_float_utils::ln1p)
delegate!(fn log10(n: c_float) -> c_float = cmath::c_float_utils::log10)
delegate!(fn log2(n: c_float) -> c_float = cmath::c_float_utils::log2)
delegate!(fn ilog_radix(n: c_float) -> c_int =
cmath::c_float_utils::ilog_radix)
delegate!(fn modf(n: c_float, iptr: &mut c_float) -> c_float =
cmath::c_float_utils::modf)
delegate!(fn pow(n: c_float, e: c_float) -> c_float =
cmath::c_float_utils::pow)
delegate!(fn round(n: c_float) -> c_float = cmath::c_float_utils::round)
delegate!(fn ldexp_radix(n: c_float, i: c_int) -> c_float =
cmath::c_float_utils::ldexp_radix)
delegate!(fn sin(n: c_float) -> c_float = cmath::c_float_utils::sin)
delegate!(fn sinh(n: c_float) -> c_float = cmath::c_float_utils::sinh)
delegate!(fn sqrt(n: c_float) -> c_float = cmath::c_float_utils::sqrt)
delegate!(fn tan(n: c_float) -> c_float = cmath::c_float_utils::tan)
delegate!(fn tanh(n: c_float) -> c_float = cmath::c_float_utils::tanh)
delegate!(fn tgamma(n: c_float) -> c_float = cmath::c_float_utils::tgamma)
delegate!(fn trunc(n: c_float) -> c_float = cmath::c_float_utils::trunc)
delegate!(
// intrinsics
fn abs(n: f32) -> f32 = intrinsics::fabsf32,
fn cos(n: f32) -> f32 = intrinsics::cosf32,
fn exp(n: f32) -> f32 = intrinsics::expf32,
fn exp2(n: f32) -> f32 = intrinsics::exp2f32,
fn floor(x: f32) -> f32 = intrinsics::floorf32,
fn ln(n: f32) -> f32 = intrinsics::logf32,
fn log10(n: f32) -> f32 = intrinsics::log10f32,
fn log2(n: f32) -> f32 = intrinsics::log2f32,
fn mul_add(a: f32, b: f32, c: f32) -> f32 = intrinsics::fmaf32,
fn pow(n: f32, e: f32) -> f32 = intrinsics::powf32,
fn powi(n: f32, e: c_int) -> f32 = intrinsics::powif32,
fn sin(n: f32) -> f32 = intrinsics::sinf32,
fn sqrt(n: f32) -> f32 = intrinsics::sqrtf32,
// LLVM 3.3 required to use intrinsics for these four
fn ceil(n: c_float) -> c_float = c_float_utils::ceil,
fn trunc(n: c_float) -> c_float = c_float_utils::trunc,
/*
fn ceil(n: f32) -> f32 = intrinsics::ceilf32,
fn trunc(n: f32) -> f32 = intrinsics::truncf32,
fn rint(n: f32) -> f32 = intrinsics::rintf32,
fn nearbyint(n: f32) -> f32 = intrinsics::nearbyintf32,
*/
// cmath
fn acos(n: c_float) -> c_float = c_float_utils::acos,
fn asin(n: c_float) -> c_float = c_float_utils::asin,
fn atan(n: c_float) -> c_float = c_float_utils::atan,
fn atan2(a: c_float, b: c_float) -> c_float = c_float_utils::atan2,
fn cbrt(n: c_float) -> c_float = c_float_utils::cbrt,
fn copysign(x: c_float, y: c_float) -> c_float = c_float_utils::copysign,
fn cosh(n: c_float) -> c_float = c_float_utils::cosh,
fn erf(n: c_float) -> c_float = c_float_utils::erf,
fn erfc(n: c_float) -> c_float = c_float_utils::erfc,
fn expm1(n: c_float) -> c_float = c_float_utils::expm1,
fn abs_sub(a: c_float, b: c_float) -> c_float = c_float_utils::abs_sub,
fn fmax(a: c_float, b: c_float) -> c_float = c_float_utils::fmax,
fn fmin(a: c_float, b: c_float) -> c_float = c_float_utils::fmin,
fn nextafter(x: c_float, y: c_float) -> c_float = c_float_utils::nextafter,
fn frexp(n: c_float, value: &mut c_int) -> c_float = c_float_utils::frexp,
fn hypot(x: c_float, y: c_float) -> c_float = c_float_utils::hypot,
fn ldexp(x: c_float, n: c_int) -> c_float = c_float_utils::ldexp,
fn lgamma(n: c_float, sign: &mut c_int) -> c_float = c_float_utils::lgamma,
fn log_radix(n: c_float) -> c_float = c_float_utils::log_radix,
fn ln1p(n: c_float) -> c_float = c_float_utils::ln1p,
fn ilog_radix(n: c_float) -> c_int = c_float_utils::ilog_radix,
fn modf(n: c_float, iptr: &mut c_float) -> c_float = c_float_utils::modf,
fn round(n: c_float) -> c_float = c_float_utils::round,
fn ldexp_radix(n: c_float, i: c_int) -> c_float = c_float_utils::ldexp_radix,
fn sinh(n: c_float) -> c_float = c_float_utils::sinh,
fn tan(n: c_float) -> c_float = c_float_utils::tan,
fn tanh(n: c_float) -> c_float = c_float_utils::tanh,
fn tgamma(n: c_float) -> c_float = c_float_utils::tgamma)
// These are not defined inside consts:: for consistency with
// the integer types
@ -143,9 +154,6 @@ pub fn ge(x: f32, y: f32) -> bool { return x >= y; }
#[inline(always)]
pub fn gt(x: f32, y: f32) -> bool { return x > y; }
/// Returns `x` rounded down
#[inline(always)]
pub fn floor(x: f32) -> f32 { unsafe { floorf32(x) } }
// FIXME (#1999): replace the predicates below with llvm intrinsics or
// calls to the libmath macros in the rust runtime for performance.

View File

@ -10,12 +10,9 @@
//! Operations and constants for `f64`
use cmath;
use libc::{c_double, c_int};
use num::strconv;
use num;
use option::Option;
use unstable::intrinsics::floorf64;
use to_str;
use from_str;
@ -25,87 +22,98 @@ use from_str;
pub use cmath::c_double_targ_consts::*;
pub use cmp::{min, max};
// An inner module is required to get the #[inline(always)] attribute on the
// functions.
pub use self::delegated::*;
macro_rules! delegate(
(
fn $name:ident(
$(
$arg:ident : $arg_ty:ty
),*
) -> $rv:ty = $bound_name:path
$(
fn $name:ident(
$(
$arg:ident : $arg_ty:ty
),*
) -> $rv:ty = $bound_name:path
),*
) => (
pub fn $name($( $arg : $arg_ty ),*) -> $rv {
unsafe {
$bound_name($( $arg ),*)
}
mod delegated {
use cmath::c_double_utils;
use libc::{c_double, c_int};
use unstable::intrinsics;
$(
#[inline(always)]
pub fn $name($( $arg : $arg_ty ),*) -> $rv {
unsafe {
$bound_name($( $arg ),*)
}
}
)*
}
)
)
delegate!(fn acos(n: c_double) -> c_double = cmath::c_double_utils::acos)
delegate!(fn asin(n: c_double) -> c_double = cmath::c_double_utils::asin)
delegate!(fn atan(n: c_double) -> c_double = cmath::c_double_utils::atan)
delegate!(fn atan2(a: c_double, b: c_double) -> c_double =
cmath::c_double_utils::atan2)
delegate!(fn cbrt(n: c_double) -> c_double = cmath::c_double_utils::cbrt)
delegate!(fn ceil(n: c_double) -> c_double = cmath::c_double_utils::ceil)
delegate!(fn copysign(x: c_double, y: c_double) -> c_double =
cmath::c_double_utils::copysign)
delegate!(fn cos(n: c_double) -> c_double = cmath::c_double_utils::cos)
delegate!(fn cosh(n: c_double) -> c_double = cmath::c_double_utils::cosh)
delegate!(fn erf(n: c_double) -> c_double = cmath::c_double_utils::erf)
delegate!(fn erfc(n: c_double) -> c_double = cmath::c_double_utils::erfc)
delegate!(fn exp(n: c_double) -> c_double = cmath::c_double_utils::exp)
delegate!(fn expm1(n: c_double) -> c_double = cmath::c_double_utils::expm1)
delegate!(fn exp2(n: c_double) -> c_double = cmath::c_double_utils::exp2)
delegate!(fn abs(n: c_double) -> c_double = cmath::c_double_utils::abs)
delegate!(fn abs_sub(a: c_double, b: c_double) -> c_double =
cmath::c_double_utils::abs_sub)
delegate!(fn mul_add(a: c_double, b: c_double, c: c_double) -> c_double =
cmath::c_double_utils::mul_add)
delegate!(fn fmax(a: c_double, b: c_double) -> c_double =
cmath::c_double_utils::fmax)
delegate!(fn fmin(a: c_double, b: c_double) -> c_double =
cmath::c_double_utils::fmin)
delegate!(fn nextafter(x: c_double, y: c_double) -> c_double =
cmath::c_double_utils::nextafter)
delegate!(fn frexp(n: c_double, value: &mut c_int) -> c_double =
cmath::c_double_utils::frexp)
delegate!(fn hypot(x: c_double, y: c_double) -> c_double =
cmath::c_double_utils::hypot)
delegate!(fn ldexp(x: c_double, n: c_int) -> c_double =
cmath::c_double_utils::ldexp)
delegate!(fn lgamma(n: c_double, sign: &mut c_int) -> c_double =
cmath::c_double_utils::lgamma)
delegate!(fn ln(n: c_double) -> c_double = cmath::c_double_utils::ln)
delegate!(fn log_radix(n: c_double) -> c_double =
cmath::c_double_utils::log_radix)
delegate!(fn ln1p(n: c_double) -> c_double = cmath::c_double_utils::ln1p)
delegate!(fn log10(n: c_double) -> c_double = cmath::c_double_utils::log10)
delegate!(fn log2(n: c_double) -> c_double = cmath::c_double_utils::log2)
delegate!(fn ilog_radix(n: c_double) -> c_int =
cmath::c_double_utils::ilog_radix)
delegate!(fn modf(n: c_double, iptr: &mut c_double) -> c_double =
cmath::c_double_utils::modf)
delegate!(fn pow(n: c_double, e: c_double) -> c_double =
cmath::c_double_utils::pow)
delegate!(fn round(n: c_double) -> c_double = cmath::c_double_utils::round)
delegate!(fn ldexp_radix(n: c_double, i: c_int) -> c_double =
cmath::c_double_utils::ldexp_radix)
delegate!(fn sin(n: c_double) -> c_double = cmath::c_double_utils::sin)
delegate!(fn sinh(n: c_double) -> c_double = cmath::c_double_utils::sinh)
delegate!(fn sqrt(n: c_double) -> c_double = cmath::c_double_utils::sqrt)
delegate!(fn tan(n: c_double) -> c_double = cmath::c_double_utils::tan)
delegate!(fn tanh(n: c_double) -> c_double = cmath::c_double_utils::tanh)
delegate!(fn tgamma(n: c_double) -> c_double = cmath::c_double_utils::tgamma)
delegate!(fn trunc(n: c_double) -> c_double = cmath::c_double_utils::trunc)
delegate!(fn j0(n: c_double) -> c_double = cmath::c_double_utils::j0)
delegate!(fn j1(n: c_double) -> c_double = cmath::c_double_utils::j1)
delegate!(fn jn(i: c_int, n: c_double) -> c_double =
cmath::c_double_utils::jn)
delegate!(fn y0(n: c_double) -> c_double = cmath::c_double_utils::y0)
delegate!(fn y1(n: c_double) -> c_double = cmath::c_double_utils::y1)
delegate!(fn yn(i: c_int, n: c_double) -> c_double =
cmath::c_double_utils::yn)
delegate!(
// intrinsics
fn abs(n: f64) -> f64 = intrinsics::fabsf64,
fn cos(n: f64) -> f64 = intrinsics::cosf64,
fn exp(n: f64) -> f64 = intrinsics::expf64,
fn exp2(n: f64) -> f64 = intrinsics::exp2f64,
fn floor(x: f64) -> f64 = intrinsics::floorf64,
fn ln(n: f64) -> f64 = intrinsics::logf64,
fn log10(n: f64) -> f64 = intrinsics::log10f64,
fn log2(n: f64) -> f64 = intrinsics::log2f64,
fn mul_add(a: f64, b: f64, c: f64) -> f64 = intrinsics::fmaf64,
fn pow(n: f64, e: f64) -> f64 = intrinsics::powf64,
fn powi(n: f64, e: c_int) -> f64 = intrinsics::powif64,
fn sin(n: f64) -> f64 = intrinsics::sinf64,
fn sqrt(n: f64) -> f64 = intrinsics::sqrtf64,
// LLVM 3.3 required to use intrinsics for these four
fn ceil(n: c_double) -> c_double = c_double_utils::ceil,
fn trunc(n: c_double) -> c_double = c_double_utils::trunc,
/*
fn ceil(n: f64) -> f64 = intrinsics::ceilf64,
fn trunc(n: f64) -> f64 = intrinsics::truncf64,
fn rint(n: c_double) -> c_double = intrinsics::rintf64,
fn nearbyint(n: c_double) -> c_double = intrinsics::nearbyintf64,
*/
// cmath
fn acos(n: c_double) -> c_double = c_double_utils::acos,
fn asin(n: c_double) -> c_double = c_double_utils::asin,
fn atan(n: c_double) -> c_double = c_double_utils::atan,
fn atan2(a: c_double, b: c_double) -> c_double = c_double_utils::atan2,
fn cbrt(n: c_double) -> c_double = c_double_utils::cbrt,
fn copysign(x: c_double, y: c_double) -> c_double = c_double_utils::copysign,
fn cosh(n: c_double) -> c_double = c_double_utils::cosh,
fn erf(n: c_double) -> c_double = c_double_utils::erf,
fn erfc(n: c_double) -> c_double = c_double_utils::erfc,
fn expm1(n: c_double) -> c_double = c_double_utils::expm1,
fn abs_sub(a: c_double, b: c_double) -> c_double = c_double_utils::abs_sub,
fn fmax(a: c_double, b: c_double) -> c_double = c_double_utils::fmax,
fn fmin(a: c_double, b: c_double) -> c_double = c_double_utils::fmin,
fn nextafter(x: c_double, y: c_double) -> c_double = c_double_utils::nextafter,
fn frexp(n: c_double, value: &mut c_int) -> c_double = c_double_utils::frexp,
fn hypot(x: c_double, y: c_double) -> c_double = c_double_utils::hypot,
fn ldexp(x: c_double, n: c_int) -> c_double = c_double_utils::ldexp,
fn lgamma(n: c_double, sign: &mut c_int) -> c_double = c_double_utils::lgamma,
fn log_radix(n: c_double) -> c_double = c_double_utils::log_radix,
fn ln1p(n: c_double) -> c_double = c_double_utils::ln1p,
fn ilog_radix(n: c_double) -> c_int = c_double_utils::ilog_radix,
fn modf(n: c_double, iptr: &mut c_double) -> c_double = c_double_utils::modf,
fn round(n: c_double) -> c_double = c_double_utils::round,
fn ldexp_radix(n: c_double, i: c_int) -> c_double = c_double_utils::ldexp_radix,
fn sinh(n: c_double) -> c_double = c_double_utils::sinh,
fn tan(n: c_double) -> c_double = c_double_utils::tan,
fn tanh(n: c_double) -> c_double = c_double_utils::tanh,
fn tgamma(n: c_double) -> c_double = c_double_utils::tgamma,
fn j0(n: c_double) -> c_double = c_double_utils::j0,
fn j1(n: c_double) -> c_double = c_double_utils::j1,
fn jn(i: c_int, n: c_double) -> c_double = c_double_utils::jn,
fn y0(n: c_double) -> c_double = c_double_utils::y0,
fn y1(n: c_double) -> c_double = c_double_utils::y1,
fn yn(i: c_int, n: c_double) -> c_double = c_double_utils::yn)
// FIXME (#1433): obtain these in a different way
@ -218,9 +226,6 @@ pub fn is_finite(x: f64) -> bool {
return !(is_NaN(x) || is_infinite(x));
}
/// Returns `x` rounded down
#[inline(always)]
pub fn floor(x: f64) -> f64 { unsafe { floorf64(x) } }
// FIXME (#1999): add is_normal, is_subnormal, and fpclassify

View File

@ -36,7 +36,7 @@ pub use f64::{acos, asin, atan2, cbrt, ceil, copysign, cosh, floor};
pub use f64::{erf, erfc, exp, expm1, exp2, abs_sub};
pub use f64::{mul_add, fmax, fmin, nextafter, frexp, hypot, ldexp};
pub use f64::{lgamma, ln, log_radix, ln1p, log10, log2, ilog_radix};
pub use f64::{modf, pow, round, sinh, tanh, tgamma, trunc};
pub use f64::{modf, pow, powi, round, sinh, tanh, tgamma, trunc};
pub use f64::signbit;
pub use f64::{j0, j1, jn, y0, y1, yn};