librustc: use LLVM intrinsics for several floating point operations.

huonw · huonw · commit d9c54f83877c · 2013-04-21T01:40:48.000+10:00
Achieves at least 5x speed up for some functions!

Also, reorganise the delegation code so that the delegated function wrappers
have the #[inline(always)] annotation, and reduce the repetition of
delegate!(..).
diff --git a/src/libcore/num/f32.rs b/src/libcore/num/f32.rs
@@ -10,12 +10,9 @@
 
 //! Operations and constants for `f32`
 
-use cmath;
-use libc::{c_float, c_int};
 use num::strconv;
 use num;
 use option::Option;
-use unstable::intrinsics::floorf32;
 use from_str;
 use to_str;
 
@@ -24,79 +21,93 @@ use to_str;
 
 pub use cmath::c_float_targ_consts::*;
 
+// An inner module is required to get the #[inline(always)] attribute on the
+// functions.
+pub use self::delegated::*;
+
 macro_rules! delegate(
     (
-        fn $name:ident(
-            $(
-                $arg:ident : $arg_ty:ty
-            ),*
-        ) -> $rv:ty = $bound_name:path
+        $(
+            fn $name:ident(
+                $(
+                    $arg:ident : $arg_ty:ty
+                ),*
+            ) -> $rv:ty = $bound_name:path
+        ),*
     ) => (
-        pub fn $name($( $arg : $arg_ty ),*) -> $rv {
-            unsafe {
-                $bound_name($( $arg ),*)
-            }
+        mod delegated {
+            use cmath::c_float_utils;
+            use libc::{c_float, c_int};
+            use unstable::intrinsics;
+
+            $(
+                #[inline(always)]
+                pub fn $name($( $arg : $arg_ty ),*) -> $rv {
+                    unsafe {
+                        $bound_name($( $arg ),*)
+                    }
+                }
+            )*
         }
     )
 )
 
-delegate!(fn acos(n: c_float) -> c_float = cmath::c_float_utils::acos)
-delegate!(fn asin(n: c_float) -> c_float = cmath::c_float_utils::asin)
-delegate!(fn atan(n: c_float) -> c_float = cmath::c_float_utils::atan)
-delegate!(fn atan2(a: c_float, b: c_float) -> c_float =
-    cmath::c_float_utils::atan2)
-delegate!(fn cbrt(n: c_float) -> c_float = cmath::c_float_utils::cbrt)
-delegate!(fn ceil(n: c_float) -> c_float = cmath::c_float_utils::ceil)
-delegate!(fn copysign(x: c_float, y: c_float) -> c_float =
-    cmath::c_float_utils::copysign)
-delegate!(fn cos(n: c_float) -> c_float = cmath::c_float_utils::cos)
-delegate!(fn cosh(n: c_float) -> c_float = cmath::c_float_utils::cosh)
-delegate!(fn erf(n: c_float) -> c_float = cmath::c_float_utils::erf)
-delegate!(fn erfc(n: c_float) -> c_float = cmath::c_float_utils::erfc)
-delegate!(fn exp(n: c_float) -> c_float = cmath::c_float_utils::exp)
-delegate!(fn expm1(n: c_float) -> c_float = cmath::c_float_utils::expm1)
-delegate!(fn exp2(n: c_float) -> c_float = cmath::c_float_utils::exp2)
-delegate!(fn abs(n: c_float) -> c_float = cmath::c_float_utils::abs)
-delegate!(fn abs_sub(a: c_float, b: c_float) -> c_float =
-    cmath::c_float_utils::abs_sub)
-delegate!(fn mul_add(a: c_float, b: c_float, c: c_float) -> c_float =
-    cmath::c_float_utils::mul_add)
-delegate!(fn fmax(a: c_float, b: c_float) -> c_float =
-    cmath::c_float_utils::fmax)
-delegate!(fn fmin(a: c_float, b: c_float) -> c_float =
-    cmath::c_float_utils::fmin)
-delegate!(fn nextafter(x: c_float, y: c_float) -> c_float =
-    cmath::c_float_utils::nextafter)
-delegate!(fn frexp(n: c_float, value: &mut c_int) -> c_float =
-    cmath::c_float_utils::frexp)
-delegate!(fn hypot(x: c_float, y: c_float) -> c_float =
-    cmath::c_float_utils::hypot)
-delegate!(fn ldexp(x: c_float, n: c_int) -> c_float =
-    cmath::c_float_utils::ldexp)
-delegate!(fn lgamma(n: c_float, sign: &mut c_int) -> c_float =
-    cmath::c_float_utils::lgamma)
-delegate!(fn ln(n: c_float) -> c_float = cmath::c_float_utils::ln)
-delegate!(fn log_radix(n: c_float) -> c_float =
-    cmath::c_float_utils::log_radix)
-delegate!(fn ln1p(n: c_float) -> c_float = cmath::c_float_utils::ln1p)
-delegate!(fn log10(n: c_float) -> c_float = cmath::c_float_utils::log10)
-delegate!(fn log2(n: c_float) -> c_float = cmath::c_float_utils::log2)
-delegate!(fn ilog_radix(n: c_float) -> c_int =
-    cmath::c_float_utils::ilog_radix)
-delegate!(fn modf(n: c_float, iptr: &mut c_float) -> c_float =
-    cmath::c_float_utils::modf)
-delegate!(fn pow(n: c_float, e: c_float) -> c_float =
-    cmath::c_float_utils::pow)
-delegate!(fn round(n: c_float) -> c_float = cmath::c_float_utils::round)
-delegate!(fn ldexp_radix(n: c_float, i: c_int) -> c_float =
-    cmath::c_float_utils::ldexp_radix)
-delegate!(fn sin(n: c_float) -> c_float = cmath::c_float_utils::sin)
-delegate!(fn sinh(n: c_float) -> c_float = cmath::c_float_utils::sinh)
-delegate!(fn sqrt(n: c_float) -> c_float = cmath::c_float_utils::sqrt)
-delegate!(fn tan(n: c_float) -> c_float = cmath::c_float_utils::tan)
-delegate!(fn tanh(n: c_float) -> c_float = cmath::c_float_utils::tanh)
-delegate!(fn tgamma(n: c_float) -> c_float = cmath::c_float_utils::tgamma)
-delegate!(fn trunc(n: c_float) -> c_float = cmath::c_float_utils::trunc)
+delegate!(
+    // intrinsics
+    fn abs(n: f32) -> f32 = intrinsics::fabsf32,
+    fn cos(n: f32) -> f32 = intrinsics::cosf32,
+    fn exp(n: f32) -> f32 = intrinsics::expf32,
+    fn exp2(n: f32) -> f32 = intrinsics::exp2f32,
+    fn floor(x: f32) -> f32 = intrinsics::floorf32,
+    fn ln(n: f32) -> f32 = intrinsics::logf32,
+    fn log10(n: f32) -> f32 = intrinsics::log10f32,
+    fn log2(n: f32) -> f32 = intrinsics::log2f32,
+    fn mul_add(a: f32, b: f32, c: f32) -> f32 = intrinsics::fmaf32,
+    fn pow(n: f32, e: f32) -> f32 = intrinsics::powf32,
+    fn powi(n: f32, e: c_int) -> f32 = intrinsics::powif32,
+    fn sin(n: f32) -> f32 = intrinsics::sinf32,
+    fn sqrt(n: f32) -> f32 = intrinsics::sqrtf32,
+
+    // LLVM 3.3 required to use intrinsics for these four
+    fn ceil(n: c_float) -> c_float = c_float_utils::ceil,
+    fn trunc(n: c_float) -> c_float = c_float_utils::trunc,
+    /*
+    fn ceil(n: f32) -> f32 = intrinsics::ceilf32,
+    fn trunc(n: f32) -> f32 = intrinsics::truncf32,
+    fn rint(n: f32) -> f32 = intrinsics::rintf32,
+    fn nearbyint(n: f32) -> f32 = intrinsics::nearbyintf32,
+    */
+
+    // cmath
+    fn acos(n: c_float) -> c_float = c_float_utils::acos,
+    fn asin(n: c_float) -> c_float = c_float_utils::asin,
+    fn atan(n: c_float) -> c_float = c_float_utils::atan,
+    fn atan2(a: c_float, b: c_float) -> c_float = c_float_utils::atan2,
+    fn cbrt(n: c_float) -> c_float = c_float_utils::cbrt,
+    fn copysign(x: c_float, y: c_float) -> c_float = c_float_utils::copysign,
+    fn cosh(n: c_float) -> c_float = c_float_utils::cosh,
+    fn erf(n: c_float) -> c_float = c_float_utils::erf,
+    fn erfc(n: c_float) -> c_float = c_float_utils::erfc,
+    fn expm1(n: c_float) -> c_float = c_float_utils::expm1,
+    fn abs_sub(a: c_float, b: c_float) -> c_float = c_float_utils::abs_sub,
+    fn fmax(a: c_float, b: c_float) -> c_float = c_float_utils::fmax,
+    fn fmin(a: c_float, b: c_float) -> c_float = c_float_utils::fmin,
+    fn nextafter(x: c_float, y: c_float) -> c_float = c_float_utils::nextafter,
+    fn frexp(n: c_float, value: &mut c_int) -> c_float = c_float_utils::frexp,
+    fn hypot(x: c_float, y: c_float) -> c_float = c_float_utils::hypot,
+    fn ldexp(x: c_float, n: c_int) -> c_float = c_float_utils::ldexp,
+    fn lgamma(n: c_float, sign: &mut c_int) -> c_float = c_float_utils::lgamma,
+    fn log_radix(n: c_float) -> c_float = c_float_utils::log_radix,
+    fn ln1p(n: c_float) -> c_float = c_float_utils::ln1p,
+    fn ilog_radix(n: c_float) -> c_int = c_float_utils::ilog_radix,
+    fn modf(n: c_float, iptr: &mut c_float) -> c_float = c_float_utils::modf,
+    fn round(n: c_float) -> c_float = c_float_utils::round,
+    fn ldexp_radix(n: c_float, i: c_int) -> c_float = c_float_utils::ldexp_radix,
+    fn sinh(n: c_float) -> c_float = c_float_utils::sinh,
+    fn tan(n: c_float) -> c_float = c_float_utils::tan,
+    fn tanh(n: c_float) -> c_float = c_float_utils::tanh,
+    fn tgamma(n: c_float) -> c_float = c_float_utils::tgamma)
+
 
 // These are not defined inside consts:: for consistency with
 // the integer types
@@ -143,9 +154,6 @@ pub fn ge(x: f32, y: f32) -> bool { return x >= y; }
 #[inline(always)]
 pub fn gt(x: f32, y: f32) -> bool { return x > y; }
 
-/// Returns `x` rounded down
-#[inline(always)]
-pub fn floor(x: f32) -> f32 { unsafe { floorf32(x) } }
 
 // FIXME (#1999): replace the predicates below with llvm intrinsics or
 // calls to the libmath macros in the rust runtime for performance.
diff --git a/src/libcore/num/f64.rs b/src/libcore/num/f64.rs
@@ -10,12 +10,9 @@
 
 //! Operations and constants for `f64`
 
-use cmath;
-use libc::{c_double, c_int};
 use num::strconv;
 use num;
 use option::Option;
-use unstable::intrinsics::floorf64;
 use to_str;
 use from_str;
 
@@ -25,87 +22,98 @@ use from_str;
 pub use cmath::c_double_targ_consts::*;
 pub use cmp::{min, max};
 
+// An inner module is required to get the #[inline(always)] attribute on the
+// functions.
+pub use self::delegated::*;
+
 macro_rules! delegate(
     (
-        fn $name:ident(
-            $(
-                $arg:ident : $arg_ty:ty
-            ),*
-        ) -> $rv:ty = $bound_name:path
+        $(
+            fn $name:ident(
+                $(
+                    $arg:ident : $arg_ty:ty
+                ),*
+            ) -> $rv:ty = $bound_name:path
+        ),*
     ) => (
-        pub fn $name($( $arg : $arg_ty ),*) -> $rv {
-            unsafe {
-                $bound_name($( $arg ),*)
-            }
+        mod delegated {
+            use cmath::c_double_utils;
+            use libc::{c_double, c_int};
+            use unstable::intrinsics;
+
+            $(
+                #[inline(always)]
+                pub fn $name($( $arg : $arg_ty ),*) -> $rv {
+                    unsafe {
+                        $bound_name($( $arg ),*)
+                    }
+                }
+            )*
         }
     )
 )
 
-delegate!(fn acos(n: c_double) -> c_double = cmath::c_double_utils::acos)
-delegate!(fn asin(n: c_double) -> c_double = cmath::c_double_utils::asin)
-delegate!(fn atan(n: c_double) -> c_double = cmath::c_double_utils::atan)
-delegate!(fn atan2(a: c_double, b: c_double) -> c_double =
-    cmath::c_double_utils::atan2)
-delegate!(fn cbrt(n: c_double) -> c_double = cmath::c_double_utils::cbrt)
-delegate!(fn ceil(n: c_double) -> c_double = cmath::c_double_utils::ceil)
-delegate!(fn copysign(x: c_double, y: c_double) -> c_double =
-    cmath::c_double_utils::copysign)
-delegate!(fn cos(n: c_double) -> c_double = cmath::c_double_utils::cos)
-delegate!(fn cosh(n: c_double) -> c_double = cmath::c_double_utils::cosh)
-delegate!(fn erf(n: c_double) -> c_double = cmath::c_double_utils::erf)
-delegate!(fn erfc(n: c_double) -> c_double = cmath::c_double_utils::erfc)
-delegate!(fn exp(n: c_double) -> c_double = cmath::c_double_utils::exp)
-delegate!(fn expm1(n: c_double) -> c_double = cmath::c_double_utils::expm1)
-delegate!(fn exp2(n: c_double) -> c_double = cmath::c_double_utils::exp2)
-delegate!(fn abs(n: c_double) -> c_double = cmath::c_double_utils::abs)
-delegate!(fn abs_sub(a: c_double, b: c_double) -> c_double =
-    cmath::c_double_utils::abs_sub)
-delegate!(fn mul_add(a: c_double, b: c_double, c: c_double) -> c_double =
-    cmath::c_double_utils::mul_add)
-delegate!(fn fmax(a: c_double, b: c_double) -> c_double =
-    cmath::c_double_utils::fmax)
-delegate!(fn fmin(a: c_double, b: c_double) -> c_double =
-    cmath::c_double_utils::fmin)
-delegate!(fn nextafter(x: c_double, y: c_double) -> c_double =
-    cmath::c_double_utils::nextafter)
-delegate!(fn frexp(n: c_double, value: &mut c_int) -> c_double =
-    cmath::c_double_utils::frexp)
-delegate!(fn hypot(x: c_double, y: c_double) -> c_double =
-    cmath::c_double_utils::hypot)
-delegate!(fn ldexp(x: c_double, n: c_int) -> c_double =
-    cmath::c_double_utils::ldexp)
-delegate!(fn lgamma(n: c_double, sign: &mut c_int) -> c_double =
-    cmath::c_double_utils::lgamma)
-delegate!(fn ln(n: c_double) -> c_double = cmath::c_double_utils::ln)
-delegate!(fn log_radix(n: c_double) -> c_double =
-    cmath::c_double_utils::log_radix)
-delegate!(fn ln1p(n: c_double) -> c_double = cmath::c_double_utils::ln1p)
-delegate!(fn log10(n: c_double) -> c_double = cmath::c_double_utils::log10)
-delegate!(fn log2(n: c_double) -> c_double = cmath::c_double_utils::log2)
-delegate!(fn ilog_radix(n: c_double) -> c_int =
-    cmath::c_double_utils::ilog_radix)
-delegate!(fn modf(n: c_double, iptr: &mut c_double) -> c_double =
-    cmath::c_double_utils::modf)
-delegate!(fn pow(n: c_double, e: c_double) -> c_double =
-    cmath::c_double_utils::pow)
-delegate!(fn round(n: c_double) -> c_double = cmath::c_double_utils::round)
-delegate!(fn ldexp_radix(n: c_double, i: c_int) -> c_double =
-    cmath::c_double_utils::ldexp_radix)
-delegate!(fn sin(n: c_double) -> c_double = cmath::c_double_utils::sin)
-delegate!(fn sinh(n: c_double) -> c_double = cmath::c_double_utils::sinh)
-delegate!(fn sqrt(n: c_double) -> c_double = cmath::c_double_utils::sqrt)
-delegate!(fn tan(n: c_double) -> c_double = cmath::c_double_utils::tan)
-delegate!(fn tanh(n: c_double) -> c_double = cmath::c_double_utils::tanh)
-delegate!(fn tgamma(n: c_double) -> c_double = cmath::c_double_utils::tgamma)
-delegate!(fn trunc(n: c_double) -> c_double = cmath::c_double_utils::trunc)
-delegate!(fn j0(n: c_double) -> c_double = cmath::c_double_utils::j0)
-delegate!(fn j1(n: c_double) -> c_double = cmath::c_double_utils::j1)
-delegate!(fn jn(i: c_int, n: c_double) -> c_double =
-    cmath::c_double_utils::jn)
-delegate!(fn y0(n: c_double) -> c_double = cmath::c_double_utils::y0)
-delegate!(fn y1(n: c_double) -> c_double = cmath::c_double_utils::y1)
-delegate!(fn yn(i: c_int, n: c_double) -> c_double =
-    cmath::c_double_utils::yn)
+delegate!(
+    // intrinsics
+    fn abs(n: f64) -> f64 = intrinsics::fabsf64,
+    fn cos(n: f64) -> f64 = intrinsics::cosf64,
+    fn exp(n: f64) -> f64 = intrinsics::expf64,
+    fn exp2(n: f64) -> f64 = intrinsics::exp2f64,
+    fn floor(x: f64) -> f64 = intrinsics::floorf64,
+    fn ln(n: f64) -> f64 = intrinsics::logf64,
+    fn log10(n: f64) -> f64 = intrinsics::log10f64,
+    fn log2(n: f64) -> f64 = intrinsics::log2f64,
+    fn mul_add(a: f64, b: f64, c: f64) -> f64 = intrinsics::fmaf64,
+    fn pow(n: f64, e: f64) -> f64 = intrinsics::powf64,
+    fn powi(n: f64, e: c_int) -> f64 = intrinsics::powif64,
+    fn sin(n: f64) -> f64 = intrinsics::sinf64,
+    fn sqrt(n: f64) -> f64 = intrinsics::sqrtf64,
+
+    // LLVM 3.3 required to use intrinsics for these four
+    fn ceil(n: c_double) -> c_double = c_double_utils::ceil,
+    fn trunc(n: c_double) -> c_double = c_double_utils::trunc,
+    /*
+    fn ceil(n: f64) -> f64 = intrinsics::ceilf64,
+    fn trunc(n: f64) -> f64 = intrinsics::truncf64,
+    fn rint(n: c_double) -> c_double = intrinsics::rintf64,
+    fn nearbyint(n: c_double) -> c_double = intrinsics::nearbyintf64,
+    */
+
+    // cmath
+    fn acos(n: c_double) -> c_double = c_double_utils::acos,
+    fn asin(n: c_double) -> c_double = c_double_utils::asin,
+    fn atan(n: c_double) -> c_double = c_double_utils::atan,
+    fn atan2(a: c_double, b: c_double) -> c_double = c_double_utils::atan2,
+    fn cbrt(n: c_double) -> c_double = c_double_utils::cbrt,
+    fn copysign(x: c_double, y: c_double) -> c_double = c_double_utils::copysign,
+    fn cosh(n: c_double) -> c_double = c_double_utils::cosh,
+    fn erf(n: c_double) -> c_double = c_double_utils::erf,
+    fn erfc(n: c_double) -> c_double = c_double_utils::erfc,
+    fn expm1(n: c_double) -> c_double = c_double_utils::expm1,
+    fn abs_sub(a: c_double, b: c_double) -> c_double = c_double_utils::abs_sub,
+    fn fmax(a: c_double, b: c_double) -> c_double = c_double_utils::fmax,
+    fn fmin(a: c_double, b: c_double) -> c_double = c_double_utils::fmin,
+    fn nextafter(x: c_double, y: c_double) -> c_double = c_double_utils::nextafter,
+    fn frexp(n: c_double, value: &mut c_int) -> c_double = c_double_utils::frexp,
+    fn hypot(x: c_double, y: c_double) -> c_double = c_double_utils::hypot,
+    fn ldexp(x: c_double, n: c_int) -> c_double = c_double_utils::ldexp,
+    fn lgamma(n: c_double, sign: &mut c_int) -> c_double = c_double_utils::lgamma,
+    fn log_radix(n: c_double) -> c_double = c_double_utils::log_radix,
+    fn ln1p(n: c_double) -> c_double = c_double_utils::ln1p,
+    fn ilog_radix(n: c_double) -> c_int = c_double_utils::ilog_radix,
+    fn modf(n: c_double, iptr: &mut c_double) -> c_double = c_double_utils::modf,
+    fn round(n: c_double) -> c_double = c_double_utils::round,
+    fn ldexp_radix(n: c_double, i: c_int) -> c_double = c_double_utils::ldexp_radix,
+    fn sinh(n: c_double) -> c_double = c_double_utils::sinh,
+    fn tan(n: c_double) -> c_double = c_double_utils::tan,
+    fn tanh(n: c_double) -> c_double = c_double_utils::tanh,
+    fn tgamma(n: c_double) -> c_double = c_double_utils::tgamma,
+    fn j0(n: c_double) -> c_double = c_double_utils::j0,
+    fn j1(n: c_double) -> c_double = c_double_utils::j1,
+    fn jn(i: c_int, n: c_double) -> c_double = c_double_utils::jn,
+    fn y0(n: c_double) -> c_double = c_double_utils::y0,
+    fn y1(n: c_double) -> c_double = c_double_utils::y1,
+    fn yn(i: c_int, n: c_double) -> c_double = c_double_utils::yn)
 
 // FIXME (#1433): obtain these in a different way
 
@@ -218,9 +226,6 @@ pub fn is_finite(x: f64) -> bool {
     return !(is_NaN(x) || is_infinite(x));
 }
 
-/// Returns `x` rounded down
-#[inline(always)]
-pub fn floor(x: f64) -> f64 { unsafe { floorf64(x) } }
 
 // FIXME (#1999): add is_normal, is_subnormal, and fpclassify
 
diff --git a/src/libcore/num/float.rs b/src/libcore/num/float.rs
@@ -36,7 +36,7 @@ pub use f64::{acos, asin, atan2, cbrt, ceil, copysign, cosh, floor};
 pub use f64::{erf, erfc, exp, expm1, exp2, abs_sub};
 pub use f64::{mul_add, fmax, fmin, nextafter, frexp, hypot, ldexp};
 pub use f64::{lgamma, ln, log_radix, ln1p, log10, log2, ilog_radix};
-pub use f64::{modf, pow, round, sinh, tanh, tgamma, trunc};
+pub use f64::{modf, pow, powi, round, sinh, tanh, tgamma, trunc};
 pub use f64::signbit;
 pub use f64::{j0, j1, jn, y0, y1, yn};