Skip to content

Commit d9c54f8

Browse files
committed
librustc: use LLVM intrinsics for several floating point operations.
Achieves at least 5x speed up for some functions! Also, reorganise the delegation code so that the delegated function wrappers have the #[inline(always)] annotation, and reduce the repetition of delegate!(..).
1 parent 93c0888 commit d9c54f8

File tree

3 files changed

+166
-153
lines changed

3 files changed

+166
-153
lines changed

src/libcore/num/f32.rs

+80-72
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,9 @@
1010

1111
//! Operations and constants for `f32`
1212
13-
use cmath;
14-
use libc::{c_float, c_int};
1513
use num::strconv;
1614
use num;
1715
use option::Option;
18-
use unstable::intrinsics::floorf32;
1916
use from_str;
2017
use to_str;
2118

@@ -24,79 +21,93 @@ use to_str;
2421

2522
pub use cmath::c_float_targ_consts::*;
2623

24+
// An inner module is required to get the #[inline(always)] attribute on the
25+
// functions.
26+
pub use self::delegated::*;
27+
2728
macro_rules! delegate(
2829
(
29-
fn $name:ident(
30-
$(
31-
$arg:ident : $arg_ty:ty
32-
),*
33-
) -> $rv:ty = $bound_name:path
30+
$(
31+
fn $name:ident(
32+
$(
33+
$arg:ident : $arg_ty:ty
34+
),*
35+
) -> $rv:ty = $bound_name:path
36+
),*
3437
) => (
35-
pub fn $name($( $arg : $arg_ty ),*) -> $rv {
36-
unsafe {
37-
$bound_name($( $arg ),*)
38-
}
38+
mod delegated {
39+
use cmath::c_float_utils;
40+
use libc::{c_float, c_int};
41+
use unstable::intrinsics;
42+
43+
$(
44+
#[inline(always)]
45+
pub fn $name($( $arg : $arg_ty ),*) -> $rv {
46+
unsafe {
47+
$bound_name($( $arg ),*)
48+
}
49+
}
50+
)*
3951
}
4052
)
4153
)
4254

43-
delegate!(fn acos(n: c_float) -> c_float = cmath::c_float_utils::acos)
44-
delegate!(fn asin(n: c_float) -> c_float = cmath::c_float_utils::asin)
45-
delegate!(fn atan(n: c_float) -> c_float = cmath::c_float_utils::atan)
46-
delegate!(fn atan2(a: c_float, b: c_float) -> c_float =
47-
cmath::c_float_utils::atan2)
48-
delegate!(fn cbrt(n: c_float) -> c_float = cmath::c_float_utils::cbrt)
49-
delegate!(fn ceil(n: c_float) -> c_float = cmath::c_float_utils::ceil)
50-
delegate!(fn copysign(x: c_float, y: c_float) -> c_float =
51-
cmath::c_float_utils::copysign)
52-
delegate!(fn cos(n: c_float) -> c_float = cmath::c_float_utils::cos)
53-
delegate!(fn cosh(n: c_float) -> c_float = cmath::c_float_utils::cosh)
54-
delegate!(fn erf(n: c_float) -> c_float = cmath::c_float_utils::erf)
55-
delegate!(fn erfc(n: c_float) -> c_float = cmath::c_float_utils::erfc)
56-
delegate!(fn exp(n: c_float) -> c_float = cmath::c_float_utils::exp)
57-
delegate!(fn expm1(n: c_float) -> c_float = cmath::c_float_utils::expm1)
58-
delegate!(fn exp2(n: c_float) -> c_float = cmath::c_float_utils::exp2)
59-
delegate!(fn abs(n: c_float) -> c_float = cmath::c_float_utils::abs)
60-
delegate!(fn abs_sub(a: c_float, b: c_float) -> c_float =
61-
cmath::c_float_utils::abs_sub)
62-
delegate!(fn mul_add(a: c_float, b: c_float, c: c_float) -> c_float =
63-
cmath::c_float_utils::mul_add)
64-
delegate!(fn fmax(a: c_float, b: c_float) -> c_float =
65-
cmath::c_float_utils::fmax)
66-
delegate!(fn fmin(a: c_float, b: c_float) -> c_float =
67-
cmath::c_float_utils::fmin)
68-
delegate!(fn nextafter(x: c_float, y: c_float) -> c_float =
69-
cmath::c_float_utils::nextafter)
70-
delegate!(fn frexp(n: c_float, value: &mut c_int) -> c_float =
71-
cmath::c_float_utils::frexp)
72-
delegate!(fn hypot(x: c_float, y: c_float) -> c_float =
73-
cmath::c_float_utils::hypot)
74-
delegate!(fn ldexp(x: c_float, n: c_int) -> c_float =
75-
cmath::c_float_utils::ldexp)
76-
delegate!(fn lgamma(n: c_float, sign: &mut c_int) -> c_float =
77-
cmath::c_float_utils::lgamma)
78-
delegate!(fn ln(n: c_float) -> c_float = cmath::c_float_utils::ln)
79-
delegate!(fn log_radix(n: c_float) -> c_float =
80-
cmath::c_float_utils::log_radix)
81-
delegate!(fn ln1p(n: c_float) -> c_float = cmath::c_float_utils::ln1p)
82-
delegate!(fn log10(n: c_float) -> c_float = cmath::c_float_utils::log10)
83-
delegate!(fn log2(n: c_float) -> c_float = cmath::c_float_utils::log2)
84-
delegate!(fn ilog_radix(n: c_float) -> c_int =
85-
cmath::c_float_utils::ilog_radix)
86-
delegate!(fn modf(n: c_float, iptr: &mut c_float) -> c_float =
87-
cmath::c_float_utils::modf)
88-
delegate!(fn pow(n: c_float, e: c_float) -> c_float =
89-
cmath::c_float_utils::pow)
90-
delegate!(fn round(n: c_float) -> c_float = cmath::c_float_utils::round)
91-
delegate!(fn ldexp_radix(n: c_float, i: c_int) -> c_float =
92-
cmath::c_float_utils::ldexp_radix)
93-
delegate!(fn sin(n: c_float) -> c_float = cmath::c_float_utils::sin)
94-
delegate!(fn sinh(n: c_float) -> c_float = cmath::c_float_utils::sinh)
95-
delegate!(fn sqrt(n: c_float) -> c_float = cmath::c_float_utils::sqrt)
96-
delegate!(fn tan(n: c_float) -> c_float = cmath::c_float_utils::tan)
97-
delegate!(fn tanh(n: c_float) -> c_float = cmath::c_float_utils::tanh)
98-
delegate!(fn tgamma(n: c_float) -> c_float = cmath::c_float_utils::tgamma)
99-
delegate!(fn trunc(n: c_float) -> c_float = cmath::c_float_utils::trunc)
55+
delegate!(
56+
// intrinsics
57+
fn abs(n: f32) -> f32 = intrinsics::fabsf32,
58+
fn cos(n: f32) -> f32 = intrinsics::cosf32,
59+
fn exp(n: f32) -> f32 = intrinsics::expf32,
60+
fn exp2(n: f32) -> f32 = intrinsics::exp2f32,
61+
fn floor(x: f32) -> f32 = intrinsics::floorf32,
62+
fn ln(n: f32) -> f32 = intrinsics::logf32,
63+
fn log10(n: f32) -> f32 = intrinsics::log10f32,
64+
fn log2(n: f32) -> f32 = intrinsics::log2f32,
65+
fn mul_add(a: f32, b: f32, c: f32) -> f32 = intrinsics::fmaf32,
66+
fn pow(n: f32, e: f32) -> f32 = intrinsics::powf32,
67+
fn powi(n: f32, e: c_int) -> f32 = intrinsics::powif32,
68+
fn sin(n: f32) -> f32 = intrinsics::sinf32,
69+
fn sqrt(n: f32) -> f32 = intrinsics::sqrtf32,
70+
71+
// LLVM 3.3 required to use intrinsics for these four
72+
fn ceil(n: c_float) -> c_float = c_float_utils::ceil,
73+
fn trunc(n: c_float) -> c_float = c_float_utils::trunc,
74+
/*
75+
fn ceil(n: f32) -> f32 = intrinsics::ceilf32,
76+
fn trunc(n: f32) -> f32 = intrinsics::truncf32,
77+
fn rint(n: f32) -> f32 = intrinsics::rintf32,
78+
fn nearbyint(n: f32) -> f32 = intrinsics::nearbyintf32,
79+
*/
80+
81+
// cmath
82+
fn acos(n: c_float) -> c_float = c_float_utils::acos,
83+
fn asin(n: c_float) -> c_float = c_float_utils::asin,
84+
fn atan(n: c_float) -> c_float = c_float_utils::atan,
85+
fn atan2(a: c_float, b: c_float) -> c_float = c_float_utils::atan2,
86+
fn cbrt(n: c_float) -> c_float = c_float_utils::cbrt,
87+
fn copysign(x: c_float, y: c_float) -> c_float = c_float_utils::copysign,
88+
fn cosh(n: c_float) -> c_float = c_float_utils::cosh,
89+
fn erf(n: c_float) -> c_float = c_float_utils::erf,
90+
fn erfc(n: c_float) -> c_float = c_float_utils::erfc,
91+
fn expm1(n: c_float) -> c_float = c_float_utils::expm1,
92+
fn abs_sub(a: c_float, b: c_float) -> c_float = c_float_utils::abs_sub,
93+
fn fmax(a: c_float, b: c_float) -> c_float = c_float_utils::fmax,
94+
fn fmin(a: c_float, b: c_float) -> c_float = c_float_utils::fmin,
95+
fn nextafter(x: c_float, y: c_float) -> c_float = c_float_utils::nextafter,
96+
fn frexp(n: c_float, value: &mut c_int) -> c_float = c_float_utils::frexp,
97+
fn hypot(x: c_float, y: c_float) -> c_float = c_float_utils::hypot,
98+
fn ldexp(x: c_float, n: c_int) -> c_float = c_float_utils::ldexp,
99+
fn lgamma(n: c_float, sign: &mut c_int) -> c_float = c_float_utils::lgamma,
100+
fn log_radix(n: c_float) -> c_float = c_float_utils::log_radix,
101+
fn ln1p(n: c_float) -> c_float = c_float_utils::ln1p,
102+
fn ilog_radix(n: c_float) -> c_int = c_float_utils::ilog_radix,
103+
fn modf(n: c_float, iptr: &mut c_float) -> c_float = c_float_utils::modf,
104+
fn round(n: c_float) -> c_float = c_float_utils::round,
105+
fn ldexp_radix(n: c_float, i: c_int) -> c_float = c_float_utils::ldexp_radix,
106+
fn sinh(n: c_float) -> c_float = c_float_utils::sinh,
107+
fn tan(n: c_float) -> c_float = c_float_utils::tan,
108+
fn tanh(n: c_float) -> c_float = c_float_utils::tanh,
109+
fn tgamma(n: c_float) -> c_float = c_float_utils::tgamma)
110+
100111

101112
// These are not defined inside consts:: for consistency with
102113
// the integer types
@@ -143,9 +154,6 @@ pub fn ge(x: f32, y: f32) -> bool { return x >= y; }
143154
#[inline(always)]
144155
pub fn gt(x: f32, y: f32) -> bool { return x > y; }
145156

146-
/// Returns `x` rounded down
147-
#[inline(always)]
148-
pub fn floor(x: f32) -> f32 { unsafe { floorf32(x) } }
149157

150158
// FIXME (#1999): replace the predicates below with llvm intrinsics or
151159
// calls to the libmath macros in the rust runtime for performance.

src/libcore/num/f64.rs

+85-80
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,9 @@
1010

1111
//! Operations and constants for `f64`
1212
13-
use cmath;
14-
use libc::{c_double, c_int};
1513
use num::strconv;
1614
use num;
1715
use option::Option;
18-
use unstable::intrinsics::floorf64;
1916
use to_str;
2017
use from_str;
2118

@@ -25,87 +22,98 @@ use from_str;
2522
pub use cmath::c_double_targ_consts::*;
2623
pub use cmp::{min, max};
2724

25+
// An inner module is required to get the #[inline(always)] attribute on the
26+
// functions.
27+
pub use self::delegated::*;
28+
2829
macro_rules! delegate(
2930
(
30-
fn $name:ident(
31-
$(
32-
$arg:ident : $arg_ty:ty
33-
),*
34-
) -> $rv:ty = $bound_name:path
31+
$(
32+
fn $name:ident(
33+
$(
34+
$arg:ident : $arg_ty:ty
35+
),*
36+
) -> $rv:ty = $bound_name:path
37+
),*
3538
) => (
36-
pub fn $name($( $arg : $arg_ty ),*) -> $rv {
37-
unsafe {
38-
$bound_name($( $arg ),*)
39-
}
39+
mod delegated {
40+
use cmath::c_double_utils;
41+
use libc::{c_double, c_int};
42+
use unstable::intrinsics;
43+
44+
$(
45+
#[inline(always)]
46+
pub fn $name($( $arg : $arg_ty ),*) -> $rv {
47+
unsafe {
48+
$bound_name($( $arg ),*)
49+
}
50+
}
51+
)*
4052
}
4153
)
4254
)
4355

44-
delegate!(fn acos(n: c_double) -> c_double = cmath::c_double_utils::acos)
45-
delegate!(fn asin(n: c_double) -> c_double = cmath::c_double_utils::asin)
46-
delegate!(fn atan(n: c_double) -> c_double = cmath::c_double_utils::atan)
47-
delegate!(fn atan2(a: c_double, b: c_double) -> c_double =
48-
cmath::c_double_utils::atan2)
49-
delegate!(fn cbrt(n: c_double) -> c_double = cmath::c_double_utils::cbrt)
50-
delegate!(fn ceil(n: c_double) -> c_double = cmath::c_double_utils::ceil)
51-
delegate!(fn copysign(x: c_double, y: c_double) -> c_double =
52-
cmath::c_double_utils::copysign)
53-
delegate!(fn cos(n: c_double) -> c_double = cmath::c_double_utils::cos)
54-
delegate!(fn cosh(n: c_double) -> c_double = cmath::c_double_utils::cosh)
55-
delegate!(fn erf(n: c_double) -> c_double = cmath::c_double_utils::erf)
56-
delegate!(fn erfc(n: c_double) -> c_double = cmath::c_double_utils::erfc)
57-
delegate!(fn exp(n: c_double) -> c_double = cmath::c_double_utils::exp)
58-
delegate!(fn expm1(n: c_double) -> c_double = cmath::c_double_utils::expm1)
59-
delegate!(fn exp2(n: c_double) -> c_double = cmath::c_double_utils::exp2)
60-
delegate!(fn abs(n: c_double) -> c_double = cmath::c_double_utils::abs)
61-
delegate!(fn abs_sub(a: c_double, b: c_double) -> c_double =
62-
cmath::c_double_utils::abs_sub)
63-
delegate!(fn mul_add(a: c_double, b: c_double, c: c_double) -> c_double =
64-
cmath::c_double_utils::mul_add)
65-
delegate!(fn fmax(a: c_double, b: c_double) -> c_double =
66-
cmath::c_double_utils::fmax)
67-
delegate!(fn fmin(a: c_double, b: c_double) -> c_double =
68-
cmath::c_double_utils::fmin)
69-
delegate!(fn nextafter(x: c_double, y: c_double) -> c_double =
70-
cmath::c_double_utils::nextafter)
71-
delegate!(fn frexp(n: c_double, value: &mut c_int) -> c_double =
72-
cmath::c_double_utils::frexp)
73-
delegate!(fn hypot(x: c_double, y: c_double) -> c_double =
74-
cmath::c_double_utils::hypot)
75-
delegate!(fn ldexp(x: c_double, n: c_int) -> c_double =
76-
cmath::c_double_utils::ldexp)
77-
delegate!(fn lgamma(n: c_double, sign: &mut c_int) -> c_double =
78-
cmath::c_double_utils::lgamma)
79-
delegate!(fn ln(n: c_double) -> c_double = cmath::c_double_utils::ln)
80-
delegate!(fn log_radix(n: c_double) -> c_double =
81-
cmath::c_double_utils::log_radix)
82-
delegate!(fn ln1p(n: c_double) -> c_double = cmath::c_double_utils::ln1p)
83-
delegate!(fn log10(n: c_double) -> c_double = cmath::c_double_utils::log10)
84-
delegate!(fn log2(n: c_double) -> c_double = cmath::c_double_utils::log2)
85-
delegate!(fn ilog_radix(n: c_double) -> c_int =
86-
cmath::c_double_utils::ilog_radix)
87-
delegate!(fn modf(n: c_double, iptr: &mut c_double) -> c_double =
88-
cmath::c_double_utils::modf)
89-
delegate!(fn pow(n: c_double, e: c_double) -> c_double =
90-
cmath::c_double_utils::pow)
91-
delegate!(fn round(n: c_double) -> c_double = cmath::c_double_utils::round)
92-
delegate!(fn ldexp_radix(n: c_double, i: c_int) -> c_double =
93-
cmath::c_double_utils::ldexp_radix)
94-
delegate!(fn sin(n: c_double) -> c_double = cmath::c_double_utils::sin)
95-
delegate!(fn sinh(n: c_double) -> c_double = cmath::c_double_utils::sinh)
96-
delegate!(fn sqrt(n: c_double) -> c_double = cmath::c_double_utils::sqrt)
97-
delegate!(fn tan(n: c_double) -> c_double = cmath::c_double_utils::tan)
98-
delegate!(fn tanh(n: c_double) -> c_double = cmath::c_double_utils::tanh)
99-
delegate!(fn tgamma(n: c_double) -> c_double = cmath::c_double_utils::tgamma)
100-
delegate!(fn trunc(n: c_double) -> c_double = cmath::c_double_utils::trunc)
101-
delegate!(fn j0(n: c_double) -> c_double = cmath::c_double_utils::j0)
102-
delegate!(fn j1(n: c_double) -> c_double = cmath::c_double_utils::j1)
103-
delegate!(fn jn(i: c_int, n: c_double) -> c_double =
104-
cmath::c_double_utils::jn)
105-
delegate!(fn y0(n: c_double) -> c_double = cmath::c_double_utils::y0)
106-
delegate!(fn y1(n: c_double) -> c_double = cmath::c_double_utils::y1)
107-
delegate!(fn yn(i: c_int, n: c_double) -> c_double =
108-
cmath::c_double_utils::yn)
56+
delegate!(
57+
// intrinsics
58+
fn abs(n: f64) -> f64 = intrinsics::fabsf64,
59+
fn cos(n: f64) -> f64 = intrinsics::cosf64,
60+
fn exp(n: f64) -> f64 = intrinsics::expf64,
61+
fn exp2(n: f64) -> f64 = intrinsics::exp2f64,
62+
fn floor(x: f64) -> f64 = intrinsics::floorf64,
63+
fn ln(n: f64) -> f64 = intrinsics::logf64,
64+
fn log10(n: f64) -> f64 = intrinsics::log10f64,
65+
fn log2(n: f64) -> f64 = intrinsics::log2f64,
66+
fn mul_add(a: f64, b: f64, c: f64) -> f64 = intrinsics::fmaf64,
67+
fn pow(n: f64, e: f64) -> f64 = intrinsics::powf64,
68+
fn powi(n: f64, e: c_int) -> f64 = intrinsics::powif64,
69+
fn sin(n: f64) -> f64 = intrinsics::sinf64,
70+
fn sqrt(n: f64) -> f64 = intrinsics::sqrtf64,
71+
72+
// LLVM 3.3 required to use intrinsics for these four
73+
fn ceil(n: c_double) -> c_double = c_double_utils::ceil,
74+
fn trunc(n: c_double) -> c_double = c_double_utils::trunc,
75+
/*
76+
fn ceil(n: f64) -> f64 = intrinsics::ceilf64,
77+
fn trunc(n: f64) -> f64 = intrinsics::truncf64,
78+
fn rint(n: c_double) -> c_double = intrinsics::rintf64,
79+
fn nearbyint(n: c_double) -> c_double = intrinsics::nearbyintf64,
80+
*/
81+
82+
// cmath
83+
fn acos(n: c_double) -> c_double = c_double_utils::acos,
84+
fn asin(n: c_double) -> c_double = c_double_utils::asin,
85+
fn atan(n: c_double) -> c_double = c_double_utils::atan,
86+
fn atan2(a: c_double, b: c_double) -> c_double = c_double_utils::atan2,
87+
fn cbrt(n: c_double) -> c_double = c_double_utils::cbrt,
88+
fn copysign(x: c_double, y: c_double) -> c_double = c_double_utils::copysign,
89+
fn cosh(n: c_double) -> c_double = c_double_utils::cosh,
90+
fn erf(n: c_double) -> c_double = c_double_utils::erf,
91+
fn erfc(n: c_double) -> c_double = c_double_utils::erfc,
92+
fn expm1(n: c_double) -> c_double = c_double_utils::expm1,
93+
fn abs_sub(a: c_double, b: c_double) -> c_double = c_double_utils::abs_sub,
94+
fn fmax(a: c_double, b: c_double) -> c_double = c_double_utils::fmax,
95+
fn fmin(a: c_double, b: c_double) -> c_double = c_double_utils::fmin,
96+
fn nextafter(x: c_double, y: c_double) -> c_double = c_double_utils::nextafter,
97+
fn frexp(n: c_double, value: &mut c_int) -> c_double = c_double_utils::frexp,
98+
fn hypot(x: c_double, y: c_double) -> c_double = c_double_utils::hypot,
99+
fn ldexp(x: c_double, n: c_int) -> c_double = c_double_utils::ldexp,
100+
fn lgamma(n: c_double, sign: &mut c_int) -> c_double = c_double_utils::lgamma,
101+
fn log_radix(n: c_double) -> c_double = c_double_utils::log_radix,
102+
fn ln1p(n: c_double) -> c_double = c_double_utils::ln1p,
103+
fn ilog_radix(n: c_double) -> c_int = c_double_utils::ilog_radix,
104+
fn modf(n: c_double, iptr: &mut c_double) -> c_double = c_double_utils::modf,
105+
fn round(n: c_double) -> c_double = c_double_utils::round,
106+
fn ldexp_radix(n: c_double, i: c_int) -> c_double = c_double_utils::ldexp_radix,
107+
fn sinh(n: c_double) -> c_double = c_double_utils::sinh,
108+
fn tan(n: c_double) -> c_double = c_double_utils::tan,
109+
fn tanh(n: c_double) -> c_double = c_double_utils::tanh,
110+
fn tgamma(n: c_double) -> c_double = c_double_utils::tgamma,
111+
fn j0(n: c_double) -> c_double = c_double_utils::j0,
112+
fn j1(n: c_double) -> c_double = c_double_utils::j1,
113+
fn jn(i: c_int, n: c_double) -> c_double = c_double_utils::jn,
114+
fn y0(n: c_double) -> c_double = c_double_utils::y0,
115+
fn y1(n: c_double) -> c_double = c_double_utils::y1,
116+
fn yn(i: c_int, n: c_double) -> c_double = c_double_utils::yn)
109117

110118
// FIXME (#1433): obtain these in a different way
111119

@@ -218,9 +226,6 @@ pub fn is_finite(x: f64) -> bool {
218226
return !(is_NaN(x) || is_infinite(x));
219227
}
220228

221-
/// Returns `x` rounded down
222-
#[inline(always)]
223-
pub fn floor(x: f64) -> f64 { unsafe { floorf64(x) } }
224229

225230
// FIXME (#1999): add is_normal, is_subnormal, and fpclassify
226231

src/libcore/num/float.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ pub use f64::{acos, asin, atan2, cbrt, ceil, copysign, cosh, floor};
3636
pub use f64::{erf, erfc, exp, expm1, exp2, abs_sub};
3737
pub use f64::{mul_add, fmax, fmin, nextafter, frexp, hypot, ldexp};
3838
pub use f64::{lgamma, ln, log_radix, ln1p, log10, log2, ilog_radix};
39-
pub use f64::{modf, pow, round, sinh, tanh, tgamma, trunc};
39+
pub use f64::{modf, pow, powi, round, sinh, tanh, tgamma, trunc};
4040
pub use f64::signbit;
4141
pub use f64::{j0, j1, jn, y0, y1, yn};
4242

0 commit comments

Comments
 (0)