Skip to content

Commit f6a47fa

Browse files
author
andy-thomason
committed
Rebase
1 parent 78a18c3 commit f6a47fa

File tree

3 files changed

+504
-0
lines changed

3 files changed

+504
-0
lines changed

crates/std_float/src/lib.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ use core_simd::simd;
1111

1212
use simd::{LaneCount, Simd, SupportedLaneCount};
1313

14+
mod libm32;
15+
#[cfg(test)]
16+
mod test_libm32;
17+
1418
#[cfg(feature = "as_crate")]
1519
mod experimental {
1620
pub trait Sealed {}
@@ -117,6 +121,31 @@ pub trait StdFloat: Sealed + Sized {
117121
fn fract(self) -> Self;
118122
}
119123

124+
pub trait StdLibm : StdFloat {
125+
type IntType;
126+
type UintType;
127+
128+
fn sin(self) -> Self;
129+
130+
fn cos(self) -> Self;
131+
132+
fn tan(self) -> Self;
133+
134+
fn asin(self) -> Self;
135+
136+
fn acos(self) -> Self;
137+
138+
fn atan(self) -> Self;
139+
140+
fn atan2(self, x: Self) -> Self;
141+
142+
fn exp2(self) -> Self;
143+
144+
fn exp(self) -> Self;
145+
146+
fn log2(self) -> Self;
147+
}
148+
120149
impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {}
121150
impl<const N: usize> Sealed for Simd<f64, N> where LaneCount<N>: SupportedLaneCount {}
122151

@@ -161,5 +190,6 @@ mod tests {
161190
let _xfma = x.mul_add(x, x);
162191
let _xsqrt = x.sqrt();
163192
let _ = x2.abs() * x2;
193+
let _ = x.sin();
164194
}
165195
}

crates/std_float/src/libm32.rs

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
#![allow(non_snake_case)]
2+
#![doc("This code is automatically generated, do not edit.")]
3+
use super::StdLibm;
4+
5+
use super::StdFloat;
6+
7+
use super::simd::{LaneCount, Simd, SupportedLaneCount};
8+
9+
impl<const N: usize> StdLibm for Simd<f32, N>
10+
where
11+
LaneCount<N>: SupportedLaneCount,
12+
{
13+
type IntType = Simd<i32, N>;
14+
type UintType = Simd<u32, N>;
15+
#[inline]
16+
fn asin(self) -> Self {
17+
let PI_BY_2 = Self::splat(1.57079632679489661923);
18+
let arg = self;
19+
let LIM: Self = Self::splat(0.70710678118654752440);
20+
let c: Self = ((arg).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2);
21+
let s: Self =
22+
((arg).lanes_lt(Self::splat(0.0))).select(-Self::splat(1.0), Self::splat(1.0));
23+
let x: Self =
24+
((arg * arg).lanes_lt(LIM * LIM)).select(arg, (Self::splat(1.0) - arg * arg).sqrt());
25+
let y: Self = (Self::splat(0.11644821f32))
26+
.mul_add(x * x, Self::splat(0.04343228f32))
27+
.mul_add(x * x, Self::splat(0.17078044f32))
28+
.mul_add(x * x, Self::splat(0.99991643f32))
29+
* x;
30+
((arg * arg).lanes_lt(LIM * LIM)).select(y, c - y * s)
31+
}
32+
#[inline]
33+
fn acos(self) -> Self {
34+
let PI_BY_2 = Self::splat(1.57079632679489661923);
35+
let PI = Self::splat(3.14159265358979323846);
36+
let arg = self;
37+
let LIM: Self = Self::splat(0.9);
38+
let c: Self = ((arg).lanes_lt(Self::splat(0.0))).select(PI, Self::splat(0.0));
39+
let s: Self =
40+
((arg).lanes_lt(Self::splat(0.0))).select(Self::splat(1.0), -Self::splat(1.0));
41+
let x: Self =
42+
((arg * arg).lanes_lt(LIM * LIM)).select(arg, (Self::splat(1.0) - arg * arg).sqrt());
43+
let y: Self = (Self::splat(1.3740137f32))
44+
.mul_add(x * x, -Self::splat(3.1993167f32))
45+
.mul_add(x * x, Self::splat(3.103398f32))
46+
.mul_add(x * x, -Self::splat(1.4533828f32))
47+
.mul_add(x * x, Self::splat(0.41395915f32))
48+
.mul_add(x * x, Self::splat(0.03113007f32))
49+
.mul_add(x * x, Self::splat(0.16861732f32))
50+
.mul_add(x * x, Self::splat(0.99998593f32))
51+
* x;
52+
((arg * arg).lanes_lt(LIM * LIM)).select(PI_BY_2 - y, c - y * s)
53+
}
54+
#[inline]
55+
fn atan(self) -> Self {
56+
let PI_BY_2 = Self::splat(1.57079632679489661923);
57+
let arg = self;
58+
let LIM: Self = Self::splat(1.0);
59+
let c: Self = ((arg).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2);
60+
let x: Self = ((arg.abs()).lanes_lt(LIM)).select(arg, arg.recip());
61+
let y: Self = (-Self::splat(0.0039602574f32))
62+
.mul_add(x * x, Self::splat(0.021659138f32))
63+
.mul_add(x * x, -Self::splat(0.05587457f32))
64+
.mul_add(x * x, Self::splat(0.09664151f32))
65+
.mul_add(x * x, -Self::splat(0.13930209f32))
66+
.mul_add(x * x, Self::splat(0.19954468f32))
67+
.mul_add(x * x, -Self::splat(0.33331004f32))
68+
.mul_add(x * x, Self::splat(0.9999998f32))
69+
* x;
70+
((arg.abs()).lanes_lt(LIM)).select(y, c - y)
71+
}
72+
#[inline]
73+
fn atan2(self, x: Self) -> Self {
74+
let PI_BY_2 = Self::splat(1.57079632679489661923);
75+
let PI = Self::splat(3.14159265358979323846);
76+
let y = self;
77+
let offset180: Self = ((y).lanes_lt(Self::splat(0.0))).select(-PI, PI);
78+
let x1: Self = ((x).lanes_lt(Self::splat(0.0))).select(-x, x);
79+
let y1: Self = ((x).lanes_lt(Self::splat(0.0))).select(-y, y);
80+
let offset1: Self = ((x).lanes_lt(Self::splat(0.0))).select(offset180, Self::splat(0.0));
81+
let offset90: Self = ((y).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2);
82+
let x2: Self = ((y1.abs()).lanes_gt(x1)).select(y1, x1);
83+
let y2: Self = ((y1.abs()).lanes_gt(x1)).select(-x1, y1);
84+
let offset2: Self = ((y1.abs()).lanes_gt(x1)).select(offset1 + offset90, offset1);
85+
let x3: Self = y2 / x2;
86+
let y3: Self = (-Self::splat(0.0039602574f32))
87+
.mul_add(x3 * x3, Self::splat(0.021659138f32))
88+
.mul_add(x3 * x3, -Self::splat(0.05587457f32))
89+
.mul_add(x3 * x3, Self::splat(0.09664151f32))
90+
.mul_add(x3 * x3, -Self::splat(0.13930209f32))
91+
.mul_add(x3 * x3, Self::splat(0.19954468f32))
92+
.mul_add(x3 * x3, -Self::splat(0.33331004f32))
93+
.mul_add(x3 * x3, Self::splat(0.9999998f32))
94+
* x3;
95+
y3 + offset2
96+
}
97+
#[inline]
98+
fn exp2(self) -> Self {
99+
let arg = self;
100+
let r: Self = arg.round();
101+
let mul: Self = Self::from_bits(unsafe {
102+
(r.mul_add(Self::splat(8388608.0f32), Self::splat(1065353216.0f32))).to_uint_unchecked()
103+
});
104+
let x: Self = arg - r;
105+
(Self::splat(0.000015310081f32))
106+
.mul_add(x, Self::splat(0.0001547802f32))
107+
.mul_add(x, Self::splat(0.0013333454f32))
108+
.mul_add(x, Self::splat(0.009617995f32))
109+
.mul_add(x, Self::splat(0.05550411f32))
110+
.mul_add(x, Self::splat(0.24022652f32))
111+
.mul_add(x, Self::splat(0.6931472f32))
112+
.mul_add(x, Self::splat(1f32))
113+
* mul
114+
}
115+
#[inline]
116+
fn exp(self) -> Self {
117+
let LOG2_E =Self ::splat (1.442695040888963407359769137464649992339735961996202908859290566914912486673985594186422766333708408);
118+
let arg = self;
119+
(arg * LOG2_E).exp2()
120+
}
121+
#[inline]
122+
fn sin(self) -> Self {
123+
let RECIP_2PI = Self::splat(0.15915494309189533577);
124+
let arg = self;
125+
let scaled: Self = arg * RECIP_2PI;
126+
let x: Self = scaled - scaled.round();
127+
(-Self::splat(12.26886f32))
128+
.mul_add(x * x, Self::splat(41.21624f32))
129+
.mul_add(x * x, -Self::splat(76.58672f32))
130+
.mul_add(x * x, Self::splat(81.59746f32))
131+
.mul_add(x * x, -Self::splat(41.34151f32))
132+
.mul_add(x * x, Self::splat(6.2831845f32))
133+
* x
134+
}
135+
#[inline]
136+
fn cos(self) -> Self {
137+
let RECIP_2PI = Self::splat(0.15915494309189533577);
138+
let arg = self;
139+
let scaled: Self = arg * RECIP_2PI;
140+
let x: Self = scaled - scaled.round();
141+
(Self::splat(6.5286584f32))
142+
.mul_add(x * x, -Self::splat(25.973276f32))
143+
.mul_add(x * x, Self::splat(60.17118f32))
144+
.mul_add(x * x, -Self::splat(85.45092f32))
145+
.mul_add(x * x, Self::splat(64.939186f32))
146+
.mul_add(x * x, -Self::splat(19.739206f32))
147+
.mul_add(x * x, Self::splat(1f32))
148+
}
149+
#[inline]
150+
fn tan(self) -> Self {
151+
let RECIP_PI = Self::splat(0.31830988618379067154);
152+
let arg = self;
153+
let scaled: Self = arg * RECIP_PI;
154+
let x: Self = scaled - scaled.round();
155+
let recip: Self = Self::splat(1.0) / (x * x - Self::splat(0.25));
156+
let y: Self = (Self::splat(0.014397301f32))
157+
.mul_add(x * x, Self::splat(0.021017345f32))
158+
.mul_add(x * x, Self::splat(0.05285888f32))
159+
.mul_add(x * x, Self::splat(0.13475448f32))
160+
.mul_add(x * x, Self::splat(0.55773664f32))
161+
.mul_add(x * x, -Self::splat(0.7853982f32))
162+
* x;
163+
y * recip
164+
}
165+
}

0 commit comments

Comments
 (0)