-
Notifications
You must be signed in to change notification settings - Fork 13.5k
Description
I'm updating a benchmark and noting performance differences. Here's one version (perlin-mut.rs):
struct Vec2 {
x: f32,
y: f32,
}
fn lerp(a: f32, b: f32, v: f32) -> f32 {
a * (1f32 - v) + b * v
}
fn smooth(v: f32) -> f32 {
v * v * (3f32 - 2f32 * v)
}
fn random_gradient(r: rand::Rng) -> Vec2 {
let v = r.gen_float() * float::consts::pi * 2.0;
Vec2{
x: float::cos(v) as f32,
y: float::sin(v) as f32,
}
}
fn gradient(orig: Vec2, grad: Vec2, p: Vec2) -> f32 {
let sp = Vec2{x: p.x - orig.x, y: p.y - orig.y};
grad.x * sp.x + grad.y + sp.y
}
struct Noise2DContext {
rgradients: ~[Vec2],
permutations: ~[int],
}
fn Noise2DContext() -> ~Noise2DContext {
let r = rand::Rng();
let rgradients = do vec::from_fn(256) |_i| { random_gradient(r) };
let mut permutations = do vec::from_fn(256) |i| { i as int };
r.shuffle_mut(permutations);
~Noise2DContext{
rgradients: move rgradients,
permutations: move permutations,
}
}
impl Noise2DContext {
fn get_gradient(x: int, y: int) -> Vec2 {
let idx = self.permutations[x & 255] + self.permutations[y & 255];
self.rgradients[idx & 255]
}
fn get_gradients(gradients: &[mut Vec2 * 4], origins: &[mut Vec2 * 4], x: f32, y: f32) {
let x0f = float::floor(x as libc::c_double) as f32;
let y0f = float::floor(y as libc::c_double) as f32;
let x0 = x0f as int;
let y0 = y0f as int;
let x1 = x0 + 1;
let y1 = y0 + 1;
gradients[0] = self.get_gradient(x0, y0);
gradients[1] = self.get_gradient(x1, y0);
gradients[2] = self.get_gradient(x0, y1);
gradients[3] = self.get_gradient(x1, y1);
origins[0] = Vec2{x: x0f + 0f32, y: y0f + 0f32};
origins[1] = Vec2{x: x0f + 1f32, y: y0f + 0f32};
origins[2] = Vec2{x: x0f + 0f32, y: y0f + 1f32};
origins[3] = Vec2{x: x0f + 1f32, y: y0f + 1f32};
}
fn get(x: f32, y: f32) -> f32 {
let p = Vec2{x: x, y: y};
let gradients: [mut Vec2 * 4] = [mut
Vec2{x:0f32, y:0f32},
Vec2{x:0f32, y:0f32},
Vec2{x:0f32, y:0f32},
Vec2{x:0f32, y:0f32},
];
let origins: [mut Vec2 * 4] = [mut
Vec2{x:0f32, y:0f32},
Vec2{x:0f32, y:0f32},
Vec2{x:0f32, y:0f32},
Vec2{x:0f32, y:0f32},
];
self.get_gradients(&gradients, &origins, x, y);
let v0 = gradient(origins[0], gradients[0], p);
let v1 = gradient(origins[1], gradients[1], p);
let v2 = gradient(origins[2], gradients[2], p);
let v3 = gradient(origins[3], gradients[3], p);
let fx = smooth(x - origins[0].x);
let vx0 = lerp(v0, v1, fx);
let vx1 = lerp(v2, v3, fx);
let fy = smooth(y - origins[0].y);
lerp(vx0, vx1, fy)
}
}
fn main() {
let symbols = [" ", "░", "▒", "▓", "█", "█"];
let mut pixels = vec::from_elem(256*256, 0f32);
let n2d = Noise2DContext();
for int::range(0, 100) |_i| {
for int::range(0, 256) |y| {
for int::range(0, 256) |x| {
let v = n2d.get(
x as f32 * 0.1f32,
y as f32 * 0.1f32
) * 0.5f32 + 0.5f32;
pixels[y*256+x] = v;
};
};
};
for int::range(0, 256) |y| {
for int::range(0, 256) |x| {
io::print(symbols[pixels[y*256+x] / 0.2f32 as int]);
}
io::println("");
}
}
Here's a version that's had the following line altered: let mut pixels = vec::from_elem(256*256, 0f32);
becomes let mut pixels = ~[0f32, ..65536];
. For reproducibility, here's the whole file (perlin-mut-literal-tilde.rs):
struct Vec2 {
x: f32,
y: f32,
}
fn lerp(a: f32, b: f32, v: f32) -> f32 {
a * (1f32 - v) + b * v
}
fn smooth(v: f32) -> f32 {
v * v * (3f32 - 2f32 * v)
}
fn random_gradient(r: rand::Rng) -> Vec2 {
let v = r.gen_float() * float::consts::pi * 2.0;
Vec2{
x: float::cos(v) as f32,
y: float::sin(v) as f32,
}
}
fn gradient(orig: Vec2, grad: Vec2, p: Vec2) -> f32 {
let sp = Vec2{x: p.x - orig.x, y: p.y - orig.y};
grad.x * sp.x + grad.y + sp.y
}
struct Noise2DContext {
rgradients: ~[Vec2],
permutations: ~[int],
}
fn Noise2DContext() -> ~Noise2DContext {
let r = rand::Rng();
let rgradients = do vec::from_fn(256) |_i| { random_gradient(r) };
let mut permutations = do vec::from_fn(256) |i| { i as int };
r.shuffle_mut(permutations);
~Noise2DContext{
rgradients: move rgradients,
permutations: move permutations,
}
}
impl Noise2DContext {
fn get_gradient(x: int, y: int) -> Vec2 {
let idx = self.permutations[x & 255] + self.permutations[y & 255];
self.rgradients[idx & 255]
}
fn get_gradients(gradients: &[mut Vec2 * 4], origins: &[mut Vec2 * 4], x: f32, y: f32) {
let x0f = float::floor(x as libc::c_double) as f32;
let y0f = float::floor(y as libc::c_double) as f32;
let x0 = x0f as int;
let y0 = y0f as int;
let x1 = x0 + 1;
let y1 = y0 + 1;
gradients[0] = self.get_gradient(x0, y0);
gradients[1] = self.get_gradient(x1, y0);
gradients[2] = self.get_gradient(x0, y1);
gradients[3] = self.get_gradient(x1, y1);
origins[0] = Vec2{x: x0f + 0f32, y: y0f + 0f32};
origins[1] = Vec2{x: x0f + 1f32, y: y0f + 0f32};
origins[2] = Vec2{x: x0f + 0f32, y: y0f + 1f32};
origins[3] = Vec2{x: x0f + 1f32, y: y0f + 1f32};
}
fn get(x: f32, y: f32) -> f32 {
let p = Vec2{x: x, y: y};
let gradients: [mut Vec2 * 4] = [mut
Vec2{x:0f32, y:0f32},
Vec2{x:0f32, y:0f32},
Vec2{x:0f32, y:0f32},
Vec2{x:0f32, y:0f32},
];
let origins: [mut Vec2 * 4] = [mut
Vec2{x:0f32, y:0f32},
Vec2{x:0f32, y:0f32},
Vec2{x:0f32, y:0f32},
Vec2{x:0f32, y:0f32},
];
self.get_gradients(&gradients, &origins, x, y);
let v0 = gradient(origins[0], gradients[0], p);
let v1 = gradient(origins[1], gradients[1], p);
let v2 = gradient(origins[2], gradients[2], p);
let v3 = gradient(origins[3], gradients[3], p);
let fx = smooth(x - origins[0].x);
let vx0 = lerp(v0, v1, fx);
let vx1 = lerp(v2, v3, fx);
let fy = smooth(y - origins[0].y);
lerp(vx0, vx1, fy)
}
}
fn main() {
let symbols = [" ", "░", "▒", "▓", "█", "█"];
//let mut pixels = vec::from_elem(256*256, 0f32);
let mut pixels = ~[0f32, ..65536];
let n2d = Noise2DContext();
for int::range(0, 100) |_i| {
for int::range(0, 256) |y| {
for int::range(0, 256) |x| {
let v = n2d.get(
x as f32 * 0.1f32,
y as f32 * 0.1f32
) * 0.5f32 + 0.5f32;
pixels[y*256+x] = v;
};
};
};
for int::range(0, 256) |y| {
for int::range(0, 256) |x| {
io::print(symbols[pixels[y*256+x] / 0.2f32 as int]);
}
io::println("");
}
}
Both programs were compiled with rustc --opt-level=3
and profiled as follows:
$ (perf stat -r 10 perlin-mut) 2> mut3.txt
$ (perf stat -r 10 perlin-mut-literal-tilde) 2> mut-literal-tilde2.txt
mut3.txt:
Performance counter stats for 'perlin-mut' (10 runs):
1346.376055 task-clock-msecs # 0.982 CPUs ( +- 0.136% )
74 context-switches # 0.000 M/sec ( +- 4.013% )
1 CPU-migrations # 0.000 M/sec ( +- 9.091% )
738 page-faults # 0.001 M/sec ( +- 0.047% )
166000485913 cycles # 123294.295 M/sec ( +- 3.857% )
166000485913 instructions # 1.000 IPC ( +- 3.857% )
166000485913 branches # 123294.295 M/sec ( +- 3.857% )
166000485913 branch-misses # 100.000 % ( +- 3.857% )
166000485913 cache-references # 123294.295 M/sec ( +- 3.857% )
166000485913 cache-misses # 123294.295 M/sec ( +- 3.857% )
1.371346350 seconds time elapsed ( +- 0.181% )
mut-literal-tilde2.txt:
Performance counter stats for 'perlin-mut-literal-tilde' (10 runs):
1376.394325 task-clock-msecs # 0.986 CPUs ( +- 0.145% )
111 context-switches # 0.000 M/sec ( +- 8.048% )
1 CPU-migrations # 0.000 M/sec ( +- 9.091% )
738 page-faults # 0.001 M/sec ( +- 0.036% )
238800181546 cycles # 173496.924 M/sec ( +- 7.465% )
238800181546 instructions # 1.000 IPC ( +- 7.465% )
238800181546 branches # 173496.924 M/sec ( +- 7.465% )
238800181546 branch-misses # 100.000 % ( +- 7.465% )
238800181546 cache-references # 173496.924 M/sec ( +- 7.465% )
238800181546 cache-misses # 173496.924 M/sec ( +- 7.465% )
1.395384346 seconds time elapsed ( +- 0.311% )
It's not a large difference, but intuitively I feel that the literal version ought to actually be much faster than the builder function; @graydon feels the same:
<@graydon> the literal should be much faster. if we're doing our work properly
it should be malloc + memcpy from const. which should then hit the
SSE/AVX paths and go very fast indeed.
I'm also worried that any slight disadvantage to using literals will result in horrifying best-practices such as "use vec::from_elem
instead of the literal vector syntax".