Skip to content

Literal vec notation is less efficient than vec::from_elem #4403

@bstrie

Description

@bstrie

I'm updating a benchmark and noting performance differences. Here's one version (perlin-mut.rs):

struct Vec2 {
    x: f32,
    y: f32,
}

fn lerp(a: f32, b: f32, v: f32) -> f32 {
    a * (1f32 - v) + b * v
}

fn smooth(v: f32) -> f32 {
    v * v * (3f32 - 2f32 * v)
}

fn random_gradient(r: rand::Rng) -> Vec2 {
    let v = r.gen_float() * float::consts::pi * 2.0;
    Vec2{
        x: float::cos(v) as f32,
        y: float::sin(v) as f32,
    }
}

fn gradient(orig: Vec2, grad: Vec2, p: Vec2) -> f32 {
    let sp = Vec2{x: p.x - orig.x, y: p.y - orig.y};
    grad.x * sp.x + grad.y + sp.y
}

struct Noise2DContext {
    rgradients: ~[Vec2],
    permutations: ~[int],
}

fn Noise2DContext() -> ~Noise2DContext {
    let r = rand::Rng();
    let rgradients = do vec::from_fn(256) |_i| { random_gradient(r) };
    let mut permutations = do vec::from_fn(256) |i| { i as int };
    r.shuffle_mut(permutations);

    ~Noise2DContext{
        rgradients: move rgradients,
        permutations: move permutations,
    }
}

impl Noise2DContext {
    fn get_gradient(x: int, y: int) -> Vec2 {
        let idx = self.permutations[x & 255] + self.permutations[y & 255];
        self.rgradients[idx & 255]
    }

    fn get_gradients(gradients: &[mut Vec2 * 4], origins: &[mut Vec2 * 4], x: f32, y: f32) {
        let x0f = float::floor(x as libc::c_double) as f32;
        let y0f = float::floor(y as libc::c_double) as f32;
        let x0 = x0f as int;
        let y0 = y0f as int;
        let x1 = x0 + 1;
        let y1 = y0 + 1;

        gradients[0] = self.get_gradient(x0, y0);
        gradients[1] = self.get_gradient(x1, y0);
        gradients[2] = self.get_gradient(x0, y1);
        gradients[3] = self.get_gradient(x1, y1);

        origins[0] = Vec2{x: x0f + 0f32, y: y0f + 0f32};
        origins[1] = Vec2{x: x0f + 1f32, y: y0f + 0f32};
        origins[2] = Vec2{x: x0f + 0f32, y: y0f + 1f32};
        origins[3] = Vec2{x: x0f + 1f32, y: y0f + 1f32};
    }

    fn get(x: f32, y: f32) -> f32 {
        let p = Vec2{x: x, y: y};
        let gradients: [mut Vec2 * 4] = [mut
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
        ];
        let origins: [mut Vec2 * 4] = [mut
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
        ];
        self.get_gradients(&gradients, &origins, x, y);
        let v0 = gradient(origins[0], gradients[0], p);
        let v1 = gradient(origins[1], gradients[1], p);
        let v2 = gradient(origins[2], gradients[2], p);
        let v3 = gradient(origins[3], gradients[3], p);
        let fx = smooth(x - origins[0].x);
        let vx0 = lerp(v0, v1, fx);
        let vx1 = lerp(v2, v3, fx);
        let fy = smooth(y - origins[0].y);
        lerp(vx0, vx1, fy)
    }
}

fn main() {
    let symbols = [" ", "░", "▒", "▓", "█", "█"];
    let mut pixels = vec::from_elem(256*256, 0f32);
    let n2d = Noise2DContext();
    for int::range(0, 100) |_i| {
        for int::range(0, 256) |y| {
            for int::range(0, 256) |x| {
                let v = n2d.get(
                    x as f32 * 0.1f32,
                    y as f32 * 0.1f32
                ) * 0.5f32 + 0.5f32;
                pixels[y*256+x] = v;
            };
        };
    };

    for int::range(0, 256) |y| {
        for int::range(0, 256) |x| {
            io::print(symbols[pixels[y*256+x] / 0.2f32 as int]);
        }
        io::println("");
    }
}

Here's a version that's had the following line altered: let mut pixels = vec::from_elem(256*256, 0f32); becomes let mut pixels = ~[0f32, ..65536];. For reproducibility, here's the whole file (perlin-mut-literal-tilde.rs):

struct Vec2 {
    x: f32,
    y: f32,
}

fn lerp(a: f32, b: f32, v: f32) -> f32 {
    a * (1f32 - v) + b * v
}

fn smooth(v: f32) -> f32 {
    v * v * (3f32 - 2f32 * v)
}

fn random_gradient(r: rand::Rng) -> Vec2 {
    let v = r.gen_float() * float::consts::pi * 2.0;
    Vec2{
        x: float::cos(v) as f32,
        y: float::sin(v) as f32,
    }
}

fn gradient(orig: Vec2, grad: Vec2, p: Vec2) -> f32 {
    let sp = Vec2{x: p.x - orig.x, y: p.y - orig.y};
    grad.x * sp.x + grad.y + sp.y
}

struct Noise2DContext {
    rgradients: ~[Vec2],
    permutations: ~[int],
}

fn Noise2DContext() -> ~Noise2DContext {
    let r = rand::Rng();
    let rgradients = do vec::from_fn(256) |_i| { random_gradient(r) };
    let mut permutations = do vec::from_fn(256) |i| { i as int };
    r.shuffle_mut(permutations);

    ~Noise2DContext{
        rgradients: move rgradients,
        permutations: move permutations,
    }
}

impl Noise2DContext {
    fn get_gradient(x: int, y: int) -> Vec2 {
        let idx = self.permutations[x & 255] + self.permutations[y & 255];
        self.rgradients[idx & 255]
    }

    fn get_gradients(gradients: &[mut Vec2 * 4], origins: &[mut Vec2 * 4], x: f32, y: f32) {
        let x0f = float::floor(x as libc::c_double) as f32;
        let y0f = float::floor(y as libc::c_double) as f32;
        let x0 = x0f as int;
        let y0 = y0f as int;
        let x1 = x0 + 1;
        let y1 = y0 + 1;

        gradients[0] = self.get_gradient(x0, y0);
        gradients[1] = self.get_gradient(x1, y0);
        gradients[2] = self.get_gradient(x0, y1);
        gradients[3] = self.get_gradient(x1, y1);

        origins[0] = Vec2{x: x0f + 0f32, y: y0f + 0f32};
        origins[1] = Vec2{x: x0f + 1f32, y: y0f + 0f32};
        origins[2] = Vec2{x: x0f + 0f32, y: y0f + 1f32};
        origins[3] = Vec2{x: x0f + 1f32, y: y0f + 1f32};
    }

    fn get(x: f32, y: f32) -> f32 {
        let p = Vec2{x: x, y: y};
        let gradients: [mut Vec2 * 4] = [mut
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
        ];
        let origins: [mut Vec2 * 4] = [mut
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
        ];
        self.get_gradients(&gradients, &origins, x, y);
        let v0 = gradient(origins[0], gradients[0], p);
        let v1 = gradient(origins[1], gradients[1], p);
        let v2 = gradient(origins[2], gradients[2], p);
        let v3 = gradient(origins[3], gradients[3], p);
        let fx = smooth(x - origins[0].x);
        let vx0 = lerp(v0, v1, fx);
        let vx1 = lerp(v2, v3, fx);
        let fy = smooth(y - origins[0].y);
        lerp(vx0, vx1, fy)
    }
}

fn main() {
    let symbols = [" ", "░", "▒", "▓", "█", "█"];
    //let mut pixels = vec::from_elem(256*256, 0f32);
    let mut pixels = ~[0f32, ..65536];
    let n2d = Noise2DContext();
    for int::range(0, 100) |_i| {
        for int::range(0, 256) |y| {
            for int::range(0, 256) |x| {
                let v = n2d.get(
                    x as f32 * 0.1f32,
                    y as f32 * 0.1f32
                ) * 0.5f32 + 0.5f32;
                pixels[y*256+x] = v;
            };
        };
    };

    for int::range(0, 256) |y| {
        for int::range(0, 256) |x| {
            io::print(symbols[pixels[y*256+x] / 0.2f32 as int]);
        }
        io::println("");
    }
}

Both programs were compiled with rustc --opt-level=3 and profiled as follows:

$ (perf stat -r 10 perlin-mut) 2> mut3.txt
$ (perf stat -r 10 perlin-mut-literal-tilde) 2> mut-literal-tilde2.txt

mut3.txt:

 Performance counter stats for 'perlin-mut' (10 runs):

    1346.376055  task-clock-msecs         #      0.982 CPUs    ( +-   0.136% )
             74  context-switches         #      0.000 M/sec   ( +-   4.013% )
              1  CPU-migrations           #      0.000 M/sec   ( +-   9.091% )
            738  page-faults              #      0.001 M/sec   ( +-   0.047% )
   166000485913  cycles                   # 123294.295 M/sec   ( +-   3.857% )
   166000485913  instructions             #      1.000 IPC     ( +-   3.857% )
   166000485913  branches                 # 123294.295 M/sec   ( +-   3.857% )
   166000485913  branch-misses            #    100.000 %       ( +-   3.857% )
   166000485913  cache-references         # 123294.295 M/sec   ( +-   3.857% )
   166000485913  cache-misses             # 123294.295 M/sec   ( +-   3.857% )

    1.371346350  seconds time elapsed   ( +-   0.181% )

mut-literal-tilde2.txt:

 Performance counter stats for 'perlin-mut-literal-tilde' (10 runs):

    1376.394325  task-clock-msecs         #      0.986 CPUs    ( +-   0.145% )
            111  context-switches         #      0.000 M/sec   ( +-   8.048% )
              1  CPU-migrations           #      0.000 M/sec   ( +-   9.091% )
            738  page-faults              #      0.001 M/sec   ( +-   0.036% )
   238800181546  cycles                   # 173496.924 M/sec   ( +-   7.465% )
   238800181546  instructions             #      1.000 IPC     ( +-   7.465% )
   238800181546  branches                 # 173496.924 M/sec   ( +-   7.465% )
   238800181546  branch-misses            #    100.000 %       ( +-   7.465% )
   238800181546  cache-references         # 173496.924 M/sec   ( +-   7.465% )
   238800181546  cache-misses             # 173496.924 M/sec   ( +-   7.465% )

    1.395384346  seconds time elapsed   ( +-   0.311% )

It's not a large difference, but intuitively I feel that the literal version ought to actually be much faster than the builder function; @graydon feels the same:

 <@graydon> the literal should be much faster. if we're doing our work properly
            it should be malloc + memcpy from const. which should then hit the
            SSE/AVX paths and go very fast indeed.

I'm also worried that any slight disadvantage to using literals will result in horrifying best-practices such as "use vec::from_elem instead of the literal vector syntax".

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-codegenArea: Code generationI-slowIssue: Problems and improvements with respect to performance of generated code.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions