From 5afc63a2aef113d49944b6ee5f7a743198a6aff1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Steinbrink?= Date: Sat, 1 Feb 2014 14:56:51 +0100 Subject: [PATCH] Optimize u64_to_{le,be}_bytes LLVM fails to properly optimize the shifts used to convert the source value to the right endianess. The resulting assembly copies the value to the stack one byte at a time even when there's no conversion required (e.g. u64_to_le_bytes on a little endian machine). Instead of doing the conversion ourselves using shifts, we can use the existing intrinsics to perform the endianess conversion and then transmute the value to get a fixed vector of its bytes. Before: test be_i8 ... bench: 21442 ns/iter (+/- 70) test be_i16 ... bench: 21447 ns/iter (+/- 45) test be_i32 ... bench: 23832 ns/iter (+/- 63) test be_i64 ... bench: 26887 ns/iter (+/- 267) test le_i8 ... bench: 21442 ns/iter (+/- 56) test le_i16 ... bench: 21448 ns/iter (+/- 36) test le_i32 ... bench: 23825 ns/iter (+/- 153) test le_i64 ... bench: 26271 ns/iter (+/- 138) After: test be_i8 ... bench: 21438 ns/iter (+/- 10) test be_i16 ... bench: 21441 ns/iter (+/- 15) test be_i32 ... bench: 19057 ns/iter (+/- 6) test be_i64 ... bench: 21439 ns/iter (+/- 34) test le_i8 ... bench: 21438 ns/iter (+/- 19) test le_i16 ... bench: 21439 ns/iter (+/- 8) test le_i32 ... bench: 21439 ns/iter (+/- 19) test le_i64 ... bench: 21438 ns/iter (+/- 22) --- src/libstd/io/extensions.rs | 42 +++++++++++++------------------------ 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/src/libstd/io/extensions.rs b/src/libstd/io/extensions.rs index f6e87212d17be..548dc3efe92f0 100644 --- a/src/libstd/io/extensions.rs +++ b/src/libstd/io/extensions.rs @@ -51,23 +51,16 @@ impl<'r, R: Reader> Iterator for Bytes<'r, R> { } pub fn u64_to_le_bytes(n: u64, size: uint, f: |v: &[u8]| -> T) -> T { + use unstable::intrinsics::{to_le16, to_le32, to_le64}; + use cast::transmute; + + // LLVM fails to properly optimize this when using shifts instead of the to_le* intrinsics assert!(size <= 8u); match size { 1u => f(&[n as u8]), - 2u => f(&[n as u8, - (n >> 8) as u8]), - 4u => f(&[n as u8, - (n >> 8) as u8, - (n >> 16) as u8, - (n >> 24) as u8]), - 8u => f(&[n as u8, - (n >> 8) as u8, - (n >> 16) as u8, - (n >> 24) as u8, - (n >> 32) as u8, - (n >> 40) as u8, - (n >> 48) as u8, - (n >> 56) as u8]), + 2u => f(unsafe { transmute::(to_le16(n as i16)) }), + 4u => f(unsafe { transmute::(to_le32(n as i32)) }), + 8u => f(unsafe { transmute::(to_le64(n as i64)) }), _ => { let mut bytes: ~[u8] = ~[]; @@ -84,23 +77,16 @@ pub fn u64_to_le_bytes(n: u64, size: uint, f: |v: &[u8]| -> T) -> T { } pub fn u64_to_be_bytes(n: u64, size: uint, f: |v: &[u8]| -> T) -> T { + use unstable::intrinsics::{to_be16, to_be32, to_be64}; + use cast::transmute; + + // LLVM fails to properly optimize this when using shifts instead of the to_be* intrinsics assert!(size <= 8u); match size { 1u => f(&[n as u8]), - 2u => f(&[(n >> 8) as u8, - n as u8]), - 4u => f(&[(n >> 24) as u8, - (n >> 16) as u8, - (n >> 8) as u8, - n as u8]), - 8u => f(&[(n >> 56) as u8, - (n >> 48) as u8, - (n >> 40) as u8, - (n >> 32) as u8, - (n >> 24) as u8, - (n >> 16) as u8, - (n >> 8) as u8, - n as u8]), + 2u => f(unsafe { transmute::(to_be16(n as i16)) }), + 4u => f(unsafe { transmute::(to_be32(n as i32)) }), + 8u => f(unsafe { transmute::(to_be64(n as i64)) }), _ => { let mut bytes: ~[u8] = ~[]; let mut i = size;