Skip to content

Use LZ4 instead of Deflate in metadata #6954

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion configure
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,8 @@ do
for i in \
isaac linenoise sync test \
arch/i386 arch/x86_64 arch/arm arch/mips \
libuv libuv/src/ares libuv/src/eio libuv/src/ev
libuv libuv/src/ares libuv/src/eio libuv/src/ev \
lz4
do
make_dir rt/$t/stage$s/$i
done
Expand Down
2 changes: 1 addition & 1 deletion mk/rt.mk
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ RUNTIME_CXXS_$(1)_$(2) := \
rt/rust_android_dummy.cpp \
rt/rust_test_helpers.cpp

RUNTIME_CS_$(1)_$(2) := rt/linenoise/linenoise.c rt/linenoise/utf8.c
RUNTIME_CS_$(1)_$(2) := rt/linenoise/linenoise.c rt/linenoise/utf8.c rt/lz4/lz4.c rt/lz4/lz4hc.c rt/lz4/xxhash.c

RUNTIME_S_$(1)_$(2) := rt/arch/$$(HOST_$(1))/_context.S \
rt/arch/$$(HOST_$(1))/ccall.S \
Expand Down
2 changes: 2 additions & 0 deletions mk/tests.mk
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ ALL_CS := $(filter-out $(S)src/rt/bigint/bigint_ext.cpp \
$(S)src/rt/miniz.cpp \
$(S)src/rt/linenoise/linenoise.c \
$(S)src/rt/linenoise/utf8.c \
$(wildcard $(S)src/rt/lz4/*.c) \
,$(ALL_CS))
ALL_HS := $(wildcard $(S)src/rt/*.h \
$(S)src/rt/*/*.h \
Expand All @@ -215,6 +216,7 @@ ALL_HS := $(filter-out $(S)src/rt/vg/valgrind.h \
$(S)src/rt/bigint/bigint.h \
$(S)src/rt/linenoise/linenoise.h \
$(S)src/rt/linenoise/utf8.h \
$(wildcard $(S)src/rt/lz4/*.h) \
,$(ALL_HS))

# Run the tidy script in multiple parts to avoid huge 'echo' commands
Expand Down
162 changes: 162 additions & 0 deletions src/libextra/lz4.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
// Copyright 2012 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

/*!

Simple compression using the very fast LZ4 algorithm

*/

#[allow(missing_doc)];

use core::prelude::*;

use core::libc::{c_void, c_int};
use core::vec;
use core::num;
use core::io;
use core::sys;
use core::vec::raw::to_mut_ptr;

priv mod rustrt {
use core::libc::{c_int, c_void};

#[link_name = "rustrt"]
pub extern {
unsafe fn LZ4_compress(source: *const c_void, dest: *mut c_void, inputSize: c_int) -> c_int;
unsafe fn LZ4_compressHC(source: *const c_void, dest: *mut c_void, inputSize: c_int)
-> c_int;
unsafe fn LZ4_decompress_safe(source: *const c_void, dest: *mut c_void, inputSize: c_int,
maxOutputSize: c_int) -> c_int;
}
}


/// Worst case compressed size
pub fn LZ4_compressBound(size: uint) -> uint { size + size/255 + 16 }

/// Container for LZ4-compressed data, because LZ4 doesn't define its own container
pub struct LZ4Container {
uncompressed_size: uint,
buf_size: uint,
buf: ~[u8]
}

/// Compress a buffer
pub fn compress(bytes: &const [u8], high_compression: bool) -> LZ4Container {
let max_cint: c_int = num::Bounded::max_value();
assert!(bytes.len() <= max_cint as uint, "buffer too long");
let mut buf: ~[u8] = vec::with_capacity(LZ4_compressBound(bytes.len()));
let mut res = 0;
do vec::as_const_buf(bytes) |b, len| {
unsafe {
if !high_compression {
res = rustrt::LZ4_compress(b as *c_void, to_mut_ptr(buf) as *mut c_void,
len as c_int);
} else {
res = rustrt::LZ4_compressHC(b as *c_void, to_mut_ptr(buf) as *mut c_void,
len as c_int);
}
vec::raw::set_len(&mut buf, res as uint);
assert!(res as int != 0, "LZ4_compress(HC) failed");
}
}
// FIXME #4960: realloc buffer to res bytes
return LZ4Container{ uncompressed_size: bytes.len(), buf_size: buf.len(), buf: buf }
}

/// Decompress LZ4 data. Returns None if the input buffer was malformed or didn't decompress
/// to `size` bytes.
pub fn decompress(bytes: &[u8], uncompressed_size: uint) -> Option<~[u8]> {
do vec::as_const_buf(bytes) |b, len| {
let mut out: ~[u8] = vec::with_capacity(uncompressed_size as uint);
unsafe {
let res = rustrt::LZ4_decompress_safe(b as *c_void, to_mut_ptr(out) as *mut c_void,
len as c_int, uncompressed_size as c_int);
if res != uncompressed_size as c_int {
warn!("LZ4_decompress_safe returned %?", res);
None
} else {
vec::raw::set_len(&mut out, res as uint);
Some(out)
}
}
}
}
impl LZ4Container {
/// Decompress LZ4 data. Returns None if the input buffer was malformed or didn't decompress
/// to `size` bytes.
pub fn decompress(&self) -> Option<~[u8]> {
decompress(self.buf, self.uncompressed_size)
}
/// Create an LZ4Container out of bytes
pub fn from_bytes(bytes: &[u8]) -> LZ4Container {
do io::with_bytes_reader(bytes) |rdr| {
let uncompressed_size = rdr.read_le_uint();
let buf_size = rdr.read_le_uint();
let remaining = bytes.len() - rdr.tell();
assert!(remaining >= buf_size,
fmt!("header wants more bytes than present in buffer (wanted %?, found %?)",
buf_size, remaining));
let buf = bytes.slice(rdr.tell(), rdr.tell() + buf_size).to_owned();
assert_eq!(buf_size, buf.len());
LZ4Container { uncompressed_size: uncompressed_size, buf_size: buf_size, buf: buf }
}
}

/// Decompress the contents of an encoded LZ4Container. Like `from_bytes(bytes).decompress()`
/// without an extra copy.
pub fn decompress_bytes(bytes: &[u8]) -> Option<~[u8]> {
do io::with_bytes_reader(bytes) |rdr| {
let uncompressed_size = rdr.read_le_uint();
let buf_size = rdr.read_le_uint();
let remaining = bytes.len() - rdr.tell();
assert!(remaining >= buf_size,
fmt!("header wants more bytes than present in buffer (wanted %?, found %?)",
buf_size, remaining));
let buf = bytes.slice(rdr.tell(), rdr.tell() + buf_size);
assert_eq!(buf_size, buf.len());
decompress(buf, uncompressed_size)
}
}
}

#[cfg(test)]
mod tests {
use super::*;
use core::rand;
use core::rand::RngUtil;

#[test]
#[allow(non_implicitly_copyable_typarams)]
fn test_round_trip() {
let mut r = rand::rng();
let mut words = ~[];
for 20.times {
let range = r.gen_uint_range(1, 10);
words.push(r.gen_bytes(range));
}
for 20.times {
let mut in = ~[];
for 2000.times {
in.push_all(r.choose(words));
}
debug!("de/inflate of %u bytes of random word-sequences",
in.len());
let cmp = compress_bytes(in, true);
debug!("compressed size reported as %?", cmp.size);
let out = cmp.decompress().unwrap();
debug!("%u bytes compressed to %u (%.1f%% size) and was decompressed to %?",
in.len(), cmp.buf.len(),
100.0 * ((cmp.buf.len() as float) / (in.len() as float)), out.len());
assert_eq!(in, out);
}
}
}
1 change: 1 addition & 0 deletions src/libextra/std.rc
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ pub mod stats;
pub mod semver;
pub mod fileinput;
pub mod flate;
pub mod lz4;

#[cfg(unicode)]
mod unicode;
Expand Down
9 changes: 6 additions & 3 deletions src/librustc/metadata/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ use core::io;
use core::str;
use core::uint;
use core::vec;
use extra::flate;
use core::to_bytes::ToBytes;
use extra::lz4;
use extra::serialize::Encodable;
use extra;
use syntax::abi::AbiSet;
Expand Down Expand Up @@ -1516,9 +1517,11 @@ pub fn encode_metadata(parms: EncodeParams, crate: &crate) -> ~[u8] {
wr.write(&[0u8, 0u8, 0u8, 0u8]);

let writer_bytes: &mut ~[u8] = wr.bytes;

let compressed = lz4::compress(*writer_bytes, true);
vec::to_owned(metadata_encoding_version) +
flate::deflate_bytes(*writer_bytes)
compressed.uncompressed_size.to_bytes(true) +
compressed.buf_size.to_bytes(true) +
compressed.buf
}

// Get the encoded string for a type
Expand Down
6 changes: 3 additions & 3 deletions src/librustc/metadata/loader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ use core::ptr;
use core::str;
use core::uint;
use core::vec;
use extra::flate;
use extra::lz4;

pub enum os {
os_macos,
Expand Down Expand Up @@ -226,8 +226,8 @@ fn get_metadata_section(os: os,
debug!("inflating %u bytes of compressed metadata",
csz - vlen);
do vec::raw::buf_as_slice(cvbuf1, csz-vlen) |bytes| {
let inflated = flate::inflate_bytes(bytes);
found = Some(@(inflated));
let s = lz4::LZ4Container::decompress_bytes(bytes);
found = Some(@(s.expect("metadata was corrupt!")));
}
if found != None {
return found;
Expand Down
Loading