Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 1 addition & 41 deletions src/block/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,8 +221,7 @@ fn count_same_bytes(input: &[u8], cur: &mut usize, source: &[u8], candidate: usi
/// to produce a total length. When the byte value is 255, another byte must read and added, and so
/// on. There can be any number of bytes of value "255" following token
#[inline]
#[cfg(feature = "safe-encode")]
fn write_integer(output: &mut impl Sink, mut n: usize) {
pub(super) fn write_integer(output: &mut impl Sink, mut n: usize) {
// Note: Since `n` is usually < 0xFF and writing multiple bytes to the output
// requires 2 branches of bound check (due to the possibility of add overflows)
// the simple byte at a time implementation below is faster in most cases.
Expand All @@ -233,36 +232,6 @@ fn write_integer(output: &mut impl Sink, mut n: usize) {
push_byte(output, n as u8);
}

/// Write an integer to the output.
///
/// Each additional byte then represent a value from 0 to 255, which is added to the previous value
/// to produce a total length. When the byte value is 255, another byte must read and added, and so
/// on. There can be any number of bytes of value "255" following token
#[inline]
#[cfg(not(feature = "safe-encode"))]
fn write_integer(output: &mut impl Sink, mut n: usize) {
// Write the 0xFF bytes as long as the integer is higher than said value.
if n >= 4 * 0xFF {
// In this unlikelly branch we use a fill instead of a loop,
// otherwise rustc may output a large unrolled/vectorized loop.
let bulk = n / (4 * 0xFF);
n %= 4 * 0xFF;
unsafe {
core::ptr::write_bytes(output.pos_mut_ptr(), 0xFF, 4 * bulk);
output.set_pos(output.pos() + 4 * bulk);
}
}

// Handle last 1 to 4 bytes
push_u32(output, 0xFFFFFFFF);
// Updating output len for the remainder
unsafe {
output.set_pos(output.pos() - 4 + 1 + n / 255);
// Write the remaining byte.
*output.pos_mut_ptr().sub(1) = (n % 255) as u8;
}
}

/// Handle the last bytes from the input as literals
#[cold]
fn handle_last_literals(output: &mut impl Sink, input: &[u8], start: usize) {
Expand Down Expand Up @@ -549,15 +518,6 @@ fn push_u16(output: &mut impl Sink, el: u16) {
}
}

#[inline]
#[cfg(not(feature = "safe-encode"))]
fn push_u32(output: &mut impl Sink, el: u32) {
unsafe {
core::ptr::copy_nonoverlapping(el.to_le_bytes().as_ptr(), output.pos_mut_ptr(), 4);
output.set_pos(output.pos() + 4);
}
}

#[inline(always)] // (always) necessary otherwise compiler fails to inline it
#[cfg(feature = "safe-encode")]
fn copy_literals_wild(output: &mut impl Sink, input: &[u8], input_start: usize, len: usize) {
Expand Down
8 changes: 4 additions & 4 deletions src/block/decompress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,12 +128,12 @@ unsafe fn copy_from_dict(
/// is encoded to _255 + 255 + 255 + 4 = 769_. The bytes after the first 4 is ignored, because
/// 4 is the first non-0xFF byte.
#[inline]
fn read_integer_ptr(
pub(super) fn read_integer_ptr(
input_ptr: &mut *const u8,
_input_ptr_end: *const u8,
) -> Result<u32, DecompressError> {
) -> Result<usize, DecompressError> {
// We start at zero and count upwards.
let mut n: u32 = 0;
let mut n: usize = 0;
// If this byte takes value 255 (the maximum value it can take), another byte is read
// and added to the sum. This repeats until a byte lower than 255 is read.
loop {
Expand All @@ -147,7 +147,7 @@ fn read_integer_ptr(
}
let extra = unsafe { input_ptr.read() };
*input_ptr = unsafe { input_ptr.add(1) };
n += extra as u32;
n += extra as usize;

// We continue if we got 255, break otherwise.
if extra != 0xFF {
Expand Down
6 changes: 3 additions & 3 deletions src/block/decompress_safe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ use alloc::vec::Vec;
/// is encoded to _255 + 255 + 255 + 4 = 769_. The bytes after the first 4 is ignored, because
/// 4 is the first non-0xFF byte.
#[inline]
fn read_integer(input: &[u8], input_pos: &mut usize) -> Result<u32, DecompressError> {
pub(super) fn read_integer(input: &[u8], input_pos: &mut usize) -> Result<usize, DecompressError> {
// We start at zero and count upwards.
let mut n: u32 = 0;
let mut n: usize = 0;
// If this byte takes value 255 (the maximum value it can take), another byte is read
// and added to the sum. This repeats until a byte lower than 255 is read.
loop {
Expand All @@ -36,7 +36,7 @@ fn read_integer(input: &[u8], input_pos: &mut usize) -> Result<u32, DecompressEr
.get(*input_pos)
.ok_or(DecompressError::ExpectedAnotherByte)?;
*input_pos += 1;
n += extra as u32;
n += extra as usize;

// We continue if we got 255, break otherwise.
if extra != 0xFF {
Expand Down
22 changes: 22 additions & 0 deletions src/block/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,25 @@ pub fn uncompressed_size(input: &[u8]) -> Result<(usize, &[u8]), DecompressError
let rest = &input[4..];
Ok((uncompressed_size, rest))
}

#[test]
#[cfg(target_pointer_width = "64")] // only relevant for 64bit CPUs
fn large_integer_roundtrip() {
let u32_max = usize::try_from(u32::MAX).unwrap();
let value = u32_max + u32_max / 2;

let mut buf = vec![0u8; value / 255 + 1];
let mut sink = crate::sink::SliceSink::new(&mut buf, 0);
self::compress::write_integer(&mut sink, value);

#[cfg(feature = "safe-decode")]
let value_decompressed = self::decompress_safe::read_integer(&buf, &mut 0).unwrap();

#[cfg(not(feature = "safe-decode"))]
let value_decompressed = {
let mut ptr_range = buf.as_ptr_range();
self::decompress::read_integer_ptr(&mut ptr_range.start, ptr_range.end).unwrap()
};

assert_eq!(value, value_decompressed);
}
2 changes: 1 addition & 1 deletion src/fastcpy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ pub fn slice_copy(src: &[u8], dst: &mut [u8]) {
return;
}

/// The code will use the vmovdqu instruction to copy 32 bytes at a time.
// The code will use the vmovdqu instruction to copy 32 bytes at a time.
#[cfg(target_feature = "avx")]
{
if len <= 64 {
Expand Down