rerun-io · teh-cmc · Oct 14, 2025 · Oct 14, 2025 · Oct 14, 2025 · Oct 14, 2025
diff --git a/src/block/compress.rs b/src/block/compress.rs
@@ -221,8 +221,7 @@ fn count_same_bytes(input: &[u8], cur: &mut usize, source: &[u8], candidate: usi
 /// to produce a total length. When the byte value is 255, another byte must read and added, and so
 /// on. There can be any number of bytes of value "255" following token
 #[inline]
-#[cfg(feature = "safe-encode")]
-fn write_integer(output: &mut impl Sink, mut n: usize) {
+pub(super) fn write_integer(output: &mut impl Sink, mut n: usize) {
     // Note: Since `n` is usually < 0xFF and writing multiple bytes to the output
     // requires 2 branches of bound check (due to the possibility of add overflows)
     // the simple byte at a time implementation below is faster in most cases.
@@ -233,36 +232,6 @@ fn write_integer(output: &mut impl Sink, mut n: usize) {
     push_byte(output, n as u8);
 }
 
-/// Write an integer to the output.
-///
-/// Each additional byte then represent a value from 0 to 255, which is added to the previous value
-/// to produce a total length. When the byte value is 255, another byte must read and added, and so
-/// on. There can be any number of bytes of value "255" following token
-#[inline]
-#[cfg(not(feature = "safe-encode"))]
-fn write_integer(output: &mut impl Sink, mut n: usize) {
-    // Write the 0xFF bytes as long as the integer is higher than said value.
-    if n >= 4 * 0xFF {
-        // In this unlikelly branch we use a fill instead of a loop,
-        // otherwise rustc may output a large unrolled/vectorized loop.
-        let bulk = n / (4 * 0xFF);
-        n %= 4 * 0xFF;
-        unsafe {
-            core::ptr::write_bytes(output.pos_mut_ptr(), 0xFF, 4 * bulk);
-            output.set_pos(output.pos() + 4 * bulk);
-        }
-    }
-
-    // Handle last 1 to 4 bytes
-    push_u32(output, 0xFFFFFFFF);
-    // Updating output len for the remainder
-    unsafe {
-        output.set_pos(output.pos() - 4 + 1 + n / 255);
-        // Write the remaining byte.
-        *output.pos_mut_ptr().sub(1) = (n % 255) as u8;
-    }
-}
-
 /// Handle the last bytes from the input as literals
 #[cold]
 fn handle_last_literals(output: &mut impl Sink, input: &[u8], start: usize) {
@@ -549,15 +518,6 @@ fn push_u16(output: &mut impl Sink, el: u16) {
     }
 }
 
-#[inline]
-#[cfg(not(feature = "safe-encode"))]
-fn push_u32(output: &mut impl Sink, el: u32) {
-    unsafe {
-        core::ptr::copy_nonoverlapping(el.to_le_bytes().as_ptr(), output.pos_mut_ptr(), 4);
-        output.set_pos(output.pos() + 4);
-    }
-}
-
 #[inline(always)] // (always) necessary otherwise compiler fails to inline it
 #[cfg(feature = "safe-encode")]
 fn copy_literals_wild(output: &mut impl Sink, input: &[u8], input_start: usize, len: usize) {

diff --git a/src/block/decompress.rs b/src/block/decompress.rs
@@ -128,12 +128,12 @@ unsafe fn copy_from_dict(
 /// is encoded to _255 + 255 + 255 + 4 = 769_. The bytes after the first 4 is ignored, because
 /// 4 is the first non-0xFF byte.
 #[inline]
-fn read_integer_ptr(
+pub(super) fn read_integer_ptr(
     input_ptr: &mut *const u8,
     _input_ptr_end: *const u8,
-) -> Result<u32, DecompressError> {
+) -> Result<usize, DecompressError> {
     // We start at zero and count upwards.
-    let mut n: u32 = 0;
+    let mut n: usize = 0;
     // If this byte takes value 255 (the maximum value it can take), another byte is read
     // and added to the sum. This repeats until a byte lower than 255 is read.
     loop {
@@ -147,7 +147,7 @@ fn read_integer_ptr(
         }
         let extra = unsafe { input_ptr.read() };
         *input_ptr = unsafe { input_ptr.add(1) };
-        n += extra as u32;
+        n += extra as usize;
 
         // We continue if we got 255, break otherwise.
         if extra != 0xFF {

diff --git a/src/block/decompress_safe.rs b/src/block/decompress_safe.rs
@@ -25,9 +25,9 @@ use alloc::vec::Vec;
 /// is encoded to _255 + 255 + 255 + 4 = 769_. The bytes after the first 4 is ignored, because
 /// 4 is the first non-0xFF byte.
 #[inline]
-fn read_integer(input: &[u8], input_pos: &mut usize) -> Result<u32, DecompressError> {
+pub(super) fn read_integer(input: &[u8], input_pos: &mut usize) -> Result<usize, DecompressError> {
     // We start at zero and count upwards.
-    let mut n: u32 = 0;
+    let mut n: usize = 0;
     // If this byte takes value 255 (the maximum value it can take), another byte is read
     // and added to the sum. This repeats until a byte lower than 255 is read.
     loop {
@@ -36,7 +36,7 @@ fn read_integer(input: &[u8], input_pos: &mut usize) -> Result<u32, DecompressEr
             .get(*input_pos)
             .ok_or(DecompressError::ExpectedAnotherByte)?;
         *input_pos += 1;
-        n += extra as u32;
+        n += extra as usize;
 
         // We continue if we got 255, break otherwise.
         if extra != 0xFF {

diff --git a/src/block/mod.rs b/src/block/mod.rs
@@ -152,3 +152,25 @@ pub fn uncompressed_size(input: &[u8]) -> Result<(usize, &[u8]), DecompressError
     let rest = &input[4..];
     Ok((uncompressed_size, rest))
 }
+
+#[test]
+#[cfg(target_pointer_width = "64")] // only relevant for 64bit CPUs
+fn large_integer_roundtrip() {
+    let u32_max = usize::try_from(u32::MAX).unwrap();
+    let value = u32_max + u32_max / 2;
+
+    let mut buf = vec![0u8; value / 255 + 1];
+    let mut sink = crate::sink::SliceSink::new(&mut buf, 0);
+    self::compress::write_integer(&mut sink, value);
+
+    #[cfg(feature = "safe-decode")]
+    let value_decompressed = self::decompress_safe::read_integer(&buf, &mut 0).unwrap();
+
+    #[cfg(not(feature = "safe-decode"))]
+    let value_decompressed = {
+        let mut ptr_range = buf.as_ptr_range();
+        self::decompress::read_integer_ptr(&mut ptr_range.start, ptr_range.end).unwrap()
+    };
+
+    assert_eq!(value, value_decompressed);
+}
diff --git a/src/fastcpy.rs b/src/fastcpy.rs
@@ -58,7 +58,7 @@ pub fn slice_copy(src: &[u8], dst: &mut [u8]) {
         return;
     }
 
-    /// The code will use the vmovdqu instruction to copy 32 bytes at a time.
+    // The code will use the vmovdqu instruction to copy 32 bytes at a time.
     #[cfg(target_feature = "avx")]
     {
         if len <= 64 {