Skip to content

Commit df88760

Browse files
committed
Update non-aligned byte reading for bulk operation
That is, instead of updating the queue on a byte-by-byte basis, process the whole buffer in sensibly-sized chunks and update the queue at the very end.
1 parent a4efaec commit df88760

File tree

6 files changed

+194
-4
lines changed

6 files changed

+194
-4
lines changed

src/lib.rs

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,6 +1077,24 @@ pub trait Endianness: Sized {
10771077
W: BitWrite,
10781078
S: SignedInteger;
10791079

1080+
/// Reads whole set of bytes to output buffer
1081+
#[inline]
1082+
fn read_bytes<R>(
1083+
reader: &mut R,
1084+
queue_value: &mut u8,
1085+
queue_bits: &mut u32,
1086+
buf: &mut [u8],
1087+
) -> io::Result<()>
1088+
where
1089+
R: io::Read,
1090+
{
1091+
// a naive implementation which works anywhere
1092+
buf.iter_mut().try_for_each(|b| {
1093+
*b = Self::read_bits_fixed::<8, _, _>(reader.by_ref(), queue_value, queue_bits)?;
1094+
Ok(())
1095+
})
1096+
}
1097+
10801098
/// Reads convertable numeric value from reader in this endianness
10811099
fn read_primitive<R, V>(r: &mut R) -> io::Result<V>
10821100
where
@@ -2100,6 +2118,59 @@ impl Endianness for BigEndian {
21002118
}
21012119
}
21022120

2121+
fn read_bytes<R>(
2122+
reader: &mut R,
2123+
queue_value: &mut u8,
2124+
queue_bits: &mut u32,
2125+
buf: &mut [u8],
2126+
) -> io::Result<()>
2127+
where
2128+
R: io::Read,
2129+
{
2130+
const CHUNK_SIZE: usize = 1024;
2131+
2132+
// we don't modify the final queue_bits count
2133+
// but the naive implementation might
2134+
let queue_bits = *queue_bits;
2135+
2136+
let mut input_chunk: [u8; CHUNK_SIZE] = [0; CHUNK_SIZE];
2137+
2138+
for output_chunk in buf.chunks_mut(CHUNK_SIZE) {
2139+
let input_chunk = &mut input_chunk[0..output_chunk.len()];
2140+
reader.read_exact(input_chunk)?;
2141+
2142+
// shift down each byte in our input to eventually
2143+
// accomodate the contents of the bit queue
2144+
// and make that our output
2145+
output_chunk
2146+
.iter_mut()
2147+
.zip(input_chunk.iter())
2148+
.for_each(|(o, i)| {
2149+
*o = i >> queue_bits;
2150+
});
2151+
2152+
// include leftover bits from the next byte
2153+
// shifted to the top
2154+
output_chunk[1..]
2155+
.iter_mut()
2156+
.zip(input_chunk.iter())
2157+
.for_each(|(o, i)| {
2158+
*o |= *i << (u8::BITS_SIZE - queue_bits);
2159+
});
2160+
2161+
// finally, prepend the queue's contents
2162+
// to the first byte in the chunk
2163+
// while replacing those contents
2164+
// with the final byte of the input
2165+
output_chunk[0] |= core::mem::replace(
2166+
queue_value,
2167+
input_chunk.last().unwrap() << (u8::BITS_SIZE - queue_bits),
2168+
);
2169+
}
2170+
2171+
Ok(())
2172+
}
2173+
21032174
#[inline]
21042175
fn read_primitive<R, V>(r: &mut R) -> io::Result<V>
21052176
where
@@ -2553,6 +2624,50 @@ impl Endianness for LittleEndian {
25532624
}
25542625
}
25552626

2627+
fn read_bytes<R>(
2628+
reader: &mut R,
2629+
queue_value: &mut u8,
2630+
queue_bits: &mut u32,
2631+
buf: &mut [u8],
2632+
) -> io::Result<()>
2633+
where
2634+
R: io::Read,
2635+
{
2636+
const CHUNK_SIZE: usize = 1024;
2637+
2638+
// we don't modify the final queue_bits count
2639+
// but the naive implementation might
2640+
let queue_bits = *queue_bits;
2641+
2642+
let mut input_chunk: [u8; CHUNK_SIZE] = [0; CHUNK_SIZE];
2643+
2644+
for output_chunk in buf.chunks_mut(CHUNK_SIZE) {
2645+
let input_chunk = &mut input_chunk[0..output_chunk.len()];
2646+
reader.read_exact(input_chunk)?;
2647+
2648+
output_chunk
2649+
.iter_mut()
2650+
.zip(input_chunk.iter())
2651+
.for_each(|(o, i)| {
2652+
*o = i << queue_bits;
2653+
});
2654+
2655+
output_chunk[1..]
2656+
.iter_mut()
2657+
.zip(input_chunk.iter())
2658+
.for_each(|(o, i)| {
2659+
*o |= i >> (u8::BITS_SIZE - queue_bits);
2660+
});
2661+
2662+
output_chunk[0] |= core::mem::replace(
2663+
queue_value,
2664+
input_chunk.last().unwrap() >> (u8::BITS_SIZE - queue_bits),
2665+
);
2666+
}
2667+
2668+
Ok(())
2669+
}
2670+
25562671
#[inline]
25572672
fn read_primitive<R, V>(r: &mut R) -> io::Result<V>
25582673
where

src/read.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1715,14 +1715,12 @@ impl<R: io::Read, E: Endianness> BitRead for BitReader<R, E> {
17151715
/// assert!(reader.read_bytes(&mut buf).is_ok());
17161716
/// assert_eq!(&buf, b"bar");
17171717
/// ```
1718+
#[inline]
17181719
fn read_bytes(&mut self, buf: &mut [u8]) -> io::Result<()> {
17191720
if BitRead::byte_aligned(self) {
17201721
self.reader.read_exact(buf)
17211722
} else {
1722-
for b in buf.iter_mut() {
1723-
*b = self.read_unsigned::<8, _>()?;
1724-
}
1725-
Ok(())
1723+
E::read_bytes(&mut self.reader, &mut self.value, &mut self.bits, buf)
17261724
}
17271725
}
17281726

tests/random-3be.bin

127 Bytes
Binary file not shown.

tests/random-3le.bin

127 Bytes
Binary file not shown.

tests/random.bin

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
x^㍀���Mt���oi���~3�J+�xm�v�M�v�^��xk.Oˈ���`��kq����r��ϙ|�GG��~l��{P4Hg'~\��#� �ʼ��>T�"r[ushHikT!T��u���x6

tests/read.rs

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,44 @@ fn test_reader_huffman_be() {
242242
assert_eq!(r.read_huffman::<SomeTree>().unwrap(), 2);
243243
}
244244

245+
#[test]
246+
fn test_read_chunks_be() {
247+
use bitstream_io::{BigEndian, BitRead, BitReader};
248+
249+
let data: &[u8] = &[0b1011_0001, 0b1110_1101, 0b0011_1011, 0b1100_0001];
250+
let mut chunk: [u8; 2] = [0; 2];
251+
252+
// test non-aligned chunk reading
253+
let mut r = BitReader::endian(data, BigEndian);
254+
assert_eq!(r.read::<2, u8>().unwrap(), 0b10);
255+
r.read_bytes(&mut chunk).unwrap();
256+
assert_eq!(&chunk, &[0b11_0001_11, 0b10_1101_00]);
257+
assert_eq!(r.read::<14, u16>().unwrap(), 0b11_1011_1100_0001);
258+
259+
// test the smallest chunk
260+
let mut chunk = 0;
261+
let mut r = BitReader::endian(data, BigEndian);
262+
assert_eq!(r.read::<2, u8>().unwrap(), 0b10);
263+
r.read_bytes(core::slice::from_mut(&mut chunk)).unwrap();
264+
assert_eq!(chunk, 0b11_0001_11);
265+
r.read_bytes(core::slice::from_mut(&mut chunk)).unwrap();
266+
assert_eq!(chunk, 0b10_1101_00);
267+
assert_eq!(r.read::<14, u16>().unwrap(), 0b11_1011_1100_0001);
268+
269+
// test a larger chunk
270+
let data = include_bytes!("random.bin");
271+
let mut chunk: [u8; 127] = [0; 127];
272+
273+
let mut r = BitReader::endian(data.as_slice(), BigEndian);
274+
assert_eq!(r.read::<3, u8>().unwrap(), 0b000);
275+
r.read_bytes(&mut chunk).unwrap();
276+
assert_eq!(
277+
chunk.as_slice(),
278+
include_bytes!("random-3be.bin").as_slice()
279+
);
280+
assert_eq!(r.read::<5, u8>().unwrap(), 0b10110);
281+
}
282+
245283
#[test]
246284
fn test_reader_le() {
247285
use bitstream_io::{BitRead, BitReader, LittleEndian};
@@ -468,6 +506,44 @@ fn test_reader_huffman_le() {
468506
assert_eq!(r.read_huffman::<SomeTree>().unwrap(), 3);
469507
}
470508

509+
#[test]
510+
fn test_read_chunks_le() {
511+
use bitstream_io::{BitRead, BitReader, LittleEndian};
512+
513+
let data: &[u8] = &[0b1011_0001, 0b1110_1101, 0b0011_1011, 0b1100_0001];
514+
let mut chunk: [u8; 2] = [0; 2];
515+
516+
// test non-aligned chunk reading
517+
let mut r = BitReader::endian(data, LittleEndian);
518+
assert_eq!(r.read::<2, u8>().unwrap(), 0b01);
519+
r.read_bytes(&mut chunk).unwrap();
520+
assert_eq!(&chunk, &[0b01_1011_00, 0b11_1110_11]);
521+
assert_eq!(r.read::<14, u16>().unwrap(), 0b1100_0001_0011_10);
522+
523+
// test the smallest chunk
524+
let mut chunk = 0;
525+
let mut r = BitReader::endian(data, LittleEndian);
526+
assert_eq!(r.read::<2, u8>().unwrap(), 0b01);
527+
r.read_bytes(core::slice::from_mut(&mut chunk)).unwrap();
528+
assert_eq!(chunk, 0b01_1011_00);
529+
r.read_bytes(core::slice::from_mut(&mut chunk)).unwrap();
530+
assert_eq!(chunk, 0b11_1110_11);
531+
assert_eq!(r.read::<14, u16>().unwrap(), 0b1100_0001_0011_10);
532+
533+
// test a larger chunk
534+
let data = include_bytes!("random.bin");
535+
let mut chunk: [u8; 127] = [0; 127];
536+
537+
let mut r = BitReader::endian(data.as_slice(), LittleEndian);
538+
assert_eq!(r.read::<3, u8>().unwrap(), 0b010);
539+
r.read_bytes(&mut chunk).unwrap();
540+
assert_eq!(
541+
chunk.as_slice(),
542+
include_bytes!("random-3le.bin").as_slice()
543+
);
544+
assert_eq!(r.read::<5, u8>().unwrap(), 0b00110);
545+
}
546+
471547
#[test]
472548
fn test_reader_io_errors_be() {
473549
use bitstream_io::{BigEndian, BitRead, BitReader};

0 commit comments

Comments
 (0)