Skip to content

Commit fc34ec0

Browse files
committed
Improve deserialization performance with a custom Read implementation
With this change I go from 140fps to 200fps on my gmail yaml.
1 parent c3280ac commit fc34ec0

File tree

1 file changed

+73
-4
lines changed

1 file changed

+73
-4
lines changed

webrender_api/src/display_list.rs

Lines changed: 73 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@ use YuvImageDisplayItem;
1717
use bincode;
1818
use serde::{Deserialize, Serialize, Serializer};
1919
use serde::ser::{SerializeMap, SerializeSeq};
20-
use std::io::Write;
20+
use std::io::{Read, Write};
2121
use std::{io, ptr};
2222
use std::marker::PhantomData;
23+
use std::slice;
2324
use time::precise_time_ns;
2425

2526
// We don't want to push a long text-run. If a text-run is too long, split it into several parts.
@@ -179,6 +180,7 @@ fn skip_slice<T: for<'de> Deserialize<'de>>(
179180
(range, count)
180181
}
181182

183+
182184
impl<'a> BuiltDisplayListIter<'a> {
183185
pub fn new(list: &'a BuiltDisplayList) -> Self {
184186
Self::new_with_list_and_data(list, list.item_slice())
@@ -229,7 +231,7 @@ impl<'a> BuiltDisplayListIter<'a> {
229231
return None;
230232
}
231233

232-
self.cur_item = bincode::deserialize_from(&mut self.data, bincode::Infinite)
234+
self.cur_item = bincode::deserialize_from(&mut UnsafeReader::new(&mut self.data), bincode::Infinite)
233235
.expect("MEH: malicious process?");
234236

235237
match self.cur_item.item {
@@ -371,7 +373,7 @@ impl<'de, 'a, T: Deserialize<'de>> AuxIter<'a, T> {
371373
let size: usize = if data.len() == 0 {
372374
0 // Accept empty ItemRanges pointing anywhere
373375
} else {
374-
bincode::deserialize_from(&mut data, bincode::Infinite).expect("MEH: malicious input?")
376+
bincode::deserialize_from(&mut UnsafeReader::new(&mut data), bincode::Infinite).expect("MEH: malicious input?")
375377
};
376378

377379
AuxIter {
@@ -391,7 +393,7 @@ impl<'a, T: for<'de> Deserialize<'de>> Iterator for AuxIter<'a, T> {
391393
} else {
392394
self.size -= 1;
393395
Some(
394-
bincode::deserialize_from(&mut self.data, bincode::Infinite)
396+
bincode::deserialize_from(&mut UnsafeReader::new(&mut self.data), bincode::Infinite)
395397
.expect("MEH: malicious input?"),
396398
)
397399
}
@@ -533,6 +535,73 @@ fn serialize_fast<T: Serialize>(vec: &mut Vec<u8>, e: &T) {
533535
debug_assert!(((w.0 as usize) - (vec.as_ptr() as usize)) == vec.len());
534536
}
535537

538+
// This uses a (start, end) representation instead of (start, len) so that
539+
// only need to update a single field as we read through it. This gives
540+
// makes it easier for llvm to understand what's going on. (https://github.com/rust-lang/rust/issues/45068)
541+
// we update the slice only once we're done reading
542+
struct UnsafeReader<'a: 'b, 'b> {
543+
start: *const u8,
544+
end: *const u8,
545+
slice: &'b mut &'a [u8]
546+
}
547+
548+
impl<'a, 'b> UnsafeReader<'a, 'b> {
549+
fn new(buf: &'b mut &'a [u8]) -> UnsafeReader<'a, 'b> {
550+
unsafe {
551+
let end = buf.as_ptr().offset(buf.len() as isize);
552+
let start = buf.as_ptr();
553+
UnsafeReader { start: start, end, slice: buf }
554+
}
555+
}
556+
557+
// This read implementation is significantly faster than the standard &[u8] one.
558+
//
559+
// First, it only supports reading exactly buf.len() bytes. This ensures that
560+
// the argument to memcpy is always buf.len() and will allow a constant buf.len()
561+
// to be propagated through to memcpy which LLVM will turn into explicit loads and
562+
// stores. The standard implementation does a len = min(slice.len(), buf.len())
563+
//
564+
// Second, we only need to adjust 'start' after reading and it's only adjusted by a
565+
// constant. This allows LLVM to avoid adjusting the length field after ever read
566+
// and lets it be aggregated into a single adjustment.
567+
#[inline(always)]
568+
fn read_internal(&mut self, buf: &mut [u8]) {
569+
// this is safe because we panic if start + buf.len() > end
570+
unsafe {
571+
if self.start.offset(buf.len() as isize) > self.end {
572+
panic!();
573+
}
574+
ptr::copy_nonoverlapping(self.start, buf.as_mut_ptr(), buf.len());
575+
self.start = self.start.offset(buf.len() as isize);
576+
}
577+
}
578+
}
579+
580+
impl<'a, 'b> Drop for UnsafeReader<'a, 'b> {
581+
// this adjusts input slice so that it properly represents the amount that's left.
582+
fn drop(&mut self) {
583+
// this is safe because we know that start and end are contained inside the original slice
584+
unsafe {
585+
*self.slice = slice::from_raw_parts(self.start, (self.end as usize) - (self.start as usize));
586+
}
587+
}
588+
}
589+
590+
impl<'a, 'b> Read for UnsafeReader<'a, 'b> {
591+
// These methods were not being inlined and we need them to be so that the memcpy
592+
// is for a constant size
593+
#[inline(always)]
594+
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
595+
self.read_internal(buf);
596+
Ok(buf.len())
597+
}
598+
#[inline(always)]
599+
fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
600+
self.read_internal(buf);
601+
Ok(())
602+
}
603+
}
604+
536605
#[derive(Clone)]
537606
pub struct DisplayListBuilder {
538607
pub data: Vec<u8>,

0 commit comments

Comments
 (0)