Skip to content

Commit d64c845

Browse files
committed
Auto merge of #113542 - saethlin:adaptive-tables, r=b-naber
Adapt table sizes to the contents This is an implementation of rust-lang/compiler-team#666 The objective of this PR is to permit the rmeta format to accommodate larger crates that need offsets larger than a `u32` can store without compromising performance for crates that do not need such range. The second commit is a number of tiny optimization opportunities I noticed while looking at perf recordings of the first commit. The rmeta tables need to have fixed-size elements to permit lazy random access. But the size only needs to be fixed _per table_, not per element type. This PR adds another `usize` to the table header which indicates the table element size. As each element of a table is set, we keep track of the widest encoded table value, then don't bother encoding all the unused trailing bytes on each value. When decoding table elements, we copy them to a full-width array if they are not already full-width. `LazyArray` needs some special treatment. Most other values that are encoded in tables are indexes or offsets, and those tend to be small so we get to drop a lot of zero bytes off the end. But `LazyArray` encodes _two_ small values in a fixed-width table element: A position of the table and the length of the table. The treatment described above could trim zero bytes off the table length, but any nonzero length shields the position bytes from the optimization. To improve this, we interleave the bytes of position and length. This change is responsible for about half of the crate metadata win on many crates. Fixes #112934 (probably) Fixes #103607
2 parents 6e8f677 + 225b3c0 commit d64c845

File tree

4 files changed

+125
-47
lines changed

4 files changed

+125
-47
lines changed

compiler/rustc_metadata/src/rmeta/decoder.rs

+11-3
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,7 @@ impl<'a, 'tcx> Metadata<'a, 'tcx> for (CrateMetadataRef<'a>, TyCtxt<'tcx>) {
250250
}
251251

252252
impl<T: ParameterizedOverTcx> LazyValue<T> {
253+
#[inline]
253254
fn decode<'a, 'tcx, M: Metadata<'a, 'tcx>>(self, metadata: M) -> T::Value<'tcx>
254255
where
255256
T::Value<'tcx>: Decodable<DecodeContext<'a, 'tcx>>,
@@ -294,6 +295,7 @@ unsafe impl<'a, 'tcx, T: Decodable<DecodeContext<'a, 'tcx>>> TrustedLen
294295
}
295296

296297
impl<T: ParameterizedOverTcx> LazyArray<T> {
298+
#[inline]
297299
fn decode<'a, 'tcx, M: Metadata<'a, 'tcx>>(
298300
self,
299301
metadata: M,
@@ -360,8 +362,8 @@ impl<'a, 'tcx> DecodeContext<'a, 'tcx> {
360362
self.read_lazy_offset_then(|pos| LazyArray::from_position_and_num_elems(pos, len))
361363
}
362364

363-
fn read_lazy_table<I, T>(&mut self, len: usize) -> LazyTable<I, T> {
364-
self.read_lazy_offset_then(|pos| LazyTable::from_position_and_encoded_size(pos, len))
365+
fn read_lazy_table<I, T>(&mut self, width: usize, len: usize) -> LazyTable<I, T> {
366+
self.read_lazy_offset_then(|pos| LazyTable::from_position_and_encoded_size(pos, width, len))
365367
}
366368

367369
#[inline]
@@ -420,25 +422,29 @@ impl<'a, 'tcx> TyDecoder for DecodeContext<'a, 'tcx> {
420422
}
421423

422424
impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for CrateNum {
425+
#[inline]
423426
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> CrateNum {
424427
let cnum = CrateNum::from_u32(d.read_u32());
425428
d.map_encoded_cnum_to_current(cnum)
426429
}
427430
}
428431

429432
impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for DefIndex {
433+
#[inline]
430434
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> DefIndex {
431435
DefIndex::from_u32(d.read_u32())
432436
}
433437
}
434438

435439
impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for ExpnIndex {
440+
#[inline]
436441
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> ExpnIndex {
437442
ExpnIndex::from_u32(d.read_u32())
438443
}
439444
}
440445

441446
impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for ast::AttrId {
447+
#[inline]
442448
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> ast::AttrId {
443449
let sess = d.sess.expect("can't decode AttrId without Session");
444450
sess.parse_sess.attr_id_generator.mk_attr_id()
@@ -672,6 +678,7 @@ impl<'a, 'tcx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyValue<T> {
672678
}
673679

674680
impl<'a, 'tcx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyArray<T> {
681+
#[inline]
675682
fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Self {
676683
let len = decoder.read_usize();
677684
if len == 0 { LazyArray::default() } else { decoder.read_lazy_array(len) }
@@ -680,8 +687,9 @@ impl<'a, 'tcx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyArray<T> {
680687

681688
impl<'a, 'tcx, I: Idx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyTable<I, T> {
682689
fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Self {
690+
let width = decoder.read_usize();
683691
let len = decoder.read_usize();
684-
decoder.read_lazy_table(len)
692+
decoder.read_lazy_table(width, len)
685693
}
686694
}
687695

compiler/rustc_metadata/src/rmeta/encoder.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,8 @@ impl<'a, 'tcx, T> Encodable<EncodeContext<'a, 'tcx>> for LazyArray<T> {
131131

132132
impl<'a, 'tcx, I, T> Encodable<EncodeContext<'a, 'tcx>> for LazyTable<I, T> {
133133
fn encode(&self, e: &mut EncodeContext<'a, 'tcx>) {
134-
e.emit_usize(self.encoded_size);
134+
e.emit_usize(self.width);
135+
e.emit_usize(self.len);
135136
e.emit_lazy_distance(self.position);
136137
}
137138
}

compiler/rustc_metadata/src/rmeta/mod.rs

+8-3
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,11 @@ impl<T> LazyArray<T> {
142142
/// eagerly and in-order.
143143
struct LazyTable<I, T> {
144144
position: NonZeroUsize,
145-
encoded_size: usize,
145+
/// The encoded size of the elements of a table is selected at runtime to drop
146+
/// trailing zeroes. This is the number of bytes used for each table element.
147+
width: usize,
148+
/// How many elements are in the table.
149+
len: usize,
146150
_marker: PhantomData<fn(I) -> T>,
147151
}
148152

@@ -153,9 +157,10 @@ impl<I: 'static, T: ParameterizedOverTcx> ParameterizedOverTcx for LazyTable<I,
153157
impl<I, T> LazyTable<I, T> {
154158
fn from_position_and_encoded_size(
155159
position: NonZeroUsize,
156-
encoded_size: usize,
160+
width: usize,
161+
len: usize,
157162
) -> LazyTable<I, T> {
158-
LazyTable { position, encoded_size, _marker: PhantomData }
163+
LazyTable { position, width, len, _marker: PhantomData }
159164
}
160165
}
161166

compiler/rustc_metadata/src/rmeta/table.rs

+104-40
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ impl IsDefault for u32 {
3838
}
3939
}
4040

41+
impl IsDefault for u64 {
42+
fn is_default(&self) -> bool {
43+
*self == 0
44+
}
45+
}
46+
4147
impl<T> IsDefault for LazyArray<T> {
4248
fn is_default(&self) -> bool {
4349
self.num_elems == 0
@@ -89,6 +95,20 @@ impl FixedSizeEncoding for u32 {
8995
}
9096
}
9197

98+
impl FixedSizeEncoding for u64 {
99+
type ByteArray = [u8; 8];
100+
101+
#[inline]
102+
fn from_bytes(b: &[u8; 8]) -> Self {
103+
Self::from_le_bytes(*b)
104+
}
105+
106+
#[inline]
107+
fn write_to_bytes(self, b: &mut [u8; 8]) {
108+
*b = self.to_le_bytes();
109+
}
110+
}
111+
92112
macro_rules! fixed_size_enum {
93113
($ty:ty { $(($($pat:tt)*))* }) => {
94114
impl FixedSizeEncoding for Option<$ty> {
@@ -300,21 +320,21 @@ impl FixedSizeEncoding for UnusedGenericParams {
300320
// generic `LazyValue<T>` impl, but in the general case we might not need / want
301321
// to fit every `usize` in `u32`.
302322
impl<T> FixedSizeEncoding for Option<LazyValue<T>> {
303-
type ByteArray = [u8; 4];
323+
type ByteArray = [u8; 8];
304324

305325
#[inline]
306-
fn from_bytes(b: &[u8; 4]) -> Self {
307-
let position = NonZeroUsize::new(u32::from_bytes(b) as usize)?;
326+
fn from_bytes(b: &[u8; 8]) -> Self {
327+
let position = NonZeroUsize::new(u64::from_bytes(b) as usize)?;
308328
Some(LazyValue::from_position(position))
309329
}
310330

311331
#[inline]
312-
fn write_to_bytes(self, b: &mut [u8; 4]) {
332+
fn write_to_bytes(self, b: &mut [u8; 8]) {
313333
match self {
314334
None => unreachable!(),
315335
Some(lazy) => {
316336
let position = lazy.position.get();
317-
let position: u32 = position.try_into().unwrap();
337+
let position: u64 = position.try_into().unwrap();
318338
position.write_to_bytes(b)
319339
}
320340
}
@@ -323,55 +343,75 @@ impl<T> FixedSizeEncoding for Option<LazyValue<T>> {
323343

324344
impl<T> LazyArray<T> {
325345
#[inline]
326-
fn write_to_bytes_impl(self, b: &mut [u8; 8]) {
327-
let ([position_bytes, meta_bytes], []) = b.as_chunks_mut::<4>() else { panic!() };
328-
329-
let position = self.position.get();
330-
let position: u32 = position.try_into().unwrap();
331-
position.write_to_bytes(position_bytes);
332-
333-
let len = self.num_elems;
334-
let len: u32 = len.try_into().unwrap();
335-
len.write_to_bytes(meta_bytes);
346+
fn write_to_bytes_impl(self, b: &mut [u8; 16]) {
347+
let position = (self.position.get() as u64).to_le_bytes();
348+
let len = (self.num_elems as u64).to_le_bytes();
349+
350+
// Element width is selected at runtime on a per-table basis by omitting trailing
351+
// zero bytes in table elements. This works very naturally when table elements are
352+
// simple numbers but `LazyArray` is a pair of integers. If naively encoded, the second
353+
// element would shield the trailing zeroes in the first. Interleaving the bytes
354+
// of the position and length exposes trailing zeroes in both to the optimization.
355+
// We encode length second because we generally expect it to be smaller.
356+
for i in 0..8 {
357+
b[2 * i] = position[i];
358+
b[2 * i + 1] = len[i];
359+
}
336360
}
337361

338-
fn from_bytes_impl(position_bytes: &[u8; 4], meta_bytes: &[u8; 4]) -> Option<LazyArray<T>> {
339-
let position = NonZeroUsize::new(u32::from_bytes(position_bytes) as usize)?;
340-
let len = u32::from_bytes(meta_bytes) as usize;
362+
fn from_bytes_impl(position: &[u8; 8], meta: &[u8; 8]) -> Option<LazyArray<T>> {
363+
let position = NonZeroUsize::new(u64::from_bytes(&position) as usize)?;
364+
let len = u64::from_bytes(&meta) as usize;
341365
Some(LazyArray::from_position_and_num_elems(position, len))
342366
}
343367
}
344368

369+
// Decoding helper for the encoding scheme used by `LazyArray`.
370+
// Interleaving the bytes of the two integers exposes trailing bytes in the first integer
371+
// to the varint scheme that we use for tables.
372+
#[inline]
373+
fn decode_interleaved(encoded: &[u8; 16]) -> ([u8; 8], [u8; 8]) {
374+
let mut first = [0u8; 8];
375+
let mut second = [0u8; 8];
376+
for i in 0..8 {
377+
first[i] = encoded[2 * i];
378+
second[i] = encoded[2 * i + 1];
379+
}
380+
(first, second)
381+
}
382+
345383
impl<T> FixedSizeEncoding for LazyArray<T> {
346-
type ByteArray = [u8; 8];
384+
type ByteArray = [u8; 16];
347385

348386
#[inline]
349-
fn from_bytes(b: &[u8; 8]) -> Self {
350-
let ([position_bytes, meta_bytes], []) = b.as_chunks::<4>() else { panic!() };
351-
if *meta_bytes == [0; 4] {
387+
fn from_bytes(b: &[u8; 16]) -> Self {
388+
let (position, meta) = decode_interleaved(b);
389+
390+
if meta == [0; 8] {
352391
return Default::default();
353392
}
354-
LazyArray::from_bytes_impl(position_bytes, meta_bytes).unwrap()
393+
LazyArray::from_bytes_impl(&position, &meta).unwrap()
355394
}
356395

357396
#[inline]
358-
fn write_to_bytes(self, b: &mut [u8; 8]) {
397+
fn write_to_bytes(self, b: &mut [u8; 16]) {
359398
assert!(!self.is_default());
360399
self.write_to_bytes_impl(b)
361400
}
362401
}
363402

364403
impl<T> FixedSizeEncoding for Option<LazyArray<T>> {
365-
type ByteArray = [u8; 8];
404+
type ByteArray = [u8; 16];
366405

367406
#[inline]
368-
fn from_bytes(b: &[u8; 8]) -> Self {
369-
let ([position_bytes, meta_bytes], []) = b.as_chunks::<4>() else { panic!() };
370-
LazyArray::from_bytes_impl(position_bytes, meta_bytes)
407+
fn from_bytes(b: &[u8; 16]) -> Self {
408+
let (position, meta) = decode_interleaved(b);
409+
410+
LazyArray::from_bytes_impl(&position, &meta)
371411
}
372412

373413
#[inline]
374-
fn write_to_bytes(self, b: &mut [u8; 8]) {
414+
fn write_to_bytes(self, b: &mut [u8; 16]) {
375415
match self {
376416
None => unreachable!(),
377417
Some(lazy) => lazy.write_to_bytes_impl(b),
@@ -381,13 +421,14 @@ impl<T> FixedSizeEncoding for Option<LazyArray<T>> {
381421

382422
/// Helper for constructing a table's serialization (also see `Table`).
383423
pub(super) struct TableBuilder<I: Idx, T: FixedSizeEncoding> {
424+
width: usize,
384425
blocks: IndexVec<I, T::ByteArray>,
385426
_marker: PhantomData<T>,
386427
}
387428

388429
impl<I: Idx, T: FixedSizeEncoding> Default for TableBuilder<I, T> {
389430
fn default() -> Self {
390-
TableBuilder { blocks: Default::default(), _marker: PhantomData }
431+
TableBuilder { width: 0, blocks: Default::default(), _marker: PhantomData }
391432
}
392433
}
393434

@@ -415,40 +456,63 @@ impl<I: Idx, const N: usize, T: FixedSizeEncoding<ByteArray = [u8; N]>> TableBui
415456
// > store bit-masks of which item in each bucket is actually serialized).
416457
let block = self.blocks.ensure_contains_elem(i, || [0; N]);
417458
value.write_to_bytes(block);
459+
if self.width != N {
460+
let width = N - trailing_zeros(block);
461+
self.width = self.width.max(width);
462+
}
418463
}
419464
}
420465

421466
pub(crate) fn encode(&self, buf: &mut FileEncoder) -> LazyTable<I, T> {
422467
let pos = buf.position();
468+
469+
let width = self.width;
423470
for block in &self.blocks {
424-
buf.emit_raw_bytes(block);
471+
buf.emit_raw_bytes(&block[..width]);
425472
}
426-
let num_bytes = self.blocks.len() * N;
473+
427474
LazyTable::from_position_and_encoded_size(
428475
NonZeroUsize::new(pos as usize).unwrap(),
429-
num_bytes,
476+
width,
477+
self.blocks.len(),
430478
)
431479
}
432480
}
433481

482+
fn trailing_zeros(x: &[u8]) -> usize {
483+
x.iter().rev().take_while(|b| **b == 0).count()
484+
}
485+
434486
impl<I: Idx, const N: usize, T: FixedSizeEncoding<ByteArray = [u8; N]> + ParameterizedOverTcx>
435487
LazyTable<I, T>
436488
where
437489
for<'tcx> T::Value<'tcx>: FixedSizeEncoding<ByteArray = [u8; N]>,
438490
{
439491
/// Given the metadata, extract out the value at a particular index (if any).
440-
#[inline(never)]
441492
pub(super) fn get<'a, 'tcx, M: Metadata<'a, 'tcx>>(&self, metadata: M, i: I) -> T::Value<'tcx> {
442-
trace!("LazyTable::lookup: index={:?} len={:?}", i, self.encoded_size);
493+
trace!("LazyTable::lookup: index={:?} len={:?}", i, self.len);
494+
495+
// Access past the end of the table returns a Default
496+
if i.index() >= self.len {
497+
return Default::default();
498+
}
443499

444-
let start = self.position.get();
445-
let bytes = &metadata.blob()[start..start + self.encoded_size];
446-
let (bytes, []) = bytes.as_chunks::<N>() else { panic!() };
447-
bytes.get(i.index()).map_or_else(Default::default, FixedSizeEncoding::from_bytes)
500+
let width = self.width;
501+
let start = self.position.get() + (width * i.index());
502+
let end = start + width;
503+
let bytes = &metadata.blob()[start..end];
504+
505+
if let Ok(fixed) = bytes.try_into() {
506+
FixedSizeEncoding::from_bytes(fixed)
507+
} else {
508+
let mut fixed = [0u8; N];
509+
fixed[..width].copy_from_slice(bytes);
510+
FixedSizeEncoding::from_bytes(&fixed)
511+
}
448512
}
449513

450514
/// Size of the table in entries, including possible gaps.
451515
pub(super) fn size(&self) -> usize {
452-
self.encoded_size / N
516+
self.len
453517
}
454518
}

0 commit comments

Comments
 (0)