Skip to content

Adapt table sizes to the contents, accommodating u64 rmeta offsets #113542

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions compiler/rustc_metadata/src/rmeta/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ impl<'a, 'tcx> Metadata<'a, 'tcx> for (CrateMetadataRef<'a>, TyCtxt<'tcx>) {
}

impl<T: ParameterizedOverTcx> LazyValue<T> {
#[inline]
fn decode<'a, 'tcx, M: Metadata<'a, 'tcx>>(self, metadata: M) -> T::Value<'tcx>
where
T::Value<'tcx>: Decodable<DecodeContext<'a, 'tcx>>,
Expand Down Expand Up @@ -294,6 +295,7 @@ unsafe impl<'a, 'tcx, T: Decodable<DecodeContext<'a, 'tcx>>> TrustedLen
}

impl<T: ParameterizedOverTcx> LazyArray<T> {
#[inline]
fn decode<'a, 'tcx, M: Metadata<'a, 'tcx>>(
self,
metadata: M,
Expand Down Expand Up @@ -360,8 +362,8 @@ impl<'a, 'tcx> DecodeContext<'a, 'tcx> {
self.read_lazy_offset_then(|pos| LazyArray::from_position_and_num_elems(pos, len))
}

fn read_lazy_table<I, T>(&mut self, len: usize) -> LazyTable<I, T> {
self.read_lazy_offset_then(|pos| LazyTable::from_position_and_encoded_size(pos, len))
fn read_lazy_table<I, T>(&mut self, width: usize, len: usize) -> LazyTable<I, T> {
self.read_lazy_offset_then(|pos| LazyTable::from_position_and_encoded_size(pos, width, len))
}

#[inline]
Expand Down Expand Up @@ -420,25 +422,29 @@ impl<'a, 'tcx> TyDecoder for DecodeContext<'a, 'tcx> {
}

impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for CrateNum {
#[inline]
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> CrateNum {
let cnum = CrateNum::from_u32(d.read_u32());
d.map_encoded_cnum_to_current(cnum)
}
}

impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for DefIndex {
#[inline]
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> DefIndex {
DefIndex::from_u32(d.read_u32())
}
}

impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for ExpnIndex {
#[inline]
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> ExpnIndex {
ExpnIndex::from_u32(d.read_u32())
}
}

impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for ast::AttrId {
#[inline]
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> ast::AttrId {
let sess = d.sess.expect("can't decode AttrId without Session");
sess.parse_sess.attr_id_generator.mk_attr_id()
Expand Down Expand Up @@ -657,6 +663,7 @@ impl<'a, 'tcx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyValue<T> {
}

impl<'a, 'tcx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyArray<T> {
#[inline]
fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Self {
let len = decoder.read_usize();
if len == 0 { LazyArray::default() } else { decoder.read_lazy_array(len) }
Expand All @@ -665,8 +672,9 @@ impl<'a, 'tcx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyArray<T> {

impl<'a, 'tcx, I: Idx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyTable<I, T> {
fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Self {
let width = decoder.read_usize();
let len = decoder.read_usize();
decoder.read_lazy_table(len)
decoder.read_lazy_table(width, len)
}
}

Expand Down
3 changes: 2 additions & 1 deletion compiler/rustc_metadata/src/rmeta/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,8 @@ impl<'a, 'tcx, T> Encodable<EncodeContext<'a, 'tcx>> for LazyArray<T> {

impl<'a, 'tcx, I, T> Encodable<EncodeContext<'a, 'tcx>> for LazyTable<I, T> {
fn encode(&self, e: &mut EncodeContext<'a, 'tcx>) {
e.emit_usize(self.encoded_size);
e.emit_usize(self.width);
e.emit_usize(self.len);
e.emit_lazy_distance(self.position);
}
}
Expand Down
11 changes: 8 additions & 3 deletions compiler/rustc_metadata/src/rmeta/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,11 @@ impl<T> LazyArray<T> {
/// eagerly and in-order.
struct LazyTable<I, T> {
position: NonZeroUsize,
encoded_size: usize,
/// The encoded size of the elements of a table is selected at runtime to drop
/// trailing zeroes. This is the number of bytes used for each table element.
width: usize,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add some comments on those?

/// How many elements are in the table.
len: usize,
_marker: PhantomData<fn(I) -> T>,
}

Expand All @@ -147,9 +151,10 @@ impl<I: 'static, T: ParameterizedOverTcx> ParameterizedOverTcx for LazyTable<I,
impl<I, T> LazyTable<I, T> {
fn from_position_and_encoded_size(
position: NonZeroUsize,
encoded_size: usize,
width: usize,
len: usize,
) -> LazyTable<I, T> {
LazyTable { position, encoded_size, _marker: PhantomData }
LazyTable { position, width, len, _marker: PhantomData }
}
}

Expand Down
144 changes: 104 additions & 40 deletions compiler/rustc_metadata/src/rmeta/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ impl IsDefault for u32 {
}
}

impl IsDefault for u64 {
fn is_default(&self) -> bool {
*self == 0
}
}

impl<T> IsDefault for LazyArray<T> {
fn is_default(&self) -> bool {
self.num_elems == 0
Expand Down Expand Up @@ -89,6 +95,20 @@ impl FixedSizeEncoding for u32 {
}
}

impl FixedSizeEncoding for u64 {
type ByteArray = [u8; 8];

#[inline]
fn from_bytes(b: &[u8; 8]) -> Self {
Self::from_le_bytes(*b)
}

#[inline]
fn write_to_bytes(self, b: &mut [u8; 8]) {
*b = self.to_le_bytes();
}
}

macro_rules! fixed_size_enum {
($ty:ty { $(($($pat:tt)*))* }) => {
impl FixedSizeEncoding for Option<$ty> {
Expand Down Expand Up @@ -299,21 +319,21 @@ impl FixedSizeEncoding for UnusedGenericParams {
// generic `LazyValue<T>` impl, but in the general case we might not need / want
// to fit every `usize` in `u32`.
impl<T> FixedSizeEncoding for Option<LazyValue<T>> {
type ByteArray = [u8; 4];
type ByteArray = [u8; 8];

#[inline]
fn from_bytes(b: &[u8; 4]) -> Self {
let position = NonZeroUsize::new(u32::from_bytes(b) as usize)?;
fn from_bytes(b: &[u8; 8]) -> Self {
let position = NonZeroUsize::new(u64::from_bytes(b) as usize)?;
Some(LazyValue::from_position(position))
}

#[inline]
fn write_to_bytes(self, b: &mut [u8; 4]) {
fn write_to_bytes(self, b: &mut [u8; 8]) {
match self {
None => unreachable!(),
Some(lazy) => {
let position = lazy.position.get();
let position: u32 = position.try_into().unwrap();
let position: u64 = position.try_into().unwrap();
position.write_to_bytes(b)
}
}
Expand All @@ -322,55 +342,75 @@ impl<T> FixedSizeEncoding for Option<LazyValue<T>> {

impl<T> LazyArray<T> {
#[inline]
fn write_to_bytes_impl(self, b: &mut [u8; 8]) {
let ([position_bytes, meta_bytes], []) = b.as_chunks_mut::<4>() else { panic!() };

let position = self.position.get();
let position: u32 = position.try_into().unwrap();
position.write_to_bytes(position_bytes);

let len = self.num_elems;
let len: u32 = len.try_into().unwrap();
len.write_to_bytes(meta_bytes);
fn write_to_bytes_impl(self, b: &mut [u8; 16]) {
let position = (self.position.get() as u64).to_le_bytes();
let len = (self.num_elems as u64).to_le_bytes();

// Element width is selected at runtime on a per-table basis by omitting trailing
// zero bytes in table elements. This works very naturally when table elements are
// simple numbers but `LazyArray` is a pair of integers. If naively encoded, the second
// element would shield the trailing zeroes in the first. Interleaving the bytes
// of the position and length exposes trailing zeroes in both to the optimization.
// We encode length second because we generally expect it to be smaller.
for i in 0..8 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please write a comment here stating the motivation for this interleaving? This code would be very difficult to understand without context.

b[2 * i] = position[i];
b[2 * i + 1] = len[i];
}
}

fn from_bytes_impl(position_bytes: &[u8; 4], meta_bytes: &[u8; 4]) -> Option<LazyArray<T>> {
let position = NonZeroUsize::new(u32::from_bytes(position_bytes) as usize)?;
let len = u32::from_bytes(meta_bytes) as usize;
fn from_bytes_impl(position: &[u8; 8], meta: &[u8; 8]) -> Option<LazyArray<T>> {
let position = NonZeroUsize::new(u64::from_bytes(&position) as usize)?;
let len = u64::from_bytes(&meta) as usize;
Some(LazyArray::from_position_and_num_elems(position, len))
}
}

// Decoding helper for the encoding scheme used by `LazyArray`.
// Interleaving the bytes of the two integers exposes trailing bytes in the first integer
// to the varint scheme that we use for tables.
#[inline]
fn decode_interleaved(encoded: &[u8; 16]) -> ([u8; 8], [u8; 8]) {
let mut first = [0u8; 8];
let mut second = [0u8; 8];
for i in 0..8 {
first[i] = encoded[2 * i];
second[i] = encoded[2 * i + 1];
}
(first, second)
}

impl<T> FixedSizeEncoding for LazyArray<T> {
type ByteArray = [u8; 8];
type ByteArray = [u8; 16];

#[inline]
fn from_bytes(b: &[u8; 8]) -> Self {
let ([position_bytes, meta_bytes], []) = b.as_chunks::<4>() else { panic!() };
if *meta_bytes == [0; 4] {
fn from_bytes(b: &[u8; 16]) -> Self {
let (position, meta) = decode_interleaved(b);

if meta == [0; 8] {
return Default::default();
}
LazyArray::from_bytes_impl(position_bytes, meta_bytes).unwrap()
LazyArray::from_bytes_impl(&position, &meta).unwrap()
}

#[inline]
fn write_to_bytes(self, b: &mut [u8; 8]) {
fn write_to_bytes(self, b: &mut [u8; 16]) {
assert!(!self.is_default());
self.write_to_bytes_impl(b)
}
}

impl<T> FixedSizeEncoding for Option<LazyArray<T>> {
type ByteArray = [u8; 8];
type ByteArray = [u8; 16];

#[inline]
fn from_bytes(b: &[u8; 8]) -> Self {
let ([position_bytes, meta_bytes], []) = b.as_chunks::<4>() else { panic!() };
LazyArray::from_bytes_impl(position_bytes, meta_bytes)
fn from_bytes(b: &[u8; 16]) -> Self {
let (position, meta) = decode_interleaved(b);

LazyArray::from_bytes_impl(&position, &meta)
}

#[inline]
fn write_to_bytes(self, b: &mut [u8; 8]) {
fn write_to_bytes(self, b: &mut [u8; 16]) {
match self {
None => unreachable!(),
Some(lazy) => lazy.write_to_bytes_impl(b),
Expand All @@ -380,13 +420,14 @@ impl<T> FixedSizeEncoding for Option<LazyArray<T>> {

/// Helper for constructing a table's serialization (also see `Table`).
pub(super) struct TableBuilder<I: Idx, T: FixedSizeEncoding> {
width: usize,
blocks: IndexVec<I, T::ByteArray>,
_marker: PhantomData<T>,
}

impl<I: Idx, T: FixedSizeEncoding> Default for TableBuilder<I, T> {
fn default() -> Self {
TableBuilder { blocks: Default::default(), _marker: PhantomData }
TableBuilder { width: 0, blocks: Default::default(), _marker: PhantomData }
}
}

Expand Down Expand Up @@ -414,40 +455,63 @@ impl<I: Idx, const N: usize, T: FixedSizeEncoding<ByteArray = [u8; N]>> TableBui
// > store bit-masks of which item in each bucket is actually serialized).
let block = self.blocks.ensure_contains_elem(i, || [0; N]);
value.write_to_bytes(block);
if self.width != N {
let width = N - trailing_zeros(block);
self.width = self.width.max(width);
}
}
}

pub(crate) fn encode(&self, buf: &mut FileEncoder) -> LazyTable<I, T> {
let pos = buf.position();

let width = self.width;
for block in &self.blocks {
buf.emit_raw_bytes(block);
buf.emit_raw_bytes(&block[..width]);
}
let num_bytes = self.blocks.len() * N;

LazyTable::from_position_and_encoded_size(
NonZeroUsize::new(pos as usize).unwrap(),
num_bytes,
width,
self.blocks.len(),
)
}
}

fn trailing_zeros(x: &[u8]) -> usize {
x.iter().rev().take_while(|b| **b == 0).count()
}

impl<I: Idx, const N: usize, T: FixedSizeEncoding<ByteArray = [u8; N]> + ParameterizedOverTcx>
LazyTable<I, T>
where
for<'tcx> T::Value<'tcx>: FixedSizeEncoding<ByteArray = [u8; N]>,
{
/// Given the metadata, extract out the value at a particular index (if any).
#[inline(never)]
pub(super) fn get<'a, 'tcx, M: Metadata<'a, 'tcx>>(&self, metadata: M, i: I) -> T::Value<'tcx> {
trace!("LazyTable::lookup: index={:?} len={:?}", i, self.encoded_size);
trace!("LazyTable::lookup: index={:?} len={:?}", i, self.len);

// Access past the end of the table returns a Default
if i.index() >= self.len {
return Default::default();
}

let start = self.position.get();
let bytes = &metadata.blob()[start..start + self.encoded_size];
let (bytes, []) = bytes.as_chunks::<N>() else { panic!() };
bytes.get(i.index()).map_or_else(Default::default, FixedSizeEncoding::from_bytes)
let width = self.width;
let start = self.position.get() + (width * i.index());
let end = start + width;
let bytes = &metadata.blob()[start..end];

if let Ok(fixed) = bytes.try_into() {
FixedSizeEncoding::from_bytes(fixed)
} else {
let mut fixed = [0u8; N];
fixed[..width].copy_from_slice(bytes);
FixedSizeEncoding::from_bytes(&fixed)
}
}

/// Size of the table in entries, including possible gaps.
pub(super) fn size(&self) -> usize {
self.encoded_size / N
self.len
}
}