Skip to content

Commit d8ed3d2

Browse files
committed
Use wider types in Layout multiplication, rather than overflow intrinsics
This lets us phrase it as just one check, rather than two, and might make it easier on LLVM to optimize. It still passes the codegen test without needing the manual optimization anymore, so let's see.
1 parent 76c427d commit d8ed3d2

File tree

2 files changed

+35
-41
lines changed

2 files changed

+35
-41
lines changed

library/core/src/alloc/layout.rs

+35-34
Original file line numberDiff line numberDiff line change
@@ -309,15 +309,9 @@ impl Layout {
309309
#[unstable(feature = "alloc_layout_extra", issue = "55724")]
310310
#[inline]
311311
pub fn repeat(&self, n: usize) -> Result<(Self, usize), LayoutError> {
312-
// This cannot overflow. Quoting from the invariant of Layout:
313-
// > `size`, when rounded up to the nearest multiple of `align`,
314-
// > must not overflow isize (i.e., the rounded value must be
315-
// > less than or equal to `isize::MAX`)
316-
let padded_size = self.size() + self.padding_needed_for(self.align());
317-
let alloc_size = padded_size.checked_mul(n).ok_or(LayoutError)?;
318-
319-
// The safe constructor is called here to enforce the isize size limit.
320-
Layout::from_size_valid_align(alloc_size, self.align).map(|layout| (layout, padded_size))
312+
let padded_self = self.pad_to_align();
313+
let repeated = padded_self.repeat_packed(n)?;
314+
Ok((repeated, padded_self.size()))
321315
}
322316

323317
/// Creates a layout describing the record for `self` followed by
@@ -394,9 +388,35 @@ impl Layout {
394388
#[unstable(feature = "alloc_layout_extra", issue = "55724")]
395389
#[inline]
396390
pub fn repeat_packed(&self, n: usize) -> Result<Self, LayoutError> {
397-
let size = self.size().checked_mul(n).ok_or(LayoutError)?;
398-
// The safe constructor is called here to enforce the isize size limit.
399-
Layout::from_size_valid_align(size, self.align)
391+
// Rather than checking both for overflow and whether the padded size
392+
// could exceed `isize::MAX`, do the checking in a larger type
393+
// so we only need to say the one check.
394+
//
395+
// This makes it a bit easier for LLVM to optimize as it's just ordinary
396+
// integer operations in the generic optimizer, not intrinsics that
397+
// might not be handled as much. For example, for constant `size` or
398+
// `n`, the optimization from llvm-project#56563 means it can often
399+
// sink the check before the multiplication, and thus do only the simple
400+
// word-width never-overflowing multiplication, rather than the checked
401+
// one. But even if not, the double-width codegen comes out essentially
402+
// the same as if the overflow intrinsics where used.
403+
404+
#[cfg(target_pointer_width = "16")]
405+
type UDoubleSize = u32;
406+
#[cfg(target_pointer_width = "32")]
407+
type UDoubleSize = u64;
408+
#[cfg(target_pointer_width = "64")]
409+
type UDoubleSize = u128;
410+
411+
// SAFETY: `UDoubleSize` is big enough that this cannot ever overflow.
412+
let size = unsafe { UDoubleSize::unchecked_mul(self.size() as _, n as _) };
413+
414+
if size > Layout::max_size_for_align(self.align) as UDoubleSize {
415+
return Err(LayoutError);
416+
}
417+
418+
// SAFETY: padded size is guaranteed to not exceed `isize::MAX`.
419+
unsafe { Ok(Layout::from_size_align_unchecked(size as usize, self.align())) }
400420
}
401421

402422
/// Creates a layout describing the record for `self` followed by
@@ -420,28 +440,9 @@ impl Layout {
420440
#[stable(feature = "alloc_layout_manipulation", since = "1.44.0")]
421441
#[inline]
422442
pub fn array<T>(n: usize) -> Result<Self, LayoutError> {
423-
// Reduce the amount of code we need to monomorphize per `T`.
424-
return inner(mem::size_of::<T>(), ValidAlign::of::<T>(), n);
425-
426-
#[inline]
427-
fn inner(element_size: usize, align: ValidAlign, n: usize) -> Result<Layout, LayoutError> {
428-
// We need to check two things about the size:
429-
// - That the total size won't overflow a `usize`, and
430-
// - That the total size still fits in an `isize`.
431-
// By using division we can check them both with a single threshold.
432-
// That'd usually be a bad idea, but thankfully here the element size
433-
// and alignment are constants, so the compiler will fold all of it.
434-
if element_size != 0 && n > Layout::max_size_for_align(align) / element_size {
435-
return Err(LayoutError);
436-
}
437-
438-
let array_size = element_size * n;
439-
440-
// SAFETY: We just checked above that the `array_size` will not
441-
// exceed `isize::MAX` even when rounded up to the alignment.
442-
// And `ValidAlign` guarantees it's a power of two.
443-
unsafe { Ok(Layout::from_size_align_unchecked(array_size, align.as_usize())) }
444-
}
443+
// The layout of a type is always padded already,
444+
// so we can use the packed computation directly.
445+
Layout::new::<T>().repeat_packed(n)
445446
}
446447
}
447448

library/core/src/mem/valid_align.rs

-7
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,6 @@ impl ValidAlign {
5353
pub(crate) fn log2(self) -> u32 {
5454
self.as_nonzero().trailing_zeros()
5555
}
56-
57-
/// Returns the alignment for a type.
58-
#[inline]
59-
pub(crate) fn of<T>() -> Self {
60-
// SAFETY: rustc ensures that type alignment is always a power of two.
61-
unsafe { ValidAlign::new_unchecked(mem::align_of::<T>()) }
62-
}
6356
}
6457

6558
impl fmt::Debug for ValidAlign {

0 commit comments

Comments
 (0)