Skip to content

Commit 60a95f1

Browse files
Avoid compressing InitMasks for copy_repeatedly
1 parent 7dee111 commit 60a95f1

File tree

2 files changed

+67
-76
lines changed

2 files changed

+67
-76
lines changed

compiler/rustc_const_eval/src/interpret/memory.rs

+12-5
Original file line numberDiff line numberDiff line change
@@ -1035,6 +1035,14 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> {
10351035
// Zero-sized *destination*.
10361036
return Ok(());
10371037
};
1038+
let src_all_uninit = src_alloc.no_bytes_init(src_range);
1039+
// FIXME: This is potentially bad for performance as the init mask could
1040+
// be large, but is currently necessary to workaround needing to have
1041+
// both the init mask for the src_alloc (shared ref) and the dst_alloc
1042+
// (unique ref) available simultaneously. Those are access through
1043+
// `self.get_raw{,_mut}` and we can't currently explain to rustc that
1044+
// there's no invalidation of the two references.
1045+
let src_init_mask = src_alloc.init_mask().clone();
10381046

10391047
// This checks relocation edges on the src, which needs to happen before
10401048
// `prepare_relocation_copy`.
@@ -1047,8 +1055,6 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> {
10471055
// since we don't want to keep any relocations at the target.
10481056
let relocations =
10491057
src_alloc.prepare_relocation_copy(self, src_range, dest_offset, num_copies);
1050-
// Prepare a copy of the initialization mask.
1051-
let compressed = src_alloc.compress_uninit_range(src_range);
10521058

10531059
// Destination alloc preparations and access hooks.
10541060
let (dest_alloc, extra) = self.get_raw_mut(dest_alloc_id)?;
@@ -1059,7 +1065,7 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> {
10591065
.map_err(|e| e.to_interp_error(dest_alloc_id))?
10601066
.as_mut_ptr();
10611067

1062-
if compressed.no_bytes_init() {
1068+
if src_all_uninit {
10631069
// Fast path: If all bytes are `uninit` then there is nothing to copy. The target range
10641070
// is marked as uninitialized but we otherwise omit changing the byte representation which may
10651071
// be arbitrary for uninitialized bytes.
@@ -1106,8 +1112,9 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> {
11061112
}
11071113

11081114
// now fill in all the "init" data
1109-
dest_alloc.mark_compressed_init_range(
1110-
&compressed,
1115+
dest_alloc.mark_init_range_repeated(
1116+
src_init_mask,
1117+
src_range,
11111118
alloc_range(dest_offset, size), // just a single copy (i.e., not full `dest_range`)
11121119
num_copies,
11131120
);

compiler/rustc_middle/src/mir/interpret/allocation.rs

+55-71
Original file line numberDiff line numberDiff line change
@@ -773,89 +773,73 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
773773
}
774774
}
775775

776-
/// Run-length encoding of the uninit mask.
777-
/// Used to copy parts of a mask multiple times to another allocation.
778-
pub struct InitMaskCompressed {
779-
/// Whether the first range is initialized.
780-
initial: bool,
781-
/// The lengths of ranges that are run-length encoded.
782-
/// The initialization state of the ranges alternate starting with `initial`.
783-
ranges: smallvec::SmallVec<[u64; 1]>,
784-
}
785-
786-
impl InitMaskCompressed {
787-
pub fn no_bytes_init(&self) -> bool {
788-
// The `ranges` are run-length encoded and of alternating initialization state.
789-
// So if `ranges.len() > 1` then the second block is an initialized range.
790-
!self.initial && self.ranges.len() == 1
791-
}
792-
}
793-
794776
/// Transferring the initialization mask to other allocations.
795777
impl<Tag, Extra> Allocation<Tag, Extra> {
796-
/// Creates a run-length encoding of the initialization mask; panics if range is empty.
797-
///
798-
/// This is essentially a more space-efficient version of
799-
/// `InitMask::range_as_init_chunks(...).collect::<Vec<_>>()`.
800-
pub fn compress_uninit_range(&self, range: AllocRange) -> InitMaskCompressed {
801-
// Since we are copying `size` bytes from `src` to `dest + i * size` (`for i in 0..repeat`),
802-
// a naive initialization mask copying algorithm would repeatedly have to read the initialization mask from
803-
// the source and write it to the destination. Even if we optimized the memory accesses,
804-
// we'd be doing all of this `repeat` times.
805-
// Therefore we precompute a compressed version of the initialization mask of the source value and
806-
// then write it back `repeat` times without computing any more information from the source.
807-
808-
// A precomputed cache for ranges of initialized / uninitialized bits
809-
// 0000010010001110 will become
810-
// `[5, 1, 2, 1, 3, 3, 1]`,
811-
// where each element toggles the state.
812-
813-
let mut ranges = smallvec::SmallVec::<[u64; 1]>::new();
814-
815-
let mut chunks = self.init_mask.range_as_init_chunks(range.start, range.end()).peekable();
816-
817-
let initial = chunks.peek().expect("range should be nonempty").is_init();
818-
819-
// Here we rely on `range_as_init_chunks` to yield alternating init/uninit chunks.
820-
for chunk in chunks {
821-
let len = chunk.range().end.bytes() - chunk.range().start.bytes();
822-
ranges.push(len);
823-
}
824-
825-
InitMaskCompressed { ranges, initial }
778+
pub fn no_bytes_init(&self, range: AllocRange) -> bool {
779+
// If no bits set in start..end
780+
self.init_mask.find_bit(range.start, range.end(), true).is_none()
826781
}
827782

828783
/// Applies multiple instances of the run-length encoding to the initialization mask.
829-
pub fn mark_compressed_init_range(
784+
pub fn mark_init_range_repeated(
830785
&mut self,
831-
defined: &InitMaskCompressed,
832-
range: AllocRange,
786+
mut src_init: InitMask,
787+
src_range: AllocRange,
788+
dest_first_range: AllocRange,
833789
repeat: u64,
834790
) {
835-
// An optimization where we can just overwrite an entire range of initialization
836-
// bits if they are going to be uniformly `1` or `0`.
837-
if defined.ranges.len() <= 1 {
838-
self.init_mask.set_range_inbounds(
839-
range.start,
840-
range.start + range.size * repeat, // `Size` operations
841-
defined.initial,
842-
);
843-
return;
791+
// If the src_range and *each* destination range are of equal size,
792+
// and the source range is either entirely initialized or entirely
793+
// uninitialized, we can skip a bunch of inserts by just inserting for
794+
// the full range once.
795+
if src_range.size == dest_first_range.size {
796+
let initialized =
797+
if src_init.find_bit(src_range.start, src_range.end(), false).is_none() {
798+
Some(true)
799+
} else if src_init.find_bit(src_range.start, src_range.end(), true).is_none() {
800+
Some(false)
801+
} else {
802+
None
803+
};
804+
805+
if let Some(initialized) = initialized {
806+
// De-initialize the destination range across all repetitions.
807+
self.init_mask.set_range_inbounds(
808+
dest_first_range.start,
809+
dest_first_range.start + dest_first_range.size * repeat,
810+
initialized,
811+
);
812+
return;
813+
}
844814
}
845815

846-
for mut j in 0..repeat {
847-
j *= range.size.bytes();
848-
j += range.start.bytes();
849-
let mut cur = defined.initial;
850-
for range in &defined.ranges {
851-
let old_j = j;
852-
j += range;
816+
// Deinitialize the ranges outside the area we care about, so the loop below
817+
// can do less work.
818+
src_init.set_range_inbounds(Size::from_bytes(0), src_range.start, false);
819+
src_init.set_range_inbounds(
820+
src_range.end(),
821+
Size::from_bytes(src_init.set.domain_size()),
822+
false,
823+
);
824+
825+
// De-initialize the destination range across all repetitions.
826+
self.init_mask.set_range_inbounds(
827+
dest_first_range.start,
828+
dest_first_range.start + dest_first_range.size * repeat,
829+
false,
830+
);
831+
832+
// Then we initialize.
833+
for count in 0..repeat {
834+
let start = dest_first_range.start + count * dest_first_range.size;
835+
for range in src_init.set.iter_intervals() {
836+
// Offset the chunk start/end from src_range, and then
837+
// offset from the start of this repetition.
853838
self.init_mask.set_range_inbounds(
854-
Size::from_bytes(old_j),
855-
Size::from_bytes(j),
856-
cur,
839+
start + (Size::from_bytes(range.start) - src_range.start),
840+
start + (Size::from_bytes(range.end) - src_range.start),
841+
true,
857842
);
858-
cur = !cur;
859843
}
860844
}
861845
}

0 commit comments

Comments
 (0)