@@ -773,89 +773,73 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
773
773
}
774
774
}
775
775
776
- /// Run-length encoding of the uninit mask.
777
- /// Used to copy parts of a mask multiple times to another allocation.
778
- pub struct InitMaskCompressed {
779
- /// Whether the first range is initialized.
780
- initial : bool ,
781
- /// The lengths of ranges that are run-length encoded.
782
- /// The initialization state of the ranges alternate starting with `initial`.
783
- ranges : smallvec:: SmallVec < [ u64 ; 1 ] > ,
784
- }
785
-
786
- impl InitMaskCompressed {
787
- pub fn no_bytes_init ( & self ) -> bool {
788
- // The `ranges` are run-length encoded and of alternating initialization state.
789
- // So if `ranges.len() > 1` then the second block is an initialized range.
790
- !self . initial && self . ranges . len ( ) == 1
791
- }
792
- }
793
-
794
776
/// Transferring the initialization mask to other allocations.
795
777
impl < Tag , Extra > Allocation < Tag , Extra > {
796
- /// Creates a run-length encoding of the initialization mask; panics if range is empty.
797
- ///
798
- /// This is essentially a more space-efficient version of
799
- /// `InitMask::range_as_init_chunks(...).collect::<Vec<_>>()`.
800
- pub fn compress_uninit_range ( & self , range : AllocRange ) -> InitMaskCompressed {
801
- // Since we are copying `size` bytes from `src` to `dest + i * size` (`for i in 0..repeat`),
802
- // a naive initialization mask copying algorithm would repeatedly have to read the initialization mask from
803
- // the source and write it to the destination. Even if we optimized the memory accesses,
804
- // we'd be doing all of this `repeat` times.
805
- // Therefore we precompute a compressed version of the initialization mask of the source value and
806
- // then write it back `repeat` times without computing any more information from the source.
807
-
808
- // A precomputed cache for ranges of initialized / uninitialized bits
809
- // 0000010010001110 will become
810
- // `[5, 1, 2, 1, 3, 3, 1]`,
811
- // where each element toggles the state.
812
-
813
- let mut ranges = smallvec:: SmallVec :: < [ u64 ; 1 ] > :: new ( ) ;
814
-
815
- let mut chunks = self . init_mask . range_as_init_chunks ( range. start , range. end ( ) ) . peekable ( ) ;
816
-
817
- let initial = chunks. peek ( ) . expect ( "range should be nonempty" ) . is_init ( ) ;
818
-
819
- // Here we rely on `range_as_init_chunks` to yield alternating init/uninit chunks.
820
- for chunk in chunks {
821
- let len = chunk. range ( ) . end . bytes ( ) - chunk. range ( ) . start . bytes ( ) ;
822
- ranges. push ( len) ;
823
- }
824
-
825
- InitMaskCompressed { ranges, initial }
778
+ pub fn no_bytes_init ( & self , range : AllocRange ) -> bool {
779
+ // If no bits set in start..end
780
+ self . init_mask . find_bit ( range. start , range. end ( ) , true ) . is_none ( )
826
781
}
827
782
828
783
/// Applies multiple instances of the run-length encoding to the initialization mask.
829
- pub fn mark_compressed_init_range (
784
+ pub fn mark_init_range_repeated (
830
785
& mut self ,
831
- defined : & InitMaskCompressed ,
832
- range : AllocRange ,
786
+ mut src_init : InitMask ,
787
+ src_range : AllocRange ,
788
+ dest_first_range : AllocRange ,
833
789
repeat : u64 ,
834
790
) {
835
- // An optimization where we can just overwrite an entire range of initialization
836
- // bits if they are going to be uniformly `1` or `0`.
837
- if defined. ranges . len ( ) <= 1 {
838
- self . init_mask . set_range_inbounds (
839
- range. start ,
840
- range. start + range. size * repeat, // `Size` operations
841
- defined. initial ,
842
- ) ;
843
- return ;
791
+ // If the src_range and *each* destination range are of equal size,
792
+ // and the source range is either entirely initialized or entirely
793
+ // uninitialized, we can skip a bunch of inserts by just inserting for
794
+ // the full range once.
795
+ if src_range. size == dest_first_range. size {
796
+ let initialized =
797
+ if src_init. find_bit ( src_range. start , src_range. end ( ) , false ) . is_none ( ) {
798
+ Some ( true )
799
+ } else if src_init. find_bit ( src_range. start , src_range. end ( ) , true ) . is_none ( ) {
800
+ Some ( false )
801
+ } else {
802
+ None
803
+ } ;
804
+
805
+ if let Some ( initialized) = initialized {
806
+ // De-initialize the destination range across all repetitions.
807
+ self . init_mask . set_range_inbounds (
808
+ dest_first_range. start ,
809
+ dest_first_range. start + dest_first_range. size * repeat,
810
+ initialized,
811
+ ) ;
812
+ return ;
813
+ }
844
814
}
845
815
846
- for mut j in 0 ..repeat {
847
- j *= range. size . bytes ( ) ;
848
- j += range. start . bytes ( ) ;
849
- let mut cur = defined. initial ;
850
- for range in & defined. ranges {
851
- let old_j = j;
852
- j += range;
816
+ // Deinitialize the ranges outside the area we care about, so the loop below
817
+ // can do less work.
818
+ src_init. set_range_inbounds ( Size :: from_bytes ( 0 ) , src_range. start , false ) ;
819
+ src_init. set_range_inbounds (
820
+ src_range. end ( ) ,
821
+ Size :: from_bytes ( src_init. set . domain_size ( ) ) ,
822
+ false ,
823
+ ) ;
824
+
825
+ // De-initialize the destination range across all repetitions.
826
+ self . init_mask . set_range_inbounds (
827
+ dest_first_range. start ,
828
+ dest_first_range. start + dest_first_range. size * repeat,
829
+ false ,
830
+ ) ;
831
+
832
+ // Then we initialize.
833
+ for count in 0 ..repeat {
834
+ let start = dest_first_range. start + count * dest_first_range. size ;
835
+ for range in src_init. set . iter_intervals ( ) {
836
+ // Offset the chunk start/end from src_range, and then
837
+ // offset from the start of this repetition.
853
838
self . init_mask . set_range_inbounds (
854
- Size :: from_bytes ( old_j ) ,
855
- Size :: from_bytes ( j ) ,
856
- cur ,
839
+ start + ( Size :: from_bytes ( range . start ) - src_range . start ) ,
840
+ start + ( Size :: from_bytes ( range . end ) - src_range . start ) ,
841
+ true ,
857
842
) ;
858
- cur = !cur;
859
843
}
860
844
}
861
845
}
0 commit comments