From 3ff2c8852ecd9bfdf6a90606d16a7fcb6a3b0c45 Mon Sep 17 00:00:00 2001
From: Tage Johansson <tajo9185@student.uu.se>
Date: Tue, 20 May 2025 15:01:30 +0200
Subject: [PATCH 1/6] Rewrite the DenseBitSet structure to only use one word on
 the stack.

This commit modifies DenseBitSet so that it only uses one word on the
stack instead of 4 words as before, allowing for faster clones. The
downside is that it may at most store 63 elements on the stack as aposed
to 128 for the previous implementation.
---
 Cargo.lock                                    |    1 +
 compiler/rustc_abi/src/layout/coroutine.rs    |    2 +-
 compiler/rustc_index/Cargo.toml               |    1 +
 compiler/rustc_index/src/bit_set.rs           |  577 +-------
 .../rustc_index/src/bit_set/dense_bit_set.rs  | 1275 +++++++++++++++++
 compiler/rustc_index/src/bit_set/tests.rs     |  658 ++++++++-
 compiler/rustc_middle/src/values.rs           |    8 +-
 .../src/framework/cursor.rs                   |    2 +-
 .../rustc_mir_dataflow/src/framework/fmt.rs   |   19 +-
 .../src/impls/initialized.rs                  |    2 +-
 .../src/impls/storage_liveness.rs             |    1 -
 compiler/rustc_mir_transform/src/copy_prop.rs |    2 +-
 compiler/rustc_mir_transform/src/coroutine.rs |   16 +-
 .../src/coverage/counters.rs                  |    3 +-
 .../src/coverage/counters/balanced_flow.rs    |    2 +-
 .../rustc_mir_transform/src/coverage/query.rs |    7 +-
 .../src/deduce_param_attrs.rs                 |    7 +-
 .../src/lint_tail_expr_drop_order.rs          |    2 +-
 .../src/single_use_consts.rs                  |    4 +-
 compiler/rustc_mir_transform/src/sroa.rs      |    2 +-
 20 files changed, 1975 insertions(+), 616 deletions(-)
 create mode 100644 compiler/rustc_index/src/bit_set/dense_bit_set.rs
diff --git a/Cargo.lock b/Cargo.lock
index 99cb71cd0ac87..d398c1d5637b2 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3855,6 +3855,7 @@ dependencies = [
 name = "rustc_index"
 version = "0.0.0"
 dependencies = [
+ "itertools",
  "rustc_index_macros",
  "rustc_macros",
  "rustc_serialize",
diff --git a/compiler/rustc_abi/src/layout/coroutine.rs b/compiler/rustc_abi/src/layout/coroutine.rs
index 27e704d538c83..73564705686f0 100644
--- a/compiler/rustc_abi/src/layout/coroutine.rs
+++ b/compiler/rustc_abi/src/layout/coroutine.rs
@@ -120,7 +120,7 @@ fn coroutine_saved_local_eligibility<VariantIdx: Idx, FieldIdx: Idx, LocalIdx: I
             for assignment in assignments.iter_mut() {
                 *assignment = Ineligible(None);
             }
-            ineligible_locals.insert_all();
+            ineligible_locals.insert_all(nb_locals);
         }
     }
 
diff --git a/compiler/rustc_index/Cargo.toml b/compiler/rustc_index/Cargo.toml
index 3d83a3c98daf8..9aa24e668b6b7 100644
--- a/compiler/rustc_index/Cargo.toml
+++ b/compiler/rustc_index/Cargo.toml
@@ -5,6 +5,7 @@ edition = "2024"
 
 [dependencies]
 # tidy-alphabetical-start
+itertools = "0.12"
 rustc_index_macros = { path = "../rustc_index_macros" }
 rustc_macros = { path = "../rustc_macros", optional = true }
 rustc_serialize = { path = "../rustc_serialize", optional = true }
diff --git a/compiler/rustc_index/src/bit_set.rs b/compiler/rustc_index/src/bit_set.rs
index 07934389158e5..e5588e90f742e 100644
--- a/compiler/rustc_index/src/bit_set.rs
+++ b/compiler/rustc_index/src/bit_set.rs
@@ -1,11 +1,13 @@
+mod dense_bit_set;
 use std::marker::PhantomData;
 #[cfg(not(feature = "nightly"))]
 use std::mem;
-use std::ops::{BitAnd, BitAndAssign, BitOrAssign, Bound, Not, Range, RangeBounds, Shl};
+use std::ops::{BitAnd, BitAndAssign, BitOrAssign, Not, Range, Shl};
 use std::rc::Rc;
-use std::{fmt, iter, slice};
+use std::{fmt, iter};
 
 use Chunk::*;
+pub use dense_bit_set::{BitIter, DenseBitSet, GrowableBitSet};
 #[cfg(feature = "nightly")]
 use rustc_macros::{Decodable_NoContext, Encodable_NoContext};
 use smallvec::{SmallVec, smallvec};
@@ -43,29 +45,6 @@ pub trait BitRelations<Rhs> {
     fn intersect(&mut self, other: &Rhs) -> bool;
 }
 
-#[inline]
-fn inclusive_start_end<T: Idx>(
-    range: impl RangeBounds<T>,
-    domain: usize,
-) -> Option<(usize, usize)> {
-    // Both start and end are inclusive.
-    let start = match range.start_bound().cloned() {
-        Bound::Included(start) => start.index(),
-        Bound::Excluded(start) => start.index() + 1,
-        Bound::Unbounded => 0,
-    };
-    let end = match range.end_bound().cloned() {
-        Bound::Included(end) => end.index(),
-        Bound::Excluded(end) => end.index().checked_sub(1)?,
-        Bound::Unbounded => domain - 1,
-    };
-    assert!(end < domain);
-    if start > end {
-        return None;
-    }
-    Some((start, end))
-}
-
 macro_rules! bit_relations_inherent_impls {
     () => {
         /// Sets `self = self | other` and returns `true` if `self` changed
@@ -96,345 +75,7 @@ macro_rules! bit_relations_inherent_impls {
         }
     };
 }
-
-/// A fixed-size bitset type with a dense representation.
-///
-/// Note 1: Since this bitset is dense, if your domain is big, and/or relatively
-/// homogeneous (for example, with long runs of bits set or unset), then it may
-/// be preferable to instead use a [MixedBitSet], or an
-/// [IntervalSet](crate::interval::IntervalSet). They should be more suited to
-/// sparse, or highly-compressible, domains.
-///
-/// Note 2: Use [`GrowableBitSet`] if you need support for resizing after creation.
-///
-/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
-/// just be `usize`.
-///
-/// All operations that involve an element will panic if the element is equal
-/// to or greater than the domain size. All operations that involve two bitsets
-/// will panic if the bitsets have differing domain sizes.
-///
-#[cfg_attr(feature = "nightly", derive(Decodable_NoContext, Encodable_NoContext))]
-#[derive(Eq, PartialEq, Hash)]
-pub struct DenseBitSet<T> {
-    domain_size: usize,
-    words: SmallVec<[Word; 2]>,
-    marker: PhantomData<T>,
-}
-
-impl<T> DenseBitSet<T> {
-    /// Gets the domain size.
-    pub fn domain_size(&self) -> usize {
-        self.domain_size
-    }
-}
-
-impl<T: Idx> DenseBitSet<T> {
-    /// Creates a new, empty bitset with a given `domain_size`.
-    #[inline]
-    pub fn new_empty(domain_size: usize) -> DenseBitSet<T> {
-        let num_words = num_words(domain_size);
-        DenseBitSet { domain_size, words: smallvec![0; num_words], marker: PhantomData }
-    }
-
-    /// Creates a new, filled bitset with a given `domain_size`.
-    #[inline]
-    pub fn new_filled(domain_size: usize) -> DenseBitSet<T> {
-        let num_words = num_words(domain_size);
-        let mut result =
-            DenseBitSet { domain_size, words: smallvec![!0; num_words], marker: PhantomData };
-        result.clear_excess_bits();
-        result
-    }
-
-    /// Clear all elements.
-    #[inline]
-    pub fn clear(&mut self) {
-        self.words.fill(0);
-    }
-
-    /// Clear excess bits in the final word.
-    fn clear_excess_bits(&mut self) {
-        clear_excess_bits_in_final_word(self.domain_size, &mut self.words);
-    }
-
-    /// Count the number of set bits in the set.
-    pub fn count(&self) -> usize {
-        self.words.iter().map(|e| e.count_ones() as usize).sum()
-    }
-
-    /// Returns `true` if `self` contains `elem`.
-    #[inline]
-    pub fn contains(&self, elem: T) -> bool {
-        assert!(elem.index() < self.domain_size);
-        let (word_index, mask) = word_index_and_mask(elem);
-        (self.words[word_index] & mask) != 0
-    }
-
-    /// Is `self` is a (non-strict) superset of `other`?
-    #[inline]
-    pub fn superset(&self, other: &DenseBitSet<T>) -> bool {
-        assert_eq!(self.domain_size, other.domain_size);
-        self.words.iter().zip(&other.words).all(|(a, b)| (a & b) == *b)
-    }
-
-    /// Is the set empty?
-    #[inline]
-    pub fn is_empty(&self) -> bool {
-        self.words.iter().all(|a| *a == 0)
-    }
-
-    /// Insert `elem`. Returns whether the set has changed.
-    #[inline]
-    pub fn insert(&mut self, elem: T) -> bool {
-        assert!(
-            elem.index() < self.domain_size,
-            "inserting element at index {} but domain size is {}",
-            elem.index(),
-            self.domain_size,
-        );
-        let (word_index, mask) = word_index_and_mask(elem);
-        let word_ref = &mut self.words[word_index];
-        let word = *word_ref;
-        let new_word = word | mask;
-        *word_ref = new_word;
-        new_word != word
-    }
-
-    #[inline]
-    pub fn insert_range(&mut self, elems: impl RangeBounds<T>) {
-        let Some((start, end)) = inclusive_start_end(elems, self.domain_size) else {
-            return;
-        };
-
-        let (start_word_index, start_mask) = word_index_and_mask(start);
-        let (end_word_index, end_mask) = word_index_and_mask(end);
-
-        // Set all words in between start and end (exclusively of both).
-        for word_index in (start_word_index + 1)..end_word_index {
-            self.words[word_index] = !0;
-        }
-
-        if start_word_index != end_word_index {
-            // Start and end are in different words, so we handle each in turn.
-            //
-            // We set all leading bits. This includes the start_mask bit.
-            self.words[start_word_index] |= !(start_mask - 1);
-            // And all trailing bits (i.e. from 0..=end) in the end word,
-            // including the end.
-            self.words[end_word_index] |= end_mask | (end_mask - 1);
-        } else {
-            self.words[start_word_index] |= end_mask | (end_mask - start_mask);
-        }
-    }
-
-    /// Sets all bits to true.
-    pub fn insert_all(&mut self) {
-        self.words.fill(!0);
-        self.clear_excess_bits();
-    }
-
-    /// Returns `true` if the set has changed.
-    #[inline]
-    pub fn remove(&mut self, elem: T) -> bool {
-        assert!(elem.index() < self.domain_size);
-        let (word_index, mask) = word_index_and_mask(elem);
-        let word_ref = &mut self.words[word_index];
-        let word = *word_ref;
-        let new_word = word & !mask;
-        *word_ref = new_word;
-        new_word != word
-    }
-
-    /// Iterates over the indices of set bits in a sorted order.
-    #[inline]
-    pub fn iter(&self) -> BitIter<'_, T> {
-        BitIter::new(&self.words)
-    }
-
-    pub fn last_set_in(&self, range: impl RangeBounds<T>) -> Option<T> {
-        let (start, end) = inclusive_start_end(range, self.domain_size)?;
-        let (start_word_index, _) = word_index_and_mask(start);
-        let (end_word_index, end_mask) = word_index_and_mask(end);
-
-        let end_word = self.words[end_word_index] & (end_mask | (end_mask - 1));
-        if end_word != 0 {
-            let pos = max_bit(end_word) + WORD_BITS * end_word_index;
-            if start <= pos {
-                return Some(T::new(pos));
-            }
-        }
-
-        // We exclude end_word_index from the range here, because we don't want
-        // to limit ourselves to *just* the last word: the bits set it in may be
-        // after `end`, so it may not work out.
-        if let Some(offset) =
-            self.words[start_word_index..end_word_index].iter().rposition(|&w| w != 0)
-        {
-            let word_idx = start_word_index + offset;
-            let start_word = self.words[word_idx];
-            let pos = max_bit(start_word) + WORD_BITS * word_idx;
-            if start <= pos {
-                return Some(T::new(pos));
-            }
-        }
-
-        None
-    }
-
-    bit_relations_inherent_impls! {}
-
-    /// Sets `self = self | !other`.
-    ///
-    /// FIXME: Incorporate this into [`BitRelations`] and fill out
-    /// implementations for other bitset types, if needed.
-    pub fn union_not(&mut self, other: &DenseBitSet<T>) {
-        assert_eq!(self.domain_size, other.domain_size);
-
-        // FIXME(Zalathar): If we were to forcibly _set_ all excess bits before
-        // the bitwise update, and then clear them again afterwards, we could
-        // quickly and accurately detect whether the update changed anything.
-        // But that's only worth doing if there's an actual use-case.
-
-        bitwise(&mut self.words, &other.words, |a, b| a | !b);
-        // The bitwise update `a | !b` can result in the last word containing
-        // out-of-domain bits, so we need to clear them.
-        self.clear_excess_bits();
-    }
-}
-
-// dense REL dense
-impl<T: Idx> BitRelations<DenseBitSet<T>> for DenseBitSet<T> {
-    fn union(&mut self, other: &DenseBitSet<T>) -> bool {
-        assert_eq!(self.domain_size, other.domain_size);
-        bitwise(&mut self.words, &other.words, |a, b| a | b)
-    }
-
-    fn subtract(&mut self, other: &DenseBitSet<T>) -> bool {
-        assert_eq!(self.domain_size, other.domain_size);
-        bitwise(&mut self.words, &other.words, |a, b| a & !b)
-    }
-
-    fn intersect(&mut self, other: &DenseBitSet<T>) -> bool {
-        assert_eq!(self.domain_size, other.domain_size);
-        bitwise(&mut self.words, &other.words, |a, b| a & b)
-    }
-}
-
-impl<T: Idx> From<GrowableBitSet<T>> for DenseBitSet<T> {
-    fn from(bit_set: GrowableBitSet<T>) -> Self {
-        bit_set.bit_set
-    }
-}
-
-impl<T> Clone for DenseBitSet<T> {
-    fn clone(&self) -> Self {
-        DenseBitSet {
-            domain_size: self.domain_size,
-            words: self.words.clone(),
-            marker: PhantomData,
-        }
-    }
-
-    fn clone_from(&mut self, from: &Self) {
-        self.domain_size = from.domain_size;
-        self.words.clone_from(&from.words);
-    }
-}
-
-impl<T: Idx> fmt::Debug for DenseBitSet<T> {
-    fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result {
-        w.debug_list().entries(self.iter()).finish()
-    }
-}
-
-impl<T: Idx> ToString for DenseBitSet<T> {
-    fn to_string(&self) -> String {
-        let mut result = String::new();
-        let mut sep = '[';
-
-        // Note: this is a little endian printout of bytes.
-
-        // i tracks how many bits we have printed so far.
-        let mut i = 0;
-        for word in &self.words {
-            let mut word = *word;
-            for _ in 0..WORD_BYTES {
-                // for each byte in `word`:
-                let remain = self.domain_size - i;
-                // If less than a byte remains, then mask just that many bits.
-                let mask = if remain <= 8 { (1 << remain) - 1 } else { 0xFF };
-                assert!(mask <= 0xFF);
-                let byte = word & mask;
-
-                result.push_str(&format!("{sep}{byte:02x}"));
-
-                if remain <= 8 {
-                    break;
-                }
-                word >>= 8;
-                i += 8;
-                sep = '-';
-            }
-            sep = '|';
-        }
-        result.push(']');
-
-        result
-    }
-}
-
-pub struct BitIter<'a, T: Idx> {
-    /// A copy of the current word, but with any already-visited bits cleared.
-    /// (This lets us use `trailing_zeros()` to find the next set bit.) When it
-    /// is reduced to 0, we move onto the next word.
-    word: Word,
-
-    /// The offset (measured in bits) of the current word.
-    offset: usize,
-
-    /// Underlying iterator over the words.
-    iter: slice::Iter<'a, Word>,
-
-    marker: PhantomData<T>,
-}
-
-impl<'a, T: Idx> BitIter<'a, T> {
-    #[inline]
-    fn new(words: &'a [Word]) -> BitIter<'a, T> {
-        // We initialize `word` and `offset` to degenerate values. On the first
-        // call to `next()` we will fall through to getting the first word from
-        // `iter`, which sets `word` to the first word (if there is one) and
-        // `offset` to 0. Doing it this way saves us from having to maintain
-        // additional state about whether we have started.
-        BitIter {
-            word: 0,
-            offset: usize::MAX - (WORD_BITS - 1),
-            iter: words.iter(),
-            marker: PhantomData,
-        }
-    }
-}
-
-impl<'a, T: Idx> Iterator for BitIter<'a, T> {
-    type Item = T;
-    fn next(&mut self) -> Option<T> {
-        loop {
-            if self.word != 0 {
-                // Get the position of the next set bit in the current word,
-                // then clear the bit.
-                let bit_pos = self.word.trailing_zeros() as usize;
-                self.word ^= 1 << bit_pos;
-                return Some(T::new(bit_pos + self.offset));
-            }
-
-            // Move onto the next word. `wrapping_add()` is needed to handle
-            // the degenerate initial value given to `offset` in `new()`.
-            self.word = *self.iter.next()?;
-            self.offset = self.offset.wrapping_add(WORD_BITS);
-        }
-    }
-}
+use bit_relations_inherent_impls;
 
 /// A fixed-size bitset type with a partially dense, partially sparse
 /// representation. The bitset is broken into chunks, and chunks that are all
@@ -727,7 +368,7 @@ impl<T: Idx> ChunkedBitSet<T> {
             Some(Ones(chunk_domain_size)) => ChunkIter::Ones(0..*chunk_domain_size as usize),
             Some(Mixed(chunk_domain_size, _, words)) => {
                 let num_words = num_words(*chunk_domain_size as usize);
-                ChunkIter::Mixed(BitIter::new(&words[0..num_words]))
+                ChunkIter::Mixed(BitIter::from_slice(&words[0..num_words]))
             }
             None => ChunkIter::Finished,
         }
@@ -771,8 +412,8 @@ impl<T: Idx> BitRelations<ChunkedBitSet<T>> for ChunkedBitSet<T> {
                     ) {
                         let self_chunk_words = Rc::make_mut(self_chunk_words);
                         let has_changed = bitwise(
-                            &mut self_chunk_words[0..num_words],
-                            &other_chunk_words[0..num_words],
+                            self_chunk_words[0..num_words].iter_mut(),
+                            other_chunk_words[0..num_words].iter().copied(),
                             op,
                         );
                         debug_assert!(has_changed);
@@ -847,8 +488,8 @@ impl<T: Idx> BitRelations<ChunkedBitSet<T>> for ChunkedBitSet<T> {
                     ) {
                         let self_chunk_words = Rc::make_mut(self_chunk_words);
                         let has_changed = bitwise(
-                            &mut self_chunk_words[0..num_words],
-                            &other_chunk_words[0..num_words],
+                            self_chunk_words[0..num_words].iter_mut(),
+                            other_chunk_words[0..num_words].iter().copied(),
                             op,
                         );
                         debug_assert!(has_changed);
@@ -898,8 +539,8 @@ impl<T: Idx> BitRelations<ChunkedBitSet<T>> for ChunkedBitSet<T> {
                     ) {
                         let self_chunk_words = Rc::make_mut(self_chunk_words);
                         let has_changed = bitwise(
-                            &mut self_chunk_words[0..num_words],
-                            &other_chunk_words[0..num_words],
+                            self_chunk_words[0..num_words].iter_mut(),
+                            other_chunk_words[0..num_words].iter().copied(),
                             op,
                         );
                         debug_assert!(has_changed);
@@ -920,48 +561,6 @@ impl<T: Idx> BitRelations<ChunkedBitSet<T>> for ChunkedBitSet<T> {
     }
 }
 
-impl<T: Idx> BitRelations<ChunkedBitSet<T>> for DenseBitSet<T> {
-    fn union(&mut self, other: &ChunkedBitSet<T>) -> bool {
-        sequential_update(|elem| self.insert(elem), other.iter())
-    }
-
-    fn subtract(&mut self, _other: &ChunkedBitSet<T>) -> bool {
-        unimplemented!("implement if/when necessary");
-    }
-
-    fn intersect(&mut self, other: &ChunkedBitSet<T>) -> bool {
-        assert_eq!(self.domain_size(), other.domain_size);
-        let mut changed = false;
-        for (i, chunk) in other.chunks.iter().enumerate() {
-            let mut words = &mut self.words[i * CHUNK_WORDS..];
-            if words.len() > CHUNK_WORDS {
-                words = &mut words[..CHUNK_WORDS];
-            }
-            match chunk {
-                Zeros(..) => {
-                    for word in words {
-                        if *word != 0 {
-                            changed = true;
-                            *word = 0;
-                        }
-                    }
-                }
-                Ones(..) => (),
-                Mixed(_, _, data) => {
-                    for (i, word) in words.iter_mut().enumerate() {
-                        let new_val = *word & data[i];
-                        if new_val != *word {
-                            changed = true;
-                            *word = new_val;
-                        }
-                    }
-                }
-            }
-        }
-        changed
-    }
-}
-
 impl<T> Clone for ChunkedBitSet<T> {
     fn clone(&self) -> Self {
         ChunkedBitSet {
@@ -1080,15 +679,6 @@ enum ChunkIter<'a> {
     Finished,
 }
 
-// Applies a function to mutate a bitset, and returns true if any
-// of the applications return true
-fn sequential_update<T: Idx>(
-    mut self_update: impl FnMut(T) -> bool,
-    it: impl Iterator<Item = T>,
-) -> bool {
-    it.fold(false, |changed, elem| self_update(elem) | changed)
-}
-
 impl<T: Idx> fmt::Debug for ChunkedBitSet<T> {
     fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result {
         w.debug_list().entries(self.iter()).finish()
@@ -1108,15 +698,16 @@ impl<T: Idx> fmt::Debug for ChunkedBitSet<T> {
 /// "changed" return value unreliable, because the change might have only
 /// affected excess bits.
 #[inline]
-fn bitwise<Op>(out_vec: &mut [Word], in_vec: &[Word], op: Op) -> bool
-where
-    Op: Fn(Word, Word) -> Word,
-{
-    assert_eq!(out_vec.len(), in_vec.len());
+fn bitwise<'a>(
+    out: impl ExactSizeIterator<Item = &'a mut Word>,
+    in_: impl ExactSizeIterator<Item = Word>,
+    op: impl Fn(Word, Word) -> Word,
+) -> bool {
+    assert_eq!(out.len(), in_.len());
     let mut changed = 0;
-    for (out_elem, in_elem) in iter::zip(out_vec, in_vec) {
+    for (out_elem, in_elem) in iter::zip(out, in_) {
         let old_val = *out_elem;
-        let new_val = op(old_val, *in_elem);
+        let new_val = op(old_val, in_elem);
         *out_elem = new_val;
         // This is essentially equivalent to a != with changed being a bool, but
         // in practice this code gets auto-vectorized by the compiler for most
@@ -1161,15 +752,6 @@ pub enum MixedBitSet<T> {
     Large(ChunkedBitSet<T>),
 }
 
-impl<T> MixedBitSet<T> {
-    pub fn domain_size(&self) -> usize {
-        match self {
-            MixedBitSet::Small(set) => set.domain_size(),
-            MixedBitSet::Large(set) => set.domain_size(),
-        }
-    }
-}
-
 impl<T: Idx> MixedBitSet<T> {
     #[inline]
     pub fn new_empty(domain_size: usize) -> MixedBitSet<T> {
@@ -1204,10 +786,15 @@ impl<T: Idx> MixedBitSet<T> {
         }
     }
 
-    pub fn insert_all(&mut self) {
+    /// Insert `0..domain_size` in the set.
+    ///
+    /// We would like an insert all function that doesn't require the domain size, but the exact
+    /// domain size is not stored in the `Small` variant, so that is not possible.
+    #[inline]
+    pub fn insert_all(&mut self, domain_size: usize) {
         match self {
-            MixedBitSet::Small(set) => set.insert_all(),
-            MixedBitSet::Large(set) => set.insert_all(),
+            Self::Small(set) => set.insert_all(domain_size),
+            Self::Large(set) => set.insert_all(),
         }
     }
 
@@ -1304,87 +891,6 @@ impl<'a, T: Idx> Iterator for MixedBitIter<'a, T> {
     }
 }
 
-/// A resizable bitset type with a dense representation.
-///
-/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
-/// just be `usize`.
-///
-/// All operations that involve an element will panic if the element is equal
-/// to or greater than the domain size.
-#[derive(Clone, Debug, PartialEq)]
-pub struct GrowableBitSet<T: Idx> {
-    bit_set: DenseBitSet<T>,
-}
-
-impl<T: Idx> Default for GrowableBitSet<T> {
-    fn default() -> Self {
-        GrowableBitSet::new_empty()
-    }
-}
-
-impl<T: Idx> GrowableBitSet<T> {
-    /// Ensure that the set can hold at least `min_domain_size` elements.
-    pub fn ensure(&mut self, min_domain_size: usize) {
-        if self.bit_set.domain_size < min_domain_size {
-            self.bit_set.domain_size = min_domain_size;
-        }
-
-        let min_num_words = num_words(min_domain_size);
-        if self.bit_set.words.len() < min_num_words {
-            self.bit_set.words.resize(min_num_words, 0)
-        }
-    }
-
-    pub fn new_empty() -> GrowableBitSet<T> {
-        GrowableBitSet { bit_set: DenseBitSet::new_empty(0) }
-    }
-
-    pub fn with_capacity(capacity: usize) -> GrowableBitSet<T> {
-        GrowableBitSet { bit_set: DenseBitSet::new_empty(capacity) }
-    }
-
-    /// Returns `true` if the set has changed.
-    #[inline]
-    pub fn insert(&mut self, elem: T) -> bool {
-        self.ensure(elem.index() + 1);
-        self.bit_set.insert(elem)
-    }
-
-    /// Returns `true` if the set has changed.
-    #[inline]
-    pub fn remove(&mut self, elem: T) -> bool {
-        self.ensure(elem.index() + 1);
-        self.bit_set.remove(elem)
-    }
-
-    #[inline]
-    pub fn is_empty(&self) -> bool {
-        self.bit_set.is_empty()
-    }
-
-    #[inline]
-    pub fn contains(&self, elem: T) -> bool {
-        let (word_index, mask) = word_index_and_mask(elem);
-        self.bit_set.words.get(word_index).is_some_and(|word| (word & mask) != 0)
-    }
-
-    #[inline]
-    pub fn iter(&self) -> BitIter<'_, T> {
-        self.bit_set.iter()
-    }
-
-    #[inline]
-    pub fn len(&self) -> usize {
-        self.bit_set.count()
-    }
-}
-
-impl<T: Idx> From<DenseBitSet<T>> for GrowableBitSet<T> {
-    fn from(bit_set: DenseBitSet<T>) -> Self {
-        Self { bit_set }
-    }
-}
-
 /// A fixed-size 2D bit matrix type with a dense representation.
 ///
 /// `R` and `C` are index types used to identify rows and columns respectively;
@@ -1416,14 +922,17 @@ impl<R: Idx, C: Idx> BitMatrix<R, C> {
     }
 
     /// Creates a new matrix, with `row` used as the value for every row.
-    pub fn from_row_n(row: &DenseBitSet<C>, num_rows: usize) -> BitMatrix<R, C> {
-        let num_columns = row.domain_size();
+    pub fn from_row_n(
+        row: &DenseBitSet<C>,
+        num_rows: usize,
+        num_columns: usize,
+    ) -> BitMatrix<R, C> {
         let words_per_row = num_words(num_columns);
-        assert_eq!(words_per_row, row.words.len());
+        assert_eq!(words_per_row, row.words().len());
         BitMatrix {
             num_rows,
             num_columns,
-            words: iter::repeat(&row.words).take(num_rows).flatten().cloned().collect(),
+            words: iter::repeat_with(|| row.words()).take(num_rows).flatten().collect(),
             marker: PhantomData,
         }
     }
@@ -1516,9 +1025,9 @@ impl<R: Idx, C: Idx> BitMatrix<R, C> {
     /// returns `true` if anything changed.
     pub fn union_row_with(&mut self, with: &DenseBitSet<C>, write: R) -> bool {
         assert!(write.index() < self.num_rows);
-        assert_eq!(with.domain_size(), self.num_columns);
+        assert!(with.capacity() >= self.num_columns);
         let (write_start, write_end) = self.range(write);
-        bitwise(&mut self.words[write_start..write_end], &with.words, |a, b| a | b)
+        bitwise(self.words[write_start..write_end].iter_mut(), with.words(), |a, b| a | b)
     }
 
     /// Sets every cell in `row` to true.
@@ -1542,7 +1051,7 @@ impl<R: Idx, C: Idx> BitMatrix<R, C> {
     pub fn iter(&self, row: R) -> BitIter<'_, C> {
         assert!(row.index() < self.num_rows);
         let (start, end) = self.range(row);
-        BitIter::new(&self.words[start..end])
+        BitIter::from_slice(&self.words[start..end])
     }
 
     /// Returns the number of elements in `row`.
@@ -1657,11 +1166,6 @@ impl<R: Idx, C: Idx> SparseBitMatrix<R, C> {
         }
     }
 
-    /// Insert all bits in the given row.
-    pub fn insert_all_into_row(&mut self, row: R) {
-        self.ensure_row(row).insert_all();
-    }
-
     pub fn rows(&self) -> impl Iterator<Item = R> {
         self.rows.indices()
     }
@@ -1754,11 +1258,6 @@ fn clear_excess_bits_in_final_word(domain_size: usize, words: &mut [Word]) {
     }
 }
 
-#[inline]
-fn max_bit(word: Word) -> usize {
-    WORD_BITS - 1 - word.leading_zeros() as usize
-}
-
 /// Integral type used to represent the bit set.
 pub trait FiniteBitSetTy:
     BitAnd<Output = Self>
diff --git a/compiler/rustc_index/src/bit_set/dense_bit_set.rs b/compiler/rustc_index/src/bit_set/dense_bit_set.rs
new file mode 100644
index 0000000000000..334dfadbf3952
--- /dev/null
+++ b/compiler/rustc_index/src/bit_set/dense_bit_set.rs
@@ -0,0 +1,1275 @@
+use std::alloc::{Layout, alloc, alloc_zeroed, dealloc, handle_alloc_error, realloc};
+use std::hash::{Hash, Hasher};
+use std::iter::FusedIterator;
+use std::marker::PhantomData;
+use std::mem::ManuallyDrop;
+use std::ops::{Range, RangeInclusive};
+use std::ptr::NonNull;
+use std::{fmt, iter, slice};
+
+use itertools::Either;
+use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
+
+use super::{
+    BitRelations, CHUNK_WORDS, Chunk, ChunkedBitSet, WORD_BITS, Word, word_index_and_mask,
+};
+use crate::Idx;
+
+/// A fixed-size bitset type with a dense representation, using only one [`Word`] on the stack.
+///
+/// This bit set occupies only a single [`Word`] of stack space. It can represent a domain size
+/// of up to `[WORD_BITS] - 1` directly inline. If the domain size exceeds this limit, it instead
+/// becomes a pointer to a sequence of [`Word`]s on the heap. This makes it very efficient for
+/// domain sizes smaller than `[WORD_BITS]`.
+///
+/// Additionally, if the set does not fit in one [`Word`], there is a special inline
+/// variant for the empty set. In this case, the domain size is stored inline along with a few
+/// bits indicating that the set is empty. Allocation is deferred until needed, such as on
+/// the first insert or remove operation. This avoids the need to wrap a lazily initialised bit set
+/// in a [`OnceCell`] or an [`Option`]—you can simply create an empty set and populate it if needed.
+///
+/// Note 1: Since this bitset is dense, if your domain is large and/or relatively homogeneous (e.g.
+/// long runs of set or unset bits), it may be more efficient to use a [MixedBitSet] or an
+/// [IntervalSet](crate::interval::IntervalSet), which are better suited for sparse or highly
+/// compressible domains.
+///
+/// Note 2: Use [`GrowableBitSet`] if you require support for resizing after creation.
+///
+/// `T` is an index type—typically a newtyped `usize` wrapper, but it may also simply be `usize`.
+///
+/// Any operation involving an element may panic if the element is equal to or greater than the
+/// domain size. Operations involving two bitsets may panic if their domain sizes differ. Panicking
+/// is not garranteed though as we store the domain size rounded up to the next multiple of
+/// [`WORD_BITS`].
+#[repr(C)]
+pub union DenseBitSet<T> {
+    /// The bit set fits in a single [`Word`] stored inline on the stack.
+    ///
+    /// The most significant bit is set to 1 to distinguish this from the other variants. You
+    /// must never change that "tag bit" after the bit set has been created.
+    ///
+    /// The remaining bits makes up the bit set. The exact domain size is not stored.
+    inline: Word,
+
+    /// The bit set doesn't fit in a single word, but is empty and not yet allocated.
+    ///
+    /// The first (most significant) two bits are set to `[0, 1]` to distinguish this variant
+    /// from others. This tag is stored in [`Self::EMPTY_UNALLOCATED_TAG_BITS`]. The remaining bits
+    /// hold the domain size (capacity) **in words** of the set, which is needed if the set is
+    /// eventually allocated.
+    ///
+    /// Note that because the capacity is stored in words, not in bits, there is plenty of room
+    /// for the two tag bits.
+    empty_unallocated: usize,
+
+    /// The bit set is stored on the heap.
+    ///
+    /// The two most significant bits are set to zero if this field is active.
+    on_heap: ManuallyDrop<BitSetOnHeap>,
+
+    /// This variant will never be created.
+    marker: PhantomData<T>,
+}
+
+impl<T> DenseBitSet<T> {
+    /// The maximum domain size that could be stored inlined on the stack.
+    pub const INLINE_CAPACITY: usize = WORD_BITS - 1;
+
+    /// A [`Word`] with the most significant bit set. That is the tag bit telling that the set is
+    /// inlined.
+    const IS_INLINE_TAG_BIT: Word = 0x1 << (WORD_BITS - 1);
+
+    /// The tag for the `empty_unallocated` variant. The two most significant bits are
+    /// `[0, 1]`.
+    const EMPTY_UNALLOCATED_TAG_BITS: usize = 0b01 << (WORD_BITS - 2);
+
+    /// Create a new empty bit set with a given domain_size.
+    ///
+    /// If `domain_size` is <= [`Self::INLINE_CAPACITY`], then it is stored inline on the stack,
+    /// otherwise it is stored on the heap.
+    #[inline]
+    pub fn new_empty(domain_size: usize) -> Self {
+        if domain_size <= Self::INLINE_CAPACITY {
+            // The first bit is set to indicate the union variant.
+            Self { inline: Self::IS_INLINE_TAG_BIT }
+        } else {
+            let num_words = domain_size.div_ceil(WORD_BITS);
+            debug_assert!(num_words.leading_zeros() >= 2);
+            Self { empty_unallocated: Self::EMPTY_UNALLOCATED_TAG_BITS | num_words }
+        }
+    }
+
+    /// Create a new filled bit set.
+    #[inline]
+    pub fn new_filled(domain_size: usize) -> Self {
+        if domain_size <= Self::INLINE_CAPACITY {
+            Self {
+                inline: Word::MAX.unbounded_shr((WORD_BITS - domain_size) as u32)
+                    | Self::IS_INLINE_TAG_BIT,
+            }
+        } else {
+            let num_words = domain_size.div_ceil(WORD_BITS);
+            let mut on_heap = BitSetOnHeap::new_empty(num_words);
+            let words = on_heap.as_mut_slice();
+            for word in words.iter_mut() {
+                *word = Word::MAX;
+            }
+            // Remove excessive bits on the last word.
+            *words.last_mut().unwrap() >>= WORD_BITS - domain_size % WORD_BITS;
+            Self { on_heap: ManuallyDrop::new(on_heap) }
+        }
+    }
+
+    /// Check if `self` is inlined.
+    // If this function returns `true`, it is safe to assume `self.inline`. Else, it is safe to
+    // assume `self.empty_unallocated`, or `self.on_heap`.
+    #[inline(always)]
+    pub fn is_inline(&self) -> bool {
+        // We check if the first bit is set. If so, it is inlined, otherwise it is on the heap.
+        (unsafe { self.inline } & Self::IS_INLINE_TAG_BIT) != 0
+    }
+
+    /// Check if `self` has a too large domain to be stored inline, is empty, and is not yet
+    /// allocated.
+    // If this function returns `true`, it is safe to assume `self.empty_unallocated`. Else, it is
+    // safe to assume `self.inline`, or `self.on_heap`.
+    #[inline(always)]
+    pub const fn is_empty_unallocated(&self) -> bool {
+        (unsafe { self.empty_unallocated }) >> usize::BITS as u32 - 2
+            == Self::EMPTY_UNALLOCATED_TAG_BITS >> usize::BITS as u32 - 2
+    }
+
+    /// Check if `self` is `empty_unallocated` and if so return the number of words required to
+    /// store the expected capacity.
+    // If this function returns `true`, it is safe to assume `self.empty_unallocated`. Else, it is
+    // safe to assume `self.inline`, or `self.on_heap`.
+    #[inline(always)]
+    pub const fn empty_unallocated_get_num_words(&self) -> Option<usize> {
+        if self.is_empty_unallocated() {
+            Some(unsafe { self.empty_unallocated } ^ Self::EMPTY_UNALLOCATED_TAG_BITS)
+        } else {
+            None
+        }
+    }
+
+    /// Check if `self` is allocated on the heap and return a reference to it in that case.
+    fn on_heap(&self) -> Option<&BitSetOnHeap> {
+        let self_word = unsafe { self.inline };
+        // Check if the two most significant bits are 0.
+        if self_word & Word::MAX >> 2 == self_word { Some(unsafe { &self.on_heap }) } else { None }
+    }
+
+    /// Check if `self` is allocated on the heap and return a mutable reference to it in that case.
+    fn on_heap_mut(&mut self) -> Option<&mut ManuallyDrop<BitSetOnHeap>> {
+        let self_word = unsafe { self.inline };
+        // Check if the two most significant bits are 0.
+        if self_word & Word::MAX >> 2 == self_word {
+            Some(unsafe { &mut self.on_heap })
+        } else {
+            None
+        }
+    }
+
+    /// If `self` is `empty_unallocated`, allocate it, otherwise return `self.on_heap_mut()`.
+    fn on_heap_get_or_alloc(&mut self) -> &mut BitSetOnHeap {
+        if let Some(num_words) = self.empty_unallocated_get_num_words() {
+            *self = Self { on_heap: ManuallyDrop::new(BitSetOnHeap::new_empty(num_words)) };
+            unsafe { &mut self.on_heap }
+        } else {
+            self.on_heap_mut().unwrap()
+        }
+    }
+
+    /// Get the capacity of this set. This is >= the initial domain size.
+    #[inline(always)]
+    pub(super) fn capacity(&self) -> usize {
+        if self.is_inline() {
+            Self::INLINE_CAPACITY
+        } else if let Some(num_words) = self.empty_unallocated_get_num_words() {
+            num_words * WORD_BITS
+        } else {
+            self.on_heap().unwrap().capacity()
+        }
+    }
+
+    /// Checks if the bit set is empty.
+    #[inline(always)]
+    pub fn is_empty(&self) -> bool {
+        if self.is_inline() {
+            let x = unsafe { self.inline };
+            x == Self::IS_INLINE_TAG_BIT
+        } else if self.is_empty_unallocated() {
+            true
+        } else {
+            self.on_heap().unwrap().is_empty()
+        }
+    }
+
+    /// Clear the set.
+    #[inline(always)]
+    pub fn clear(&mut self) {
+        if self.is_inline() {
+            self.inline = Self::IS_INLINE_TAG_BIT
+        } else if let Some(on_heap) = self.on_heap_mut() {
+            for word in on_heap.as_mut_slice() {
+                *word = 0x0;
+            }
+        }
+    }
+
+    /// Get an iterator of all words making up the set.
+    pub(super) fn words(&self) -> impl ExactSizeIterator<Item = Word> {
+        if self.is_inline() {
+            let word = unsafe { self.inline } ^ Self::IS_INLINE_TAG_BIT;
+            Either::Left(iter::once(word))
+        } else if let Some(num_words) = self.empty_unallocated_get_num_words() {
+            Either::Right(Either::Left(iter::repeat_n(0, num_words)))
+        } else {
+            Either::Right(Either::Right(self.on_heap().unwrap().as_slice().iter().copied()))
+        }
+    }
+
+    /// Checks if `self` is a (non-strict) superset of `other`.
+    ///
+    /// May panic if `self` and other have different sizes.
+    #[inline(always)]
+    pub fn superset(&self, other: &Self) -> bool {
+        // Function to check that a usize is a superset of another.
+        let word_is_superset = |x: Word, other: Word| (!x & other) == 0;
+
+        if self.is_inline() {
+            let x = unsafe { self.inline };
+            assert!(other.is_inline(), "bit sets has different domain sizes");
+            let y = unsafe { other.inline };
+            word_is_superset(x, y)
+        } else if other.is_empty_unallocated() {
+            true
+        } else {
+            let other_on_heap = other.on_heap().unwrap();
+            if self.is_empty_unallocated() {
+                other_on_heap.is_empty()
+            } else {
+                let on_heap = self.on_heap().unwrap();
+                let self_slice = on_heap.as_slice();
+                let other_slice = other_on_heap.as_slice();
+                debug_assert_eq!(
+                    self_slice.len(),
+                    other_slice.len(),
+                    "bit sets have different domain sizes"
+                );
+                self_slice.iter().zip(other_slice).all(|(&x, &y)| (!x & y) == 0)
+            }
+        }
+    }
+
+    /// Count the number of set bits in the set.
+    #[inline(always)]
+    pub fn count(&self) -> usize {
+        if self.is_inline() {
+            let x = unsafe { self.inline };
+            x.count_ones() as usize - 1
+        } else if self.is_empty_unallocated() {
+            0
+        } else {
+            self.on_heap().unwrap().as_slice().iter().map(|w| w.count_ones() as usize).sum()
+        }
+    }
+
+    /// Returns an iterator over the indices for all elements in this set.
+    #[inline(always)]
+    pub fn iter_usizes(&self) -> BitIter<'_, usize> {
+        if self.is_inline() {
+            let x = unsafe { self.inline };
+            // Remove the tag bit.
+            let without_tag_bit = x ^ Self::IS_INLINE_TAG_BIT;
+            BitIter::from_single_word(without_tag_bit)
+        } else if let Some(on_heap) = self.on_heap() {
+            BitIter::from_slice(on_heap.as_slice())
+        } else {
+            debug_assert!(self.is_empty_unallocated());
+            BitIter::from_single_word(0)
+        }
+    }
+
+    /// Insert the elem with index `idx`. Returns `true` if the set has changed.
+    #[inline(always)]
+    fn insert_usize(&mut self, idx: usize) -> bool {
+        // Insert the `i`th bit in a word and return `true` if it changed.
+        let insert_bit = |word: &mut Word, bit_idx: u32| {
+            let mask = 0x01 << bit_idx;
+            let old = *word;
+            *word |= mask;
+            *word != old
+        };
+
+        if self.is_inline() {
+            let x = unsafe { &mut self.inline };
+            debug_assert!(idx < Self::INLINE_CAPACITY, "index too large: {idx}");
+            insert_bit(x, idx as u32)
+        } else {
+            let words = self.on_heap_get_or_alloc().as_mut_slice();
+
+            let word_idx = idx / WORD_BITS;
+            let bit_idx = (idx % WORD_BITS) as u32;
+            let word = &mut words[word_idx];
+            insert_bit(word, bit_idx)
+        }
+    }
+
+    /// Insert `0..domain_size` in the set.
+    ///
+    /// We would like an insert all function that doesn't require the domain size, but the exact
+    /// domain size is not stored so that is not possible.
+    #[inline(always)]
+    pub fn insert_all(&mut self, domain_size: usize) {
+        if self.is_inline() {
+            debug_assert!(domain_size <= Self::INLINE_CAPACITY);
+            unsafe {
+                self.inline |= Word::MAX.unbounded_shr(WORD_BITS as u32 - domain_size as u32)
+            };
+        } else {
+            let on_heap = self.on_heap_get_or_alloc();
+            debug_assert!(on_heap.capacity() >= domain_size, "domain size too big");
+            let words = on_heap.as_mut_slice();
+
+            let (end_word_index, end_mask) = word_index_and_mask(domain_size - 1);
+
+            for word_index in 0..end_word_index {
+                words[word_index] = Word::MAX;
+            }
+
+            words[end_word_index] |= end_mask | (end_mask - 1);
+        }
+    }
+
+    /// Sets `self = self | !other` for all elements less than `domain_size`.
+    #[inline(always)]
+    pub fn union_not(&mut self, other: &Self, domain_size: usize) {
+        if self.is_inline() {
+            assert!(other.is_inline());
+
+            let self_word = unsafe { &mut self.inline };
+            let other_word = unsafe { other.inline };
+
+            debug_assert!(domain_size <= Self::INLINE_CAPACITY);
+
+            *self_word |= !other_word & Word::MAX.unbounded_shr((WORD_BITS - domain_size) as u32);
+        } else if other.is_empty_unallocated() {
+            self.insert_all(domain_size);
+        } else {
+            let self_words = self.on_heap_get_or_alloc().as_mut_slice();
+            let other_words = other.on_heap().unwrap().as_slice();
+
+            // Set all but the last word if domain_size is not divisible by `WORD_BITS`.
+            for (self_word, other_word) in
+                self_words.iter_mut().zip(other_words).take(domain_size / WORD_BITS)
+            {
+                *self_word |= !other_word;
+            }
+
+            let remaining_bits = domain_size % WORD_BITS;
+            if remaining_bits > 0 {
+                let last_idx = domain_size / WORD_BITS;
+                self_words[last_idx] |= !other_words[last_idx] & !(Word::MAX << remaining_bits);
+            }
+        }
+    }
+
+    /// Common function for union/intersection-like operations.
+    ///
+    /// This function takes two bit sets—one mutably, one immutably. Neither must be the
+    /// `empty_unallocated` variant. It asserts that they have the same `domain_size`, then applies a function to
+    /// each pair of words, effectively performing a zip-like operation.
+    /// It checks whether `self` has changed; if so, it returns `true`, otherwise `false`.
+    ///
+    /// ## Safety
+    ///
+    /// - Neither set must be `self.empty_unallocated`.
+    /// - If the sets are inlined, this will leave the tag bit set to 1. You must not modify it—doing so
+    ///   results in undefined behaviour. This may be inconvenient for operations such as subtraction;
+    ///   in such cases, use `binary_operation_safe` instead.
+    #[inline(always)]
+    unsafe fn binary_operation(&mut self, other: &Self, op: impl Fn(&mut Word, Word)) -> bool {
+        debug_assert!(!self.is_empty_unallocated());
+        debug_assert!(!other.is_empty_unallocated());
+
+        // Apply `op` and return if the word changed.
+        let apply_and_check_change = |x: &mut Word, y: Word| -> bool {
+            let old = *x;
+            op(x, y);
+            *x != old
+        };
+
+        if self.is_inline() {
+            let x = unsafe { &mut self.inline };
+            assert!(other.is_inline(), "bit sets has different domain sizes");
+            let y = unsafe { other.inline };
+            apply_and_check_change(x, y)
+        } else {
+            let self_on_heap = unsafe { &mut self.on_heap };
+            assert!(!other.is_inline(), "bit sets has different domain sizes");
+            let other_on_heap = unsafe { &other.on_heap };
+            let self_slice = self_on_heap.as_mut_slice();
+            let other_slice = other_on_heap.as_slice();
+            assert_eq!(self_slice.len(), other_slice.len(), "bit sets have different domain sizes");
+            let mut has_changed = false;
+            for (x, y) in self_slice.iter_mut().zip(other_slice) {
+                has_changed |= apply_and_check_change(x, *y);
+            }
+            has_changed
+        }
+    }
+
+    /// Similar to [`Self::binary_operation`], but restores the tag bit if it has changed.
+    ///
+    /// Note that the tag bit will still be set in the call to `op`, but there is no danger in
+    /// changing it as it will be restored afterwords.
+    ///
+    /// ## Safety
+    ///
+    /// Neither set must be `self.empty_unallocated`.
+    #[inline(always)]
+    unsafe fn binary_operation_safe(&mut self, other: &Self, op: impl Fn(&mut Word, Word)) -> bool {
+        debug_assert!(!self.is_empty_unallocated());
+        debug_assert!(!other.is_empty_unallocated());
+
+        if self.is_inline() {
+            let x = unsafe { &mut self.inline };
+            assert!(other.is_inline(), "bit sets has different domain sizes");
+            let y = unsafe { other.inline };
+
+            let old = *x;
+            op(x, y);
+            *x |= Self::IS_INLINE_TAG_BIT;
+            old != *x
+        } else {
+            let self_on_heap = unsafe { &mut self.on_heap };
+            assert!(!other.is_inline(), "bit sets has different domain sizes");
+            let other_on_heap = unsafe { &other.on_heap };
+            let self_slice = self_on_heap.as_mut_slice();
+            let other_slice = other_on_heap.as_slice();
+            assert_eq!(self_slice.len(), other_slice.len(), "bit sets have different domain sizes");
+            let mut has_changed = false;
+            for (x, y) in self_slice.iter_mut().zip(other_slice) {
+                let old = *x;
+                op(x, *y);
+                has_changed |= old != *x;
+            }
+            has_changed
+        }
+    }
+
+    super::bit_relations_inherent_impls! {}
+}
+
+impl<T> BitRelations<DenseBitSet<T>> for DenseBitSet<T> {
+    #[inline(always)]
+    fn union(&mut self, other: &Self) -> bool {
+        if self.is_empty_unallocated() {
+            debug_assert!(!other.is_inline());
+            *self = other.clone();
+            !self.is_empty()
+        } else if other.is_empty_unallocated() {
+            false
+        } else {
+            // SAFETY: The union operation does not remove any bit set to 1, so the tag bit is
+            // unaffected.
+            unsafe { self.binary_operation(other, |x, y| *x |= y) }
+        }
+    }
+
+    #[inline(always)]
+    fn intersect(&mut self, other: &Self) -> bool {
+        if self.is_empty_unallocated() {
+            false
+        } else if other.is_empty_unallocated() {
+            debug_assert!(!self.is_inline());
+            let was_empty = self.is_empty();
+            self.clear();
+            !was_empty
+        } else {
+            // SAFETY: Since the tag bit is set in both `self` and `other`, the intersection won't
+            // remove it.
+            unsafe { self.binary_operation(other, |x, y| *x &= y) }
+        }
+    }
+
+    #[inline(always)]
+    fn subtract(&mut self, other: &Self) -> bool {
+        if self.is_empty_unallocated() || other.is_empty_unallocated() {
+            false
+        } else {
+            unsafe { self.binary_operation_safe(other, |x, y| *x &= !y) }
+        }
+    }
+}
+
+impl<T: Idx> DenseBitSet<T> {
+    /// Checks if the bit set contains `elem`.
+    #[inline(always)]
+    pub fn contains(&self, elem: T) -> bool {
+        // Check if the `i`th bit is set in a word.
+        let contains_bit = |word: Word, bit_idx: u32| {
+            let mask = 0x01 << bit_idx;
+            (word & mask) != 0
+        };
+
+        let idx = elem.index();
+        if self.is_inline() {
+            let x = unsafe { self.inline };
+            debug_assert!(idx < Self::INLINE_CAPACITY, "index too large: {idx}");
+            contains_bit(x, idx as u32)
+        } else if let Some(on_heap) = self.on_heap() {
+            let word_idx = idx / WORD_BITS;
+            let bit_idx = (idx % WORD_BITS) as u32;
+            let word = on_heap.as_slice()[word_idx];
+            contains_bit(word, bit_idx)
+        } else {
+            debug_assert!(self.is_empty_unallocated());
+            false
+        }
+    }
+
+    /// Insert `elem`. Returns `true` if the set has changed.
+    #[inline(always)]
+    pub fn insert(&mut self, elem: T) -> bool {
+        self.insert_usize(elem.index())
+    }
+
+    /// Remove `elem`. Returns `true` if the set has changed.
+    #[inline(always)]
+    pub fn remove(&mut self, elem: T) -> bool {
+        // Remove the `i`th bit in a word and return `true` if it changed.
+        let remove_bit = |word: &mut Word, bit_idx: u32| {
+            let mask = !(0x01 << bit_idx);
+            let old = *word;
+            *word &= mask;
+            *word != old
+        };
+
+        let idx = elem.index();
+        if self.is_inline() {
+            let x = unsafe { &mut self.inline };
+            debug_assert!(idx < Self::INLINE_CAPACITY, "index too large: {idx}");
+            remove_bit(x, idx as u32)
+        } else if let Some(on_heap) = self.on_heap_mut() {
+            let word_idx = idx / WORD_BITS;
+            let bit_idx = (idx % WORD_BITS) as u32;
+            let word = &mut on_heap.as_mut_slice()[word_idx];
+            remove_bit(word, bit_idx)
+        } else {
+            debug_assert!(self.is_empty_unallocated());
+            // Nothing to be removed.
+            false
+        }
+    }
+
+    /// Returns an iterator over all elements in this set.
+    #[inline(always)]
+    pub fn iter(&self) -> BitIter<'_, T> {
+        if self.is_inline() {
+            let x = unsafe { self.inline };
+            // Remove the tag bit.
+            let without_tag_bit = x ^ Self::IS_INLINE_TAG_BIT;
+            BitIter::from_single_word(without_tag_bit)
+        } else if let Some(on_heap) = self.on_heap() {
+            BitIter::from_slice(on_heap.as_slice())
+        } else {
+            debug_assert!(self.is_empty_unallocated());
+            BitIter::from_single_word(0)
+        }
+    }
+
+    /// Returns `Some(elem)` if the set contains exactly one elemement otherwise returns `None`.
+    #[inline(always)]
+    pub fn only_one_elem(&self) -> Option<T> {
+        if self.is_inline() {
+            let word = unsafe { self.inline } ^ Self::IS_INLINE_TAG_BIT;
+            if word.is_power_of_two() { Some(T::new(word.trailing_zeros() as usize)) } else { None }
+        } else if self.is_empty_unallocated() {
+            None
+        } else {
+            let words = self.on_heap().unwrap().as_slice();
+            let mut found_elem = None;
+            for (i, &word) in words.iter().enumerate() {
+                if word > 0 {
+                    if found_elem.is_some() {
+                        return None;
+                    }
+                    if word.is_power_of_two() {
+                        found_elem =
+                            Some(T::new(i * WORD_BITS as usize + word.trailing_zeros() as usize));
+                    } else {
+                        return None;
+                    }
+                }
+            }
+            found_elem
+        }
+    }
+
+    #[inline]
+    pub fn insert_range(&mut self, range: Range<T>) {
+        if let Some(end) = range.end.index().checked_sub(1) {
+            self.insert_range_inclusive(RangeInclusive::new(range.start, Idx::new(end)));
+        }
+    }
+
+    #[inline(always)]
+    pub fn insert_range_inclusive(&mut self, range: RangeInclusive<T>) {
+        let start = range.start().index();
+        let end = range.end().index();
+
+        if start > end {
+            return;
+        }
+
+        if self.is_inline() {
+            debug_assert!(end < Self::INLINE_CAPACITY);
+            let mask = (1 << end) | ((1 << end) - (1 << start));
+            unsafe { self.inline |= mask };
+        } else {
+            let words = self.on_heap_get_or_alloc().as_mut_slice();
+
+            let (start_word_index, start_mask) = word_index_and_mask(start);
+            let (end_word_index, end_mask) = word_index_and_mask(end);
+
+            // Set all words in between start and end (exclusively of both).
+            for word_index in (start_word_index + 1)..end_word_index {
+                words[word_index] = !0;
+            }
+
+            if start_word_index != end_word_index {
+                // Start and end are in different words, so we handle each in turn.
+                //
+                // We set all leading bits. This includes the start_mask bit.
+                words[start_word_index] |= !(start_mask - 1);
+                // And all trailing bits (i.e. from 0..=end) in the end word,
+                // including the end.
+                words[end_word_index] |= end_mask | (end_mask - 1);
+            } else {
+                words[start_word_index] |= end_mask | (end_mask - start_mask);
+            }
+        }
+    }
+
+    #[inline(always)]
+    pub fn last_set_in(&self, range: RangeInclusive<T>) -> Option<T> {
+        let start = range.start().index();
+        let end = range.end().index();
+
+        if start > end {
+            return None;
+        }
+
+        if self.is_inline() {
+            debug_assert!(end < Self::INLINE_CAPACITY);
+            let mut word = unsafe { self.inline } ^ Self::IS_INLINE_TAG_BIT;
+            let end_bit = 1 << end;
+            // Set all bits mor significant than `end_bit` to zero.
+            word &= end_bit | (end_bit - 1);
+            if word != 0 {
+                let pos = max_bit(word);
+                if start <= pos { Some(T::new(pos)) } else { None }
+            } else {
+                None
+            }
+        } else if let Some(on_heap) = self.on_heap() {
+            let words = on_heap.as_slice();
+
+            let (start_word_index, _) = word_index_and_mask(start);
+            let (end_word_index, end_mask) = word_index_and_mask(end);
+
+            let end_word = words[end_word_index] & (end_mask | (end_mask - 1));
+            if end_word != 0 {
+                let pos = max_bit(end_word) + WORD_BITS * end_word_index;
+                if start <= pos {
+                    return Some(T::new(pos));
+                }
+            }
+
+            // We exclude end_word_index from the range here, because we don't want
+            // to limit ourselves to *just* the last word: the bits set it in may be
+            // after `end`, so it may not work out.
+            if let Some(offset) =
+                words[start_word_index..end_word_index].iter().rposition(|&w| w != 0)
+            {
+                let word_idx = start_word_index + offset;
+                let start_word = words[word_idx];
+                let pos = max_bit(start_word) + WORD_BITS * word_idx;
+                if start <= pos { Some(T::new(pos)) } else { None }
+            } else {
+                None
+            }
+        } else {
+            debug_assert!(self.is_empty_unallocated());
+            None
+        }
+    }
+}
+
+impl<T: Idx> BitRelations<ChunkedBitSet<T>> for DenseBitSet<T> {
+    fn union(&mut self, other: &ChunkedBitSet<T>) -> bool {
+        other.iter().fold(false, |changed, elem| self.insert(elem) || changed)
+    }
+
+    fn subtract(&mut self, _other: &ChunkedBitSet<T>) -> bool {
+        unimplemented!("implement if/when necessary");
+    }
+
+    fn intersect(&mut self, other: &ChunkedBitSet<T>) -> bool {
+        if self.is_inline() {
+            assert!(other.domain_size <= Self::INLINE_CAPACITY);
+            if other.domain_size == 0 {
+                return false;
+            }
+
+            let word = unsafe { &mut self.inline };
+            let old_word = *word;
+            match &other.chunks[0] {
+                Chunk::Zeros(d) => {
+                    debug_assert_eq!(usize::from(*d), other.domain_size);
+                    let mask = Word::MAX << other.domain_size();
+                    *word &= mask;
+                }
+                Chunk::Ones(_) => (),
+                Chunk::Mixed(d, _, words) => {
+                    debug_assert_eq!(usize::from(*d), other.domain_size);
+                    *word &= words[0] | Self::IS_INLINE_TAG_BIT;
+                }
+            }
+            *word != old_word
+        } else if let Some(on_heap) = self.on_heap_mut() {
+            let all_words = on_heap.as_mut_slice();
+
+            let mut changed = false;
+            for (i, chunk) in other.chunks.iter().enumerate() {
+                let mut words = &mut all_words[i * CHUNK_WORDS..];
+                if words.len() > CHUNK_WORDS {
+                    words = &mut words[..CHUNK_WORDS];
+                }
+                match chunk {
+                    Chunk::Zeros(..) => {
+                        for word in words {
+                            if *word != 0 {
+                                changed = true;
+                                *word = 0;
+                            }
+                        }
+                    }
+                    Chunk::Ones(..) => (),
+                    Chunk::Mixed(_, _, data) => {
+                        for (i, word) in words.iter_mut().enumerate() {
+                            let new_val = *word & data[i];
+                            if new_val != *word {
+                                changed = true;
+                                *word = new_val;
+                            }
+                        }
+                    }
+                }
+            }
+            changed
+        } else {
+            debug_assert!(self.is_empty_unallocated());
+            false
+        }
+    }
+}
+
+impl<S: Encoder, T> Encodable<S> for DenseBitSet<T> {
+    #[inline(never)] // FIXME: For profiling purposes
+    fn encode(&self, s: &mut S) {
+        // The encoding is as follows:
+        //
+        // The `inline` and `empty_unallocated` variants are encoded as a single `Word`. Here, we
+        // consider the `empty_unallocated` variant as the `inline` variant because
+        // `empty_unallocated: usize`, `inline: Word`, and `usize` is smaller than `Word`.
+        //
+        // The `on_heap` variant is encoded as follows: First, the number of `Word`s are encoded
+        // with a single `Word`. We assert that the two most significant bits of this number are 0
+        // to distinguish it from the `inline` and `empty_unallocated` variants. Then all the words are
+        // encoded in sequence.
+
+        if let Some(on_heap) = self.on_heap() {
+            let n_words: Word = on_heap.n_words();
+            debug_assert_eq!(
+                n_words >> WORD_BITS - 2,
+                0x0,
+                "the two most significant bits must be 0"
+            );
+            n_words.encode(s);
+            debug_assert_eq!(n_words as usize, on_heap.as_slice().len());
+            for word in on_heap.as_slice().iter() {
+                word.encode(s);
+            }
+        } else {
+            let word = unsafe { self.inline };
+            debug_assert!(word >> WORD_BITS - 2 != 0, "the 2 most significant bits must not be 0");
+            word.encode(s);
+        }
+    }
+}
+
+impl<D: Decoder, T> Decodable<D> for DenseBitSet<T> {
+    #[inline(never)] // FIXME: For profiling purposes
+    fn decode(d: &mut D) -> Self {
+        // First we read one `Word` and check the variant.
+        let word = Word::decode(d);
+        if word >> WORD_BITS - 2 == 0x0 {
+            // If the two most significant bits are 0, then this is the `on_heap` variant and the
+            // number of words is encoded by `word`.
+            let n_words = word as usize;
+            assert!(
+                n_words > 0,
+                "DenseBitSet decoder error: At least one word must be stored with the `on_heap` variant."
+            );
+            let mut on_heap = BitSetOnHeap::new_empty(n_words);
+
+            let words = on_heap.as_mut_slice();
+            // All `words` are now initialised to 0x0.
+            debug_assert_eq!(words.len(), n_words);
+
+            // Decode the words one-by-one.
+            for word in words.iter_mut() {
+                *word = Word::decode(d);
+            }
+
+            DenseBitSet { on_heap: ManuallyDrop::new(on_heap) }
+        } else {
+            // Both the `inline` and `empty_unallocated` variants are encoded by one `Word`. We can
+            // just assume the `inline` variant because the `empty_unallocated` variant is smaller
+            // and the union is `repr(C)`.
+            Self { inline: word }
+        }
+    }
+}
+
+impl<T> Clone for DenseBitSet<T> {
+    #[inline(always)]
+    fn clone(&self) -> Self {
+        if self.is_inline() {
+            let inline = unsafe { self.inline };
+            Self { inline }
+        } else if self.is_empty_unallocated() {
+            let empty_unallocated = unsafe { self.empty_unallocated };
+            Self { empty_unallocated }
+        } else {
+            let old_on_heap = unsafe { &self.on_heap };
+            let on_heap = old_on_heap.clone();
+            Self { on_heap }
+        }
+    }
+}
+
+impl<T> Drop for DenseBitSet<T> {
+    #[inline(always)]
+    fn drop(&mut self) {
+        // Deallocate if `self` is not inlined.
+        if let Some(on_heap) = self.on_heap_mut() {
+            unsafe {
+                ManuallyDrop::drop(on_heap);
+            }
+        }
+    }
+}
+
+/// A pointer to a dense bit set stored on the heap.
+///
+/// This struct is a `usize`, with its two most significant bits always set to 0. If the value is
+/// shifted left by 2 bits, it yields a pointer to a sequence of words on the heap. The first word
+/// in this sequence represents the length—it indicates how many words follow. These subsequent
+/// words make up the actual bit set.
+///
+/// For example, suppose the bit set should support a domain size of 240 bits. We first determine
+/// how many words are needed to store 240 bits—that’s 4 words, assuming `[WORD_BITS] == 64`.
+/// The pointer in this struct then points to a sequence of five words allocated on the heap. The
+/// first word has the value 4 (the length), and the remaining four words comprise the bit set.
+#[repr(transparent)]
+struct BitSetOnHeap(usize);
+
+impl BitSetOnHeap {
+    fn new_empty(len: usize) -> Self {
+        debug_assert!(len >= 1);
+
+        // The first word is used to store the total number of words. The rest of the words
+        // store the bits.
+        let num_words = len + 1;
+
+        let layout = Layout::array::<Word>(num_words).expect("Bit set too large");
+        // SAFETY: `num_words` is always at least `1` so we never allocate zero size.
+        let ptr = unsafe { alloc_zeroed(layout).cast::<Word>() };
+        let Some(ptr) = NonNull::<Word>::new(ptr) else {
+            handle_alloc_error(layout);
+        };
+
+        // Store the length in the first word.
+        unsafe { ptr.write(len as Word) };
+
+        // Convert `ptr` to a `usize` and shift it two bits to the right.
+        BitSetOnHeap((ptr.as_ptr() as usize) >> 2)
+    }
+
+    /// Get a slice with all bits in this bit set.
+    ///
+    /// Note that the number of bits in the set is rounded up to the next power of `Usize::BITS`. So
+    /// if the user requested a domain_size of 216 bits, a slice with 4 words will be returned on a
+    /// 64-bit machine.
+    #[inline]
+    fn as_slice(&self) -> &[Word] {
+        let ptr = (self.0 << 2) as *const Word;
+        let len = unsafe { ptr.read() } as usize;
+        // The slice starts at the second word.
+        unsafe { slice::from_raw_parts(ptr.add(1), len) }
+    }
+
+    /// Get a mutable slice with all bits in this bit set.
+    ///
+    /// Note that the number of bits in the set is rounded up to the next power of `Usize::BITS`. So
+    /// if the user requested a domain_size of 216 bits, a slice with 4 words will be returned on a
+    /// 64-bit machine.
+    #[inline]
+    fn as_mut_slice(&mut self) -> &mut [Word] {
+        let ptr = (self.0 << 2) as *mut Word;
+        let len = unsafe { ptr.read() } as usize;
+        // The slice starts at the second word.
+        unsafe { slice::from_raw_parts_mut(ptr.add(1), len) }
+    }
+
+    /// Check if the set is empty.
+    fn is_empty(&self) -> bool {
+        self.as_slice().iter().all(|&x| x == 0)
+    }
+
+    /// Get the number of words.
+    #[allow(dead_code)] // FIXME
+    #[inline]
+    fn n_words(&self) -> Word {
+        let ptr = (self.0 << 2) as *const Word;
+        unsafe { ptr.read() }
+    }
+
+    /// Get the capacity, that is the number of elements that can be stored in this set.
+    fn capacity(&self) -> usize {
+        let ptr = (self.0 << 2) as *const Word;
+        let len = unsafe { ptr.read() } as usize;
+        len * WORD_BITS
+    }
+
+    /// Make sure the set can hold at least `min_domain_size` elements. Reallocate if necessary.
+    fn ensure_capacity(&mut self, min_domain_size: usize) {
+        let len = min_domain_size.div_ceil(WORD_BITS);
+
+        let old_ptr = (self.0 << 2) as *const Word;
+        let old_len = unsafe { old_ptr.read() } as usize;
+
+        if len <= old_len {
+            return;
+        }
+
+        // The first word is used to store the total number of words. The rest of the words
+        // store the bits.
+        let num_words = len + 1;
+        let old_num_words = old_len + 1;
+
+        let new_layout = Layout::array::<Word>(num_words).expect("Bit set too large");
+        let old_layout = Layout::array::<usize>(old_num_words).expect("Bit set too large");
+
+        // SAFETY: `num_words` is always at least `1` so we never allocate zero size.
+        let ptr =
+            unsafe { realloc(old_ptr as *mut u8, old_layout, new_layout.size()).cast::<Word>() };
+        let Some(ptr) = NonNull::<Word>::new(ptr) else {
+            handle_alloc_error(new_layout);
+        };
+
+        // Store the length in the first word.
+        unsafe { ptr.write(len as Word) };
+
+        // Set all the new words to 0.
+        for word_idx in old_num_words..num_words {
+            unsafe { ptr.add(word_idx).write(0x0) }
+        }
+
+        // Convert `ptr` to a `usize` and shift it two bits to the right.
+        self.0 = (ptr.as_ptr() as usize) >> 2
+    }
+}
+
+impl Clone for BitSetOnHeap {
+    fn clone(&self) -> Self {
+        let ptr = (self.0 << 2) as *const Word;
+        let len = unsafe { ptr.read() } as usize;
+        let num_words = len + 1;
+
+        let layout = Layout::array::<usize>(num_words).expect("Bit set too large");
+        // SAFETY: `num_words` is always at least `1` so we never allocate zero size.
+        let new_ptr = unsafe { alloc(layout).cast::<Word>() };
+        let Some(new_ptr) = NonNull::<Word>::new(new_ptr) else {
+            handle_alloc_error(layout);
+        };
+
+        unsafe { ptr.copy_to_nonoverlapping(new_ptr.as_ptr(), num_words) };
+
+        BitSetOnHeap((new_ptr.as_ptr() as usize) >> 2)
+    }
+}
+
+impl Drop for BitSetOnHeap {
+    fn drop(&mut self) {
+        let ptr = (self.0 << 2) as *mut Word;
+
+        // SAFETY: The first word stores the number of words for the bit set. We have to add 1
+        // because the first word storing the length is allocated as well.
+        let num_words = unsafe { ptr.read() } as usize + 1;
+        let layout = Layout::array::<Word>(num_words).expect("Bit set too large");
+        // SAFETY: We know that `on_heap` has been allocated with the same layout. See the
+        // `new` method for reference.
+        unsafe { dealloc(ptr.cast::<u8>(), layout) };
+    }
+}
+
+pub struct BitIter<'a, T: Idx> {
+    /// A copy of the current word, but with any already-visited bits cleared.
+    /// (This lets us use `trailing_zeros()` to find the next set bit.) When it
+    /// is reduced to 0, we move onto the next word.
+    word: Word,
+
+    /// The offset (measured in bits) of the current word.
+    offset: usize,
+
+    /// Underlying iterator over the words.
+    iter: slice::Iter<'a, Word>,
+
+    marker: PhantomData<T>,
+}
+
+impl<'a, T: Idx> BitIter<'a, T> {
+    pub(super) fn from_slice(words: &'a [Word]) -> Self {
+        // We initialize `word` and `offset` to degenerate values. On the first
+        // call to `next()` we will fall through to getting the first word from
+        // `iter`, which sets `word` to the first word (if there is one) and
+        // `offset` to 0. Doing it this way saves us from having to maintain
+        // additional state about whether we have started.
+        Self {
+            word: 0,
+            offset: usize::MAX - (WORD_BITS - 1),
+            iter: words.iter(),
+            marker: PhantomData,
+        }
+    }
+
+    #[inline(always)]
+    fn from_single_word(word: Word) -> Self {
+        Self { word, offset: 0, iter: [].iter(), marker: PhantomData }
+    }
+}
+
+impl<'a, T: Idx> Iterator for BitIter<'a, T> {
+    type Item = T;
+
+    #[inline(always)]
+    fn next(&mut self) -> Option<T> {
+        loop {
+            if self.word != 0 {
+                // Get the position of the next set bit in the current word,
+                // then clear the bit.
+                let bit_pos = self.word.trailing_zeros() as usize;
+                self.word ^= 0x01 << bit_pos;
+                return Some(T::new(bit_pos + self.offset));
+            }
+
+            // Move onto the next word. `wrapping_add()` is needed to handle
+            // the degenerate initial value given to `offset` in `new()`.
+            self.word = *self.iter.next()?;
+            self.offset = self.offset.wrapping_add(WORD_BITS);
+        }
+    }
+}
+
+impl<'a, T: Idx> FusedIterator for BitIter<'a, T> {}
+
+impl<T: Idx> fmt::Debug for DenseBitSet<T> {
+    fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result {
+        w.debug_list().entries(self.iter()).finish()
+    }
+}
+
+impl<T> PartialEq for DenseBitSet<T> {
+    #[inline]
+    fn eq(&self, other: &Self) -> bool {
+        if self.is_inline() {
+            if other.is_inline() {
+                unsafe { self.inline == other.inline }
+            } else if other.is_empty_unallocated() {
+                self.is_empty()
+            } else {
+                let other_words = other.on_heap().unwrap().as_slice();
+                let self_word = unsafe { self.inline } ^ Self::IS_INLINE_TAG_BIT;
+                other_words[0] == self_word && other_words[1..].iter().all(|&w| w == 0)
+            }
+        } else if self.is_empty_unallocated() {
+            other.is_empty()
+        } else {
+            let self_words = self.on_heap().unwrap().as_slice();
+            if other.is_empty_unallocated() {
+                self_words.iter().all(|&w| w == 0)
+            } else if other.is_inline() {
+                let other_word = unsafe { other.inline } ^ Self::IS_INLINE_TAG_BIT;
+                self_words[0] == other_word && self_words[1..].iter().all(|&w| w == 0)
+            } else {
+                let mut self_words = self_words.iter();
+                let mut other_words = other.on_heap().unwrap().as_slice().iter();
+                loop {
+                    match (self_words.next(), other_words.next()) {
+                        (Some(w1), Some(w2)) if w1 == w2 => (),
+                        (Some(_), Some(_)) => break false,
+                        (Some(0), None) | (None, Some(0)) => (),
+                        (Some(_), None) | (None, Some(_)) => break false,
+                        (None, None) => break true,
+                    }
+                }
+            }
+        }
+    }
+}
+
+impl<T> Eq for DenseBitSet<T> {}
+
+impl<T> Hash for DenseBitSet<T> {
+    #[inline]
+    fn hash<H: Hasher>(&self, hasher: &mut H) {
+        if self.is_inline() {
+            let inline = unsafe { self.inline };
+            inline.hash(hasher);
+        } else if let Some(num_words) = self.empty_unallocated_get_num_words() {
+            // Now we hash 0 for `num_words` times so that this hash should be equal to a cleared
+            // set with the `on_heap` variant.
+            for _ in 0..num_words {
+                let zero_word: Word = 0x0;
+                zero_word.hash(hasher);
+            }
+        } else {
+            let words = self.on_heap().unwrap().as_slice();
+            for word in words {
+                word.hash(hasher);
+            }
+        }
+    }
+}
+
+/// A resizable bitset type with a dense representation.
+///
+/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
+/// just be `usize`.
+///
+/// All operations that involve an element will panic if the element is equal
+/// to or greater than the domain size.
+#[derive(Clone, PartialEq)]
+pub struct GrowableBitSet<T> {
+    bit_set: DenseBitSet<T>,
+}
+
+impl<T> Default for GrowableBitSet<T> {
+    fn default() -> Self {
+        GrowableBitSet::new_empty()
+    }
+}
+
+impl<T> GrowableBitSet<T> {
+    /// Ensure that the set can hold at least `min_domain_size` elements.
+    pub fn ensure(&mut self, min_domain_size: usize) {
+        if min_domain_size <= self.bit_set.capacity() {
+            return;
+        }
+
+        if self.bit_set.is_inline() {
+            // The set must change from being inlined to allocate on the heap.
+            debug_assert!(min_domain_size > DenseBitSet::<T>::INLINE_CAPACITY);
+
+            let mut new_bit_set = DenseBitSet::new_empty(min_domain_size);
+            if !self.bit_set.is_empty() {
+                // SAFETY: We know that `self.is_inline()` is true.
+                let word = unsafe { self.bit_set.inline } ^ DenseBitSet::<T>::IS_INLINE_TAG_BIT;
+                new_bit_set.on_heap_get_or_alloc().as_mut_slice()[0] = word;
+            }
+            self.bit_set = new_bit_set;
+        } else if self.bit_set.is_empty_unallocated() {
+            self.bit_set = DenseBitSet::new_empty(min_domain_size);
+        } else {
+            self.bit_set.on_heap_mut().unwrap().ensure_capacity(min_domain_size);
+        }
+    }
+
+    pub fn new_empty() -> GrowableBitSet<T> {
+        GrowableBitSet { bit_set: DenseBitSet::new_empty(0) }
+    }
+
+    pub fn with_capacity(capacity: usize) -> GrowableBitSet<T> {
+        GrowableBitSet { bit_set: DenseBitSet::new_empty(capacity) }
+    }
+
+    /// Insert the element with index `idx`. Returns `true` if the set has changed.
+    #[inline]
+    pub fn insert_usize(&mut self, idx: usize) -> bool {
+        self.ensure(idx + 1);
+        self.bit_set.insert_usize(idx)
+    }
+}
+
+impl<T: Idx> GrowableBitSet<T> {
+    /// Insert `elem` into the set, resizing if necessary. Returns `true` if the set has changed.
+    #[inline]
+    pub fn insert(&mut self, elem: T) -> bool {
+        self.insert_usize(elem.index())
+    }
+
+    /// Returns `true` if the set has changed.
+    #[inline]
+    pub fn remove(&mut self, elem: T) -> bool {
+        self.ensure(elem.index() + 1);
+        self.bit_set.remove(elem)
+    }
+
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.bit_set.is_empty()
+    }
+
+    #[inline]
+    pub fn contains(&self, elem: T) -> bool {
+        elem.index() < self.bit_set.capacity() && self.bit_set.contains(elem)
+    }
+
+    #[inline]
+    pub fn iter(&self) -> BitIter<'_, T> {
+        self.bit_set.iter()
+    }
+
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.bit_set.count()
+    }
+}
+
+impl<T> From<DenseBitSet<T>> for GrowableBitSet<T> {
+    fn from(bit_set: DenseBitSet<T>) -> Self {
+        Self { bit_set }
+    }
+}
+
+impl<T> From<GrowableBitSet<T>> for DenseBitSet<T> {
+    fn from(bit_set: GrowableBitSet<T>) -> Self {
+        bit_set.bit_set
+    }
+}
+
+impl<T: Idx> fmt::Debug for GrowableBitSet<T> {
+    fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result {
+        self.bit_set.fmt(w)
+    }
+}
+
+#[inline]
+fn max_bit(word: Word) -> usize {
+    WORD_BITS - 1 - word.leading_zeros() as usize
+}
diff --git a/compiler/rustc_index/src/bit_set/tests.rs b/compiler/rustc_index/src/bit_set/tests.rs
index 323a66ddc6f20..eea19cb11a101 100644
--- a/compiler/rustc_index/src/bit_set/tests.rs
+++ b/compiler/rustc_index/src/bit_set/tests.rs
@@ -1,9 +1,583 @@
+use std::collections::BTreeSet;
+use std::hash::{BuildHasher, BuildHasherDefault, DefaultHasher};
+use std::hint::black_box;
+use std::ops::{Range, RangeBounds, RangeInclusive};
+
+use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
+use test::Bencher;
+
 use super::*;
+use crate::IndexVec;
 
 extern crate test;
-use std::hint::black_box;
 
-use test::Bencher;
+/// A very simple pseudo random generator using linear xorshift.
+///
+/// [See Wikipedia](https://en.wikipedia.org/wiki/Xorshift). This has 64-bit state and a period
+/// of `2^64 - 1`.
+struct Rng(u64);
+
+impl Rng {
+    fn new(seed: u64) -> Self {
+        Rng(seed)
+    }
+
+    fn next(&mut self) -> usize {
+        self.0 ^= self.0 << 7;
+        self.0 ^= self.0 >> 9;
+        self.0 as usize
+    }
+
+    fn next_bool(&mut self) -> bool {
+        self.next() % 2 == 0
+    }
+
+    /// Sample a range, a subset of `0..=max`.
+    ///
+    /// The purpose of this method is to make edge cases such as `0..=max` more common.
+    fn sample_range(&mut self, max: usize) -> RangeInclusive<usize> {
+        let start = match self.next() % 3 {
+            0 => 0,
+            1 => max,
+            2 => self.next() % (max + 1),
+            _ => unreachable!(),
+        };
+        let end = match self.next() % 3 {
+            0 => 0,
+            1 => max,
+            2 => self.next() % (max + 1),
+            _ => unreachable!(),
+        };
+        RangeInclusive::new(start, end)
+    }
+}
+
+#[derive(Default)]
+struct EncoderLittleEndian {
+    bytes: Vec<u8>,
+}
+
+impl Encoder for EncoderLittleEndian {
+    fn emit_usize(&mut self, v: usize) {
+        self.bytes.extend(v.to_le_bytes());
+    }
+    fn emit_u8(&mut self, v: u8) {
+        self.bytes.extend(v.to_le_bytes());
+    }
+    fn emit_u16(&mut self, v: u16) {
+        self.bytes.extend(v.to_le_bytes());
+    }
+    fn emit_u32(&mut self, v: u32) {
+        self.bytes.extend(v.to_le_bytes());
+    }
+    fn emit_u64(&mut self, v: u64) {
+        self.bytes.extend(v.to_le_bytes());
+    }
+    fn emit_u128(&mut self, v: u128) {
+        self.bytes.extend(v.to_le_bytes());
+    }
+    fn emit_isize(&mut self, v: isize) {
+        self.bytes.extend(v.to_le_bytes());
+    }
+    fn emit_i8(&mut self, v: i8) {
+        self.bytes.extend(v.to_le_bytes());
+    }
+    fn emit_i16(&mut self, v: i16) {
+        self.bytes.extend(v.to_le_bytes());
+    }
+    fn emit_i32(&mut self, v: i32) {
+        self.bytes.extend(v.to_le_bytes());
+    }
+    fn emit_i64(&mut self, v: i64) {
+        self.bytes.extend(v.to_le_bytes());
+    }
+    fn emit_i128(&mut self, v: i128) {
+        self.bytes.extend(v.to_le_bytes());
+    }
+    fn emit_raw_bytes(&mut self, v: &[u8]) {
+        self.bytes.extend(v);
+    }
+}
+
+struct DecoderLittleEndian<'a> {
+    bytes: &'a [u8],
+    /// Remember the original `bytes.len()` so we can calculate how many bytes we've read.
+    original_len: usize,
+}
+
+impl<'a> DecoderLittleEndian<'a> {
+    fn new(bytes: &'a [u8]) -> Self {
+        Self { bytes, original_len: bytes.len() }
+    }
+}
+
+impl<'a> Decoder for DecoderLittleEndian<'a> {
+    fn read_usize(&mut self) -> usize {
+        let (int_bytes, rest) = self.bytes.split_at(size_of::<usize>());
+        self.bytes = rest;
+        usize::from_le_bytes(int_bytes.try_into().unwrap())
+    }
+    fn read_u128(&mut self) -> u128 {
+        let (int_bytes, rest) = self.bytes.split_at(size_of::<u128>());
+        self.bytes = rest;
+        u128::from_le_bytes(int_bytes.try_into().unwrap())
+    }
+    fn read_u64(&mut self) -> u64 {
+        let (int_bytes, rest) = self.bytes.split_at(size_of::<u64>());
+        self.bytes = rest;
+        u64::from_le_bytes(int_bytes.try_into().unwrap())
+    }
+    fn read_u32(&mut self) -> u32 {
+        let (int_bytes, rest) = self.bytes.split_at(size_of::<u32>());
+        self.bytes = rest;
+        u32::from_le_bytes(int_bytes.try_into().unwrap())
+    }
+    fn read_u16(&mut self) -> u16 {
+        let (int_bytes, rest) = self.bytes.split_at(size_of::<u16>());
+        self.bytes = rest;
+        u16::from_le_bytes(int_bytes.try_into().unwrap())
+    }
+    fn read_u8(&mut self) -> u8 {
+        let (int_bytes, rest) = self.bytes.split_at(size_of::<u8>());
+        self.bytes = rest;
+        u8::from_le_bytes(int_bytes.try_into().unwrap())
+    }
+    fn read_isize(&mut self) -> isize {
+        let (int_bytes, rest) = self.bytes.split_at(size_of::<isize>());
+        self.bytes = rest;
+        isize::from_le_bytes(int_bytes.try_into().unwrap())
+    }
+    fn read_i128(&mut self) -> i128 {
+        let (int_bytes, rest) = self.bytes.split_at(size_of::<i128>());
+        self.bytes = rest;
+        i128::from_le_bytes(int_bytes.try_into().unwrap())
+    }
+    fn read_i64(&mut self) -> i64 {
+        let (int_bytes, rest) = self.bytes.split_at(size_of::<i64>());
+        self.bytes = rest;
+        i64::from_le_bytes(int_bytes.try_into().unwrap())
+    }
+    fn read_i32(&mut self) -> i32 {
+        let (int_bytes, rest) = self.bytes.split_at(size_of::<i32>());
+        self.bytes = rest;
+        i32::from_le_bytes(int_bytes.try_into().unwrap())
+    }
+    fn read_i16(&mut self) -> i16 {
+        let (int_bytes, rest) = self.bytes.split_at(size_of::<i16>());
+        self.bytes = rest;
+        i16::from_le_bytes(int_bytes.try_into().unwrap())
+    }
+    fn read_raw_bytes(&mut self, len: usize) -> &[u8] {
+        let (bytes, rest) = self.bytes.split_at(len);
+        self.bytes = rest;
+        bytes
+    }
+    fn peek_byte(&self) -> u8 {
+        self.bytes[0]
+    }
+    fn position(&self) -> usize {
+        self.original_len - self.bytes.len()
+    }
+}
+
+fn test_with_domain_size(domain_size: usize) {
+    const TEST_ITERATIONS: u32 = 512;
+
+    let mut set_1 = DenseBitSet::<usize>::new_empty(domain_size);
+    let mut set_1_reference = IndexVec::<usize, bool>::from_elem_n(false, domain_size);
+    let mut set_2 = DenseBitSet::<usize>::new_empty(domain_size);
+    let mut set_2_reference = IndexVec::<usize, bool>::from_elem_n(false, domain_size);
+
+    let hasher = BuildHasherDefault::<DefaultHasher>::new();
+
+    let mut encoder = EncoderLittleEndian::default();
+
+    let mut rng = Rng::new(42);
+
+    for _ in 0..TEST_ITERATIONS {
+        // Make a random operation.
+        match rng.next() % 100 {
+            0..20 => {
+                // Insert in one of the sets.
+                if domain_size == 0 {
+                    continue;
+                }
+                let elem = rng.next() % domain_size;
+                // Choose set to insert into.
+                if rng.next_bool() {
+                    assert_eq!(!set_1.contains(elem), set_1.insert(elem));
+                    set_1_reference[elem] = true;
+                } else {
+                    assert_eq!(!set_2.contains(elem), set_2.insert(elem));
+                    set_2_reference[elem] = true;
+                }
+            }
+            20..40 => {
+                // Insert a range in one of the sets.
+                if domain_size == 0 {
+                    continue;
+                }
+
+                let range = rng.sample_range(domain_size - 1);
+                // Choose set to insert into.
+                if rng.next_bool() {
+                    set_1.insert_range_inclusive(range.clone());
+                    for i in range {
+                        set_1_reference[i] = true;
+                    }
+                } else {
+                    set_2.insert_range_inclusive(range.clone());
+                    for i in range {
+                        set_2_reference[i] = true;
+                    }
+                }
+            }
+            40..50 => {
+                // Test insert_all().
+                if rng.next_bool() {
+                    set_1.insert_all(domain_size);
+                    for x in set_1_reference.iter_mut() {
+                        *x = true;
+                    }
+                } else {
+                    set_2.insert_all(domain_size);
+                    for x in set_2_reference.iter_mut() {
+                        *x = true;
+                    }
+                }
+            }
+            50..70 => {
+                // Remove from one of the sets.
+                if domain_size == 0 {
+                    continue;
+                }
+                let elem = rng.next() % domain_size;
+                // Choose set to remove into.
+                if rng.next_bool() {
+                    assert_eq!(set_1.contains(elem), set_1.remove(elem),);
+                    set_1_reference[elem] = false;
+                } else {
+                    assert_eq!(set_2.contains(elem), set_2.remove(elem),);
+                    set_2_reference[elem] = false;
+                }
+            }
+            70..76 => {
+                // Union
+                let old_set_1 = set_1.clone();
+                let changed = set_1.union(&set_2);
+                assert_eq!(changed, old_set_1 != set_1);
+
+                // Adjust the reference sets.
+                for (x, val) in set_2_reference.iter_enumerated() {
+                    set_1_reference[x] |= val;
+                }
+            }
+            76..82 => {
+                // Intersection
+                let old_set_1 = set_1.clone();
+                let changed = set_1.intersect(&set_2);
+                assert_eq!(changed, old_set_1 != set_1);
+
+                // Adjust the reference sets.
+                for (x, val) in set_2_reference.iter_enumerated() {
+                    set_1_reference[x] &= val;
+                }
+            }
+            82..88 => {
+                // Subtraction
+                let old_set_1 = set_1.clone();
+                let changed = set_1.subtract(&set_2);
+                assert_eq!(changed, old_set_1 != set_1);
+
+                // Adjust the reference sets.
+                for (x, val) in set_2_reference.iter_enumerated() {
+                    set_1_reference[x] &= !val;
+                }
+            }
+            88..94 => {
+                // Union_not
+                set_1.union_not(&set_2, domain_size);
+
+                // Adjust the reference sets.
+                for (x, val) in set_2_reference.iter_enumerated() {
+                    set_1_reference[x] |= !val;
+                }
+            }
+            94..97 => {
+                // Clear
+                if rng.next_bool() {
+                    set_1.clear();
+                    for x in set_1_reference.iter_mut() {
+                        *x = false;
+                    }
+                } else {
+                    set_2.clear();
+                    for x in set_2_reference.iter_mut() {
+                        *x = false;
+                    }
+                }
+            }
+            97..100 => {
+                // Test new_filled().
+                if rng.next_bool() {
+                    set_1 = DenseBitSet::new_filled(domain_size);
+                    for x in set_1_reference.iter_mut() {
+                        *x = true;
+                    }
+                } else {
+                    set_2 = DenseBitSet::new_filled(domain_size);
+                    for x in set_2_reference.iter_mut() {
+                        *x = true;
+                    }
+                }
+            }
+            _ => unreachable!(),
+        }
+
+        // Check the contains function.
+        for i in 0..domain_size {
+            assert_eq!(set_1.contains(i), set_1_reference[i]);
+            assert_eq!(set_2.contains(i), set_2_reference[i]);
+        }
+
+        // Check iter function.
+        assert!(
+            set_1.iter().eq(set_1_reference.iter_enumerated().filter(|&(_, &v)| v).map(|(x, _)| x))
+        );
+        assert!(
+            set_2.iter().eq(set_2_reference.iter_enumerated().filter(|&(_, &v)| v).map(|(x, _)| x))
+        );
+
+        // Check the superset relation.
+        assert_eq!(set_1.superset(&set_2), set_2.iter().all(|x| set_1.contains(x)));
+
+        // Check the `==` operator.
+        assert_eq!(set_1 == set_2, set_1_reference == set_2_reference);
+
+        // Check the `hash()` function.
+        // If the `set_1` and `set_2` are equal, then their hashes must also be equal.
+        if set_1 == set_2 {
+            assert_eq!(hasher.hash_one(&set_1), hasher.hash_one(&set_2));
+        }
+
+        // Check the count function.
+        assert_eq!(set_1.count(), set_1_reference.iter().filter(|&&x| x).count());
+        assert_eq!(set_2.count(), set_2_reference.iter().filter(|&&x| x).count());
+
+        // Check `only_one_elem()`.
+        if let Some(elem) = set_1.only_one_elem() {
+            assert_eq!(set_1.count(), 1);
+            assert_eq!(elem, set_1.iter().next().unwrap());
+        } else {
+            assert_ne!(set_1.count(), 1);
+        }
+
+        // Check `last_set_in()`.
+        if domain_size > 0 {
+            let range = rng.sample_range(domain_size - 1);
+            assert_eq!(
+                set_1.last_set_in(range.clone()),
+                range.clone().filter(|&i| set_1.contains(i)).last()
+            );
+            assert_eq!(
+                set_2.last_set_in(range.clone()),
+                range.filter(|&i| set_2.contains(i)).last()
+            );
+        }
+
+        // Check `Encodable` and `Decodable` implementations.
+        if rng.next() as u32 % TEST_ITERATIONS < 128 {
+            set_1.encode(&mut encoder);
+
+            let mut decoder = DecoderLittleEndian::new(&encoder.bytes);
+            let decoded = DenseBitSet::<usize>::decode(&mut decoder);
+            assert_eq!(
+                decoder.position(),
+                encoder.bytes.len(),
+                "All bytes must be read when decoding."
+            );
+
+            assert_eq!(set_1, decoded);
+
+            encoder.bytes.clear();
+        }
+    }
+}
+
+fn test_relations_with_chunked_set(domain_size: usize) {
+    const TEST_ITERATIONS: u32 = 64;
+
+    let mut dense_set = DenseBitSet::<usize>::new_empty(domain_size);
+    let mut chunked_set = ChunkedBitSet::new_empty(domain_size);
+
+    let mut rng = Rng::new(42);
+
+    for _ in 0..TEST_ITERATIONS {
+        // Make a random operation.
+        match rng.next() % 10 {
+            0..3 => {
+                // Insert in one of the sets.
+                if domain_size == 0 {
+                    continue;
+                }
+                let elem = rng.next() % domain_size;
+                // Choose set to insert into.
+                if rng.next_bool() {
+                    dense_set.insert(elem);
+                } else {
+                    chunked_set.insert(elem);
+                }
+            }
+            3..6 => {
+                // Remove from one of the sets.
+                if domain_size == 0 {
+                    continue;
+                }
+                let elem = rng.next() % domain_size;
+                // Choose set to remove into.
+                if rng.next_bool() {
+                    dense_set.remove(elem);
+                } else {
+                    chunked_set.remove(elem);
+                }
+            }
+            6 => {
+                // Clear
+                if rng.next_bool() {
+                    dense_set.clear();
+                } else {
+                    chunked_set.clear();
+                }
+            }
+            7 => {
+                // Fill.
+                if rng.next_bool() {
+                    dense_set.insert_all(domain_size);
+                } else {
+                    chunked_set.insert_all();
+                }
+            }
+            8 => {
+                // Union
+                let old_dense_set = dense_set.clone();
+                let changed = dense_set.union(&chunked_set);
+                assert_eq!(old_dense_set != dense_set, changed);
+                assert!(dense_set.superset(&old_dense_set));
+                assert!(chunked_set.iter().all(|x| dense_set.contains(x)));
+
+                // Check that all the added elements come from `chunked_set`.
+                let mut difference = dense_set.clone();
+                difference.subtract(&old_dense_set);
+                assert!(difference.iter().all(|x| chunked_set.contains(x)));
+            }
+            9 => {
+                // Intersection
+                let old_dense_set = dense_set.clone();
+                let changed = dense_set.intersect(&chunked_set);
+                assert_eq!(old_dense_set != dense_set, changed);
+                assert!(old_dense_set.superset(&dense_set));
+                assert!(dense_set.iter().all(|x| chunked_set.contains(x)));
+
+                // Check that no of the removed elements comes from `chunked_set`.
+                let mut difference = old_dense_set; // Just renaming.
+                difference.subtract(&dense_set);
+                assert!(difference.iter().all(|x| !chunked_set.contains(x)));
+            }
+            _ => unreachable!(),
+        }
+    }
+}
+
+#[test]
+fn test_dense_bit_set() {
+    assert_eq!(
+        size_of::<DenseBitSet<usize>>(),
+        size_of::<Word>(),
+        "DenseBitSet should have the same size as a Word"
+    );
+
+    test_with_domain_size(0);
+    test_with_domain_size(1);
+    test_with_domain_size(63);
+    test_with_domain_size(64);
+    test_with_domain_size(65);
+    test_with_domain_size(127);
+    test_with_domain_size(128);
+    test_with_domain_size(129);
+
+    test_relations_with_chunked_set(0);
+    test_relations_with_chunked_set(1);
+    test_relations_with_chunked_set(CHUNK_BITS - 1);
+    test_relations_with_chunked_set(CHUNK_BITS);
+    test_relations_with_chunked_set(CHUNK_BITS + 2);
+    test_relations_with_chunked_set(3 * CHUNK_BITS - 2);
+    test_relations_with_chunked_set(3 * CHUNK_BITS);
+    test_relations_with_chunked_set(3 * CHUNK_BITS + 1);
+}
+
+#[test]
+fn test_growable_bit_set() {
+    const TEST_ITERATIONS: u32 = 512;
+    const MAX_ELEMS: usize = 314;
+
+    let mut set = GrowableBitSet::<usize>::new_empty();
+    let mut reference_set = BTreeSet::<usize>::new();
+
+    let mut rng = Rng::new(42);
+
+    for _ in 0..TEST_ITERATIONS {
+        match rng.next() % 100 {
+            0..30 => {
+                // Insert an element in the `0..=(DenseBitSet::INLINE_CAPACITY + 2)` range.
+                let elem = rng.next() % (DenseBitSet::<usize>::INLINE_CAPACITY + 3);
+                set.insert(elem);
+                reference_set.insert(elem);
+            }
+            30..50 => {
+                // Insert an element in the `0..MAX_ELEMS` range.
+                let elem = rng.next() % MAX_ELEMS;
+                set.insert(elem);
+                reference_set.insert(elem);
+            }
+            50..70 => {
+                // Remove an existing element.
+                let len = set.len();
+                if len == 0 {
+                    continue;
+                }
+                let elem = set.iter().nth(rng.next() % len).unwrap();
+                set.remove(elem);
+                reference_set.remove(&elem);
+            }
+            70..90 => {
+                // Remove an arbitrary element in the `0..MAX_ELEMS` range.
+                let elem = rng.next() % MAX_ELEMS;
+                set.remove(elem);
+                reference_set.remove(&elem);
+            }
+            90..100 => {
+                // Make sure the `with_capacity()` function works.
+                let capacity = rng.next() % MAX_ELEMS;
+                set = GrowableBitSet::with_capacity(capacity);
+                reference_set.clear();
+            }
+            _ => unreachable!(),
+        }
+
+        // Check the `is_empty()` function.
+        assert_eq!(set.is_empty(), reference_set.is_empty());
+
+        // Check the `iter` function.
+        assert!(set.iter().eq(reference_set.iter().copied()));
+
+        // Check the contains function with a 20 % probability.
+        if rng.next() % 5 == 0 {
+            for x in 0..MAX_ELEMS {
+                assert_eq!(set.contains(x), reference_set.contains(&x));
+            }
+        }
+    }
+}
 
 #[test]
 fn test_new_filled() {
@@ -50,11 +624,11 @@ fn bitset_clone_from() {
 
     let mut b = DenseBitSet::new_empty(2);
     b.clone_from(&a);
-    assert_eq!(b.domain_size(), 10);
+    assert!(b.capacity() >= 10);
     assert_eq!(b.iter().collect::<Vec<_>>(), [4, 7, 9]);
 
     b.clone_from(&DenseBitSet::new_empty(40));
-    assert_eq!(b.domain_size(), 40);
+    assert!(b.capacity() >= 40);
     assert_eq!(b.iter().collect::<Vec<_>>(), []);
 }
 
@@ -91,7 +665,7 @@ fn union_not() {
     b.insert(81); // Already in `a`.
     b.insert(90);
 
-    a.union_not(&b);
+    a.union_not(&b, 100);
 
     // After union-not, `a` should contain all values in the domain, except for
     // the ones that are in `b` and were _not_ already in `a`.
@@ -600,10 +1174,7 @@ fn sparse_matrix_operations() {
 #[test]
 fn dense_insert_range() {
     #[track_caller]
-    fn check<R>(domain: usize, range: R)
-    where
-        R: RangeBounds<usize> + Clone + IntoIterator<Item = usize> + std::fmt::Debug,
-    {
+    fn check_range(domain: usize, range: Range<usize>) {
         let mut set = DenseBitSet::new_empty(domain);
         set.insert_range(range.clone());
         for i in set.iter() {
@@ -613,32 +1184,45 @@ fn dense_insert_range() {
             assert!(set.contains(i), "{} in {:?}, inserted {:?}", i, set, range);
         }
     }
-    check(300, 10..10);
-    check(300, WORD_BITS..WORD_BITS * 2);
-    check(300, WORD_BITS - 1..WORD_BITS * 2);
-    check(300, WORD_BITS - 1..WORD_BITS);
-    check(300, 10..100);
-    check(300, 10..30);
-    check(300, 0..5);
-    check(300, 0..250);
-    check(300, 200..250);
-
-    check(300, 10..=10);
-    check(300, WORD_BITS..=WORD_BITS * 2);
-    check(300, WORD_BITS - 1..=WORD_BITS * 2);
-    check(300, WORD_BITS - 1..=WORD_BITS);
-    check(300, 10..=100);
-    check(300, 10..=30);
-    check(300, 0..=5);
-    check(300, 0..=250);
-    check(300, 200..=250);
+
+    #[track_caller]
+    fn check_range_inclusive(domain: usize, range: RangeInclusive<usize>) {
+        let mut set = DenseBitSet::new_empty(domain);
+        set.insert_range_inclusive(range.clone());
+        for i in set.iter() {
+            assert!(range.contains(&i));
+        }
+        for i in range.clone() {
+            assert!(set.contains(i), "{} in {:?}, inserted {:?}", i, set, range);
+        }
+    }
+
+    check_range(300, 10..10);
+    check_range(300, WORD_BITS..WORD_BITS * 2);
+    check_range(300, WORD_BITS - 1..WORD_BITS * 2);
+    check_range(300, WORD_BITS - 1..WORD_BITS);
+    check_range(300, 10..100);
+    check_range(300, 10..30);
+    check_range(300, 0..5);
+    check_range(300, 0..250);
+    check_range(300, 200..250);
+
+    check_range_inclusive(300, 10..=10);
+    check_range_inclusive(300, WORD_BITS..=WORD_BITS * 2);
+    check_range_inclusive(300, WORD_BITS - 1..=WORD_BITS * 2);
+    check_range_inclusive(300, WORD_BITS - 1..=WORD_BITS);
+    check_range_inclusive(300, 10..=100);
+    check_range_inclusive(300, 10..=30);
+    check_range_inclusive(300, 0..=5);
+    check_range_inclusive(300, 0..=250);
+    check_range_inclusive(300, 200..=250);
 
     for i in 0..WORD_BITS * 2 {
         for j in i..WORD_BITS * 2 {
-            check(WORD_BITS * 2, i..j);
-            check(WORD_BITS * 2, i..=j);
-            check(300, i..j);
-            check(300, i..=j);
+            check_range(WORD_BITS * 2, i..j);
+            check_range_inclusive(WORD_BITS * 2, i..=j);
+            check_range(300, i..j);
+            check_range_inclusive(300, i..=j);
         }
     }
 }
@@ -656,7 +1240,7 @@ fn dense_last_set_before() {
     }
 
     #[track_caller]
-    fn cmp(set: &DenseBitSet<usize>, needle: impl RangeBounds<usize> + Clone + std::fmt::Debug) {
+    fn cmp(set: &DenseBitSet<usize>, needle: RangeInclusive<usize>) {
         assert_eq!(
             set.last_set_in(needle.clone()),
             easy(set, needle.clone()),
@@ -672,20 +1256,18 @@ fn dense_last_set_before() {
     set.insert(WORD_BITS - 1);
     cmp(&set, 0..=WORD_BITS - 1);
     cmp(&set, 0..=5);
-    cmp(&set, 10..100);
+    cmp(&set, 10..=99);
     set.insert(100);
-    cmp(&set, 100..110);
-    cmp(&set, 99..100);
+    cmp(&set, 100..=119);
+    cmp(&set, 99..=99);
     cmp(&set, 99..=100);
 
     for i in 0..=WORD_BITS * 2 {
         for j in i..=WORD_BITS * 2 {
             for k in 0..WORD_BITS * 2 {
                 let mut set = DenseBitSet::new_empty(300);
-                cmp(&set, i..j);
                 cmp(&set, i..=j);
                 set.insert(k);
-                cmp(&set, i..j);
                 cmp(&set, i..=j);
             }
         }
diff --git a/compiler/rustc_middle/src/values.rs b/compiler/rustc_middle/src/values.rs
index 4d70a70873267..46bcc25dc0710 100644
--- a/compiler/rustc_middle/src/values.rs
+++ b/compiler/rustc_middle/src/values.rs
@@ -376,12 +376,8 @@ fn find_item_ty_spans(
                 });
                 if check_params && let Some(args) = path.segments.last().unwrap().args {
                     let params_in_repr = tcx.params_in_repr(def_id);
-                    // the domain size check is needed because the HIR may not be well-formed at this point
-                    for (i, arg) in args.args.iter().enumerate().take(params_in_repr.domain_size())
-                    {
-                        if let hir::GenericArg::Type(ty) = arg
-                            && params_in_repr.contains(i as u32)
-                        {
+                    for arg in params_in_repr.iter().map_while(|i| args.args.get(i as usize)) {
+                        if let hir::GenericArg::Type(ty) = arg {
                             find_item_ty_spans(
                                 tcx,
                                 ty.as_unambig_ty(),
diff --git a/compiler/rustc_mir_dataflow/src/framework/cursor.rs b/compiler/rustc_mir_dataflow/src/framework/cursor.rs
index 3f6e7a0661921..d45509d9de758 100644
--- a/compiler/rustc_mir_dataflow/src/framework/cursor.rs
+++ b/compiler/rustc_mir_dataflow/src/framework/cursor.rs
@@ -127,7 +127,7 @@ where
     #[cfg(test)]
     pub(crate) fn allow_unreachable(&mut self) {
         #[cfg(debug_assertions)]
-        self.reachable_blocks.insert_all()
+        self.reachable_blocks.insert_all(self.body().basic_blocks.len())
     }
 
     /// Returns the `Analysis` used to generate the underlying `Results`.
diff --git a/compiler/rustc_mir_dataflow/src/framework/fmt.rs b/compiler/rustc_mir_dataflow/src/framework/fmt.rs
index 38599cd094933..8a5d3c35f2f52 100644
--- a/compiler/rustc_mir_dataflow/src/framework/fmt.rs
+++ b/compiler/rustc_mir_dataflow/src/framework/fmt.rs
@@ -82,21 +82,12 @@ where
     }
 
     fn fmt_diff_with(&self, old: &Self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        let size = self.domain_size();
-        assert_eq!(size, old.domain_size());
-
-        let mut set_in_self = MixedBitSet::new_empty(size);
-        let mut cleared_in_self = MixedBitSet::new_empty(size);
-
-        for i in (0..size).map(T::new) {
-            match (self.contains(i), old.contains(i)) {
-                (true, false) => set_in_self.insert(i),
-                (false, true) => cleared_in_self.insert(i),
-                _ => continue,
-            };
-        }
+        let mut set_in_self = self.clone();
+        set_in_self.subtract(old);
+        let mut cleared_in_self = old.clone();
+        cleared_in_self.subtract(self);
 
-        fmt_diff(&set_in_self, &cleared_in_self, ctxt, f)
+        fmt_diff(&MixedBitSet::Small(set_in_self), &MixedBitSet::Small(cleared_in_self), ctxt, f)
     }
 }
 
diff --git a/compiler/rustc_mir_dataflow/src/impls/initialized.rs b/compiler/rustc_mir_dataflow/src/impls/initialized.rs
index 18165b0b9bd08..4f77deb8526d2 100644
--- a/compiler/rustc_mir_dataflow/src/impls/initialized.rs
+++ b/compiler/rustc_mir_dataflow/src/impls/initialized.rs
@@ -464,7 +464,7 @@ impl<'tcx> Analysis<'tcx> for MaybeUninitializedPlaces<'_, 'tcx> {
     // sets state bits for Arg places
     fn initialize_start_block(&self, _: &mir::Body<'tcx>, state: &mut Self::Domain) {
         // set all bits to 1 (uninit) before gathering counter-evidence
-        state.insert_all();
+        state.insert_all(self.move_data().move_paths.len());
 
         drop_flag_effects_for_function_entry(self.body, self.move_data, |path, s| {
             assert!(s == DropFlagState::Present);
diff --git a/compiler/rustc_mir_dataflow/src/impls/storage_liveness.rs b/compiler/rustc_mir_dataflow/src/impls/storage_liveness.rs
index e3aa8f5a62014..896b86156c8a2 100644
--- a/compiler/rustc_mir_dataflow/src/impls/storage_liveness.rs
+++ b/compiler/rustc_mir_dataflow/src/impls/storage_liveness.rs
@@ -87,7 +87,6 @@ impl<'a, 'tcx> Analysis<'tcx> for MaybeStorageDead<'a> {
     }
 
     fn initialize_start_block(&self, body: &Body<'tcx>, state: &mut Self::Domain) {
-        assert_eq!(body.local_decls.len(), self.always_live_locals.domain_size());
         // Do not iterate on return place and args, as they are trivially always live.
         for local in body.vars_and_temps_iter() {
             if !self.always_live_locals.contains(local) {
diff --git a/compiler/rustc_mir_transform/src/copy_prop.rs b/compiler/rustc_mir_transform/src/copy_prop.rs
index 27af5818982d0..42cac41d8cb6d 100644
--- a/compiler/rustc_mir_transform/src/copy_prop.rs
+++ b/compiler/rustc_mir_transform/src/copy_prop.rs
@@ -34,7 +34,7 @@ impl<'tcx> crate::MirPass<'tcx> for CopyProp {
         let fully_moved = fully_moved_locals(&ssa, body);
         debug!(?fully_moved);
 
-        let mut storage_to_remove = DenseBitSet::new_empty(fully_moved.domain_size());
+        let mut storage_to_remove = DenseBitSet::new_empty(body.local_decls.len());
         for (local, &head) in ssa.copy_classes().iter_enumerated() {
             if local != head {
                 storage_to_remove.insert(head);
diff --git a/compiler/rustc_mir_transform/src/coroutine.rs b/compiler/rustc_mir_transform/src/coroutine.rs
index cddb2f8477858..1b58b7dedf739 100644
--- a/compiler/rustc_mir_transform/src/coroutine.rs
+++ b/compiler/rustc_mir_transform/src/coroutine.rs
@@ -211,6 +211,9 @@ struct TransformVisitor<'tcx> {
     old_yield_ty: Ty<'tcx>,
 
     old_ret_ty: Ty<'tcx>,
+
+    /// The number of locals in the [`Body`].
+    n_locals: usize,
 }
 
 impl<'tcx> TransformVisitor<'tcx> {
@@ -440,7 +443,7 @@ impl<'tcx> MutVisitor<'tcx> for TransformVisitor<'tcx> {
                 let storage_liveness: GrowableBitSet<Local> =
                     self.storage_liveness[block].clone().unwrap().into();
 
-                for i in 0..self.always_live_locals.domain_size() {
+                for i in 0..self.n_locals {
                     let l = Local::new(i);
                     let needs_storage_dead = storage_liveness.contains(l)
                         && !self.remap.contains(l)
@@ -845,8 +848,6 @@ fn compute_storage_conflicts<'mir, 'tcx>(
     analysis: &mut MaybeRequiresStorage<'mir, 'tcx>,
     results: &Results<DenseBitSet<Local>>,
 ) -> BitMatrix<CoroutineSavedLocal, CoroutineSavedLocal> {
-    assert_eq!(body.local_decls.len(), saved_locals.domain_size());
-
     debug!("compute_storage_conflicts({:?})", body.span);
     debug!("always_live = {:?}", always_live_locals);
 
@@ -859,7 +860,11 @@ fn compute_storage_conflicts<'mir, 'tcx>(
     let mut visitor = StorageConflictVisitor {
         body,
         saved_locals,
-        local_conflicts: BitMatrix::from_row_n(&ineligible_locals, body.local_decls.len()),
+        local_conflicts: BitMatrix::from_row_n(
+            &ineligible_locals,
+            body.local_decls.len(),
+            body.local_decls.len(),
+        ),
         eligible_storage_live: DenseBitSet::new_empty(body.local_decls.len()),
     };
 
@@ -1010,7 +1015,7 @@ fn compute_layout<'tcx>(
     // Create a map from local indices to coroutine struct indices.
     let mut variant_fields: IndexVec<VariantIdx, IndexVec<FieldIdx, CoroutineSavedLocal>> =
         iter::repeat(IndexVec::new()).take(CoroutineArgs::RESERVED_VARIANTS).collect();
-    let mut remap = IndexVec::from_elem_n(None, saved_locals.domain_size());
+    let mut remap = IndexVec::from_elem_n(None, body.local_decls.len());
     for (suspension_point_idx, live_locals) in live_locals_at_suspension_points.iter().enumerate() {
         let variant_index =
             VariantIdx::from(CoroutineArgs::RESERVED_VARIANTS + suspension_point_idx);
@@ -1585,6 +1590,7 @@ impl<'tcx> crate::MirPass<'tcx> for StateTransform {
             discr_ty,
             old_ret_ty,
             old_yield_ty,
+            n_locals: body.local_decls.len(),
         };
         transform.visit_body(body);
 
diff --git a/compiler/rustc_mir_transform/src/coverage/counters.rs b/compiler/rustc_mir_transform/src/coverage/counters.rs
index 5568d42ab8f3c..c226f9b89ceb8 100644
--- a/compiler/rustc_mir_transform/src/coverage/counters.rs
+++ b/compiler/rustc_mir_transform/src/coverage/counters.rs
@@ -81,8 +81,9 @@ pub(crate) fn transcribe_counters(
     old: &NodeCounters<BasicCoverageBlock>,
     bcb_needs_counter: &DenseBitSet<BasicCoverageBlock>,
     bcbs_seen: &DenseBitSet<BasicCoverageBlock>,
+    num_bcbs: usize,
 ) -> CoverageCounters {
-    let mut new = CoverageCounters::with_num_bcbs(bcb_needs_counter.domain_size());
+    let mut new = CoverageCounters::with_num_bcbs(num_bcbs);
 
     for bcb in bcb_needs_counter.iter() {
         if !bcbs_seen.contains(bcb) {
diff --git a/compiler/rustc_mir_transform/src/coverage/counters/balanced_flow.rs b/compiler/rustc_mir_transform/src/coverage/counters/balanced_flow.rs
index 4c20722a04347..e31cec383cfe4 100644
--- a/compiler/rustc_mir_transform/src/coverage/counters/balanced_flow.rs
+++ b/compiler/rustc_mir_transform/src/coverage/counters/balanced_flow.rs
@@ -72,7 +72,7 @@ impl<G: graph::DirectedGraph> BalancedFlowGraph<G> {
         // Next, find all nodes that are currently not reverse-reachable from
         // `sink_edge_nodes`, and add them to the set as well.
         dfs.complete_search();
-        sink_edge_nodes.union_not(dfs.visited_set());
+        sink_edge_nodes.union_not(dfs.visited_set(), graph.num_nodes());
 
         // The sink node is 1 higher than the highest real node.
         let sink = G::Node::new(graph.num_nodes());
diff --git a/compiler/rustc_mir_transform/src/coverage/query.rs b/compiler/rustc_mir_transform/src/coverage/query.rs
index ccf76dc710874..003082d396d1b 100644
--- a/compiler/rustc_mir_transform/src/coverage/query.rs
+++ b/compiler/rustc_mir_transform/src/coverage/query.rs
@@ -136,7 +136,12 @@ fn coverage_ids_info<'tcx>(
     priority_list[1..].sort_by_key(|&bcb| !bcbs_seen.contains(bcb));
 
     let node_counters = make_node_counters(&fn_cov_info.node_flow_data, &priority_list);
-    let coverage_counters = transcribe_counters(&node_counters, &bcb_needs_counter, &bcbs_seen);
+    let coverage_counters = transcribe_counters(
+        &node_counters,
+        &bcb_needs_counter,
+        &bcbs_seen,
+        fn_cov_info.priority_list.len(),
+    );
 
     let CoverageCounters {
         phys_counter_for_node, next_counter_id, node_counters, expressions, ..
diff --git a/compiler/rustc_mir_transform/src/deduce_param_attrs.rs b/compiler/rustc_mir_transform/src/deduce_param_attrs.rs
index a0db8bdb7ed88..b2f077d5206fc 100644
--- a/compiler/rustc_mir_transform/src/deduce_param_attrs.rs
+++ b/compiler/rustc_mir_transform/src/deduce_param_attrs.rs
@@ -19,19 +19,20 @@ struct DeduceReadOnly {
     /// 1). The bit is true if the argument may have been mutated or false if we know it hasn't
     /// been up to the point we're at.
     mutable_args: DenseBitSet<usize>,
+    arg_count: usize,
 }
 
 impl DeduceReadOnly {
     /// Returns a new DeduceReadOnly instance.
     fn new(arg_count: usize) -> Self {
-        Self { mutable_args: DenseBitSet::new_empty(arg_count) }
+        Self { mutable_args: DenseBitSet::new_empty(arg_count), arg_count }
     }
 }
 
 impl<'tcx> Visitor<'tcx> for DeduceReadOnly {
     fn visit_place(&mut self, place: &Place<'tcx>, context: PlaceContext, _location: Location) {
         // We're only interested in arguments.
-        if place.local == RETURN_PLACE || place.local.index() > self.mutable_args.domain_size() {
+        if place.local == RETURN_PLACE || place.local.index() > self.arg_count {
             return;
         }
 
@@ -86,7 +87,7 @@ impl<'tcx> Visitor<'tcx> for DeduceReadOnly {
                     let local = place.local;
                     if place.is_indirect()
                         || local == RETURN_PLACE
-                        || local.index() > self.mutable_args.domain_size()
+                        || local.index() > self.arg_count
                     {
                         continue;
                     }
diff --git a/compiler/rustc_mir_transform/src/lint_tail_expr_drop_order.rs b/compiler/rustc_mir_transform/src/lint_tail_expr_drop_order.rs
index 75f351f05c30e..0d4bfd2f78b8a 100644
--- a/compiler/rustc_mir_transform/src/lint_tail_expr_drop_order.rs
+++ b/compiler/rustc_mir_transform/src/lint_tail_expr_drop_order.rs
@@ -274,7 +274,7 @@ pub(crate) fn run_lint<'tcx>(tcx: TyCtxt<'tcx>, def_id: LocalDefId, body: &Body<
 
         // We shall now exclude some local bindings for the following cases.
         {
-            let mut to_exclude = MixedBitSet::new_empty(all_locals_dropped.domain_size());
+            let mut to_exclude = MixedBitSet::new_empty(move_data.move_paths.len());
             // We will now do subtraction from the candidate dropped locals, because of the
             // following reasons.
             for path_idx in all_locals_dropped.iter() {
diff --git a/compiler/rustc_mir_transform/src/single_use_consts.rs b/compiler/rustc_mir_transform/src/single_use_consts.rs
index 02caa92ad3fc8..d88f8607f8484 100644
--- a/compiler/rustc_mir_transform/src/single_use_consts.rs
+++ b/compiler/rustc_mir_transform/src/single_use_consts.rs
@@ -33,7 +33,9 @@ impl<'tcx> crate::MirPass<'tcx> for SingleUseConsts {
             locals_in_debug_info: DenseBitSet::new_empty(body.local_decls.len()),
         };
 
-        finder.ineligible_locals.insert_range(..=Local::from_usize(body.arg_count));
+        finder
+            .ineligible_locals
+            .insert_range_inclusive(Local::from_usize(0)..=Local::from_usize(body.arg_count));
 
         finder.visit_body(body);
 
diff --git a/compiler/rustc_mir_transform/src/sroa.rs b/compiler/rustc_mir_transform/src/sroa.rs
index 7c6ccc89c4f30..c390c993c5aed 100644
--- a/compiler/rustc_mir_transform/src/sroa.rs
+++ b/compiler/rustc_mir_transform/src/sroa.rs
@@ -103,7 +103,7 @@ fn escaping_locals<'tcx>(
     };
 
     let mut set = DenseBitSet::new_empty(body.local_decls.len());
-    set.insert_range(RETURN_PLACE..=Local::from_usize(body.arg_count));
+    set.insert_range_inclusive(RETURN_PLACE..=Local::from_usize(body.arg_count));
     for (local, decl) in body.local_decls().iter_enumerated() {
         if excluded.contains(local) || is_excluded_ty(decl.ty) {
             set.insert(local);

From eddd1cc8610e0d15808fea98c021080242ae87d5 Mon Sep 17 00:00:00 2001
From: Tage Johansson <tajo9185@student.uu.se>
Date: Wed, 21 May 2025 14:15:35 +0200
Subject: [PATCH 2/6] DenseBitSet: Solve a few bugs related to overflow in SHR.

---
 compiler/rustc_index/src/bit_set/dense_bit_set.rs | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/compiler/rustc_index/src/bit_set/dense_bit_set.rs b/compiler/rustc_index/src/bit_set/dense_bit_set.rs
index 334dfadbf3952..9e128fb204235 100644
--- a/compiler/rustc_index/src/bit_set/dense_bit_set.rs
+++ b/compiler/rustc_index/src/bit_set/dense_bit_set.rs
@@ -81,7 +81,7 @@ impl<T> DenseBitSet<T> {
 
     /// The tag for the `empty_unallocated` variant. The two most significant bits are
     /// `[0, 1]`.
-    const EMPTY_UNALLOCATED_TAG_BITS: usize = 0b01 << (WORD_BITS - 2);
+    const EMPTY_UNALLOCATED_TAG_BITS: usize = 0b01 << (usize::BITS - 2);
 
     /// Create a new empty bit set with a given domain_size.
     ///
@@ -115,7 +115,9 @@ impl<T> DenseBitSet<T> {
                 *word = Word::MAX;
             }
             // Remove excessive bits on the last word.
-            *words.last_mut().unwrap() >>= WORD_BITS - domain_size % WORD_BITS;
+            // Trust me: this mask is correct.
+            let last_word_mask = Word::MAX.wrapping_shr(domain_size.wrapping_neg() as u32);
+            *words.last_mut().unwrap() &= last_word_mask;
             Self { on_heap: ManuallyDrop::new(on_heap) }
         }
     }
@@ -135,8 +137,8 @@ impl<T> DenseBitSet<T> {
     // safe to assume `self.inline`, or `self.on_heap`.
     #[inline(always)]
     pub const fn is_empty_unallocated(&self) -> bool {
-        (unsafe { self.empty_unallocated }) >> usize::BITS as u32 - 2
-            == Self::EMPTY_UNALLOCATED_TAG_BITS >> usize::BITS as u32 - 2
+        const MASK: usize = usize::MAX << usize::BITS - 2;
+        (unsafe { self.empty_unallocated } & MASK) == Self::EMPTY_UNALLOCATED_TAG_BITS
     }
 
     /// Check if `self` is `empty_unallocated` and if so return the number of words required to

From c36b99dcff6ad76f393b6a367977c84dc9649bc8 Mon Sep 17 00:00:00 2001
From: Tage Johansson <tajo9185@student.uu.se>
Date: Wed, 21 May 2025 15:06:59 +0200
Subject: [PATCH 3/6] fix documentation errors

---
 compiler/rustc_index/src/bit_set/dense_bit_set.rs | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/compiler/rustc_index/src/bit_set/dense_bit_set.rs b/compiler/rustc_index/src/bit_set/dense_bit_set.rs
index 9e128fb204235..0a9890e65bac5 100644
--- a/compiler/rustc_index/src/bit_set/dense_bit_set.rs
+++ b/compiler/rustc_index/src/bit_set/dense_bit_set.rs
@@ -26,11 +26,13 @@ use crate::Idx;
 /// variant for the empty set. In this case, the domain size is stored inline along with a few
 /// bits indicating that the set is empty. Allocation is deferred until needed, such as on
 /// the first insert or remove operation. This avoids the need to wrap a lazily initialised bit set
-/// in a [`OnceCell`] or an [`Option`]—you can simply create an empty set and populate it if needed.
+/// in a [`OnceCell`](std::cell::OnceCell) or an [`Option`]—you can simply create an empty set and
+/// populate it if needed.
 ///
 /// Note 1: Since this bitset is dense, if your domain is large and/or relatively homogeneous (e.g.
-/// long runs of set or unset bits), it may be more efficient to use a [MixedBitSet] or an
-/// [IntervalSet](crate::interval::IntervalSet), which are better suited for sparse or highly
+/// long runs of set or unset bits), it may be more efficient to use a
+/// [`MixedBitSet`](crate::bit_set::MixedBitSet) or an
+/// [`IntervalSet`](crate::interval::IntervalSet), which are better suited for sparse or highly
 /// compressible domains.
 ///
 /// Note 2: Use [`GrowableBitSet`] if you require support for resizing after creation.

From f17b8f7d566695149b42878c9f8bab4a3c20bfca Mon Sep 17 00:00:00 2001
From: Tage Johansson <tajo9185@student.uu.se>
Date: Wed, 21 May 2025 17:00:43 +0200
Subject: [PATCH 4/6] remove a test checking that the hash for bit sets of
 different domain sizes are different

The new implementation of DenseBitSet doesn't store the exact domain
size, so of course the hash values for identical sets with different
domain sizes may be equal.
---
 .../rustc_data_structures/src/stable_hasher/tests.rs   | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/compiler/rustc_data_structures/src/stable_hasher/tests.rs b/compiler/rustc_data_structures/src/stable_hasher/tests.rs
index 635f241847c43..9f854ca9735eb 100644
--- a/compiler/rustc_data_structures/src/stable_hasher/tests.rs
+++ b/compiler/rustc_data_structures/src/stable_hasher/tests.rs
@@ -14,16 +14,6 @@ fn hash<T: HashStable<()>>(t: &T) -> Hash128 {
     h.finish()
 }
 
-// Check that bit set hash includes the domain size.
-#[test]
-fn test_hash_bit_set() {
-    use rustc_index::bit_set::DenseBitSet;
-    let a: DenseBitSet<usize> = DenseBitSet::new_empty(1);
-    let b: DenseBitSet<usize> = DenseBitSet::new_empty(2);
-    assert_ne!(a, b);
-    assert_ne!(hash(&a), hash(&b));
-}
-
 // Check that bit matrix hash includes the matrix dimensions.
 #[test]
 fn test_hash_bit_matrix() {

From 44f13f44abd0df88cf82aa166f59c9a3c46cd384 Mon Sep 17 00:00:00 2001
From: Tage Johansson <tajo9185@student.uu.se>
Date: Thu, 22 May 2025 10:57:46 +0200
Subject: [PATCH 5/6] in rustc_index: fix compilation error by changing the
 rustc_serialize dependency to be required instead of optional

---
 compiler/rustc_index/Cargo.toml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/compiler/rustc_index/Cargo.toml b/compiler/rustc_index/Cargo.toml
index 9aa24e668b6b7..7ea9cfdb63549 100644
--- a/compiler/rustc_index/Cargo.toml
+++ b/compiler/rustc_index/Cargo.toml
@@ -8,7 +8,7 @@ edition = "2024"
 itertools = "0.12"
 rustc_index_macros = { path = "../rustc_index_macros" }
 rustc_macros = { path = "../rustc_macros", optional = true }
-rustc_serialize = { path = "../rustc_serialize", optional = true }
+rustc_serialize = { path = "../rustc_serialize" }
 smallvec = "1.8.1"
 # tidy-alphabetical-end
 
@@ -16,7 +16,6 @@ smallvec = "1.8.1"
 # tidy-alphabetical-start
 default = ["nightly"]
 nightly = [
-    "dep:rustc_serialize",
     "dep:rustc_macros",
     "rustc_index_macros/nightly",
 ]

From 20f59260539f4af9ab83a56f539f5bbc3fd5d16f Mon Sep 17 00:00:00 2001
From: Tage Johansson <tajo9185@student.uu.se>
Date: Mon, 26 May 2025 12:40:53 +0200
Subject: [PATCH 6/6] in rustc_index: revert "fix compilation error by changing
 the rustc_serialize dependency to be required instead of optional", and
 introduce conditional compilation instead.

---
 compiler/rustc_index/Cargo.toml                   | 3 ++-
 compiler/rustc_index/src/bit_set/dense_bit_set.rs | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/compiler/rustc_index/Cargo.toml b/compiler/rustc_index/Cargo.toml
index 7ea9cfdb63549..9aa24e668b6b7 100644
--- a/compiler/rustc_index/Cargo.toml
+++ b/compiler/rustc_index/Cargo.toml
@@ -8,7 +8,7 @@ edition = "2024"
 itertools = "0.12"
 rustc_index_macros = { path = "../rustc_index_macros" }
 rustc_macros = { path = "../rustc_macros", optional = true }
-rustc_serialize = { path = "../rustc_serialize" }
+rustc_serialize = { path = "../rustc_serialize", optional = true }
 smallvec = "1.8.1"
 # tidy-alphabetical-end
 
@@ -16,6 +16,7 @@ smallvec = "1.8.1"
 # tidy-alphabetical-start
 default = ["nightly"]
 nightly = [
+    "dep:rustc_serialize",
     "dep:rustc_macros",
     "rustc_index_macros/nightly",
 ]
diff --git a/compiler/rustc_index/src/bit_set/dense_bit_set.rs b/compiler/rustc_index/src/bit_set/dense_bit_set.rs
index 0a9890e65bac5..7ff6ef285b3ea 100644
--- a/compiler/rustc_index/src/bit_set/dense_bit_set.rs
+++ b/compiler/rustc_index/src/bit_set/dense_bit_set.rs
@@ -8,6 +8,7 @@ use std::ptr::NonNull;
 use std::{fmt, iter, slice};
 
 use itertools::Either;
+#[cfg(feature = "nightly")]
 use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
 
 use super::{
@@ -781,6 +782,7 @@ impl<T: Idx> BitRelations<ChunkedBitSet<T>> for DenseBitSet<T> {
     }
 }
 
+#[cfg(feature = "nightly")]
 impl<S: Encoder, T> Encodable<S> for DenseBitSet<T> {
     #[inline(never)] // FIXME: For profiling purposes
     fn encode(&self, s: &mut S) {
@@ -815,6 +817,7 @@ impl<S: Encoder, T> Encodable<S> for DenseBitSet<T> {
     }
 }
 
+#[cfg(feature = "nightly")]
 impl<D: Decoder, T> Decodable<D> for DenseBitSet<T> {
     #[inline(never)] // FIXME: For profiling purposes
     fn decode(d: &mut D) -> Self {