From 6277dbb4ce250473b01b4e4b0d986e5256d2318b Mon Sep 17 00:00:00 2001
From: "Felix S. Klock II" <pnkfelix@pnkfx.org>
Date: Wed, 8 Feb 2023 09:36:23 -0500
Subject: [PATCH 1/6] refactoring: move `replace_base` utility function to root
 of rustc_mir_transform crate.

incorporated review feedback: simplify replace_base via Place::project_deeper
---
 compiler/rustc_mir_transform/src/generator.rs | 9 +--------
 compiler/rustc_mir_transform/src/lib.rs       | 4 ++++
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/compiler/rustc_mir_transform/src/generator.rs b/compiler/rustc_mir_transform/src/generator.rs
index 797a1a86846f6..308a17cdf9e6f 100644
--- a/compiler/rustc_mir_transform/src/generator.rs
+++ b/compiler/rustc_mir_transform/src/generator.rs
@@ -182,14 +182,7 @@ impl<'tcx> MutVisitor<'tcx> for PinArgVisitor<'tcx> {
     }
 }
 
-fn replace_base<'tcx>(place: &mut Place<'tcx>, new_base: Place<'tcx>, tcx: TyCtxt<'tcx>) {
-    place.local = new_base.local;
-
-    let mut new_projection = new_base.projection.to_vec();
-    new_projection.append(&mut place.projection.to_vec());
-
-    place.projection = tcx.mk_place_elems(&new_projection);
-}
+use crate::replace_base;
 
 const SELF_ARG: Local = Local::from_u32(1);
 
diff --git a/compiler/rustc_mir_transform/src/lib.rs b/compiler/rustc_mir_transform/src/lib.rs
index bf798adee199e..5bad7c178c5d1 100644
--- a/compiler/rustc_mir_transform/src/lib.rs
+++ b/compiler/rustc_mir_transform/src/lib.rs
@@ -626,3 +626,7 @@ fn promoted_mir(tcx: TyCtxt<'_>, def: LocalDefId) -> &IndexVec<Promoted, Body<'_
 
     tcx.arena.alloc(promoted)
 }
+
+fn replace_base<'tcx>(place: &mut Place<'tcx>, new_base: Place<'tcx>, tcx: TyCtxt<'tcx>) {
+    *place = new_base.project_deeper(&place.projection[..], tcx)
+}

From f0cbd03b0fdbd29cfcbfeca4273527d6059b143a Mon Sep 17 00:00:00 2001
From: "Felix S. Klock II" <pnkfelix@pnkfx.org>
Date: Wed, 8 Feb 2023 09:37:21 -0500
Subject: [PATCH 2/6] UpvarToLocalProp is a MIR optimization for Future size
 blowup.

Problem Overview
----------------

Consider: `async fn(x: param) { a(&x); b().await; c(&c); }`. It desugars to:

`fn(x: param) -> impl Future { async { let x = x; a(&x); b().await; c(&c); }`

and within that desugared form, the `let x = x;` ends up occupying *two*
distinct slots in the generator: one for the upvar (the right-hand side) and one
for the local (the left-hand side).

The UpvarToLocalProp MIR transformation tries to detect the scenario where we
have a generator with upvars (which look like `self.field` in the MIR) that are
*solely* moved/copied to non-self locals (and those non-self locals may likewise
be moved/copied to other locals). After identifying the full set of locals that
are solely moved/copied from `self.field`, the transformation replaces them all
with `self.field` itself.

Note 1: As soon as you have a use local L that *isn't* a local-to-local
assignment, then that is something you need to respect, and the propagation will
stop trying to propagate L at that point. (And likewise, writes to the
self-local `_1`, or projections thereof, need to be handled with care as well.)

Note 2: `_0` is the special "return place" and should never be replaced with
`self.field`.

In addition, the UpvarToLocalProp transformation removes all the silly
`self.field = self.field;` assignments that result from the above replacement
(apparently some later MIR passes try to double-check that you don't have any
assignments with overlapping memory, so it ends up being necessary to do this
no-op transformation to avoid assertions later).

Note 3: This transformation is significantly generalized past what I
demonstrated on youtube; the latter was focused on matching solely `_3 = _1.0`,
because it was a proof of concept to demostrate that a MIR transformation
prepass even *could* address the generator layout problem.

Furthermore, the UpvarToLocalProp transformation respects optimization fuel: you
can use `-Z fuel=$CRATE=$FUEL` and when the fuel runs out, the transformation
will stop being applied, or be applied only partially.

Note 4: I did not put the optimization fuel check in the patching code for
UpvarToLocalProp: once you decide to replace `_3` with `_1.0` in `_3 = _1.0;`,
you are committed to replacing all future reads of `_3` with `_1.0`, and it
would have complicated the patch transformation to try to use fuel with that
level of control there. Instead, the way I used the fuel was to have it control
how many local variables are added to the `local_to_root_upvar_and_ty` table,
which is the core database that informs the patching process, and thus gets us
the same end effect (of limiting the number of locals that take part in the
transformation) in a hopefully sound manner.

Note 5: Added check that we do not ever call `visit_local` on a local that is
being replaced. This way we hopefully ensure that we will ICE if we ever forget
to replace one.

But also: I didnt think I needed to recur on place, but failing to do so meant I
overlooked locals in the projection. So now I recur on place.

Satisfying above two changes did mean we need to be more aggressive about
getting rid of now useless StorageLive and StorageDead on these locals.

Note 6: Derefs invalidate replacement attempts in any context, not just
mutations.

Updates
-------

rewrote saw_upvar_to_local.

Namely, unified invalidation control paths (because I realized that when looking
at `_l = _1.field`, if you need to invalidate either left- or right-hand side,
you end up needing to invalidate both).

Also made logic for initializing the upvar_to_ty map more robust: Instead of
asserting that we encounter each upvar at most once (because, when chains stop
growing, we cannot assume that), now just ensure that the types we end up
inserting are consistent. (Another alternative would be to bail out of the
routine if the chain is not marked as growing; I'm still debating about which of
those two approaches yields better code here.)

Fixed a bug in how I described an invariant on `LocalState::Ineligible`.

Updated to respect -Zmir_opt_level=0
---
 compiler/rustc_mir_transform/src/lib.rs       |   5 +
 .../src/upvar_to_local_prop.rs                | 691 ++++++++++++++++++
 .../future-sizes/async-awaiting-fut.stdout    |  40 +-
 tests/ui/print_type_sizes/async.stdout        |  11 +-
 4 files changed, 714 insertions(+), 33 deletions(-)
 create mode 100644 compiler/rustc_mir_transform/src/upvar_to_local_prop.rs

diff --git a/compiler/rustc_mir_transform/src/lib.rs b/compiler/rustc_mir_transform/src/lib.rs
index 5bad7c178c5d1..452f4df9e4448 100644
--- a/compiler/rustc_mir_transform/src/lib.rs
+++ b/compiler/rustc_mir_transform/src/lib.rs
@@ -104,6 +104,7 @@ mod simplify_comparison_integral;
 mod sroa;
 mod uninhabited_enum_branching;
 mod unreachable_prop;
+mod upvar_to_local_prop;
 
 use rustc_const_eval::transform::check_consts::{self, ConstCx};
 use rustc_const_eval::transform::promote_consts;
@@ -491,6 +492,10 @@ fn run_runtime_lowering_passes<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
         // `AddRetag` needs to run after `ElaborateDrops`. Otherwise it should run fairly late,
         // but before optimizations begin.
         &elaborate_box_derefs::ElaborateBoxDerefs,
+        // `UpvarToLocalProp` needs to run before `generator::StateTransform`, because its
+        // purpose is to coalesce locals into their original upvars before fresh space is
+        // allocated for them in the generator.
+        &upvar_to_local_prop::UpvarToLocalProp,
         &generator::StateTransform,
         &add_retag::AddRetag,
         &Lint(const_prop_lint::ConstProp),
diff --git a/compiler/rustc_mir_transform/src/upvar_to_local_prop.rs b/compiler/rustc_mir_transform/src/upvar_to_local_prop.rs
new file mode 100644
index 0000000000000..3f435b2b25518
--- /dev/null
+++ b/compiler/rustc_mir_transform/src/upvar_to_local_prop.rs
@@ -0,0 +1,691 @@
+//! This pass propagates upvars to locals within a generator, if it can
+//! determine such coalescing is sound.
+//!
+//! Original demo: search for: `_3 = _1.0`; if found, then replace all occurrences of `_3` with `_1.0`.
+//!
+//! The generalization:
+//!
+//! * apply to more upvars than just `_1.0`; e.g. `_1.1`, `_1.2`, ... (but don't need to worry
+//!   about array indices nor enum variants).
+//!
+//! * transitively apply the replacement to subsequent copies of the local(s).
+//!   e.g. `_3 = _1.0; ... _4 = _3; ...; _5 = _4;`, then might replace `_5` with `_1.0`.
+//!
+//! * How do we know how long to make these "chains" of locals that are participating in these
+//!   assignments? For now we use a simple answer: We extend them until we see a use of a local
+//!   from the chain that *isn't* a trivial assignment to another local, and then cut off
+//!   everything *after* that used local.
+//!
+//! * BUT, if a local is assigned with more than one root value (which we might approximate via
+//!   more than one assignment), then we cannot include that local in the transformation.
+//!
+//! * Also, if `_3` occurs within a place that has a *Deref projection*, then we cannot do the
+//!   replacement of `_3` with `_1.0` in that context (Post cleanup, all Deref projections must
+//!   occur first within a place.). This means we cannot get rid of the initialization of `_3`, and
+//!   thus probably should not do the replacement of `_3` with `_1.0` anywhere at all.
+//!
+//! * Furthermore, locals can occur in the projection itself, e.g. as `Index(_3)` in a PlaceElem.
+//!   We cannot replace such locals with the `_1.0`, so this should likewise make the local
+//!   ineligible.
+//!
+//! Notes around soundness:
+//!
+//! * any writes to a local with a rhs that isn't either the upvar or a local in that upvar's set
+//!   should cause that local and all locals derived *from it* to become ineligible.
+//!
+//! * any writes to `_1` should invalidate the whole transformation. (Should be at most a corner
+//!   case.)
+//!
+//! * any write to `_1.L` where `.L` is one of the upvars that we are using as root values should
+//!   invalidate the use of that upvar as a root value. e.g. a write to `_1.2` should make `_1.2`
+//!   ineligible for the transformation, but *other* upvars like `_1.0` should still be able to
+//!   participate.
+//!
+//! But, what about transmutes of `&_1` and writes to fields of the transmuted thing? We simplify
+//! the reasoning here by just treating *any* use of `_1` that isn't a trivial assignment of a
+//! field to a local (e.g. `&mut _1`, `&_1`) as invalidating the whole transformation
+
+use crate::MirPass;
+use rustc_index::{Idx, IndexVec};
+use rustc_middle::mir::visit::{MutVisitor, PlaceContext, Visitor};
+use rustc_middle::mir::*;
+use rustc_middle::ty::{self, Ty, TyCtxt};
+use rustc_span::def_id::DefId;
+use rustc_target::abi::FieldIdx;
+
+struct IndexMap<K: Idx, V> {
+    vec: IndexVec<K, Option<V>>,
+}
+
+impl<K: Idx, V: Copy> IndexMap<K, V> {
+    fn new(n: usize) -> Self {
+        IndexMap { vec: IndexVec::from_elem_n(None, n) }
+    }
+    fn get(&self, k: K) -> Option<V> {
+        self.vec.get(k).map(|p| *p).flatten()
+    }
+}
+
+const SELF_IDX: u32 = 1;
+const SELF_ARG: Local = Local::from_u32(SELF_IDX);
+
+pub struct UpvarToLocalProp;
+
+// This is a way to make `trace!` format strings shorter to placate tidy.
+const W: &'static str = "UpvarToLocalProp";
+
+struct Patch<'tcx> {
+    tcx: TyCtxt<'tcx>,
+
+    /// Maps each local to upvar that transformation will use as local's replacement
+    local_to_root_upvar_and_ty: IndexMap<Local, (FieldIdx, Ty<'tcx>)>,
+}
+
+impl<'tcx> MirPass<'tcx> for UpvarToLocalProp {
+    fn is_enabled(&self, sess: &rustc_session::Session) -> bool {
+        sess.mir_opt_level() > 0 // on by default w/o -Zmir-opt-level=0
+    }
+    fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
+        trace!("Running UpvarToLocalProp on {:?}", body.source);
+
+        // First: how many upvars might we need to consider?
+        let upvar_limit: usize = {
+            // If we are looking at a generator, we have a fast path
+            if body.yield_ty().is_some() {
+                // The first argument is the generator type passed by value
+                let gen_ty = body.local_decls.raw[SELF_IDX as usize].ty;
+                if let ty::Generator(_, substs, _movability) = gen_ty.kind() {
+                    let substs = substs.as_generator();
+                    substs.upvar_tys().len()
+                } else {
+                    tcx.sess.delay_span_bug(
+                        body.span,
+                        &format!("unexpected generator type {}", gen_ty),
+                    );
+                    return;
+                }
+            } else {
+                // otherwise, fall back to scanning code to find "upvar" count.
+                struct MaxUpvarField(usize);
+                impl<'tcx> Visitor<'tcx> for MaxUpvarField {
+                    fn visit_place(
+                        &mut self,
+                        place: &Place<'tcx>,
+                        _context: PlaceContext,
+                        _location: Location,
+                    ) {
+                        if place.local == SELF_ARG {
+                            if let Some(ProjectionElem::Field(field, _ty)) =
+                                place.projection.as_slice().get(0)
+                            {
+                                self.0 = std::cmp::max(self.0, field.index());
+                            }
+                        }
+                    }
+                }
+                let mut v = MaxUpvarField(0);
+                v.visit_body(body);
+                v.0 + 1
+            }
+        };
+
+        let num_locals = body.local_decls.len();
+        let mir_source = body.source.def_id();
+
+        let upvar_to_locals = IndexVec::from_elem_n(Chain::new(), upvar_limit);
+        let upvar_to_ty = IndexMap { vec: IndexVec::from_elem_n(None, upvar_limit) };
+        let mut local_to_upvar = IndexVec::from_elem_n(LocalState::Unseen, num_locals);
+
+        // The special return place `_0` must not participate in transformation.
+        local_to_upvar[RETURN_PLACE] = LocalState::Ineligible;
+
+        let mut walk = Walk { tcx, mir_source, upvar_to_locals, local_to_upvar, upvar_to_ty };
+
+        walk.visit_body(body);
+        let mut p = walk.to_patch();
+        p.visit_body(body);
+    }
+}
+
+/// Each upvar `_1.K` maps to a *chain* of locals [_l0, ... _li, ...] such that we see see `_l0 =
+/// _1.K; ...; _l{i+1} = _li; ...` in the analyzed MIR, with no intervening invalidations, such
+/// that it would be sound to make them an *alias-set* and replace all uses of the locals `_li`
+/// with `_1.K` itself
+///
+/// During the analysis, this chain first grows (as we discover new copies), then shrinks
+/// (potentially to nothing) as we discover uses of the locals that invalidate their inclusion in
+/// that upvar's alias-set.
+#[derive(Clone, Debug)]
+struct Chain {
+    /// The [_li ...] in the chain. Note that teh last element of the chain may have a special role
+    /// depending on the ChainState `self.state`.
+    local_copies: Vec<Local>,
+
+    /// The `state` captures where we are in the analysis for the upvar: We may be still collecting
+    /// all the copies, or we may have found the end of the copy chain and are now searching for
+    /// reasons to trim it shorter.
+    state: ChainState,
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+enum ChainState {
+    /// A chain that is growing; i.e. we may discover another copy `_z = _y;` where _y is the last
+    /// element of the vector, and can push _z onto the vector to mark its addition to the chain.
+    Growing,
+
+    /// We found a chain of copies, but we cannot grow it further because either:
+    /// * the last element of the chain is used somewhere that isn't a local-to-local copy (but
+    ///   is a place where it would be valid to substitute an upvar), such as a borrow, or
+    /// * the last element of the chain is only used in a local-to-local copy, but the target of
+    ///   that copy is used in a place where it would not be valid to substitute an upvar.
+    Shrinking,
+
+    /// Chain for associated upvar was entirely invalidated (e.g. by borrow of, write to, or
+    /// another read from the upvar).
+    Invalidated,
+    // (One could try representing invalidated chains as Shrinking chains of zero length, since a
+    //  chain will only transition from growing to shrinking, never the other way. It is not clear
+    //  whether such a representation would clarify or obfuscate the code here.)
+}
+
+impl Chain {
+    fn new() -> Self {
+        Chain { local_copies: Vec::new(), state: ChainState::Growing }
+    }
+    fn len(&self) -> usize {
+        let len = self.local_copies.len();
+        if len > 0 {
+            assert_ne!(self.state, ChainState::Invalidated);
+        }
+        len
+    }
+    fn tail(&self) -> Option<Local> {
+        let len = self.local_copies.len();
+        if len > 0 { self.local_copies.get(len - 1).map(|l| *l) } else { None }
+    }
+    fn push_if_growing(&mut self, l: Local) -> bool {
+        if self.state == ChainState::Growing {
+            self.local_copies.push(l);
+            true
+        } else {
+            false
+        }
+    }
+    fn stop_growing(&mut self) {
+        if self.state == ChainState::Growing {
+            self.state = ChainState::Shrinking;
+        }
+    }
+}
+
+struct Walk<'tcx> {
+    tcx: TyCtxt<'tcx>,
+
+    mir_source: DefId,
+
+    /// maps each upvar `_1.K` to a chain of locals [_l, ... _i, _j, ...] such
+    /// that we see see `_l = _1.K; ...; _i = _; _j = _i; ...` in the analyzed
+    /// MIR, with no intervening invalidations. The docs for `Chain` spell out
+    /// the associated state transitions for the analysis.
+    upvar_to_locals: IndexVec<FieldIdx, Chain>,
+
+    /// maps each local to its associated upvar, if its still eligible for its
+    /// single chain of copies.
+    local_to_upvar: IndexVec<Local, LocalState>,
+
+    /// The MIR is going to require we supply the Ty of the upvar when we
+    /// substitute it for part of a Place.
+    upvar_to_ty: IndexMap<FieldIdx, Ty<'tcx>>,
+}
+
+/// Reason that a invalidation step is taken.
+#[derive(Copy, Clone)]
+enum Reason<'tcx> {
+    NonFieldSelfProjection,
+    UseOfUpvar(FieldIdx),
+    Write2ToLocal(Local),
+    Read2FromUpvar(FieldIdx),
+    DereferenceInPlace(Place<'tcx>),
+    WriteToTail(Place<'tcx>, Location),
+    WriteToPlace(Place<'tcx>),
+    LocalUsedAsIndex(Local),
+    ChainAlreadyExtendedPast(Local),
+}
+
+impl<'tcx> std::fmt::Display for Reason<'tcx> {
+    fn fmt(&self, w: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Reason::NonFieldSelfProjection => {
+                write!(w, "observed non-field projection on self; giving up on transform.")
+            }
+            Reason::Write2ToLocal(l) => {
+                write!(w, "observed 2nd write to local {l:?}; it and its suffix now invalidated.")
+            }
+            Reason::Read2FromUpvar(f) => {
+                write!(w, "observed 2nd read from upvar {f:?}; it is now invalidated.")
+            }
+            Reason::UseOfUpvar(f) => {
+                write!(w, "observed a non-trivial use of upvar _1.{f:?}; it is now ineligible.")
+            }
+            Reason::DereferenceInPlace(place) => {
+                let local = &place.local;
+                write!(w, "ineligible local:{local:?} because of dereference in {place:?}")
+            }
+            Reason::WriteToTail(place, loc) => {
+                let local = &place.local;
+                let Location { block: bb, statement_index: idx } = loc;
+                let bb = bb.index();
+                write!(
+                    w,
+                    "observed write to place:{place:?} at bb{bb}:{idx}; {local:?} now tail at most."
+                )
+            }
+            Reason::WriteToPlace(place) => {
+                let local = &place.local;
+                write!(w, "observed write to place:{place:?}; {local:?} is now ineligible.")
+            }
+            Reason::LocalUsedAsIndex(local) => {
+                write!(w, "observed local:{local:?} used as index; it is now ineligible.")
+            }
+            Reason::ChainAlreadyExtendedPast(local) => {
+                write!(w, "local:{local:?} is not the tail of its chain")
+            }
+        }
+    }
+}
+
+impl<'tcx> Walk<'tcx> {
+    fn num_locals(&self) -> usize {
+        self.local_to_upvar.len()
+    }
+
+    // case: `_l = _r;`
+    fn saw_local_to_local(&mut self, lhs: Local, rhs: Local) {
+        match self.local_to_upvar[lhs] {
+            LocalState::Unseen => {
+                // fall through to happy path where we might grow the chain.
+            }
+            LocalState::Valid(_) | LocalState::Ineligible => {
+                // not the first write to lhs; we must ensure lhs is invalidated, and *also* must
+                // immediately end growth of chain for rhs.
+                self.invalidate_local(lhs, Reason::Write2ToLocal(lhs));
+                self.stop_growing_chain_for_local(rhs);
+
+                // (still fall-through in case rhs needs further invalidation below.)
+            }
+        }
+
+        // if rhs is valid chain, then must inspect further; otherwise, no more invalidation to do.
+        let LocalState::Valid(rhs_upvar) = self.local_to_upvar[rhs] else { return; };
+
+        let rhs_chain = &mut self.upvar_to_locals[rhs_upvar];
+        assert!(rhs_chain.local_copies.contains(&rhs));
+        if rhs_chain.tail() != Some(rhs) {
+            // Uh oh, this chain has *already* been extended with a different assignment from rhs.
+            // That means we must cut the chain and force rhs to be the end of it.
+            self.cut_off_only_beyond_local(rhs, Reason::ChainAlreadyExtendedPast(rhs));
+            return;
+        }
+
+        assert_eq!(self.local_to_upvar[rhs], LocalState::Valid(rhs_upvar));
+        assert_eq!(rhs_chain.tail(), Some(rhs));
+
+        if self.local_to_upvar[lhs] == LocalState::Unseen {
+            self.try_to_alias(lhs, rhs_upvar);
+        }
+    }
+
+    // case: `_l = _1.field;`
+    fn saw_upvar_to_local(&mut self, lhs: Local, (rhs, rhs_ty): (FieldIdx, Ty<'tcx>)) {
+        let chains = &self.upvar_to_locals;
+        let locals = &self.local_to_upvar;
+
+        let needs_invalidation = {
+            if let LocalState::Valid(_) = self.local_to_upvar[lhs] {
+                // if lhs L is already in some alias-set, then this is a new write to L and we must
+                // trim that alias set back now.
+                Some(Reason::Write2ToLocal(lhs))
+            } else if chains[rhs].len() > 0 {
+                // If we already had entries, then we have two uses of the same upvar for `rhs`,
+                // and must give up on it entirely.
+                Some(Reason::Read2FromUpvar(rhs))
+            } else {
+                None
+            }
+        };
+
+        if let Some(reason) = needs_invalidation {
+            self.invalidate_local(lhs, reason);
+            self.invalidate_upvar(rhs, reason);
+            return;
+        }
+
+        if locals[lhs] != LocalState::Unseen {
+            return;
+        }
+
+        // At this point, (1.) we have confirmed that (a.) this is the first time we have seen lhs,
+        // and (b.) the chain for rhs is length zero (but not necessarily growing) and (2.) we have
+        // committed to *try* to make lhs and rhs aliases.
+
+        if self.upvar_to_ty.vec[rhs] == None {
+            self.upvar_to_ty.vec[rhs] = Some(rhs_ty);
+        }
+        assert_eq!(self.upvar_to_ty.vec[rhs], Some(rhs_ty));
+
+        self.try_to_alias(lhs, rhs);
+    }
+
+    fn try_to_alias(&mut self, lhs: Local, rhs: FieldIdx) {
+        let locals = &mut self.local_to_upvar;
+        assert_eq!(locals[lhs], LocalState::Unseen);
+
+        // If optimization fuel exhausted, then do not add more entries to any alias set.
+        if !self.tcx.consider_optimizing(|| format!("UpvarToLocalProp {:?}", self.mir_source)) {
+            return;
+        }
+        if self.upvar_to_locals[rhs].push_if_growing(lhs) {
+            locals[lhs] = LocalState::Valid(rhs);
+        } else {
+            locals[lhs] = LocalState::Ineligible;
+        }
+    }
+
+    fn invalidate_all(&mut self, reason: Reason<'tcx>) {
+        trace!("{W} invalidate_all, {reason}");
+        for chain in self.upvar_to_locals.iter_mut() {
+            chain.local_copies.clear();
+            chain.state = ChainState::Invalidated;
+        }
+        for local_state in self.local_to_upvar.iter_mut() {
+            *local_state = LocalState::Ineligible;
+        }
+    }
+
+    fn invalidate_upvar(&mut self, f: FieldIdx, reason: Reason<'tcx>) {
+        trace!("{W} invalidate_upvar, {reason}");
+        let chain = &mut self.upvar_to_locals[f];
+        while let Some(l) = chain.local_copies.pop() {
+            self.local_to_upvar[l] = LocalState::Ineligible;
+        }
+        chain.state = ChainState::Invalidated;
+    }
+
+    fn invalidate_local(&mut self, l: Local, reason: Reason<'tcx>) {
+        trace!("{W} invalidate_local, {reason}");
+        match self.local_to_upvar[l] {
+            LocalState::Valid(_) => self.cut_off_local_and_beyond(l, reason),
+            LocalState::Unseen => self.local_to_upvar[l] = LocalState::Ineligible,
+            LocalState::Ineligible => {}
+        }
+    }
+
+    fn cut_off_only_beyond_local(&mut self, l: Local, reason: Reason<'tcx>) {
+        trace!("{W} cut_off_only_beyond_local, {reason}");
+        self.cut_off_suffix(l, true);
+    }
+
+    fn cut_off_local_and_beyond(&mut self, l: Local, reason: Reason<'tcx>) {
+        trace!("{W} cut_off_local_and_beyond, {reason}");
+        self.cut_off_suffix(l, false);
+    }
+
+    fn cut_off_suffix(&mut self, l: Local, keep_local: bool) {
+        let locals = &mut self.local_to_upvar;
+        let LocalState::Valid(upvar) = locals[l] else {
+            panic!("should only be called for locals currently attached to an upvar chain");
+        };
+
+        let chain = &mut self.upvar_to_locals[upvar];
+        assert!(chain.local_copies.contains(&l));
+        chain.stop_growing();
+
+        while let Some(tail) = chain.local_copies.pop() {
+            assert_eq!(locals[tail], LocalState::Valid(upvar));
+            if keep_local && tail == l {
+                chain.local_copies.push(tail);
+                break;
+            }
+
+            locals[tail] = LocalState::Ineligible;
+
+            if tail == l {
+                assert!(!keep_local);
+                break;
+            }
+        }
+    }
+
+    fn stop_growing_chain_for_local(&mut self, l: Local) {
+        if let LocalState::Valid(upvar) = self.local_to_upvar[l] {
+            self.upvar_to_locals[upvar].stop_growing();
+        }
+    }
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+enum LocalState {
+    /// Unseen: We have not yet seen this local in the walk over the MIR
+    Unseen,
+    /// Valid(F): We have seen this local exactly once, and there is a direct chain of copies
+    /// `_l0 = _self.F; _l1 = _l0; ...; L = _li;` that connects this local L to F.
+    Valid(FieldIdx),
+    /// This local cannot be part of any alias set; furthermore, if it is the left-hand side of any
+    /// local-to-local assignment, then the right-hand side of that assignment is either (a.) not
+    /// in a chain of copies, or (b.) must be *the* tail of its chain of copies.
+    Ineligible,
+}
+
+impl<'tcx> Visitor<'tcx> for Walk<'tcx> {
+    fn visit_assign(&mut self, place: &Place<'tcx>, rvalue: &Rvalue<'tcx>, location: Location) {
+        match categorize_assign(place, rvalue) {
+            AssignCategory::LocalToLocal { lhs, rhs } => self.saw_local_to_local(lhs, rhs),
+            AssignCategory::UpvarToLocal { lhs, rhs: (rhs, rhs_ty) } => {
+                self.saw_upvar_to_local(lhs, (rhs, rhs_ty))
+            }
+            AssignCategory::Other => {
+                // recurs if *and only if* we are in the other "complex" cases.
+                self.super_assign(place, rvalue, location)
+            }
+        }
+    }
+
+    fn visit_place(&mut self, place: &Place<'tcx>, context: PlaceContext, location: Location) {
+        let local = place.local;
+        // If there is a Deref within place, then we cannot do the replacement in that place, and
+        // thus should not bother trying to replace that local at all.
+        if place.is_indirect() {
+            self.invalidate_local(local, Reason::DereferenceInPlace(*place));
+        } else if context.is_use() {
+            if place.local == SELF_ARG {
+                // case: use of `_1`
+                if let Some(ProjectionElem::Field(f, _ty)) = place.projection.get(0) {
+                    self.invalidate_upvar(*f, Reason::UseOfUpvar(*f));
+                } else {
+                    // for *every* other kind of projection of _1, we will just invalidate the
+                    // *whole* transformation.
+                    self.invalidate_all(Reason::NonFieldSelfProjection);
+                }
+            } else {
+                // case: use of some place derived from a local other than `_1`.
+                if context.is_drop() {
+                    // FIXME: make sure I can justify this, especially in face of conditional
+                    // control flow. Plus, the drop-independence may be a property that holds for
+                    // the upvars, but not for the locals being put in alias set for that upvar.
+                    trace!("{W} drop of place:{place:?} does not affect analysis state");
+                } else if let LocalState::Valid(_) = self.local_to_upvar[local] {
+                    // A local `L` is allowed to be used (e.g. `&L`, `&mut L`, or `L.foo = 3`) as
+                    // the very last link in a chain of aliases, but cannot be the source of an
+                    // assignment at that point.
+                    let reason = Reason::WriteToTail(*place, location);
+                    self.cut_off_only_beyond_local(place.local, reason);
+                } else {
+                    // If we see a write to `L` *before* it is on any chain, then do not allow
+                    // it to be part of any chain in the future.
+                    self.invalidate_local(local, Reason::WriteToPlace(*place));
+                }
+            }
+        }
+        self.super_place(place, context, location)
+    }
+
+    // A fun corner case: you can have MIR like: `_18 = &(*_27)[_3];`, and you cannot plug `_1.0`
+    // in for `_3` in that context.
+    fn visit_projection_elem(
+        &mut self,
+        _place_ref: PlaceRef<'tcx>,
+        elem: PlaceElem<'tcx>,
+        _context: PlaceContext,
+        _location: Location,
+    ) {
+        match elem {
+            ProjectionElem::OpaqueCast(_ty) | ProjectionElem::Field(_, _ty) => {}
+
+            ProjectionElem::Deref
+            | ProjectionElem::Subslice { from: _, to: _, from_end: _ }
+            | ProjectionElem::ConstantIndex { offset: _, min_length: _, from_end: _ }
+            | ProjectionElem::Downcast(_, _) => {}
+
+            ProjectionElem::Index(local) => {
+                self.invalidate_local(local, Reason::LocalUsedAsIndex(local));
+            }
+        }
+    }
+}
+
+impl<'tcx> Walk<'tcx> {
+    fn to_patch(self) -> Patch<'tcx> {
+        let tcx = self.tcx;
+        Patch { tcx, local_to_root_upvar_and_ty: self.to_local_map() }
+    }
+
+    fn to_local_map(self) -> IndexMap<Local, (FieldIdx, Ty<'tcx>)> {
+        let mut map = IndexMap::new(self.num_locals());
+
+        // This assertion is not strictly necessary, in the sense that the analysis and
+        // transformation are still sound if we only zip over a prefix of these sequences.
+        //
+        // (Also note that these vectors include invalidated chains and uninitialized types, so
+        // this isn't really asserting all that much at all...)
+        assert_eq!(self.upvar_to_locals.len(), self.upvar_to_ty.vec.len());
+
+        let chains = self.upvar_to_locals.iter_enumerated();
+        let chains_and_tys = chains.zip(self.upvar_to_ty.vec.into_iter());
+        for ((field, chain), ty) in chains_and_tys {
+            let Some(ty) = ty else { assert_eq!(chain.len(), 0); continue; };
+            if chain.state == ChainState::Invalidated {
+                assert_eq!(chain.local_copies.len(), 0);
+            }
+            for local in &chain.local_copies {
+                assert!(map.vec[*local].is_none());
+                map.vec[*local] = Some((field, ty));
+            }
+        }
+
+        map
+    }
+}
+
+enum AssignCategory<'tcx> {
+    /// case of `_lhs = _rhs;`
+    LocalToLocal { lhs: Local, rhs: Local },
+    /// case of `_lhs = self.field;` aka `_lhs = _1.field;`
+    UpvarToLocal { lhs: Local, rhs: (FieldIdx, Ty<'tcx>) },
+    /// every other kind of assignment
+    Other,
+}
+
+fn categorize_assign<'tcx>(place: &Place<'tcx>, rvalue: &Rvalue<'tcx>) -> AssignCategory<'tcx> {
+    let l = place.local;
+    if place.projection.len() == 0 &&
+        let Rvalue::Use(Operand::Copy(rhs)) | Rvalue::Use(Operand::Move(rhs)) =
+        rvalue
+    {
+        match (rhs.local, rhs.projection.as_slice()) {
+            // case `_l = _r;`
+            (r, &[]) => AssignCategory::LocalToLocal { lhs: l, rhs: r },
+            // case `_l = _1.field;`
+            (r, &[ProjectionElem::Field(f, ty)]) if r == SELF_ARG => {
+                AssignCategory::UpvarToLocal { lhs: l, rhs: (f, ty) }
+            }
+            _ => AssignCategory::Other,
+        }
+    } else {
+        AssignCategory::Other
+    }
+}
+
+impl<'tcx> MutVisitor<'tcx> for Patch<'tcx> {
+    fn tcx<'a>(&'a self) -> TyCtxt<'tcx> {
+        self.tcx
+    }
+
+    fn visit_local(&mut self, local: &mut Local, _context: PlaceContext, _location: Location) {
+        // Any point in the MIR that has a local that is eligible for replacement should be *doing*
+        // that replacement, which means *this* `visit_local` method should never ever see any such
+        // local.
+        //
+        // As a safeguard (because pnkfelix overlooked one such case, namely
+        // `ProjectionElem::Index(Local)`), assert that here.
+        //
+        // (Note however, this safeguard is imperfect and won't catch all coding errors, because
+        // for this check to even occur, we need the right recursive calls to `super_place` and
+        // `super_statement` to remain in the right places.)
+        if let Some((upvar, ty)) = self.local_to_root_upvar_and_ty.get(*local) {
+            panic!(
+                "{W} visited local:{local:?} at {_context:?} and found \
+                 unexpected associated upvar:{upvar:?} of type:{ty:?}"
+            );
+        }
+    }
+
+    fn visit_place(
+        &mut self,
+        place: &mut Place<'tcx>,
+        _context: PlaceContext,
+        _location: Location,
+    ) {
+        if let Some(Some((field, ty))) = self.local_to_root_upvar_and_ty.vec.get(place.local) {
+            let new_base = Place {
+                local: SELF_ARG,
+                projection: self.tcx.mk_place_elems(&[ProjectionElem::Field(*field, *ty)]),
+            };
+            trace!("{W} replacing base of {place:?} with new base {new_base:?}");
+            crate::replace_base(place, new_base, self.tcx);
+        }
+        self.super_place(place, _context, _location);
+    }
+
+    fn visit_statement(&mut self, statement: &mut Statement<'tcx>, location: Location) {
+        if let StatementKind::StorageLive(l) | StatementKind::StorageDead(l) = statement.kind {
+            if let Some(Some(_)) = self.local_to_root_upvar_and_ty.vec.get(l) {
+                trace!("{W} replacing newly useless storage marker {statement:?} with no-op.");
+                statement.kind = StatementKind::Nop;
+                return;
+            }
+        }
+
+        let mut inspect_for_no_op_swap = false;
+        if let StatementKind::Assign(tup) = &statement.kind {
+            let lhs = &tup.0;
+            if let Some(Some(_)) = self.local_to_root_upvar_and_ty.vec.get(lhs.local) {
+                inspect_for_no_op_swap = true;
+            }
+        }
+        self.super_statement(statement, location);
+        if inspect_for_no_op_swap {
+            let StatementKind::Assign(tup) = &statement.kind else {
+                panic!("cannot lose assign during super_statement call");
+            };
+            let lhs = &tup.0;
+            let rhs = &tup.1;
+            match rhs {
+                Rvalue::Use(Operand::Copy(p)) | Rvalue::Use(Operand::Move(p)) if p == lhs => {
+                    trace!("{W} replacing rewritten {statement:?} with no-op.");
+                    statement.kind = StatementKind::Nop;
+                }
+                _ => {}
+            }
+        }
+    }
+}
diff --git a/tests/ui/async-await/future-sizes/async-awaiting-fut.stdout b/tests/ui/async-await/future-sizes/async-awaiting-fut.stdout
index c0fbb0204b307..654a086fd1b34 100644
--- a/tests/ui/async-await/future-sizes/async-awaiting-fut.stdout
+++ b/tests/ui/async-await/future-sizes/async-awaiting-fut.stdout
@@ -1,41 +1,39 @@
-print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:21:21: 24:2]`: 3078 bytes, alignment: 1 bytes
+print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:21:21: 24:2]`: 2053 bytes, alignment: 1 bytes
 print-type-size     discriminant: 1 bytes
 print-type-size     variant `Unresumed`: 0 bytes
-print-type-size     variant `Suspend0`: 3077 bytes
-print-type-size         local `.__awaitee`: 3077 bytes
+print-type-size     variant `Suspend0`: 2052 bytes
+print-type-size         local `.__awaitee`: 2052 bytes
 print-type-size     variant `Returned`: 0 bytes
 print-type-size     variant `Panicked`: 0 bytes
-print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:10:64: 19:2]`: 3077 bytes, alignment: 1 bytes
+print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:10:64: 19:2]`: 2052 bytes, alignment: 1 bytes
 print-type-size     discriminant: 1 bytes
 print-type-size     variant `Unresumed`: 1025 bytes
 print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
-print-type-size     variant `Suspend0`: 2052 bytes
+print-type-size     variant `Suspend0`: 1027 bytes
 print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
 print-type-size         padding: 1 bytes
-print-type-size         local `.fut`: 1025 bytes, alignment: 1 bytes
-print-type-size         local `..generator_field4`: 1 bytes
+print-type-size         local `..generator_field3`: 1 bytes, alignment: 1 bytes
 print-type-size         local `.__awaitee`: 1 bytes
-print-type-size     variant `Suspend1`: 3076 bytes
+print-type-size     variant `Suspend1`: 2051 bytes
 print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
-print-type-size         padding: 1026 bytes
-print-type-size         local `..generator_field4`: 1 bytes, alignment: 1 bytes
+print-type-size         padding: 1 bytes
+print-type-size         local `..generator_field3`: 1 bytes, alignment: 1 bytes
 print-type-size         local `.__awaitee`: 1025 bytes
-print-type-size     variant `Suspend2`: 2052 bytes
+print-type-size     variant `Suspend2`: 1027 bytes
 print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
 print-type-size         padding: 1 bytes
-print-type-size         local `.fut`: 1025 bytes, alignment: 1 bytes
-print-type-size         local `..generator_field4`: 1 bytes
+print-type-size         local `..generator_field3`: 1 bytes, alignment: 1 bytes
 print-type-size         local `.__awaitee`: 1 bytes
 print-type-size     variant `Returned`: 1025 bytes
 print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
 print-type-size     variant `Panicked`: 1025 bytes
 print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
-print-type-size type: `std::mem::ManuallyDrop<[async fn body@$DIR/async-awaiting-fut.rs:10:64: 19:2]>`: 3077 bytes, alignment: 1 bytes
-print-type-size     field `.value`: 3077 bytes
-print-type-size type: `std::mem::MaybeUninit<[async fn body@$DIR/async-awaiting-fut.rs:10:64: 19:2]>`: 3077 bytes, alignment: 1 bytes
-print-type-size     variant `MaybeUninit`: 3077 bytes
+print-type-size type: `std::mem::ManuallyDrop<[async fn body@$DIR/async-awaiting-fut.rs:10:64: 19:2]>`: 2052 bytes, alignment: 1 bytes
+print-type-size     field `.value`: 2052 bytes
+print-type-size type: `std::mem::MaybeUninit<[async fn body@$DIR/async-awaiting-fut.rs:10:64: 19:2]>`: 2052 bytes, alignment: 1 bytes
+print-type-size     variant `MaybeUninit`: 2052 bytes
 print-type-size         field `.uninit`: 0 bytes
-print-type-size         field `.value`: 3077 bytes
+print-type-size         field `.value`: 2052 bytes
 print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:8:35: 8:37]`: 1025 bytes, alignment: 1 bytes
 print-type-size     discriminant: 1 bytes
 print-type-size     variant `Unresumed`: 1024 bytes
@@ -44,12 +42,6 @@ print-type-size     variant `Returned`: 1024 bytes
 print-type-size         upvar `.arg`: 1024 bytes
 print-type-size     variant `Panicked`: 1024 bytes
 print-type-size         upvar `.arg`: 1024 bytes
-print-type-size type: `std::mem::ManuallyDrop<[async fn body@$DIR/async-awaiting-fut.rs:8:35: 8:37]>`: 1025 bytes, alignment: 1 bytes
-print-type-size     field `.value`: 1025 bytes
-print-type-size type: `std::mem::MaybeUninit<[async fn body@$DIR/async-awaiting-fut.rs:8:35: 8:37]>`: 1025 bytes, alignment: 1 bytes
-print-type-size     variant `MaybeUninit`: 1025 bytes
-print-type-size         field `.uninit`: 0 bytes
-print-type-size         field `.value`: 1025 bytes
 print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:6:17: 6:19]`: 1 bytes, alignment: 1 bytes
 print-type-size     discriminant: 1 bytes
 print-type-size     variant `Unresumed`: 0 bytes
diff --git a/tests/ui/print_type_sizes/async.stdout b/tests/ui/print_type_sizes/async.stdout
index 873def9031aaa..be63c513c8a49 100644
--- a/tests/ui/print_type_sizes/async.stdout
+++ b/tests/ui/print_type_sizes/async.stdout
@@ -1,21 +1,14 @@
-print-type-size type: `[async fn body@$DIR/async.rs:10:36: 13:2]`: 16386 bytes, alignment: 1 bytes
+print-type-size type: `[async fn body@$DIR/async.rs:10:36: 13:2]`: 8194 bytes, alignment: 1 bytes
 print-type-size     discriminant: 1 bytes
 print-type-size     variant `Unresumed`: 8192 bytes
 print-type-size         upvar `.arg`: 8192 bytes
-print-type-size     variant `Suspend0`: 16385 bytes
+print-type-size     variant `Suspend0`: 8193 bytes
 print-type-size         upvar `.arg`: 8192 bytes
-print-type-size         local `.arg`: 8192 bytes
 print-type-size         local `.__awaitee`: 1 bytes
 print-type-size     variant `Returned`: 8192 bytes
 print-type-size         upvar `.arg`: 8192 bytes
 print-type-size     variant `Panicked`: 8192 bytes
 print-type-size         upvar `.arg`: 8192 bytes
-print-type-size type: `std::mem::ManuallyDrop<[u8; 8192]>`: 8192 bytes, alignment: 1 bytes
-print-type-size     field `.value`: 8192 bytes
-print-type-size type: `std::mem::MaybeUninit<[u8; 8192]>`: 8192 bytes, alignment: 1 bytes
-print-type-size     variant `MaybeUninit`: 8192 bytes
-print-type-size         field `.uninit`: 0 bytes
-print-type-size         field `.value`: 8192 bytes
 print-type-size type: `[async fn body@$DIR/async.rs:8:17: 8:19]`: 1 bytes, alignment: 1 bytes
 print-type-size     discriminant: 1 bytes
 print-type-size     variant `Unresumed`: 0 bytes

From e88e117c25b9b86027d2b45f72192f098ca280df Mon Sep 17 00:00:00 2001
From: "Felix S. Klock II" <pnkfelix@pnkfx.org>
Date: Wed, 15 Feb 2023 22:21:25 -0500
Subject: [PATCH 3/6] InlineFutureIntoFuture is a peephole optimization
 improving UpvarToLocalProp.

Problem Overview
----------------

When `async fn` is desugared, there's a whole bunch of local-to-local moves that
are easily identified and eliminated. However, there is one exception: the
sugaring of `.await` does `a = IntoFuture::into_future(b);`, and that is no longer obviously a move from the viewpoint of the analysis.

However, for all F: Future, `<F as IntoFuture>::into_future(self)` is
"guaranteed" to be the identity function that returns `self`.

So: this matches `a = <impl Future as IntoFuture>::into_future(b);` and replaces
it with `a = b;`, based on reasoning that libcore's blanket implementation of
IntoFuture for impl Future is an identity function that takes `self` by value.

This transformation, in tandem with UpvarToLocalProp, is enough to address both
case 1 and case 2 of Rust issue 62958.

InlineFutureIntoFuture respects optimization fuel, same as UpvarToLocalProp
(much simpler to implement in this case).

inline-future-into-future: improved comments during code walk for a rubber duck.

MERGEME inline_future_into_future revised internal instrumentation to print out arg0 type

(because that is what is really going to matter and I should be doing more to
let it drive the analysis.)

Updates
-------

respect -Zmir_opt_level=0
---
 .../src/inline_future_into_future.rs          | 162 ++++++++++++++++++
 compiler/rustc_mir_transform/src/lib.rs       |   4 +
 .../future-sizes/async-awaiting-fut.stdout    |  27 ++-
 .../async-await/future-sizes/future-as-arg.rs |   2 +-
 4 files changed, 180 insertions(+), 15 deletions(-)
 create mode 100644 compiler/rustc_mir_transform/src/inline_future_into_future.rs

diff --git a/compiler/rustc_mir_transform/src/inline_future_into_future.rs b/compiler/rustc_mir_transform/src/inline_future_into_future.rs
new file mode 100644
index 0000000000000..656cdf42314fe
--- /dev/null
+++ b/compiler/rustc_mir_transform/src/inline_future_into_future.rs
@@ -0,0 +1,162 @@
+//! Converts `y = <Future as IntoFuture>::into_future(x);` into just `y = x;`,
+//! since we "know" that matches the behavior of the blanket implementation of
+//! IntoFuture for F where F: Future.
+//!
+//! FIXME: determine such coalescing is sound. In particular, check whether
+//! specialization could foil our plans here!
+//!
+//! This is meant to enhance the effectiveness of the upvar-to-local-prop
+//! transformation in reducing the size of the generators constructed by the
+//! compiler.
+
+use crate::MirPass;
+use rustc_index::IndexVec;
+use rustc_middle::mir::interpret::ConstValue;
+use rustc_middle::mir::visit::MutVisitor;
+use rustc_middle::mir::*;
+use rustc_middle::ty::{self, Ty, TyCtxt};
+use rustc_span::def_id::DefId;
+
+pub struct InlineFutureIntoFuture;
+impl<'tcx> MirPass<'tcx> for InlineFutureIntoFuture {
+    fn is_enabled(&self, sess: &rustc_session::Session) -> bool {
+        sess.mir_opt_level() > 0 // on by default w/o -Zmir-opt-level=0
+    }
+
+    fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
+        let Some(into_future_fn_def_id) = tcx.lang_items().into_future_fn() else { return; };
+        let Some(future_trait_def_id) = tcx.lang_items().future_trait() else { return; };
+        let mir_source_def_id = body.source.def_id();
+        trace!("Running InlineFutureIntoFuture on {:?}", body.source);
+        let local_decls = body.local_decls().to_owned();
+        let mut v = Inliner {
+            tcx,
+            into_future_fn_def_id,
+            future_trait_def_id,
+            mir_source_def_id,
+            local_decls,
+        };
+        v.visit_body(body);
+    }
+}
+
+struct Inliner<'tcx> {
+    tcx: TyCtxt<'tcx>,
+    mir_source_def_id: DefId,
+    into_future_fn_def_id: DefId,
+    future_trait_def_id: DefId,
+    local_decls: IndexVec<Local, LocalDecl<'tcx>>,
+}
+
+#[derive(Copy, Clone, PartialEq, Eq)]
+enum FoundImplFuture {
+    Yes,
+    No,
+}
+
+#[derive(Copy, Clone, PartialEq, Eq)]
+enum FoundIntoFutureCall {
+    Yes,
+    No,
+}
+
+struct ImplFutureCallingIntoFuture<'tcx> {
+    args: Vec<Operand<'tcx>>,
+    destination: Place<'tcx>,
+    target: Option<BasicBlock>,
+}
+
+impl<'tcx> Inliner<'tcx> {
+    // This verifies that `ty` implements `Future`, according to the where
+    // clauses (i.e. predicates) attached to the source code identified by
+    // `mir_source_def_id`).
+    fn does_ty_impl_future(&self, ty: Ty<'tcx>) -> FoundImplFuture {
+        let mir_source_predicates = self.tcx.predicates_of(self.mir_source_def_id);
+        let predicates = mir_source_predicates.instantiate_identity(self.tcx);
+        for pred in &predicates.predicates {
+            let Some(kind) = pred.kind().no_bound_vars() else { continue; };
+            let ty::ClauseKind::Trait(trait_pred) = kind else { continue; };
+            let ty::TraitPredicate { trait_ref, polarity: ty::ImplPolarity::Positive } = trait_pred else { continue };
+
+            // FIXME: justify ignoring `substs` below. My current argument is
+            // that `trait Future` has no generic parameters, and the blanket
+            // impl of `IntoFuture` for all futures does not put any constrants
+            // on the associated items of those futures. But it is worth running
+            // this by a trait system expert to validate.
+            let ty::TraitRef { def_id: trait_def_id, .. } = trait_ref;
+            let self_ty = trait_ref.self_ty();
+            if trait_def_id == self.future_trait_def_id {
+                if self_ty == ty {
+                    return FoundImplFuture::Yes;
+                }
+            }
+        }
+        FoundImplFuture::No
+    }
+}
+
+impl<'tcx> MutVisitor<'tcx> for Inliner<'tcx> {
+    fn tcx<'a>(&'a self) -> TyCtxt<'tcx> {
+        self.tcx
+    }
+    fn visit_basic_block_data(&mut self, _bb: BasicBlock, bb_data: &mut BasicBlockData<'tcx>) {
+        let Some(term) = &mut bb_data.terminator else { return; };
+        let Some(result) = self.analyze_terminator(term) else { return; };
+        let ImplFutureCallingIntoFuture {
+            args, destination: dest, target: Some(target)
+        } = result else { return; };
+
+        // At this point, we have identified this terminator as a call to the
+        // associated function `<impl Future as IntoFuture>::into_future`
+        // Due to our knowledge of how libcore implements Future and IntoFuture,
+        // we know we can replace such a call with a trivial move.
+
+        let Some(arg0) = args.get(0) else { return; };
+
+        trace!("InlineFutureIntoFuture bb_data args:{args:?} dest:{dest:?} target:{target:?}");
+
+        bb_data.statements.push(Statement {
+            source_info: term.source_info,
+            kind: StatementKind::Assign(Box::new((dest, Rvalue::Use(arg0.clone())))),
+        });
+        term.kind = TerminatorKind::Goto { target }
+    }
+}
+
+impl<'tcx> Inliner<'tcx> {
+    fn analyze_terminator(
+        &mut self,
+        term: &mut Terminator<'tcx>,
+    ) -> Option<ImplFutureCallingIntoFuture<'tcx>> {
+        let mut found = (FoundImplFuture::No, FoundIntoFutureCall::No);
+        let &TerminatorKind::Call {
+            ref func, ref args, destination, target, fn_span: _, unwind: _, call_source: _
+        } = &term.kind else { return None; };
+        let Operand::Constant(c) = func else { return None; };
+        let ConstantKind::Val(val_const, const_ty) = c.literal else { return None; };
+        let ConstValue::ZeroSized = val_const else { return None; };
+        let ty::FnDef(fn_def_id, substs) =  const_ty.kind() else { return None; };
+        if *fn_def_id == self.into_future_fn_def_id {
+            found.1 = FoundIntoFutureCall::Yes;
+        } else {
+            trace!("InlineFutureIntoFuture bail as this is not `into_future` invocation.");
+            return None;
+        }
+        let arg0_ty = args.get(0).map(|arg0| arg0.ty(&self.local_decls, self.tcx()));
+        trace!("InlineFutureIntoFuture substs:{substs:?} args:{args:?} arg0 ty:{arg0_ty:?}");
+        let Some(arg0_ty) = arg0_ty else { return None; };
+        found.0 = self.does_ty_impl_future(arg0_ty);
+        if let (FoundImplFuture::Yes, FoundIntoFutureCall::Yes) = found {
+            trace!("InlineFutureIntoFuture can replace {term:?}, a {func:?} call, with move");
+            if !self.tcx.consider_optimizing(|| {
+                format!("InlineFutureIntoFuture {:?}", self.mir_source_def_id)
+            }) {
+                return None;
+            }
+            let args = args.clone();
+            Some(ImplFutureCallingIntoFuture { args, destination, target })
+        } else {
+            None
+        }
+    }
+}
diff --git a/compiler/rustc_mir_transform/src/lib.rs b/compiler/rustc_mir_transform/src/lib.rs
index 452f4df9e4448..c2875e19ececb 100644
--- a/compiler/rustc_mir_transform/src/lib.rs
+++ b/compiler/rustc_mir_transform/src/lib.rs
@@ -76,6 +76,7 @@ mod ffi_unwind_calls;
 mod function_item_references;
 mod generator;
 pub mod inline;
+mod inline_future_into_future;
 mod instsimplify;
 mod large_enums;
 mod lower_intrinsics;
@@ -492,6 +493,9 @@ fn run_runtime_lowering_passes<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
         // `AddRetag` needs to run after `ElaborateDrops`. Otherwise it should run fairly late,
         // but before optimizations begin.
         &elaborate_box_derefs::ElaborateBoxDerefs,
+        // `InlineFutureIntoFuture` needs to run before `UpvarToLocalProp`, because its
+        // purpose is to enhance the effectiveness of the latter transformation.
+        &inline_future_into_future::InlineFutureIntoFuture,
         // `UpvarToLocalProp` needs to run before `generator::StateTransform`, because its
         // purpose is to coalesce locals into their original upvars before fresh space is
         // allocated for them in the generator.
diff --git a/tests/ui/async-await/future-sizes/async-awaiting-fut.stdout b/tests/ui/async-await/future-sizes/async-awaiting-fut.stdout
index 654a086fd1b34..fa63cbd52c257 100644
--- a/tests/ui/async-await/future-sizes/async-awaiting-fut.stdout
+++ b/tests/ui/async-await/future-sizes/async-awaiting-fut.stdout
@@ -1,39 +1,38 @@
-print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:21:21: 24:2]`: 2053 bytes, alignment: 1 bytes
+print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:21:21: 24:2]`: 1029 bytes, alignment: 1 bytes
 print-type-size     discriminant: 1 bytes
 print-type-size     variant `Unresumed`: 0 bytes
-print-type-size     variant `Suspend0`: 2052 bytes
-print-type-size         local `.__awaitee`: 2052 bytes
+print-type-size     variant `Suspend0`: 1028 bytes
+print-type-size         local `.__awaitee`: 1028 bytes
 print-type-size     variant `Returned`: 0 bytes
 print-type-size     variant `Panicked`: 0 bytes
-print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:10:64: 19:2]`: 2052 bytes, alignment: 1 bytes
+print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:10:64: 19:2]`: 1028 bytes, alignment: 1 bytes
 print-type-size     discriminant: 1 bytes
 print-type-size     variant `Unresumed`: 1025 bytes
 print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
 print-type-size     variant `Suspend0`: 1027 bytes
 print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
 print-type-size         padding: 1 bytes
-print-type-size         local `..generator_field3`: 1 bytes, alignment: 1 bytes
+print-type-size         local `..generator_field2`: 1 bytes, alignment: 1 bytes
 print-type-size         local `.__awaitee`: 1 bytes
-print-type-size     variant `Suspend1`: 2051 bytes
+print-type-size     variant `Suspend1`: 1026 bytes
 print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
 print-type-size         padding: 1 bytes
-print-type-size         local `..generator_field3`: 1 bytes, alignment: 1 bytes
-print-type-size         local `.__awaitee`: 1025 bytes
+print-type-size         local `..generator_field2`: 1 bytes, alignment: 1 bytes
 print-type-size     variant `Suspend2`: 1027 bytes
 print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
 print-type-size         padding: 1 bytes
-print-type-size         local `..generator_field3`: 1 bytes, alignment: 1 bytes
+print-type-size         local `..generator_field2`: 1 bytes, alignment: 1 bytes
 print-type-size         local `.__awaitee`: 1 bytes
 print-type-size     variant `Returned`: 1025 bytes
 print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
 print-type-size     variant `Panicked`: 1025 bytes
 print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
-print-type-size type: `std::mem::ManuallyDrop<[async fn body@$DIR/async-awaiting-fut.rs:10:64: 19:2]>`: 2052 bytes, alignment: 1 bytes
-print-type-size     field `.value`: 2052 bytes
-print-type-size type: `std::mem::MaybeUninit<[async fn body@$DIR/async-awaiting-fut.rs:10:64: 19:2]>`: 2052 bytes, alignment: 1 bytes
-print-type-size     variant `MaybeUninit`: 2052 bytes
+print-type-size type: `std::mem::ManuallyDrop<[async fn body@$DIR/async-awaiting-fut.rs:10:64: 19:2]>`: 1028 bytes, alignment: 1 bytes
+print-type-size     field `.value`: 1028 bytes
+print-type-size type: `std::mem::MaybeUninit<[async fn body@$DIR/async-awaiting-fut.rs:10:64: 19:2]>`: 1028 bytes, alignment: 1 bytes
+print-type-size     variant `MaybeUninit`: 1028 bytes
 print-type-size         field `.uninit`: 0 bytes
-print-type-size         field `.value`: 2052 bytes
+print-type-size         field `.value`: 1028 bytes
 print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:8:35: 8:37]`: 1025 bytes, alignment: 1 bytes
 print-type-size     discriminant: 1 bytes
 print-type-size     variant `Unresumed`: 1024 bytes
diff --git a/tests/ui/async-await/future-sizes/future-as-arg.rs b/tests/ui/async-await/future-sizes/future-as-arg.rs
index 93c69b05254dc..0c95c4a9b54ee 100644
--- a/tests/ui/async-await/future-sizes/future-as-arg.rs
+++ b/tests/ui/async-await/future-sizes/future-as-arg.rs
@@ -11,6 +11,6 @@ fn main() {
     let actual = std::mem::size_of_val(
         &use_future(use_future(use_future(use_future(use_future(test([0; 16])))))));
     // Not using an exact number in case it slightly changes over different commits
-    let expected = 550;
+    let expected = 20;
     assert!(actual > expected, "expected: >{expected}, actual: {actual}");
 }

From 83e15f8cfff80b3bcf5e10159d41ada167d2fa24 Mon Sep 17 00:00:00 2001
From: "Felix S. Klock II" <pnkfelix@pnkfx.org>
Date: Thu, 23 Feb 2023 21:53:13 -0500
Subject: [PATCH 4/6] Updated the future-sizes tests to opt-into the MIR
 transformations (regardless of their default settings) and then updated the
 numbers to reflect the improvements those transformations now yield.

---
 .../async-awaiting-fut.both_off.stdout        | 68 +++++++++++++++++++
 ...dout => async-awaiting-fut.both_on.stdout} | 12 ++--
 .../async-awaiting-fut.just_prop.stdout       | 60 ++++++++++++++++
 .../future-sizes/async-awaiting-fut.rs        |  6 +-
 .../async-await/future-sizes/future-as-arg.rs |  9 ++-
 5 files changed, 145 insertions(+), 10 deletions(-)
 create mode 100644 tests/ui/async-await/future-sizes/async-awaiting-fut.both_off.stdout
 rename tests/ui/async-await/future-sizes/{async-awaiting-fut.stdout => async-awaiting-fut.both_on.stdout} (91%)
 create mode 100644 tests/ui/async-await/future-sizes/async-awaiting-fut.just_prop.stdout

diff --git a/tests/ui/async-await/future-sizes/async-awaiting-fut.both_off.stdout b/tests/ui/async-await/future-sizes/async-awaiting-fut.both_off.stdout
new file mode 100644
index 0000000000000..66a9f09612eeb
--- /dev/null
+++ b/tests/ui/async-await/future-sizes/async-awaiting-fut.both_off.stdout
@@ -0,0 +1,68 @@
+print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:25:21: 28:2]`: 3078 bytes, alignment: 1 bytes
+print-type-size     discriminant: 1 bytes
+print-type-size     variant `Unresumed`: 0 bytes
+print-type-size     variant `Suspend0`: 3077 bytes
+print-type-size         local `.__awaitee`: 3077 bytes
+print-type-size     variant `Returned`: 0 bytes
+print-type-size     variant `Panicked`: 0 bytes
+print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:14:64: 23:2]`: 3077 bytes, alignment: 1 bytes
+print-type-size     discriminant: 1 bytes
+print-type-size     variant `Unresumed`: 1025 bytes
+print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
+print-type-size     variant `Suspend0`: 2052 bytes
+print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
+print-type-size         padding: 1 bytes
+print-type-size         local `.fut`: 1025 bytes, alignment: 1 bytes
+print-type-size         local `..generator_field4`: 1 bytes
+print-type-size         local `.__awaitee`: 1 bytes
+print-type-size     variant `Suspend1`: 3076 bytes
+print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
+print-type-size         padding: 1026 bytes
+print-type-size         local `..generator_field4`: 1 bytes, alignment: 1 bytes
+print-type-size         local `.__awaitee`: 1025 bytes
+print-type-size     variant `Suspend2`: 2052 bytes
+print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
+print-type-size         padding: 1 bytes
+print-type-size         local `.fut`: 1025 bytes, alignment: 1 bytes
+print-type-size         local `..generator_field4`: 1 bytes
+print-type-size         local `.__awaitee`: 1 bytes
+print-type-size     variant `Returned`: 1025 bytes
+print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
+print-type-size     variant `Panicked`: 1025 bytes
+print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
+print-type-size type: `std::mem::ManuallyDrop<[async fn body@$DIR/async-awaiting-fut.rs:14:64: 23:2]>`: 3077 bytes, alignment: 1 bytes
+print-type-size     field `.value`: 3077 bytes
+print-type-size type: `std::mem::MaybeUninit<[async fn body@$DIR/async-awaiting-fut.rs:14:64: 23:2]>`: 3077 bytes, alignment: 1 bytes
+print-type-size     variant `MaybeUninit`: 3077 bytes
+print-type-size         field `.uninit`: 0 bytes
+print-type-size         field `.value`: 3077 bytes
+print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:12:35: 12:37]`: 1025 bytes, alignment: 1 bytes
+print-type-size     discriminant: 1 bytes
+print-type-size     variant `Unresumed`: 1024 bytes
+print-type-size         upvar `.arg`: 1024 bytes
+print-type-size     variant `Returned`: 1024 bytes
+print-type-size         upvar `.arg`: 1024 bytes
+print-type-size     variant `Panicked`: 1024 bytes
+print-type-size         upvar `.arg`: 1024 bytes
+print-type-size type: `std::mem::ManuallyDrop<[async fn body@$DIR/async-awaiting-fut.rs:12:35: 12:37]>`: 1025 bytes, alignment: 1 bytes
+print-type-size     field `.value`: 1025 bytes
+print-type-size type: `std::mem::MaybeUninit<[async fn body@$DIR/async-awaiting-fut.rs:12:35: 12:37]>`: 1025 bytes, alignment: 1 bytes
+print-type-size     variant `MaybeUninit`: 1025 bytes
+print-type-size         field `.uninit`: 0 bytes
+print-type-size         field `.value`: 1025 bytes
+print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:10:17: 10:19]`: 1 bytes, alignment: 1 bytes
+print-type-size     discriminant: 1 bytes
+print-type-size     variant `Unresumed`: 0 bytes
+print-type-size     variant `Returned`: 0 bytes
+print-type-size     variant `Panicked`: 0 bytes
+print-type-size type: `std::mem::ManuallyDrop<bool>`: 1 bytes, alignment: 1 bytes
+print-type-size     field `.value`: 1 bytes
+print-type-size type: `std::mem::MaybeUninit<bool>`: 1 bytes, alignment: 1 bytes
+print-type-size     variant `MaybeUninit`: 1 bytes
+print-type-size         field `.uninit`: 0 bytes
+print-type-size         field `.value`: 1 bytes
+print-type-size type: `std::task::Poll<()>`: 1 bytes, alignment: 1 bytes
+print-type-size     discriminant: 1 bytes
+print-type-size     variant `Ready`: 0 bytes
+print-type-size         field `.0`: 0 bytes
+print-type-size     variant `Pending`: 0 bytes
diff --git a/tests/ui/async-await/future-sizes/async-awaiting-fut.stdout b/tests/ui/async-await/future-sizes/async-awaiting-fut.both_on.stdout
similarity index 91%
rename from tests/ui/async-await/future-sizes/async-awaiting-fut.stdout
rename to tests/ui/async-await/future-sizes/async-awaiting-fut.both_on.stdout
index fa63cbd52c257..645634d54e4c2 100644
--- a/tests/ui/async-await/future-sizes/async-awaiting-fut.stdout
+++ b/tests/ui/async-await/future-sizes/async-awaiting-fut.both_on.stdout
@@ -1,11 +1,11 @@
-print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:21:21: 24:2]`: 1029 bytes, alignment: 1 bytes
+print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:25:21: 28:2]`: 1029 bytes, alignment: 1 bytes
 print-type-size     discriminant: 1 bytes
 print-type-size     variant `Unresumed`: 0 bytes
 print-type-size     variant `Suspend0`: 1028 bytes
 print-type-size         local `.__awaitee`: 1028 bytes
 print-type-size     variant `Returned`: 0 bytes
 print-type-size     variant `Panicked`: 0 bytes
-print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:10:64: 19:2]`: 1028 bytes, alignment: 1 bytes
+print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:14:64: 23:2]`: 1028 bytes, alignment: 1 bytes
 print-type-size     discriminant: 1 bytes
 print-type-size     variant `Unresumed`: 1025 bytes
 print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
@@ -27,13 +27,13 @@ print-type-size     variant `Returned`: 1025 bytes
 print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
 print-type-size     variant `Panicked`: 1025 bytes
 print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
-print-type-size type: `std::mem::ManuallyDrop<[async fn body@$DIR/async-awaiting-fut.rs:10:64: 19:2]>`: 1028 bytes, alignment: 1 bytes
+print-type-size type: `std::mem::ManuallyDrop<[async fn body@$DIR/async-awaiting-fut.rs:14:64: 23:2]>`: 1028 bytes, alignment: 1 bytes
 print-type-size     field `.value`: 1028 bytes
-print-type-size type: `std::mem::MaybeUninit<[async fn body@$DIR/async-awaiting-fut.rs:10:64: 19:2]>`: 1028 bytes, alignment: 1 bytes
+print-type-size type: `std::mem::MaybeUninit<[async fn body@$DIR/async-awaiting-fut.rs:14:64: 23:2]>`: 1028 bytes, alignment: 1 bytes
 print-type-size     variant `MaybeUninit`: 1028 bytes
 print-type-size         field `.uninit`: 0 bytes
 print-type-size         field `.value`: 1028 bytes
-print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:8:35: 8:37]`: 1025 bytes, alignment: 1 bytes
+print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:12:35: 12:37]`: 1025 bytes, alignment: 1 bytes
 print-type-size     discriminant: 1 bytes
 print-type-size     variant `Unresumed`: 1024 bytes
 print-type-size         upvar `.arg`: 1024 bytes
@@ -41,7 +41,7 @@ print-type-size     variant `Returned`: 1024 bytes
 print-type-size         upvar `.arg`: 1024 bytes
 print-type-size     variant `Panicked`: 1024 bytes
 print-type-size         upvar `.arg`: 1024 bytes
-print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:6:17: 6:19]`: 1 bytes, alignment: 1 bytes
+print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:10:17: 10:19]`: 1 bytes, alignment: 1 bytes
 print-type-size     discriminant: 1 bytes
 print-type-size     variant `Unresumed`: 0 bytes
 print-type-size     variant `Returned`: 0 bytes
diff --git a/tests/ui/async-await/future-sizes/async-awaiting-fut.just_prop.stdout b/tests/ui/async-await/future-sizes/async-awaiting-fut.just_prop.stdout
new file mode 100644
index 0000000000000..186404541b56f
--- /dev/null
+++ b/tests/ui/async-await/future-sizes/async-awaiting-fut.just_prop.stdout
@@ -0,0 +1,60 @@
+print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:25:21: 28:2]`: 2053 bytes, alignment: 1 bytes
+print-type-size     discriminant: 1 bytes
+print-type-size     variant `Unresumed`: 0 bytes
+print-type-size     variant `Suspend0`: 2052 bytes
+print-type-size         local `.__awaitee`: 2052 bytes
+print-type-size     variant `Returned`: 0 bytes
+print-type-size     variant `Panicked`: 0 bytes
+print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:14:64: 23:2]`: 2052 bytes, alignment: 1 bytes
+print-type-size     discriminant: 1 bytes
+print-type-size     variant `Unresumed`: 1025 bytes
+print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
+print-type-size     variant `Suspend0`: 1027 bytes
+print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
+print-type-size         padding: 1 bytes
+print-type-size         local `..generator_field3`: 1 bytes, alignment: 1 bytes
+print-type-size         local `.__awaitee`: 1 bytes
+print-type-size     variant `Suspend1`: 2051 bytes
+print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
+print-type-size         padding: 1 bytes
+print-type-size         local `..generator_field3`: 1 bytes, alignment: 1 bytes
+print-type-size         local `.__awaitee`: 1025 bytes
+print-type-size     variant `Suspend2`: 1027 bytes
+print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
+print-type-size         padding: 1 bytes
+print-type-size         local `..generator_field3`: 1 bytes, alignment: 1 bytes
+print-type-size         local `.__awaitee`: 1 bytes
+print-type-size     variant `Returned`: 1025 bytes
+print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
+print-type-size     variant `Panicked`: 1025 bytes
+print-type-size         upvar `.fut`: 1025 bytes, offset: 0 bytes, alignment: 1 bytes
+print-type-size type: `std::mem::ManuallyDrop<[async fn body@$DIR/async-awaiting-fut.rs:14:64: 23:2]>`: 2052 bytes, alignment: 1 bytes
+print-type-size     field `.value`: 2052 bytes
+print-type-size type: `std::mem::MaybeUninit<[async fn body@$DIR/async-awaiting-fut.rs:14:64: 23:2]>`: 2052 bytes, alignment: 1 bytes
+print-type-size     variant `MaybeUninit`: 2052 bytes
+print-type-size         field `.uninit`: 0 bytes
+print-type-size         field `.value`: 2052 bytes
+print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:12:35: 12:37]`: 1025 bytes, alignment: 1 bytes
+print-type-size     discriminant: 1 bytes
+print-type-size     variant `Unresumed`: 1024 bytes
+print-type-size         upvar `.arg`: 1024 bytes
+print-type-size     variant `Returned`: 1024 bytes
+print-type-size         upvar `.arg`: 1024 bytes
+print-type-size     variant `Panicked`: 1024 bytes
+print-type-size         upvar `.arg`: 1024 bytes
+print-type-size type: `[async fn body@$DIR/async-awaiting-fut.rs:10:17: 10:19]`: 1 bytes, alignment: 1 bytes
+print-type-size     discriminant: 1 bytes
+print-type-size     variant `Unresumed`: 0 bytes
+print-type-size     variant `Returned`: 0 bytes
+print-type-size     variant `Panicked`: 0 bytes
+print-type-size type: `std::mem::ManuallyDrop<bool>`: 1 bytes, alignment: 1 bytes
+print-type-size     field `.value`: 1 bytes
+print-type-size type: `std::mem::MaybeUninit<bool>`: 1 bytes, alignment: 1 bytes
+print-type-size     variant `MaybeUninit`: 1 bytes
+print-type-size         field `.uninit`: 0 bytes
+print-type-size         field `.value`: 1 bytes
+print-type-size type: `std::task::Poll<()>`: 1 bytes, alignment: 1 bytes
+print-type-size     discriminant: 1 bytes
+print-type-size     variant `Ready`: 0 bytes
+print-type-size         field `.0`: 0 bytes
+print-type-size     variant `Pending`: 0 bytes
diff --git a/tests/ui/async-await/future-sizes/async-awaiting-fut.rs b/tests/ui/async-await/future-sizes/async-awaiting-fut.rs
index 1816d842d6c41..13aae4ebd6e93 100644
--- a/tests/ui/async-await/future-sizes/async-awaiting-fut.rs
+++ b/tests/ui/async-await/future-sizes/async-awaiting-fut.rs
@@ -1,4 +1,8 @@
-// compile-flags: -Z print-type-sizes --crate-type lib
+// revisions: both_off just_prop both_on
+// ignore-tidy-linelength
+// [both_off]  compile-flags: -Z print-type-sizes --crate-type lib -Z mir-enable-passes=-UpvarToLocalProp,-InlineFutureIntoFuture
+// [just_prop] compile-flags: -Z print-type-sizes --crate-type lib -Z mir-enable-passes=+UpvarToLocalProp,-InlineFutureIntoFuture
+// [both_on]   compile-flags: -Z print-type-sizes --crate-type lib -Z mir-enable-passes=+UpvarToLocalProp,+InlineFutureIntoFuture
 // edition:2021
 // build-pass
 // ignore-pass
diff --git a/tests/ui/async-await/future-sizes/future-as-arg.rs b/tests/ui/async-await/future-sizes/future-as-arg.rs
index 0c95c4a9b54ee..bd62f5afb1df8 100644
--- a/tests/ui/async-await/future-sizes/future-as-arg.rs
+++ b/tests/ui/async-await/future-sizes/future-as-arg.rs
@@ -1,3 +1,4 @@
+// compile-flags: -Z mir-enable-passes=+UpvarToLocalProp,+InlineFutureIntoFuture
 // edition: 2021
 // run-pass
 
@@ -10,7 +11,9 @@ async fn use_future(fut: impl std::future::Future<Output = ()>) {
 fn main() {
     let actual = std::mem::size_of_val(
         &use_future(use_future(use_future(use_future(use_future(test([0; 16])))))));
-    // Not using an exact number in case it slightly changes over different commits
-    let expected = 20;
-    assert!(actual > expected, "expected: >{expected}, actual: {actual}");
+
+    // Using an acceptable range rather than an exact number, in order to allow
+    // the actual value to vary slightly over different commits
+    let expected = 16..=32;
+    assert!(expected.contains(&actual), "expected: {expected:?}, actual: {actual}");
 }

From 6dbe78d01f5c86f48db3f245808b4539e426ad37 Mon Sep 17 00:00:00 2001
From: "Felix S. Klock II" <pnkfelix@pnkfx.org>
Date: Thu, 23 Feb 2023 21:54:08 -0500
Subject: [PATCH 5/6] Updated the print-type-sizes async.rs test to opt into
 the new MIR transformations added here (regardless of their default settings)
 and updated the numbers in the output to reflect the improvements those
 transformations yield.

---
 .../ui/print_type_sizes/async.both_off.stdout | 34 +++++++++++++++++++
 .../{async.stdout => async.both_on.stdout}    |  8 ++---
 .../print_type_sizes/async.just_prop.stdout   | 27 +++++++++++++++
 tests/ui/print_type_sizes/async.rs            |  6 +++-
 4 files changed, 70 insertions(+), 5 deletions(-)
 create mode 100644 tests/ui/print_type_sizes/async.both_off.stdout
 rename tests/ui/print_type_sizes/{async.stdout => async.both_on.stdout} (82%)
 create mode 100644 tests/ui/print_type_sizes/async.just_prop.stdout

diff --git a/tests/ui/print_type_sizes/async.both_off.stdout b/tests/ui/print_type_sizes/async.both_off.stdout
new file mode 100644
index 0000000000000..faecf5bb83e7c
--- /dev/null
+++ b/tests/ui/print_type_sizes/async.both_off.stdout
@@ -0,0 +1,34 @@
+print-type-size type: `[async fn body@$DIR/async.rs:14:36: 17:2]`: 16386 bytes, alignment: 1 bytes
+print-type-size     discriminant: 1 bytes
+print-type-size     variant `Unresumed`: 8192 bytes
+print-type-size         upvar `.arg`: 8192 bytes
+print-type-size     variant `Suspend0`: 16385 bytes
+print-type-size         upvar `.arg`: 8192 bytes
+print-type-size         local `.arg`: 8192 bytes
+print-type-size         local `.__awaitee`: 1 bytes
+print-type-size     variant `Returned`: 8192 bytes
+print-type-size         upvar `.arg`: 8192 bytes
+print-type-size     variant `Panicked`: 8192 bytes
+print-type-size         upvar `.arg`: 8192 bytes
+print-type-size type: `std::mem::ManuallyDrop<[u8; 8192]>`: 8192 bytes, alignment: 1 bytes
+print-type-size     field `.value`: 8192 bytes
+print-type-size type: `std::mem::MaybeUninit<[u8; 8192]>`: 8192 bytes, alignment: 1 bytes
+print-type-size     variant `MaybeUninit`: 8192 bytes
+print-type-size         field `.uninit`: 0 bytes
+print-type-size         field `.value`: 8192 bytes
+print-type-size type: `[async fn body@$DIR/async.rs:12:17: 12:19]`: 1 bytes, alignment: 1 bytes
+print-type-size     discriminant: 1 bytes
+print-type-size     variant `Unresumed`: 0 bytes
+print-type-size     variant `Returned`: 0 bytes
+print-type-size     variant `Panicked`: 0 bytes
+print-type-size type: `std::mem::ManuallyDrop<[async fn body@$DIR/async.rs:12:17: 12:19]>`: 1 bytes, alignment: 1 bytes
+print-type-size     field `.value`: 1 bytes
+print-type-size type: `std::mem::MaybeUninit<[async fn body@$DIR/async.rs:12:17: 12:19]>`: 1 bytes, alignment: 1 bytes
+print-type-size     variant `MaybeUninit`: 1 bytes
+print-type-size         field `.uninit`: 0 bytes
+print-type-size         field `.value`: 1 bytes
+print-type-size type: `std::task::Poll<()>`: 1 bytes, alignment: 1 bytes
+print-type-size     discriminant: 1 bytes
+print-type-size     variant `Ready`: 0 bytes
+print-type-size         field `.0`: 0 bytes
+print-type-size     variant `Pending`: 0 bytes
diff --git a/tests/ui/print_type_sizes/async.stdout b/tests/ui/print_type_sizes/async.both_on.stdout
similarity index 82%
rename from tests/ui/print_type_sizes/async.stdout
rename to tests/ui/print_type_sizes/async.both_on.stdout
index be63c513c8a49..159734d74a3c6 100644
--- a/tests/ui/print_type_sizes/async.stdout
+++ b/tests/ui/print_type_sizes/async.both_on.stdout
@@ -1,4 +1,4 @@
-print-type-size type: `[async fn body@$DIR/async.rs:10:36: 13:2]`: 8194 bytes, alignment: 1 bytes
+print-type-size type: `[async fn body@$DIR/async.rs:14:36: 17:2]`: 8194 bytes, alignment: 1 bytes
 print-type-size     discriminant: 1 bytes
 print-type-size     variant `Unresumed`: 8192 bytes
 print-type-size         upvar `.arg`: 8192 bytes
@@ -9,14 +9,14 @@ print-type-size     variant `Returned`: 8192 bytes
 print-type-size         upvar `.arg`: 8192 bytes
 print-type-size     variant `Panicked`: 8192 bytes
 print-type-size         upvar `.arg`: 8192 bytes
-print-type-size type: `[async fn body@$DIR/async.rs:8:17: 8:19]`: 1 bytes, alignment: 1 bytes
+print-type-size type: `[async fn body@$DIR/async.rs:12:17: 12:19]`: 1 bytes, alignment: 1 bytes
 print-type-size     discriminant: 1 bytes
 print-type-size     variant `Unresumed`: 0 bytes
 print-type-size     variant `Returned`: 0 bytes
 print-type-size     variant `Panicked`: 0 bytes
-print-type-size type: `std::mem::ManuallyDrop<[async fn body@$DIR/async.rs:8:17: 8:19]>`: 1 bytes, alignment: 1 bytes
+print-type-size type: `std::mem::ManuallyDrop<[async fn body@$DIR/async.rs:12:17: 12:19]>`: 1 bytes, alignment: 1 bytes
 print-type-size     field `.value`: 1 bytes
-print-type-size type: `std::mem::MaybeUninit<[async fn body@$DIR/async.rs:8:17: 8:19]>`: 1 bytes, alignment: 1 bytes
+print-type-size type: `std::mem::MaybeUninit<[async fn body@$DIR/async.rs:12:17: 12:19]>`: 1 bytes, alignment: 1 bytes
 print-type-size     variant `MaybeUninit`: 1 bytes
 print-type-size         field `.uninit`: 0 bytes
 print-type-size         field `.value`: 1 bytes
diff --git a/tests/ui/print_type_sizes/async.just_prop.stdout b/tests/ui/print_type_sizes/async.just_prop.stdout
new file mode 100644
index 0000000000000..159734d74a3c6
--- /dev/null
+++ b/tests/ui/print_type_sizes/async.just_prop.stdout
@@ -0,0 +1,27 @@
+print-type-size type: `[async fn body@$DIR/async.rs:14:36: 17:2]`: 8194 bytes, alignment: 1 bytes
+print-type-size     discriminant: 1 bytes
+print-type-size     variant `Unresumed`: 8192 bytes
+print-type-size         upvar `.arg`: 8192 bytes
+print-type-size     variant `Suspend0`: 8193 bytes
+print-type-size         upvar `.arg`: 8192 bytes
+print-type-size         local `.__awaitee`: 1 bytes
+print-type-size     variant `Returned`: 8192 bytes
+print-type-size         upvar `.arg`: 8192 bytes
+print-type-size     variant `Panicked`: 8192 bytes
+print-type-size         upvar `.arg`: 8192 bytes
+print-type-size type: `[async fn body@$DIR/async.rs:12:17: 12:19]`: 1 bytes, alignment: 1 bytes
+print-type-size     discriminant: 1 bytes
+print-type-size     variant `Unresumed`: 0 bytes
+print-type-size     variant `Returned`: 0 bytes
+print-type-size     variant `Panicked`: 0 bytes
+print-type-size type: `std::mem::ManuallyDrop<[async fn body@$DIR/async.rs:12:17: 12:19]>`: 1 bytes, alignment: 1 bytes
+print-type-size     field `.value`: 1 bytes
+print-type-size type: `std::mem::MaybeUninit<[async fn body@$DIR/async.rs:12:17: 12:19]>`: 1 bytes, alignment: 1 bytes
+print-type-size     variant `MaybeUninit`: 1 bytes
+print-type-size         field `.uninit`: 0 bytes
+print-type-size         field `.value`: 1 bytes
+print-type-size type: `std::task::Poll<()>`: 1 bytes, alignment: 1 bytes
+print-type-size     discriminant: 1 bytes
+print-type-size     variant `Ready`: 0 bytes
+print-type-size         field `.0`: 0 bytes
+print-type-size     variant `Pending`: 0 bytes
diff --git a/tests/ui/print_type_sizes/async.rs b/tests/ui/print_type_sizes/async.rs
index f38a6e674da99..602092393be0c 100644
--- a/tests/ui/print_type_sizes/async.rs
+++ b/tests/ui/print_type_sizes/async.rs
@@ -1,4 +1,8 @@
-// compile-flags: -Z print-type-sizes --crate-type lib
+// revisions: both_off just_prop both_on
+// ignore-tidy-linelength
+// [both_off]  compile-flags: -Z print-type-sizes --crate-type lib -Z mir-enable-passes=-UpvarToLocalProp,-InlineFutureIntoFuture
+// [just_prop] compile-flags: -Z print-type-sizes --crate-type lib -Z mir-enable-passes=+UpvarToLocalProp,-InlineFutureIntoFuture
+// [both_on]   compile-flags: -Z print-type-sizes --crate-type lib -Z mir-enable-passes=+UpvarToLocalProp,+InlineFutureIntoFuture
 // edition:2021
 // build-pass
 // ignore-pass

From f4a1d4bb0a1edb8b4806d860beaa5b0510d32b51 Mon Sep 17 00:00:00 2001
From: "Felix S. Klock II" <pnkfelix@pnkfx.org>
Date: Thu, 23 Feb 2023 23:01:41 -0500
Subject: [PATCH 6/6] Some local unit tests to track progress and capture
 interesting cases as I identify them.

issue-62958-c.rs was reduced from the tracing-attributes proc-macro crate.

issue-62958-d.rs was reduced from the doc test attached to `AtomicPtr::from_mut_slice`.

issue-62958-e.rs covers some important operational characteristics.
---
 tests/ui/mir/issue-62958-a.rs |  28 +++++++++
 tests/ui/mir/issue-62958-b.rs |  27 +++++++++
 tests/ui/mir/issue-62958-c.rs |  23 ++++++++
 tests/ui/mir/issue-62958-d.rs |  34 +++++++++++
 tests/ui/mir/issue-62958-e.rs | 108 ++++++++++++++++++++++++++++++++++
 5 files changed, 220 insertions(+)
 create mode 100644 tests/ui/mir/issue-62958-a.rs
 create mode 100644 tests/ui/mir/issue-62958-b.rs
 create mode 100644 tests/ui/mir/issue-62958-c.rs
 create mode 100644 tests/ui/mir/issue-62958-d.rs
 create mode 100644 tests/ui/mir/issue-62958-e.rs

diff --git a/tests/ui/mir/issue-62958-a.rs b/tests/ui/mir/issue-62958-a.rs
new file mode 100644
index 0000000000000..334504d7af052
--- /dev/null
+++ b/tests/ui/mir/issue-62958-a.rs
@@ -0,0 +1,28 @@
+// revisions: both_off just_prop both_on
+// ignore-tidy-linelength
+// run-pass
+// [both_off]  compile-flags: -Z mir-enable-passes=-UpvarToLocalProp,-InlineFutureIntoFuture
+// [just_prop] compile-flags: -Z mir-enable-passes=+UpvarToLocalProp,-InlineFutureIntoFuture
+// [both_on]   compile-flags: -Z mir-enable-passes=+UpvarToLocalProp,+InlineFutureIntoFuture
+// edition:2018
+
+async fn wait() {}
+#[allow(dropping_copy_types)]
+async fn test(arg: [u8; 8192]) {
+    wait().await;
+    drop(arg);
+}
+
+#[cfg(both_off)]
+fn main() {
+    let expected = 16000..=17000;
+    let actual = std::mem::size_of_val(&test([0; 8192]));
+    assert!(expected.contains(&actual));
+}
+
+#[cfg(any(just_prop, both_on))]
+fn main() {
+    let expected = 8192..=9999;
+    let actual = std::mem::size_of_val(&test([0; 8192]));
+    assert!(expected.contains(&actual));
+}
diff --git a/tests/ui/mir/issue-62958-b.rs b/tests/ui/mir/issue-62958-b.rs
new file mode 100644
index 0000000000000..35fabd69af150
--- /dev/null
+++ b/tests/ui/mir/issue-62958-b.rs
@@ -0,0 +1,27 @@
+// revisions: both_off just_prop both_on
+// ignore-tidy-linelength
+// run-pass
+// [both_off]  compile-flags: -Z mir-enable-passes=-UpvarToLocalProp,-InlineFutureIntoFuture
+// [just_prop] compile-flags: -Z mir-enable-passes=+UpvarToLocalProp,-InlineFutureIntoFuture
+// [both_on]   compile-flags: -Z mir-enable-passes=+UpvarToLocalProp,+InlineFutureIntoFuture
+// edition:2018
+
+async fn test(_arg: [u8; 8192]) {}
+
+async fn use_future(fut: impl std::future::Future<Output = ()>) {
+    fut.await
+}
+
+#[cfg(both_on)]
+fn main() {
+    let expected = 8192..=9999;
+    let actual = std::mem::size_of_val(&use_future(use_future(use_future(test([0; 8192])))));
+    assert!(expected.contains(&actual), "expected: {:?} actual: {}", expected, actual);
+}
+
+#[cfg(any(both_off, just_prop))]
+fn main() {
+    let expected = 16000..=8192*(2*2*2*2); // O(2^n) LOL
+    let actual = std::mem::size_of_val(&use_future(use_future(use_future(test([0; 8192])))));
+    assert!(expected.contains(&actual), "expected: {:?} actual: {}", expected, actual);
+}
diff --git a/tests/ui/mir/issue-62958-c.rs b/tests/ui/mir/issue-62958-c.rs
new file mode 100644
index 0000000000000..51ae6d8f724de
--- /dev/null
+++ b/tests/ui/mir/issue-62958-c.rs
@@ -0,0 +1,23 @@
+// revisions: both_off just_prop both_on
+// ignore-tidy-linelength
+// build-pass
+// [both_off]  compile-flags: -Z mir-enable-passes=-UpvarToLocalProp,-InlineFutureIntoFuture
+// [just_prop] compile-flags: -Z mir-enable-passes=+UpvarToLocalProp,-InlineFutureIntoFuture
+// [both_on]   compile-flags: -Z mir-enable-passes=+UpvarToLocalProp,+InlineFutureIntoFuture
+
+#![crate_type="rlib"]
+
+// FIXME: I should be able to expand the below into something that actually does
+// some interesting work. The crucial thing is to enforce a rule that we never
+// replace `_3` with `_1.0` on a place that holds a Deref.
+
+pub struct G { p: () }
+pub struct S { g: G }
+pub struct R<'a> { s: &'a S, b: () }
+
+pub fn gen_function(input: R<'_>) {
+    let R { s, b: _b } = input;
+    let S { g, .. } = s;
+    let G { p: _p, .. } = g;
+    loop { }
+}
diff --git a/tests/ui/mir/issue-62958-d.rs b/tests/ui/mir/issue-62958-d.rs
new file mode 100644
index 0000000000000..0b34826c9584a
--- /dev/null
+++ b/tests/ui/mir/issue-62958-d.rs
@@ -0,0 +1,34 @@
+// revisions: both_off just_prop both_on
+// ignore-tidy-linelength
+// run-pass
+// [both_off]  compile-flags: -Z mir-enable-passes=-UpvarToLocalProp,-InlineFutureIntoFuture
+// [just_prop] compile-flags: -Z mir-enable-passes=+UpvarToLocalProp,-InlineFutureIntoFuture
+// [both_on]   compile-flags: -Z mir-enable-passes=+UpvarToLocalProp,+InlineFutureIntoFuture
+// edition:2018
+
+#![feature(atomic_from_mut)]
+
+// FIXME: I should be able to reduce the below further now that I understand the
+// nature of the problem. (Namely, the fact that the old upvar_to_local_prop
+// code was ignoring locals in projections.)
+
+use std::ptr::null_mut;
+use std::sync::atomic::{AtomicPtr, Ordering};
+
+fn main() {
+    let mut some_ptrs = [null_mut::<String>(); 10];
+    let a = &*AtomicPtr::from_mut_slice(&mut some_ptrs);
+    std::thread::scope(|s| {
+        for i in 0..a.len() {
+            s.spawn(move || {
+                let name = Box::new(format!("thread{i}"));
+                a[i].store(Box::into_raw(name), Ordering::Relaxed);
+            });
+        }
+    });
+    for p in some_ptrs {
+        assert!(!p.is_null());
+        let name = unsafe { Box::from_raw(p) };
+        println!("Hello, {name}!");
+    }
+}
diff --git a/tests/ui/mir/issue-62958-e.rs b/tests/ui/mir/issue-62958-e.rs
new file mode 100644
index 0000000000000..870018a321eca
--- /dev/null
+++ b/tests/ui/mir/issue-62958-e.rs
@@ -0,0 +1,108 @@
+// revisions: both_off both_on
+// ignore-tidy-linelength
+// run-pass
+// [both_off]  compile-flags: -Z mir-enable-passes=-UpvarToLocalProp,-InlineFutureIntoFuture
+// [both_on]   compile-flags: -Z mir-enable-passes=+UpvarToLocalProp,+InlineFutureIntoFuture
+// edition:2018
+
+use std::future::Future;
+
+async fn wait() {}
+
+fn test_shrinks_1(arg: [u8; 1000]) -> impl std::future::Future<Output=()> {
+    async move {
+        let mut local = arg;
+        local[2] = 3;
+        wait().await;
+        assert_eq!(local[2], 3);
+    }
+}
+
+fn test_noshrinks_1(mut arg: [u8; 1000]) -> impl std::future::Future<Output=()> {
+    async move {
+        let mut local = arg;
+        local[2] = 3;
+        let l2 = &mut arg;
+        l2[2] = 4;
+        wait().await;
+        assert_eq!(local[2], 3);
+        assert_eq!(l2[2], 4);
+    }
+}
+
+fn test_noshrinks_2(arg: [u8; 1000]) -> impl std::future::Future<Output=()> {
+    async move {
+        let mut local = arg;
+        local[2] = 1;
+        let l2 = arg;
+        wait().await;
+        assert_eq!(local[2], 1);
+        assert_eq!(l2[2], 0);
+    }
+}
+
+fn test_noshrinks_3(arg: [u8; 1000]) -> impl std::future::Future<Output=()> {
+    async move {
+        let bor = &arg[2];
+        let mut local = arg;
+        local[2] = 1;
+        wait().await;
+        assert_eq!(local[2], 1);
+        assert_eq!(*bor, 0);
+    }
+}
+
+#[cfg(both_on)]
+fn check_shrinks(which: &str, fut: impl std::future::Future<Output=()>) {
+    let sz = std::mem::size_of_val(&fut);
+    println!("{which}: {sz}");
+    assert!((1000..=1500).contains(&sz));
+    run_fut(fut)
+}
+
+fn check_no_shrinks(which: &str, fut: impl std::future::Future<Output=()>) {
+    let sz = std::mem::size_of_val(&fut);
+    println!("{which}: {sz}");
+    assert!((2000..).contains(&sz));
+    run_fut(fut);
+}
+
+#[cfg(both_on)]
+fn main() {
+    check_shrinks("s1", test_shrinks_1([0; 1000]));
+
+    check_no_shrinks("n1", test_noshrinks_1([0; 1000]));
+    check_no_shrinks("n2", test_noshrinks_2([0; 1000]));
+    check_no_shrinks("n3", test_noshrinks_3([0; 1000]));
+}
+
+#[cfg(both_off)]
+fn main() {
+    check_no_shrinks("s1", test_shrinks_1([0; 1000]));
+    check_no_shrinks("n1", test_noshrinks_1([0; 1000]));
+    check_no_shrinks("n2", test_noshrinks_2([0; 1000]));
+    check_no_shrinks("n3", test_noshrinks_3([0; 1000]));
+}
+
+fn run_fut<T>(fut: impl Future<Output = T>) -> T {
+    use std::sync::Arc;
+    use std::task::{Context, Poll, Wake, Waker};
+
+    struct MyWaker;
+    impl Wake for MyWaker {
+        fn wake(self: Arc<Self>) {
+            unimplemented!()
+        }
+    }
+
+    let waker = Waker::from(Arc::new(MyWaker));
+    let mut context = Context::from_waker(&waker);
+
+    let mut pinned = Box::pin(fut);
+    loop {
+        match pinned.as_mut().poll(&mut context) {
+            Poll::Pending => continue,
+            Poll::Ready(v) => return v,
+        }
+    }
+}