From 48dc00e3a13844468322ec0b6d7c73c21c2c5fb7 Mon Sep 17 00:00:00 2001
From: Scott McMurray <scottmcm@users.noreply.github.com>
Date: Thu, 23 Mar 2023 18:50:57 -0700
Subject: [PATCH] Avoid some `alloca`s in codegen of `CastKind::Transmute`

---
 compiler/rustc_codegen_ssa/src/mir/rvalue.rs | 25 ++++++++++++----
 tests/codegen/intrinsics/transmute.rs        | 24 ++++++++++-----
 tests/codegen/transmute-scalar.rs            | 31 ++++++++++----------
 3 files changed, 50 insertions(+), 30 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
index 72d41d8c32c21..5d54f4248d2a9 100644
--- a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
@@ -396,7 +396,8 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                         OperandValue::Immediate(newval)
                     }
                     mir::CastKind::Transmute => {
-                        bug!("Transmute operand {:?} in `codegen_rvalue_operand`", operand);
+                        // `rvalue_creates_operand` checked that the immediate matches
+                        OperandValue::Immediate(operand.immediate())
                     }
                 };
                 OperandRef { val, layout: cast }
@@ -726,11 +727,23 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
 
 impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
     pub fn rvalue_creates_operand(&self, rvalue: &mir::Rvalue<'tcx>, span: Span) -> bool {
-        match *rvalue {
-            mir::Rvalue::Cast(mir::CastKind::Transmute, ..) =>
-                // FIXME: Now that transmute is an Rvalue, it would be nice if
-                // it could create `Immediate`s for scalars, where possible.
-                false,
+        match rvalue {
+            mir::Rvalue::Cast(mir::CastKind::Transmute, operand, dst_ty) => {
+                let dst_ty = self.cx.layout_of(self.monomorphize(*dst_ty));
+                if !self.cx.is_backend_immediate(dst_ty) {
+                    return false;
+                }
+
+                let src_ty = operand.ty(self.mir, self.cx.tcx());
+                let src_ty = self.cx.layout_of(self.monomorphize(src_ty));
+                if !self.cx.is_backend_immediate(src_ty) {
+                    return false;
+                }
+
+                // FIXME: This only handles the easy exact matches for now;
+                // future work could also support `bitcast`s and such.
+                self.cx.immediate_backend_type(src_ty) == self.cx.immediate_backend_type(dst_ty)
+            }
             mir::Rvalue::Ref(..) |
             mir::Rvalue::CopyForDeref(..) |
             mir::Rvalue::AddressOf(..) |
diff --git a/tests/codegen/intrinsics/transmute.rs b/tests/codegen/intrinsics/transmute.rs
index cefcf9ed9caae..1db6bec863930 100644
--- a/tests/codegen/intrinsics/transmute.rs
+++ b/tests/codegen/intrinsics/transmute.rs
@@ -79,20 +79,16 @@ pub unsafe fn check_from_uninhabited(x: BigNever) -> u16 {
 // CHECK-LABEL: @check_to_newtype(
 #[no_mangle]
 pub unsafe fn check_to_newtype(x: u64) -> Scalar64 {
-    // CHECK: %0 = alloca i64
-    // CHECK: store i64 %x, ptr %0
-    // CHECK: %1 = load i64, ptr %0
-    // CHECK: ret i64 %1
+    // CHECK-NOT: alloca
+    // CHECK: ret i64 %x
     transmute(x)
 }
 
 // CHECK-LABEL: @check_from_newtype(
 #[no_mangle]
 pub unsafe fn check_from_newtype(x: Scalar64) -> u64 {
-    // CHECK: %0 = alloca i64
-    // CHECK: store i64 %x, ptr %0
-    // CHECK: %1 = load i64, ptr %0
-    // CHECK: ret i64 %1
+    // CHECK-NOT: alloca
+    // CHECK: ret i64 %x
     transmute(x)
 }
 
@@ -194,3 +190,15 @@ pub unsafe fn check_long_array_more_aligned(x: [u8; 100]) -> [u32; 25] {
     // CHECK-NEXT: ret void
     transmute(x)
 }
+
+// CHECK-LABEL: @check_intermediate_passthrough(
+#[no_mangle]
+pub unsafe fn check_intermediate_passthrough(x: u32) -> i32 {
+    // CHECK: start
+    // CHECK: %[[TMP:.+]] = add i32 1, %x
+    // CHECK: %[[RET:.+]] = add i32 %[[TMP]], 1
+    // CHECK: ret i32 %[[RET]]
+    unsafe {
+        transmute::<u32, i32>(1 + x) + 1
+    }
+}
diff --git a/tests/codegen/transmute-scalar.rs b/tests/codegen/transmute-scalar.rs
index 4d7a80bfbe5cc..517873d00eb1c 100644
--- a/tests/codegen/transmute-scalar.rs
+++ b/tests/codegen/transmute-scalar.rs
@@ -5,11 +5,11 @@
 
 // With opaque ptrs in LLVM, `transmute` can load/store any `alloca` as any type,
 // without needing to pointercast, and SRoA will turn that into a `bitcast`.
-// As such, there's no longer special-casing in `transmute` to attempt to
-// generate `bitcast` ourselves, as that just made the IR longer.
+// Thus for place-to-place transmutes, there's no point in bothering to attempt
+// to `bitcast` the values, and we can just write them directly.
 
 // FIXME: That said, `bitcast`s could still be a valuable addition if they could
-// be done in `rvalue_creates_operand`, and thus avoid the `alloca`s entirely.
+// be done in `rvalue_creates_operand`, and thereby avoid the `alloca`s entirely.
 
 // CHECK-LABEL: define{{.*}}i32 @f32_to_bits(float noundef %x)
 // CHECK: store float %{{.*}}, ptr %0
@@ -22,8 +22,8 @@ pub fn f32_to_bits(x: f32) -> u32 {
 
 // CHECK-LABEL: define{{.*}}i8 @bool_to_byte(i1 noundef zeroext %b)
 // CHECK: %1 = zext i1 %b to i8
-// CHECK-NEXT: store i8 %1, {{.*}} %0
-// CHECK-NEXT: %2 = load i8, {{.*}} %0
+// CHECK-NEXT: store i8 %1, ptr %0
+// CHECK-NEXT: %2 = load i8, ptr %0
 // CHECK: ret i8 %2
 #[no_mangle]
 pub fn bool_to_byte(b: bool) -> u8 {
@@ -32,7 +32,7 @@ pub fn bool_to_byte(b: bool) -> u8 {
 
 // CHECK-LABEL: define{{.*}}noundef zeroext i1 @byte_to_bool(i8 noundef %byte)
 // CHECK: store i8 %byte, ptr %0
-// CHECK-NEXT: %1 = load i8, {{.*}} %0
+// CHECK-NEXT: %1 = load i8, ptr %0
 // CHECK-NEXT: %2 = trunc i8 %1 to i1
 // CHECK: ret i1 %2
 #[no_mangle]
@@ -40,28 +40,27 @@ pub unsafe fn byte_to_bool(byte: u8) -> bool {
     std::mem::transmute(byte)
 }
 
-// CHECK-LABEL: define{{.*}}{{i8\*|ptr}} @ptr_to_ptr({{i16\*|ptr}} noundef %p)
-// CHECK: store {{i8\*|ptr}} %{{.*}}, {{.*}} %0
-// CHECK-NEXT: %[[RES:.*]] = load {{i8\*|ptr}}, {{.*}} %0
-// CHECK: ret {{i8\*|ptr}} %[[RES]]
+// CHECK-LABEL: define{{.*}}ptr @ptr_to_ptr(ptr noundef %p)
+// CHECK: start
+// CHECK-NEXT: ret ptr %p
 #[no_mangle]
 pub fn ptr_to_ptr(p: *mut u16) -> *mut u8 {
     unsafe { std::mem::transmute(p) }
 }
 
-// CHECK: define{{.*}}[[USIZE:i[0-9]+]] @ptr_to_int({{i16\*|ptr}} noundef %p)
-// CHECK: store {{i16\*|ptr}} %p, {{.*}}
-// CHECK-NEXT: %[[RES:.*]] = load [[USIZE]], {{.*}} %0
+// CHECK: define{{.*}}[[USIZE:i[0-9]+]] @ptr_to_int(ptr noundef %p)
+// CHECK: store ptr %p, {{.*}}
+// CHECK-NEXT: %[[RES:.*]] = load [[USIZE]], ptr %0
 // CHECK: ret [[USIZE]] %[[RES]]
 #[no_mangle]
 pub fn ptr_to_int(p: *mut u16) -> usize {
     unsafe { std::mem::transmute(p) }
 }
 
-// CHECK: define{{.*}}{{i16\*|ptr}} @int_to_ptr([[USIZE]] noundef %i)
+// CHECK: define{{.*}}ptr @int_to_ptr([[USIZE]] noundef %i)
 // CHECK: store [[USIZE]] %i, {{.*}}
-// CHECK-NEXT: %[[RES:.*]] = load {{i16\*|ptr}}, {{.*}} %0
-// CHECK: ret {{i16\*|ptr}} %[[RES]]
+// CHECK-NEXT: %[[RES:.*]] = load ptr, {{.*}} %0
+// CHECK: ret ptr %[[RES]]
 #[no_mangle]
 pub fn int_to_ptr(i: usize) -> *mut u16 {
     unsafe { std::mem::transmute(i) }