From dce272b2e106b597023dfd87b3db11e578528aa9 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Fri, 14 Mar 2025 15:47:48 +0000 Subject: [PATCH] Nul terminate rust string literals This allows taking advantage of the C string merging functionality of linkers, reducing code size. --- .../src/interpret/intrinsics.rs | 17 +++++++++++----- .../rustc_const_eval/src/interpret/place.rs | 11 +++++++--- .../src/builder/expr/as_constant.rs | 10 ++++++++-- tests/codegen/remap_path_prefix/main.rs | 2 +- .../const_allocation.main.GVN.after.32bit.mir | 20 +++++++++---------- .../const_allocation.main.GVN.after.64bit.mir | 20 +++++++++---------- .../stable-mir/check_allocation.rs | 20 ++++++++++--------- .../ui-fulldeps/stable-mir/check_transform.rs | 2 +- 8 files changed, 61 insertions(+), 41 deletions(-) diff --git a/compiler/rustc_const_eval/src/interpret/intrinsics.rs b/compiler/rustc_const_eval/src/interpret/intrinsics.rs index 4ca317e3a1e53..e8fc5c9659a19 100644 --- a/compiler/rustc_const_eval/src/interpret/intrinsics.rs +++ b/compiler/rustc_const_eval/src/interpret/intrinsics.rs @@ -24,10 +24,17 @@ use super::{ use crate::fluent_generated as fluent; /// Directly returns an `Allocation` containing an absolute path representation of the given type. -pub(crate) fn alloc_type_name<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> ConstAllocation<'tcx> { - let path = crate::util::type_name(tcx, ty); +pub(crate) fn alloc_type_name<'tcx>( + tcx: TyCtxt<'tcx>, + ty: Ty<'tcx>, +) -> (ConstAllocation<'tcx>, u64) { + let mut path = crate::util::type_name(tcx, ty); + let path_len = path.len().try_into().unwrap(); + if !path.contains('\0') { + path.push('\0'); + }; let alloc = Allocation::from_bytes_byte_aligned_immutable(path.into_bytes()); - tcx.mk_const_alloc(alloc) + (tcx.mk_const_alloc(alloc), path_len) } /// The logic for all nullary intrinsics is implemented here. These intrinsics don't get evaluated @@ -43,8 +50,8 @@ pub(crate) fn eval_nullary_intrinsic<'tcx>( interp_ok(match name { sym::type_name => { ensure_monomorphic_enough(tcx, tp_ty)?; - let alloc = alloc_type_name(tcx, tp_ty); - ConstValue::Slice { data: alloc, meta: alloc.inner().size().bytes() } + let (alloc, path_len) = alloc_type_name(tcx, tp_ty); + ConstValue::Slice { data: alloc, meta: path_len } } sym::needs_drop => { ensure_monomorphic_enough(tcx, tp_ty)?; diff --git a/compiler/rustc_const_eval/src/interpret/place.rs b/compiler/rustc_const_eval/src/interpret/place.rs index f5d3de7b1b270..39947f26d4c75 100644 --- a/compiler/rustc_const_eval/src/interpret/place.rs +++ b/compiler/rustc_const_eval/src/interpret/place.rs @@ -1019,11 +1019,16 @@ where &mut self, s: &str, ) -> InterpResult<'tcx, MPlaceTy<'tcx, M::Provenance>> { - let bytes = s.as_bytes(); - let ptr = self.allocate_bytes_dedup(bytes)?; + let ptr = if !s.contains('\0') { + let mut bytes = s.as_bytes().to_owned(); + bytes.push(0); + self.allocate_bytes_dedup(&bytes)? + } else { + self.allocate_bytes_dedup(s.as_bytes())? + }; // Create length metadata for the string. - let meta = Scalar::from_target_usize(u64::try_from(bytes.len()).unwrap(), self); + let meta = Scalar::from_target_usize(u64::try_from(s.len()).unwrap(), self); // Get layout for Rust's str type. let layout = self.layout_of(self.tcx.types.str_).unwrap(); diff --git a/compiler/rustc_mir_build/src/builder/expr/as_constant.rs b/compiler/rustc_mir_build/src/builder/expr/as_constant.rs index 64d092e035451..64b38842d2ed5 100644 --- a/compiler/rustc_mir_build/src/builder/expr/as_constant.rs +++ b/compiler/rustc_mir_build/src/builder/expr/as_constant.rs @@ -121,9 +121,15 @@ fn lit_to_mir_constant<'tcx>(tcx: TyCtxt<'tcx>, lit_input: LitToConstInput<'tcx> let value = match (lit, lit_ty.kind()) { (ast::LitKind::Str(s, _), ty::Ref(_, inner_ty, _)) if inner_ty.is_str() => { let s = s.as_str(); - let allocation = Allocation::from_bytes_byte_aligned_immutable(s.as_bytes()); + let allocation = if !s.contains('\0') { + let mut s = s.to_owned(); + s.push('\0'); + Allocation::from_bytes_byte_aligned_immutable(s.as_bytes()) + } else { + Allocation::from_bytes_byte_aligned_immutable(s.as_bytes()) + }; let allocation = tcx.mk_const_alloc(allocation); - ConstValue::Slice { data: allocation, meta: allocation.inner().size().bytes() } + ConstValue::Slice { data: allocation, meta: s.len().try_into().unwrap() } } (ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _)) if matches!(inner_ty.kind(), ty::Slice(_)) => diff --git a/tests/codegen/remap_path_prefix/main.rs b/tests/codegen/remap_path_prefix/main.rs index 7d17b3b67cfa2..f0ee402e0498c 100644 --- a/tests/codegen/remap_path_prefix/main.rs +++ b/tests/codegen/remap_path_prefix/main.rs @@ -12,7 +12,7 @@ mod aux_mod; include!("aux_mod.rs"); // Here we check that the expansion of the file!() macro is mapped. -// CHECK: @alloc_5761061597a97f66e13ef2ff92712c4b = private unnamed_addr constant [34 x i8] c"/the/src/remap_path_prefix/main.rs" +// CHECK: @alloc_4079a2e7607f89f86df6b8a72ba0dd06 = private unnamed_addr constant [35 x i8] c"/the/src/remap_path_prefix/main.rs\00" pub static FILE_PATH: &'static str = file!(); fn main() { diff --git a/tests/mir-opt/const_allocation.main.GVN.after.32bit.mir b/tests/mir-opt/const_allocation.main.GVN.after.32bit.mir index a2bd2bc0d92ab..a3ff5e29b51e5 100644 --- a/tests/mir-opt/const_allocation.main.GVN.after.32bit.mir +++ b/tests/mir-opt/const_allocation.main.GVN.after.32bit.mir @@ -33,12 +33,12 @@ ALLOC2 (size: 16, align: 4) { ╾ALLOC4╼ 03 00 00 00 ╾ALLOC5╼ 03 00 00 00 │ ╾──╼....╾──╼.... } -ALLOC4 (size: 3, align: 1) { - 66 6f 6f │ foo +ALLOC4 (size: 4, align: 1) { + 66 6f 6f 00 │ foo. } -ALLOC5 (size: 3, align: 1) { - 62 61 72 │ bar +ALLOC5 (size: 4, align: 1) { + 62 61 72 00 │ bar. } ALLOC3 (size: 24, align: 4) { @@ -46,14 +46,14 @@ ALLOC3 (size: 24, align: 4) { 0x10 │ ╾ALLOC8╼ 04 00 00 00 │ ╾──╼.... } -ALLOC6 (size: 3, align: 1) { - 6d 65 68 │ meh +ALLOC6 (size: 4, align: 1) { + 6d 65 68 00 │ meh. } -ALLOC7 (size: 3, align: 1) { - 6d 6f 70 │ mop +ALLOC7 (size: 4, align: 1) { + 6d 6f 70 00 │ mop. } -ALLOC8 (size: 4, align: 1) { - 6d c3 b6 70 │ m..p +ALLOC8 (size: 5, align: 1) { + 6d c3 b6 70 00 │ m..p. } diff --git a/tests/mir-opt/const_allocation.main.GVN.after.64bit.mir b/tests/mir-opt/const_allocation.main.GVN.after.64bit.mir index a431104d08bec..0d381070b9099 100644 --- a/tests/mir-opt/const_allocation.main.GVN.after.64bit.mir +++ b/tests/mir-opt/const_allocation.main.GVN.after.64bit.mir @@ -36,12 +36,12 @@ ALLOC2 (size: 32, align: 8) { 0x10 │ ╾ALLOC5╼ 03 00 00 00 00 00 00 00 │ ╾──────╼........ } -ALLOC4 (size: 3, align: 1) { - 66 6f 6f │ foo +ALLOC4 (size: 4, align: 1) { + 66 6f 6f 00 │ foo. } -ALLOC5 (size: 3, align: 1) { - 62 61 72 │ bar +ALLOC5 (size: 4, align: 1) { + 62 61 72 00 │ bar. } ALLOC3 (size: 48, align: 8) { @@ -50,14 +50,14 @@ ALLOC3 (size: 48, align: 8) { 0x20 │ ╾ALLOC8╼ 04 00 00 00 00 00 00 00 │ ╾──────╼........ } -ALLOC6 (size: 3, align: 1) { - 6d 65 68 │ meh +ALLOC6 (size: 4, align: 1) { + 6d 65 68 00 │ meh. } -ALLOC7 (size: 3, align: 1) { - 6d 6f 70 │ mop +ALLOC7 (size: 4, align: 1) { + 6d 6f 70 00 │ mop. } -ALLOC8 (size: 4, align: 1) { - 6d c3 b6 70 │ m..p +ALLOC8 (size: 5, align: 1) { + 6d c3 b6 70 00 │ m..p. } diff --git a/tests/ui-fulldeps/stable-mir/check_allocation.rs b/tests/ui-fulldeps/stable-mir/check_allocation.rs index c102f86a2286b..231da2f4a174e 100644 --- a/tests/ui-fulldeps/stable-mir/check_allocation.rs +++ b/tests/ui-fulldeps/stable-mir/check_allocation.rs @@ -19,13 +19,6 @@ extern crate rustc_driver; extern crate rustc_interface; extern crate stable_mir; -use rustc_smir::rustc_internal; -use stable_mir::crate_def::CrateDef; -use stable_mir::mir::alloc::GlobalAlloc; -use stable_mir::mir::mono::{Instance, InstanceKind, StaticDef}; -use stable_mir::mir::{Body, TerminatorKind}; -use stable_mir::ty::{Allocation, ConstantKind, RigidTy, TyKind}; -use stable_mir::{CrateItem, CrateItems, ItemKind}; use std::ascii::Char; use std::assert_matches::assert_matches; use std::cmp::{max, min}; @@ -34,6 +27,14 @@ use std::ffi::CStr; use std::io::Write; use std::ops::ControlFlow; +use rustc_smir::rustc_internal; +use stable_mir::crate_def::CrateDef; +use stable_mir::mir::alloc::GlobalAlloc; +use stable_mir::mir::mono::{Instance, InstanceKind, StaticDef}; +use stable_mir::mir::{Body, TerminatorKind}; +use stable_mir::ty::{Allocation, ConstantKind, RigidTy, TyKind}; +use stable_mir::{CrateItem, CrateItems, ItemKind}; + const CRATE_NAME: &str = "input"; /// This function uses the Stable MIR APIs to get information about the test crate. @@ -78,11 +79,12 @@ fn check_bar(item: CrateItem) { let alloc_id_0 = alloc.provenance.ptrs[0].1.0; let GlobalAlloc::Memory(allocation) = GlobalAlloc::from(alloc_id_0) else { unreachable!() }; - assert_eq!(allocation.bytes.len(), 3); + assert_eq!(allocation.bytes.len(), 4); assert_eq!(allocation.bytes[0].unwrap(), Char::CapitalB.to_u8()); assert_eq!(allocation.bytes[1].unwrap(), Char::SmallA.to_u8()); assert_eq!(allocation.bytes[2].unwrap(), Char::SmallR.to_u8()); - assert_eq!(std::str::from_utf8(&allocation.raw_bytes().unwrap()), Ok("Bar")); + assert_eq!(allocation.bytes[3].unwrap(), Char::Null.to_u8()); + assert_eq!(std::str::from_utf8(&allocation.raw_bytes().unwrap()), Ok("Bar\0")); } /// Check the allocation data for static `C_STR`. diff --git a/tests/ui-fulldeps/stable-mir/check_transform.rs b/tests/ui-fulldeps/stable-mir/check_transform.rs index d9fc924933fa8..6a3938726a514 100644 --- a/tests/ui-fulldeps/stable-mir/check_transform.rs +++ b/tests/ui-fulldeps/stable-mir/check_transform.rs @@ -38,7 +38,7 @@ fn test_transform() -> ControlFlow<()> { let target_fn = *get_item(&items, (ItemKind::Fn, "dummy")).unwrap(); let instance = Instance::try_from(target_fn).unwrap(); let body = instance.body().unwrap(); - check_msg(&body, "oops"); + check_msg(&body, "oops\0"); let new_msg = "new panic message"; let new_body = change_panic_msg(body, new_msg);