From 5327e78e86a2988f2862a05542189231a9faa014 Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Fri, 14 Oct 2022 14:46:22 -0700 Subject: [PATCH] rustc_codegen_ssa: Use `llvm.invariant` intrinsics on arguments that are immutable references; off by default. Optimization failures around reloads and memcpy optimizations are frequently traceable to LLVM's failure to prove that memory can't be mutated by a call or store. This problem is especially acute in Rust, where large values tend to be memcpy'd more often than in C++. Thankfully, Rust has stronger guarantees on mutability available than C++ does, via the strong immutability of `&` references. This should allow LLVM to prove that memory can't be modified by stores and calls in more cases. We're already using LLVM's `readonly` parameter attribute on such calls. However, the semantics of `readonly` are akin to `const` in C++, in that they only promise to LLVM that the function won't mutate the parameter *through that pointer*, not that the pointed-to memory is immutable for the entire duration of the function. These weak semantics limit the applicability of `readonly` to LLVM's alias analysis. Instead of `readonly`, the correct way to express strong immutability guarantees on memory is through the `llvm.invariant.start` and `llvm.invariant.end` intrinsics. These enable a frontend like `rustc` to describe immutability of memory regions in an expressive, flow-sensitive manner. Unfortunately, LLVM doesn't use the `llvm.invariant.start` and `llvm.invariant.end` intrinsics for much at the moment. It's only used in one optimization in loop-invariant code motion at this time. Follow-up work will need to be done in LLVM to integrate these intrinsics into alias analysis. Possibly there will need to be some sort of "MemoryInvarianceAnalysis" that uses graph reachability algorithms to analyze the extent of the guarantees provided by these intrinsics to the control flow graph. Regardless, this front-end work needs to happen as a prerequisite for any LLVM work, so that the improvements to LLVM can be measured and tested. So this commit makes `rustc` use `llvm.invariant` in a minimal way: on immutable references to "freeze" types (i.e. not transitively containing UnsafeCell) passed directly as parameters to functions. This is off by default, gated behind the non-default `-Z emit-invariant-markers=yes` flag. Obviously, a lot more can be done to use `llvm.invariant` more liberally in the future, but this can be added over time, especially once more LLVM optimization passes use that infrastructure. This is simply the bare minimum for now. Once LLVM uses those intrinsics for more optimizations, the effects of more `llvm.invariant` use can be measured more precisely. --- compiler/rustc_codegen_gcc/src/builder.rs | 10 +++++ compiler/rustc_codegen_llvm/src/builder.rs | 15 +++++++ compiler/rustc_codegen_llvm/src/context.rs | 4 ++ compiler/rustc_codegen_ssa/src/mir/block.rs | 16 ++++++++ compiler/rustc_codegen_ssa/src/mir/mod.rs | 41 +++++++++++++++++-- .../rustc_codegen_ssa/src/traits/builder.rs | 3 ++ compiler/rustc_session/src/options.rs | 2 + src/test/codegen/invariant-basic.rs | 12 ++++++ src/test/rustdoc-ui/z-help.stdout | 1 + 9 files changed, 101 insertions(+), 3 deletions(-) create mode 100644 src/test/codegen/invariant-basic.rs diff --git a/compiler/rustc_codegen_gcc/src/builder.rs b/compiler/rustc_codegen_gcc/src/builder.rs index a314b7cc2152d..be5eec74044a3 100644 --- a/compiler/rustc_codegen_gcc/src/builder.rs +++ b/compiler/rustc_codegen_gcc/src/builder.rs @@ -1235,6 +1235,16 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { // TODO(antoyo) } + fn invariant_start(&mut self, _value_ptr: RValue<'gcc>, _size: Size) -> RValue<'gcc> { + // TODO + unimplemented!() + } + + fn invariant_end(&mut self, _marker_ptr: RValue<'gcc>, _value_ptr: RValue<'gcc>, _size: Size) { + // TODO + unimplemented!() + } + fn call( &mut self, _typ: Type<'gcc>, diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs index fca43a0d86ddd..f445daeb2eb7d 100644 --- a/compiler/rustc_codegen_llvm/src/builder.rs +++ b/compiler/rustc_codegen_llvm/src/builder.rs @@ -1113,6 +1113,21 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { self.call_lifetime_intrinsic("llvm.lifetime.end.p0i8", ptr, size); } + fn invariant_start(&mut self, value_ptr: &'ll Value, size: Size) -> &'ll Value { + let size = size.bytes(); + let value_ptr = self.pointercast(value_ptr, self.cx.type_i8p()); + self.call_intrinsic("llvm.invariant.start.p0i8", &[self.cx.const_u64(size), value_ptr]) + } + + fn invariant_end(&mut self, marker_ptr: &'ll Value, value_ptr: &'ll Value, size: Size) { + let size = size.bytes(); + let value_ptr = self.pointercast(value_ptr, self.cx.type_i8p()); + self.call_intrinsic( + "llvm.invariant.end.p0i8", + &[marker_ptr, self.cx.const_u64(size), value_ptr], + ); + } + fn instrprof_increment( &mut self, fn_name: &'ll Value, diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs index 79ddfd884dfac..2c702581afca9 100644 --- a/compiler/rustc_codegen_llvm/src/context.rs +++ b/compiler/rustc_codegen_llvm/src/context.rs @@ -629,6 +629,7 @@ impl<'ll> CodegenCx<'ll, '_> { } let i8p = self.type_i8p(); + let unitp = self.type_ptr_to(self.type_struct(&[], false)); let void = self.type_void(); let i1 = self.type_i1(); let t_i8 = self.type_i8(); @@ -840,6 +841,9 @@ impl<'ll> CodegenCx<'ll, '_> { ifn!("llvm.lifetime.start.p0i8", fn(t_i64, i8p) -> void); ifn!("llvm.lifetime.end.p0i8", fn(t_i64, i8p) -> void); + ifn!("llvm.invariant.start.p0i8", fn(t_i64, i8p) -> unitp); + ifn!("llvm.invariant.end.p0i8", fn(unitp, t_i64, i8p) -> void); + ifn!("llvm.expect.i1", fn(i1, i1) -> i1); ifn!("llvm.eh.typeid.for", fn(i8p) -> t_i32); ifn!("llvm.localescape", fn(...) -> void); diff --git a/compiler/rustc_codegen_ssa/src/mir/block.rs b/compiler/rustc_codegen_ssa/src/mir/block.rs index bd4f0cac7eb46..8463b93b4ba45 100644 --- a/compiler/rustc_codegen_ssa/src/mir/block.rs +++ b/compiler/rustc_codegen_ssa/src/mir/block.rs @@ -249,6 +249,8 @@ impl<'a, 'tcx> TerminatorCodegenHelper<'tcx> { impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { /// Generates code for a `Resume` terminator. fn codegen_resume_terminator(&mut self, helper: TerminatorCodegenHelper<'tcx>, mut bx: Bx) { + self.emit_invariant_end_markers(&mut bx); + if let Some(funclet) = helper.funclet(self) { bx.cleanup_ret(funclet, None); } else { @@ -306,6 +308,8 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { } fn codegen_return_terminator(&mut self, mut bx: Bx) { + self.emit_invariant_end_markers(&mut bx); + // Call `va_end` if this is the definition of a C-variadic function. if self.fn_abi.c_variadic { // The `VaList` "spoofed" argument is just after all the real arguments. @@ -1723,6 +1727,18 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { } } +impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { + fn emit_invariant_end_markers(&mut self, bx: &mut Bx) { + for invariant_value in self.invariant_values.iter().rev() { + bx.invariant_end( + invariant_value.marker_ptr, + invariant_value.value_ptr, + invariant_value.size, + ) + } + } +} + enum ReturnDest<'tcx, V> { // Do nothing; the return value is indirect or ignored. Nothing, diff --git a/compiler/rustc_codegen_ssa/src/mir/mod.rs b/compiler/rustc_codegen_ssa/src/mir/mod.rs index 2b931bfc91d63..49ae161c49d4e 100644 --- a/compiler/rustc_codegen_ssa/src/mir/mod.rs +++ b/compiler/rustc_codegen_ssa/src/mir/mod.rs @@ -1,9 +1,11 @@ use crate::traits::*; -use rustc_middle::mir; use rustc_middle::mir::interpret::ErrorHandled; -use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt, TyAndLayout}; +use rustc_middle::mir::{self, Mutability}; +use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt, LayoutOf, TyAndLayout}; use rustc_middle::ty::{self, Instance, Ty, TypeFoldable, TypeVisitable}; -use rustc_target::abi::call::{FnAbi, PassMode}; +use rustc_span::DUMMY_SP; +use rustc_target::abi::call::{ArgAbi, FnAbi, PassMode}; +use rustc_target::abi::Size; use std::iter; @@ -87,6 +89,11 @@ pub struct FunctionCx<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> { /// Caller location propagated if this function has `#[track_caller]`. caller_location: Option>, + + /// Information about pointers that are known immutable for the entire execution of the + /// function. We need to keep this information around so that we can call `invariant_end()` + /// before exiting this function. + invariant_values: Vec>, } impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { @@ -103,6 +110,15 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { } } +/// Information about pointers that are known immutable for the entire execution of the +/// function. We need to keep this information around so that we can call `invariant_end()` +/// before exiting this function. +struct InvariantValue { + marker_ptr: V, + value_ptr: V, + size: Size, +} + enum LocalRef<'tcx, V> { Place(PlaceRef<'tcx, V>), /// `UnsizedPlace(p)`: `p` itself is a thin pointer (indirect place). @@ -178,6 +194,7 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( debug_context, per_local_var_debug_info: None, caller_location: None, + invariant_values: vec![], }; fx.per_local_var_debug_info = fx.compute_per_local_var_debug_info(&mut bx); @@ -325,6 +342,7 @@ fn arg_local_refs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( PassMode::Direct(_) => { let llarg = bx.get_param(llarg_idx); llarg_idx += 1; + emit_invariant_markers_for(bx, fx, arg, llarg); return local(OperandRef::from_immediate_or_packed_pair( bx, llarg, arg.layout, )); @@ -398,6 +416,23 @@ fn arg_local_refs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( args } +fn emit_invariant_markers_for<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( + bx: &mut Bx, + fx: &mut FunctionCx<'a, 'tcx, Bx>, + arg: &ArgAbi<'tcx, Ty<'tcx>>, + value_ptr: Bx::Value, +) { + if bx.sess().opts.unstable_opts.emit_invariant_markers { + if let ty::Ref(_, subty, Mutability::Not) = arg.layout.ty.kind() { + if subty.is_freeze(bx.tcx().at(DUMMY_SP), ty::ParamEnv::reveal_all()) { + let size = bx.cx().layout_of(*subty).size; + let marker_ptr = bx.invariant_start(value_ptr, size); + fx.invariant_values.push(InvariantValue { marker_ptr, value_ptr, size }); + } + } + } +} + mod analyze; mod block; pub mod constant; diff --git a/compiler/rustc_codegen_ssa/src/traits/builder.rs b/compiler/rustc_codegen_ssa/src/traits/builder.rs index 01408f39fb306..26905fb7a8868 100644 --- a/compiler/rustc_codegen_ssa/src/traits/builder.rs +++ b/compiler/rustc_codegen_ssa/src/traits/builder.rs @@ -310,6 +310,9 @@ pub trait BuilderMethods<'a, 'tcx>: /// Called for `StorageDead` fn lifetime_end(&mut self, ptr: Self::Value, size: Size); + fn invariant_start(&mut self, ptr: Self::Value, size: Size) -> Self::Value; + fn invariant_end(&mut self, marker_ptr: Self::Value, value_ptr: Self::Value, size: Size); + fn instrprof_increment( &mut self, fn_name: Self::Value, diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs index a8be318dea8ae..ab8d15fdbbc63 100644 --- a/compiler/rustc_session/src/options.rs +++ b/compiler/rustc_session/src/options.rs @@ -1295,6 +1295,8 @@ options! { an additional `.html` file showing the computed coverage spans."), dwarf_version: Option = (None, parse_opt_number, [TRACKED], "version of DWARF debug information to emit (default: 2 or 4, depending on platform)"), + emit_invariant_markers: bool = (false, parse_bool, [UNTRACKED], + "emit `llvm.invariant` markers, which may enable better optimization (default: no)"), emit_stack_sizes: bool = (false, parse_bool, [UNTRACKED], "emit a section containing stack size metadata (default: no)"), emit_thin_lto: bool = (true, parse_bool, [TRACKED], diff --git a/src/test/codegen/invariant-basic.rs b/src/test/codegen/invariant-basic.rs new file mode 100644 index 0000000000000..aaa53df8e98b2 --- /dev/null +++ b/src/test/codegen/invariant-basic.rs @@ -0,0 +1,12 @@ +// compile-flags: -C opt-level=0 -Z emit-invariant-markers=yes +#![crate_type="lib"] + +#[no_mangle] +pub fn foo(x: &i32) { + // CHECK-LABEL: @foo + // CHECK: call {{.*}} @llvm.invariant.start{{[^(]*}}(i64 4 + // CHECK: call void @{{.*}}drop{{.*}} + // CHECK: call void @llvm.invariant.end + // CHECK: ret void + drop(x); +} diff --git a/src/test/rustdoc-ui/z-help.stdout b/src/test/rustdoc-ui/z-help.stdout index dbf3a8f00ee6d..e19aa81a56588 100644 --- a/src/test/rustdoc-ui/z-help.stdout +++ b/src/test/rustdoc-ui/z-help.stdout @@ -36,6 +36,7 @@ -Z dump-mir-graphviz=val -- in addition to `.mir` files, create graphviz `.dot` files (and with `-Z instrument-coverage`, also create a `.dot` file for the MIR-derived coverage graph) (default: no) -Z dump-mir-spanview=val -- in addition to `.mir` files, create `.html` files to view spans for all `statement`s (including terminators), only `terminator` spans, or computed `block` spans (one span encompassing a block's terminator and all statements). If `-Z instrument-coverage` is also enabled, create an additional `.html` file showing the computed coverage spans. -Z dwarf-version=val -- version of DWARF debug information to emit (default: 2 or 4, depending on platform) + -Z emit-invariant-markers=val -- emit `llvm.invariant` markers, which may enable better optimization (default: no) -Z emit-stack-sizes=val -- emit a section containing stack size metadata (default: no) -Z emit-thin-lto=val -- emit the bc module with thin LTO info (default: yes) -Z export-executable-symbols=val -- export symbols from executables, as if they were dynamic libraries