From cca23880de2277bedd68b3b31855270b272b55ff Mon Sep 17 00:00:00 2001 From: Augie Fackler Date: Tue, 22 Aug 2023 17:49:39 -0400 Subject: [PATCH 1/8] lto: use the `object` crate to load bitcode sections Upstream change llvm/llvm-project@6b539f5eb8ef1d3a3c87873caa2dbd5147e1adbd changed `isSectionBitcode` works and it now only respects `.llvm.lto` sections instead of also `.llvmbc`, which it says was never intended to be used for LTO. We already have the object crate handy, so it's easy to use that to load the section. We have to sniff for raw bitcode files by hand, as `object` doesn't support those, but that's easy enough. I also look for both .llvmbc and .llvm.lto sections so that the behavior of the new code matches the old LLVM behavior exactly, though .llvm.lto sections seem to not (currently) be covered by tests. r? @nikic @rustbot label: +llvm-main --- compiler/rustc_codegen_llvm/src/back/lto.rs | 35 ++++++++++--------- compiler/rustc_codegen_llvm/src/llvm/ffi.rs | 5 --- .../rustc_llvm/llvm-wrapper/PassWrapper.cpp | 26 -------------- 3 files changed, 18 insertions(+), 48 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs index b2d28cef89976..f60bc854b7c88 100644 --- a/compiler/rustc_codegen_llvm/src/back/lto.rs +++ b/compiler/rustc_codegen_llvm/src/back/lto.rs @@ -28,6 +28,9 @@ use std::path::Path; use std::slice; use std::sync::Arc; +use object::Object; +use object::ObjectSection; + /// We keep track of the computed LTO cache keys from the previous /// session to determine which CGUs we can reuse. pub const THIN_LTO_KEYS_INCR_COMP_FILE_NAME: &str = "thin-lto-past-keys.bin"; @@ -142,23 +145,21 @@ fn prepare_lto( } fn get_bitcode_slice_from_object_data(obj: &[u8]) -> Result<&[u8], LtoBitcodeFromRlib> { - let mut len = 0; - let data = - unsafe { llvm::LLVMRustGetBitcodeSliceFromObjectData(obj.as_ptr(), obj.len(), &mut len) }; - if !data.is_null() { - assert!(len != 0); - let bc = unsafe { slice::from_raw_parts(data, len) }; - - // `bc` must be a sub-slice of `obj`. - assert!(obj.as_ptr() <= bc.as_ptr()); - assert!(bc[bc.len()..bc.len()].as_ptr() <= obj[obj.len()..obj.len()].as_ptr()); - - Ok(bc) - } else { - assert!(len == 0); - Err(LtoBitcodeFromRlib { - llvm_err: llvm::last_error().unwrap_or_else(|| "unknown LLVM error".to_string()), - }) + // The object crate doesn't understand bitcode files, but we can just sniff for the possible + // magic strings here and return the whole slice directly. + if obj.starts_with(b"\xDE\xC0\x17\x0B") || obj.starts_with(b"BC\xC0\xDE") { + return Ok(obj); + } + match object::read::File::parse(obj) { + Ok(f) => match f.section_by_name(".llvmbc").or_else(|| f.section_by_name(".llvm.lto")) { + Some(d) => Ok(d.data().unwrap()), + None => Err(LtoBitcodeFromRlib { + llvm_err: "Bitcode section not found in object file".to_string(), + }), + }, + Err(e) => { + Err(LtoBitcodeFromRlib { llvm_err: format!("error loading bitcode section: {}", e) }) + } } } diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index 84157d1e25ca3..af4e3187261d6 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -2309,11 +2309,6 @@ extern "C" { len: usize, Identifier: *const c_char, ) -> Option<&Module>; - pub fn LLVMRustGetBitcodeSliceFromObjectData( - Data: *const u8, - len: usize, - out_len: &mut usize, - ) -> *const u8; pub fn LLVMRustLinkerNew(M: &Module) -> &mut Linker<'_>; pub fn LLVMRustLinkerAdd( diff --git a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp index b566ea496dedf..3f5223547733a 100644 --- a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp +++ b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp @@ -1481,32 +1481,6 @@ LLVMRustParseBitcodeForLTO(LLVMContextRef Context, return wrap(std::move(*SrcOrError).release()); } -// Find the bitcode section in the object file data and return it as a slice. -// Fail if the bitcode section is present but empty. -// -// On success, the return value is the pointer to the start of the slice and -// `out_len` is filled with the (non-zero) length. On failure, the return value -// is `nullptr` and `out_len` is set to zero. -extern "C" const char* -LLVMRustGetBitcodeSliceFromObjectData(const char *data, - size_t len, - size_t *out_len) { - *out_len = 0; - - StringRef Data(data, len); - MemoryBufferRef Buffer(Data, ""); // The id is unused. - - Expected BitcodeOrError = - object::IRObjectFile::findBitcodeInMemBuffer(Buffer); - if (!BitcodeOrError) { - LLVMRustSetLastError(toString(BitcodeOrError.takeError()).c_str()); - return nullptr; - } - - *out_len = BitcodeOrError->getBufferSize(); - return BitcodeOrError->getBufferStart(); -} - // Computes the LTO cache key for the provided 'ModId' in the given 'Data', // storing the result in 'KeyOut'. // Currently, this cache key is a SHA-1 hash of anything that could affect From bdf78359980ea06f0254eb8a07e813ca4eb1b112 Mon Sep 17 00:00:00 2001 From: Augie Fackler Date: Wed, 23 Aug 2023 12:36:16 -0400 Subject: [PATCH 2/8] lto: also handle macho and aix section names --- compiler/rustc_codegen_llvm/src/back/lto.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs index f60bc854b7c88..5fb95b3dcdf64 100644 --- a/compiler/rustc_codegen_llvm/src/back/lto.rs +++ b/compiler/rustc_codegen_llvm/src/back/lto.rs @@ -151,7 +151,12 @@ fn get_bitcode_slice_from_object_data(obj: &[u8]) -> Result<&[u8], LtoBitcodeFro return Ok(obj); } match object::read::File::parse(obj) { - Ok(f) => match f.section_by_name(".llvmbc").or_else(|| f.section_by_name(".llvm.lto")) { + Ok(f) => match f + .section_by_name(".llvmbc") + .or_else(|| f.section_by_name(".llvm.lto")) + .or_else(|| f.section_by_name("__LLVM,__bitcode\0")) + .or_else(|| f.section_by_name(".ipa\0")) + { Some(d) => Ok(d.data().unwrap()), None => Err(LtoBitcodeFromRlib { llvm_err: "Bitcode section not found in object file".to_string(), From 0ae0f3a981f73f56ccd0e757661dac79e185e4be Mon Sep 17 00:00:00 2001 From: Augie Fackler Date: Wed, 23 Aug 2023 14:11:17 -0400 Subject: [PATCH 3/8] lto: remove bogusly copied null byte --- compiler/rustc_codegen_llvm/src/back/lto.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs index 5fb95b3dcdf64..e263491de846b 100644 --- a/compiler/rustc_codegen_llvm/src/back/lto.rs +++ b/compiler/rustc_codegen_llvm/src/back/lto.rs @@ -154,8 +154,8 @@ fn get_bitcode_slice_from_object_data(obj: &[u8]) -> Result<&[u8], LtoBitcodeFro Ok(f) => match f .section_by_name(".llvmbc") .or_else(|| f.section_by_name(".llvm.lto")) - .or_else(|| f.section_by_name("__LLVM,__bitcode\0")) - .or_else(|| f.section_by_name(".ipa\0")) + .or_else(|| f.section_by_name("__LLVM,__bitcode")) + .or_else(|| f.section_by_name(".ipa")) { Some(d) => Ok(d.data().unwrap()), None => Err(LtoBitcodeFromRlib { From fb4ee7d19b67960046c1446a4f442b662dac380d Mon Sep 17 00:00:00 2001 From: Augie Fackler Date: Mon, 28 Aug 2023 13:00:18 -0400 Subject: [PATCH 4/8] rustc_codegen_llvm: tease out functions to avoid duplication of bitcode section names --- compiler/rustc_codegen_llvm/src/back/lto.rs | 23 +++++++----- compiler/rustc_codegen_llvm/src/back/write.rs | 36 ++++++++++++------- 2 files changed, 39 insertions(+), 20 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs index e263491de846b..e11195166225b 100644 --- a/compiler/rustc_codegen_llvm/src/back/lto.rs +++ b/compiler/rustc_codegen_llvm/src/back/lto.rs @@ -1,4 +1,7 @@ -use crate::back::write::{self, save_temp_bitcode, CodegenDiagnosticsStage, DiagnosticHandlers}; +use crate::back::write::{ + self, bitcode_section_name, save_temp_bitcode, target_is_aix, target_is_apple, + CodegenDiagnosticsStage, DiagnosticHandlers, +}; use crate::errors::{ DynamicLinkingWithLTO, LlvmError, LtoBitcodeFromRlib, LtoDisallowed, LtoDylib, }; @@ -121,8 +124,12 @@ fn prepare_lto( .filter(|&(name, _)| looks_like_rust_object_file(name)); for (name, child) in obj_files { info!("adding bitcode from {}", name); + let is_apple = target_is_apple(cgcx); + let is_aix = target_is_aix(cgcx); match get_bitcode_slice_from_object_data( child.data(&*archive_data).expect("corrupt rlib"), + is_apple, + is_aix, ) { Ok(data) => { let module = SerializedModule::FromRlib(data.to_vec()); @@ -144,19 +151,19 @@ fn prepare_lto( Ok((symbols_below_threshold, upstream_modules)) } -fn get_bitcode_slice_from_object_data(obj: &[u8]) -> Result<&[u8], LtoBitcodeFromRlib> { +fn get_bitcode_slice_from_object_data( + obj: &[u8], + is_apple: bool, + is_aix: bool, +) -> Result<&[u8], LtoBitcodeFromRlib> { // The object crate doesn't understand bitcode files, but we can just sniff for the possible // magic strings here and return the whole slice directly. if obj.starts_with(b"\xDE\xC0\x17\x0B") || obj.starts_with(b"BC\xC0\xDE") { return Ok(obj); } + let section = bitcode_section_name(is_apple, is_aix).trim_end_matches('\0'); match object::read::File::parse(obj) { - Ok(f) => match f - .section_by_name(".llvmbc") - .or_else(|| f.section_by_name(".llvm.lto")) - .or_else(|| f.section_by_name("__LLVM,__bitcode")) - .or_else(|| f.section_by_name(".ipa")) - { + Ok(f) => match f.section_by_name(section) { Some(d) => Ok(d.data().unwrap()), None => Err(LtoBitcodeFromRlib { llvm_err: "Bitcode section not found in object file".to_string(), diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs index 47cc5bd52e2ac..5787c391397c5 100644 --- a/compiler/rustc_codegen_llvm/src/back/write.rs +++ b/compiler/rustc_codegen_llvm/src/back/write.rs @@ -853,6 +853,27 @@ fn create_section_with_flags_asm(section_name: &str, section_flags: &str, data: asm } +pub(crate) fn bitcode_section_name(is_apple: bool, is_aix: bool) -> &'static str { + if is_apple { + "__LLVM,__bitcode\0" + } else if is_aix { + ".ipa\0" + } else { + ".llvmbc\0" + } +} + +pub(crate) fn target_is_apple(cgcx: &CodegenContext) -> bool { + cgcx.opts.target_triple.triple().contains("-ios") + || cgcx.opts.target_triple.triple().contains("-darwin") + || cgcx.opts.target_triple.triple().contains("-tvos") + || cgcx.opts.target_triple.triple().contains("-watchos") +} + +pub(crate) fn target_is_aix(cgcx: &CodegenContext) -> bool { + cgcx.opts.target_triple.triple().contains("-aix") +} + /// Embed the bitcode of an LLVM module in the LLVM module itself. /// /// This is done primarily for iOS where it appears to be standard to compile C @@ -913,11 +934,8 @@ unsafe fn embed_bitcode( // Unfortunately, LLVM provides no way to set custom section flags. For ELF // and COFF we emit the sections using module level inline assembly for that // reason (see issue #90326 for historical background). - let is_aix = cgcx.opts.target_triple.triple().contains("-aix"); - let is_apple = cgcx.opts.target_triple.triple().contains("-ios") - || cgcx.opts.target_triple.triple().contains("-darwin") - || cgcx.opts.target_triple.triple().contains("-tvos") - || cgcx.opts.target_triple.triple().contains("-watchos"); + let is_aix = target_is_aix(cgcx); + let is_apple = target_is_apple(cgcx); if is_apple || is_aix || cgcx.opts.target_triple.triple().starts_with("wasm") @@ -932,13 +950,7 @@ unsafe fn embed_bitcode( ); llvm::LLVMSetInitializer(llglobal, llconst); - let section = if is_apple { - "__LLVM,__bitcode\0" - } else if is_aix { - ".ipa\0" - } else { - ".llvmbc\0" - }; + let section = bitcode_section_name(is_apple, is_aix); llvm::LLVMSetSection(llglobal, section.as_ptr().cast()); llvm::LLVMRustSetLinkage(llglobal, llvm::Linkage::PrivateLinkage); llvm::LLVMSetGlobalConstant(llglobal, llvm::True); From ee81e0d1e4cbbd88f716e1f9ddfc7a065c9963a7 Mon Sep 17 00:00:00 2001 From: Augie Fackler Date: Mon, 28 Aug 2023 14:23:57 -0400 Subject: [PATCH 5/8] rustc_codegen_llvm: rework internal API per review --- compiler/rustc_codegen_llvm/src/back/lto.rs | 19 +++++--------- compiler/rustc_codegen_llvm/src/back/write.rs | 26 +++++++++---------- 2 files changed, 20 insertions(+), 25 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs index e11195166225b..bf116d5d91f23 100644 --- a/compiler/rustc_codegen_llvm/src/back/lto.rs +++ b/compiler/rustc_codegen_llvm/src/back/lto.rs @@ -1,6 +1,5 @@ use crate::back::write::{ - self, bitcode_section_name, save_temp_bitcode, target_is_aix, target_is_apple, - CodegenDiagnosticsStage, DiagnosticHandlers, + self, bitcode_section_name, save_temp_bitcode, CodegenDiagnosticsStage, DiagnosticHandlers, }; use crate::errors::{ DynamicLinkingWithLTO, LlvmError, LtoBitcodeFromRlib, LtoDisallowed, LtoDylib, @@ -124,12 +123,9 @@ fn prepare_lto( .filter(|&(name, _)| looks_like_rust_object_file(name)); for (name, child) in obj_files { info!("adding bitcode from {}", name); - let is_apple = target_is_apple(cgcx); - let is_aix = target_is_aix(cgcx); match get_bitcode_slice_from_object_data( child.data(&*archive_data).expect("corrupt rlib"), - is_apple, - is_aix, + cgcx, ) { Ok(data) => { let module = SerializedModule::FromRlib(data.to_vec()); @@ -151,17 +147,16 @@ fn prepare_lto( Ok((symbols_below_threshold, upstream_modules)) } -fn get_bitcode_slice_from_object_data( - obj: &[u8], - is_apple: bool, - is_aix: bool, -) -> Result<&[u8], LtoBitcodeFromRlib> { +fn get_bitcode_slice_from_object_data<'a>( + obj: &'a [u8], + cgcx: &CodegenContext, +) -> Result<&'a [u8], LtoBitcodeFromRlib> { // The object crate doesn't understand bitcode files, but we can just sniff for the possible // magic strings here and return the whole slice directly. if obj.starts_with(b"\xDE\xC0\x17\x0B") || obj.starts_with(b"BC\xC0\xDE") { return Ok(obj); } - let section = bitcode_section_name(is_apple, is_aix).trim_end_matches('\0'); + let section = bitcode_section_name(cgcx).trim_end_matches('\0'); match object::read::File::parse(obj) { Ok(f) => match f.section_by_name(section) { Some(d) => Ok(d.data().unwrap()), diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs index 5787c391397c5..d73714ef357c4 100644 --- a/compiler/rustc_codegen_llvm/src/back/write.rs +++ b/compiler/rustc_codegen_llvm/src/back/write.rs @@ -853,27 +853,27 @@ fn create_section_with_flags_asm(section_name: &str, section_flags: &str, data: asm } -pub(crate) fn bitcode_section_name(is_apple: bool, is_aix: bool) -> &'static str { - if is_apple { - "__LLVM,__bitcode\0" - } else if is_aix { - ".ipa\0" - } else { - ".llvmbc\0" - } -} - -pub(crate) fn target_is_apple(cgcx: &CodegenContext) -> bool { +fn target_is_apple(cgcx: &CodegenContext) -> bool { cgcx.opts.target_triple.triple().contains("-ios") || cgcx.opts.target_triple.triple().contains("-darwin") || cgcx.opts.target_triple.triple().contains("-tvos") || cgcx.opts.target_triple.triple().contains("-watchos") } -pub(crate) fn target_is_aix(cgcx: &CodegenContext) -> bool { +fn target_is_aix(cgcx: &CodegenContext) -> bool { cgcx.opts.target_triple.triple().contains("-aix") } +pub(crate) fn bitcode_section_name(cgcx: &CodegenContext) -> &'static str { + if target_is_apple(cgcx) { + "__LLVM,__bitcode\0" + } else if target_is_aix(cgcx) { + ".ipa\0" + } else { + ".llvmbc\0" + } +} + /// Embed the bitcode of an LLVM module in the LLVM module itself. /// /// This is done primarily for iOS where it appears to be standard to compile C @@ -950,7 +950,7 @@ unsafe fn embed_bitcode( ); llvm::LLVMSetInitializer(llglobal, llconst); - let section = bitcode_section_name(is_apple, is_aix); + let section = bitcode_section_name(cgcx); llvm::LLVMSetSection(llglobal, section.as_ptr().cast()); llvm::LLVMRustSetLinkage(llglobal, llvm::Linkage::PrivateLinkage); llvm::LLVMSetGlobalConstant(llglobal, llvm::True); From e97776b9116b54bd5a5bb652c8cba32eec8bef7f Mon Sep 17 00:00:00 2001 From: Augie Fackler Date: Tue, 29 Aug 2023 13:30:51 -0400 Subject: [PATCH 6/8] rustc_codegen_llvm: enable wasm in object crate Otherwise we lose the ability to do crate-level LTO for wasm targets now that we're loading our own bitcode sections instead of using LLVM to do it. --- Cargo.lock | 11 +++++++++++ compiler/rustc_codegen_llvm/Cargo.toml | 1 + 2 files changed, 12 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 4ebbb16442eaa..a0214bfa11410 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2454,6 +2454,7 @@ dependencies = [ "rustc-std-workspace-alloc", "rustc-std-workspace-core", "ruzstd", + "wasmparser", ] [[package]] @@ -5823,6 +5824,16 @@ version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" +[[package]] +name = "wasmparser" +version = "0.110.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dfcdb72d96f01e6c85b6bf20102e7423bdbaad5c337301bab2bbf253d26413c" +dependencies = [ + "indexmap 2.0.0", + "semver", +] + [[package]] name = "web-sys" version = "0.3.61" diff --git a/compiler/rustc_codegen_llvm/Cargo.toml b/compiler/rustc_codegen_llvm/Cargo.toml index be09820d08da2..f518988c5cc83 100644 --- a/compiler/rustc_codegen_llvm/Cargo.toml +++ b/compiler/rustc_codegen_llvm/Cargo.toml @@ -14,6 +14,7 @@ measureme = "10.0.0" object = { version = "0.32.0", default-features = false, features = [ "std", "read", + "wasm", ] } tracing = "0.1" rustc_middle = { path = "../rustc_middle" } From 1745bc592a92ae53b60216e3a9754edaa2bff9ad Mon Sep 17 00:00:00 2001 From: Augie Fackler Date: Tue, 29 Aug 2023 13:57:10 -0400 Subject: [PATCH 7/8] licenses: also allow `Apache-2.0 WITH LLVM-exception` for wasmparser We already allow enough cousins of this license I can't imagine it being a problem. --- src/tools/tidy/src/deps.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tools/tidy/src/deps.rs b/src/tools/tidy/src/deps.rs index 410852b6a31f3..3f92025e37c07 100644 --- a/src/tools/tidy/src/deps.rs +++ b/src/tools/tidy/src/deps.rs @@ -15,6 +15,7 @@ const LICENSES: &[&str] = &[ "Apache-2.0 / MIT", "Apache-2.0 OR MIT", "Apache-2.0 WITH LLVM-exception OR Apache-2.0 OR MIT", // wasi license + "Apache-2.0 WITH LLVM-exception", // wasmparser license "Apache-2.0/MIT", "ISC", "MIT / Apache-2.0", From 32594581001ebe6a4bf4802d55391644ac1a6af2 Mon Sep 17 00:00:00 2001 From: Augie Fackler Date: Tue, 29 Aug 2023 14:04:21 -0400 Subject: [PATCH 8/8] wasmparser: allow as dep of rustc --- src/tools/tidy/src/deps.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tools/tidy/src/deps.rs b/src/tools/tidy/src/deps.rs index 3f92025e37c07..654b8ef15dbf9 100644 --- a/src/tools/tidy/src/deps.rs +++ b/src/tools/tidy/src/deps.rs @@ -294,6 +294,7 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[ "valuable", "version_check", "wasi", + "wasmparser", "winapi", "winapi-i686-pc-windows-gnu", "winapi-util",