-
Notifications
You must be signed in to change notification settings - Fork 13.3k
support #[target_feature(enable = ...)]
on #[naked]
functions
#137720
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
792bd9c
9f3d4c6
14acaf1
4626bcc
6eaad40
d02b18a
9e7dae7
9fc1242
5e370a2
7e763e9
1ce4d18
aea36f4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,8 @@ | ||
use object::{Architecture, SubArchitecture}; | ||
use rustc_abi::{BackendRepr, Float, Integer, Primitive, RegKind}; | ||
use rustc_attr_parsing::InstructionSetAttr; | ||
use rustc_hir::def_id::DefId; | ||
use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrs, TargetFeature}; | ||
use rustc_middle::mir::mono::{Linkage, MonoItem, MonoItemData, Visibility}; | ||
use rustc_middle::mir::{Body, InlineAsmOperand}; | ||
use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt, HasTypingEnv, LayoutOf}; | ||
|
@@ -104,6 +106,262 @@ fn inline_to_global_operand<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( | |
} | ||
} | ||
|
||
// FIXME share code with `create_object_file` | ||
fn parse_architecture( | ||
sess: &rustc_session::Session, | ||
) -> Option<(Architecture, Option<SubArchitecture>)> { | ||
let (architecture, subarchitecture) = match &sess.target.arch[..] { | ||
"arm" => (Architecture::Arm, None), | ||
"aarch64" => ( | ||
if sess.target.pointer_width == 32 { | ||
Architecture::Aarch64_Ilp32 | ||
} else { | ||
Architecture::Aarch64 | ||
}, | ||
None, | ||
), | ||
"x86" => (Architecture::I386, None), | ||
"s390x" => (Architecture::S390x, None), | ||
"mips" | "mips32r6" => (Architecture::Mips, None), | ||
"mips64" | "mips64r6" => (Architecture::Mips64, None), | ||
"x86_64" => ( | ||
if sess.target.pointer_width == 32 { | ||
Architecture::X86_64_X32 | ||
} else { | ||
Architecture::X86_64 | ||
}, | ||
None, | ||
), | ||
"powerpc" => (Architecture::PowerPc, None), | ||
"powerpc64" => (Architecture::PowerPc64, None), | ||
"riscv32" => (Architecture::Riscv32, None), | ||
"riscv64" => (Architecture::Riscv64, None), | ||
"sparc" => { | ||
if sess.unstable_target_features.contains(&sym::v8plus) { | ||
// Target uses V8+, aka EM_SPARC32PLUS, aka 64-bit V9 but in 32-bit mode | ||
(Architecture::Sparc32Plus, None) | ||
} else { | ||
// Target uses V7 or V8, aka EM_SPARC | ||
(Architecture::Sparc, None) | ||
} | ||
} | ||
"sparc64" => (Architecture::Sparc64, None), | ||
"avr" => (Architecture::Avr, None), | ||
"msp430" => (Architecture::Msp430, None), | ||
"hexagon" => (Architecture::Hexagon, None), | ||
"bpf" => (Architecture::Bpf, None), | ||
"loongarch64" => (Architecture::LoongArch64, None), | ||
"csky" => (Architecture::Csky, None), | ||
"arm64ec" => (Architecture::Aarch64, Some(SubArchitecture::Arm64EC)), | ||
|
||
// These architecutres are added here, and not present in `create_object_file` | ||
"wasm32" => (Architecture::Wasm32, None), | ||
"wasm64" => (Architecture::Wasm64, None), | ||
"m68k" => (Architecture::M68k, None), | ||
|
||
// Unsupported architecture. | ||
_ => return None, | ||
}; | ||
|
||
Some((architecture, subarchitecture)) | ||
} | ||
|
||
/// Enable the function's target features in the body of the function, then disable them again | ||
fn enable_disable_target_features<'tcx>( | ||
tcx: TyCtxt<'tcx>, | ||
attrs: &CodegenFnAttrs, | ||
) -> Option<(String, String)> { | ||
use std::fmt::Write; | ||
|
||
let mut begin = String::new(); | ||
let mut end = String::new(); | ||
|
||
let (architecture, _subarchitecture) = parse_architecture(tcx.sess)?; | ||
let features = attrs.target_features.iter().filter(|attr| !attr.implied); | ||
|
||
match architecture { | ||
Architecture::X86_64 | Architecture::X86_64_X32 | Architecture::I386 => { | ||
// no action is needed, all instructions are accepted regardless of target feature | ||
} | ||
|
||
Architecture::Aarch64 | Architecture::Aarch64_Ilp32 => { | ||
// https://developer.arm.com/documentation/100067/0611/armclang-Integrated-Assembler/AArch32-Target-selection-directives?lang=en | ||
|
||
for feature in features { | ||
// only enable/disable a feature if it is not already globally enabled. | ||
// so that we don't infuence subsequent asm blocks | ||
if !tcx.sess.unstable_target_features.contains(&feature.name) { | ||
writeln!(begin, ".arch_extension {}", feature.name).unwrap(); | ||
|
||
writeln!(end, ".arch_extension no{}", feature.name).unwrap(); | ||
} | ||
Comment on lines
+191
to
+197
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This doesn't actually appear to work in some cases because due to a bug in LLVM (#113221), even if the feature is enabled on the Rust side, it is not passed to the assembler... However, given that the bug appears conditionally, I think it is also wrong to always set both A bit odd, but setting In that approach, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Nah, this doesn't met stateful directive rule... And I think the s390x implementation of this PR that attempts to restore state based on features enabled on the Rust side has the same problem... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree. We now have a separate feature flag for target features on naked functions (so that naked functions themselves can be stabilized), #138568. At this point I'm sceptical that we can make the approach of this PR work in general. So, that means that either
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For now, I think it is safer to only support the x86/x86_64 which obviously does not require anything and architectures which support push/pop logic such as riscv, and make errors otherwise. Once the LLVM bug is fixed, we can also support aarch64 and s390x (and probably arm).
Considering the LLVM bug, I guess it is also difficult to use it properly on the user side. (as for aarch64 and s390x) |
||
} | ||
} | ||
Architecture::Arm => { | ||
// https://developer.arm.com/documentation/100067/0611/armclang-Integrated-Assembler/AArch32-Target-selection-directives?lang=en | ||
|
||
// FIXME: implement the target feature handling. Contrary to most other targets, there | ||
// is no convenient push/pop mechanism. That means we have to manually restore the state | ||
// of the target features to the state on entry. This is complicated, given how arm | ||
// features interact. There is some incomplete code here https://gist.github.com/folkertdev/fe99874c466e598d0fb2dadf13b91b6f | ||
|
||
/* fallthrough */ | ||
} | ||
|
||
Architecture::Riscv32 | Architecture::Riscv64 => { | ||
// https://github.com/riscv-non-isa/riscv-asm-manual/blob/ad0de8c004e29c9a7ac33cfd054f4d4f9392f2fb/src/asm-manual.adoc#arch | ||
|
||
writeln!(begin, ".option push").unwrap(); | ||
for feature in features { | ||
writeln!(begin, ".option arch, +{}", feature.name).unwrap(); | ||
} | ||
|
||
writeln!(end, ".option pop").unwrap(); | ||
} | ||
Architecture::Mips | Architecture::Mips64 | Architecture::Mips64_N32 => { | ||
// https://sourceware.org/binutils/docs/as/MIPS-ISA.html | ||
// https://sourceware.org/binutils/docs/as/MIPS-ASE-Instruction-Generation-Overrides.html | ||
|
||
writeln!(begin, ".set push").unwrap(); | ||
for feature in features { | ||
writeln!(begin, ".set {}", feature.name).unwrap(); | ||
} | ||
|
||
writeln!(end, ".set pop").unwrap(); | ||
} | ||
|
||
Architecture::S390x => { | ||
// https://sourceware.org/binutils/docs/as/s390-Directives.html | ||
|
||
// based on src/llvm-project/llvm/lib/Target/SystemZ/SystemZFeatures.td | ||
let isa_revision_for_feature_name = |feature_name| match feature_name { | ||
"backchain" => None, // does not define any instructions | ||
"deflate-conversion" => Some(13), | ||
"enhanced-sort" => Some(13), | ||
"guarded-storage" => Some(12), | ||
"high-word" => None, // technically 9, but LLVM supports only >= 10 | ||
"nnp-assist" => Some(14), | ||
"transactional-execution" => Some(10), | ||
"vector" => Some(11), | ||
"vector-enhancements-1" => Some(12), | ||
"vector-enhancements-2" => Some(13), | ||
"vector-packed-decimal" => Some(12), | ||
"vector-packed-decimal-enhancement" => Some(13), | ||
"vector-packed-decimal-enhancement-2" => Some(14), | ||
_ => None, | ||
}; | ||
|
||
let target_feature_isa = features | ||
.filter_map(|feature| isa_revision_for_feature_name(feature.name.as_str())) | ||
.max(); | ||
|
||
if let Some(minimum_isa) = target_feature_isa { | ||
writeln!(begin, ".machine arch{minimum_isa}").unwrap(); | ||
|
||
// NOTE: LLVM does not currently support `.machine push` and `.machine pop` | ||
// this is tracked in https://github.com/llvm/llvm-project/issues/129053. | ||
// | ||
// So instead we have to try revert to the previous state manually. | ||
// | ||
// However, this may still be observable if the user explicitly set the machine to | ||
// a higher value using global assembly. | ||
let global_isa = tcx | ||
.sess | ||
.unstable_target_features | ||
.iter() | ||
.filter_map(|feature| isa_revision_for_feature_name(feature.as_str())) | ||
.max() | ||
.unwrap_or(10); | ||
|
||
writeln!(end, ".machine arch{global_isa}").unwrap(); | ||
} | ||
} | ||
Architecture::PowerPc | Architecture::PowerPc64 => { | ||
// https://www.ibm.com/docs/en/ssw_aix_71/assembler/assembler_pdf.pdf | ||
|
||
// based on src/llvm-project/llvm/lib/Target/PowerPC/PPC.td | ||
let isa_revision_for_feature = |feature: &TargetFeature| match feature.name.as_str() { | ||
"altivec" => Some(7), | ||
"partword-atomics" => Some(8), | ||
"power10-vector" => Some(10), | ||
"power8-altivec" => Some(8), | ||
"power8-crypto" => Some(8), | ||
"power8-vector" => Some(9), | ||
"power9-altivec" => Some(9), | ||
"power9-vector" => Some(9), | ||
"quadword-atomics" => Some(8), | ||
"vsx" => Some(7), | ||
_ => None, | ||
}; | ||
|
||
if let Some(minimum_isa) = features.filter_map(isa_revision_for_feature).max() { | ||
writeln!(begin, ".machine push").unwrap(); | ||
|
||
// LLVM currently ignores the .machine directive, and allows all instructions regardless | ||
// of the machine. This may be fixed in the future. | ||
// | ||
// https://github.com/llvm/llvm-project/blob/74306afe87b85cb9b5734044eb6c74b8290098b3/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp#L1799 | ||
writeln!(begin, ".machine pwr{minimum_isa}").unwrap(); | ||
|
||
writeln!(end, ".machine pop").unwrap(); | ||
} | ||
} | ||
|
||
Architecture::M68k => { | ||
// https://sourceware.org/binutils/docs/as/M68K_002dDirectives.html#index-directives_002c-M680x0 | ||
|
||
// M68k suports the .cpu and .arch directives, but they both can only be applied once | ||
// | ||
// > If it is given multiple times, or in conjunction with the -march option, | ||
// > all uses must be for the same architecture and extension set. | ||
// | ||
// That is not flexible enough for us, because different functions might want different | ||
// features. | ||
// | ||
// So far, we've not found any cases where ignoring the target features causes issues, | ||
// so that's what we do for now. | ||
|
||
/* fallthrough */ | ||
} | ||
|
||
Architecture::Wasm32 | Architecture::Wasm64 => { | ||
// LLVM does not appear to accept any directive to enable target features | ||
// | ||
// https://github.com/llvm/llvm-project/blob/74306afe87b85cb9b5734044eb6c74b8290098b3/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp#L909 | ||
|
||
/* fallthrough */ | ||
} | ||
|
||
Architecture::LoongArch64 => { | ||
// LLVM does not appear to accept any directive to enable target features | ||
// | ||
// https://github.com/llvm/llvm-project/blob/74306afe87b85cb9b5734044eb6c74b8290098b3/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp#L1918 | ||
|
||
/* fallthrough */ | ||
} | ||
|
||
// FIXME: support naked_asm! on more architectures | ||
Architecture::Avr => return None, | ||
Architecture::Bpf => return None, | ||
Architecture::Csky => return None, | ||
Architecture::E2K32 => return None, | ||
Architecture::E2K64 => return None, | ||
Architecture::Hexagon => return None, | ||
Architecture::Msp430 => return None, | ||
Architecture::Sbf => return None, | ||
Architecture::Sharc => return None, | ||
Architecture::Sparc => return None, | ||
Architecture::Sparc32Plus => return None, | ||
Architecture::Sparc64 => return None, | ||
Architecture::Xtensa => return None, | ||
|
||
// the Architecture enum is non-exhaustive | ||
Architecture::Unknown | _ => return None, | ||
} | ||
|
||
Some((begin, end)) | ||
} | ||
|
||
fn prefix_and_suffix<'tcx>( | ||
tcx: TyCtxt<'tcx>, | ||
instance: Instance<'tcx>, | ||
|
@@ -186,6 +444,12 @@ fn prefix_and_suffix<'tcx>( | |
Ok(()) | ||
}; | ||
|
||
let Some((target_feature_begin, target_feature_end)) = | ||
enable_disable_target_features(tcx, attrs) | ||
else { | ||
panic!("target features on naked functions are not supported for this architecture"); | ||
}; | ||
|
||
let mut begin = String::new(); | ||
let mut end = String::new(); | ||
match asm_binary_format { | ||
|
@@ -205,6 +469,8 @@ fn prefix_and_suffix<'tcx>( | |
writeln!(begin, ".pushsection {section},\"ax\", {progbits}").unwrap(); | ||
writeln!(begin, ".balign {align}").unwrap(); | ||
write_linkage(&mut begin).unwrap(); | ||
begin.push_str(&target_feature_begin); | ||
|
||
if let Visibility::Hidden = item_data.visibility { | ||
writeln!(begin, ".hidden {asm_name}").unwrap(); | ||
} | ||
|
@@ -215,6 +481,7 @@ fn prefix_and_suffix<'tcx>( | |
writeln!(begin, "{asm_name}:").unwrap(); | ||
|
||
writeln!(end).unwrap(); | ||
end.push_str(&target_feature_end); | ||
writeln!(end, ".size {asm_name}, . - {asm_name}").unwrap(); | ||
writeln!(end, ".popsection").unwrap(); | ||
if !arch_suffix.is_empty() { | ||
|
@@ -226,12 +493,14 @@ fn prefix_and_suffix<'tcx>( | |
writeln!(begin, ".pushsection {},regular,pure_instructions", section).unwrap(); | ||
writeln!(begin, ".balign {align}").unwrap(); | ||
write_linkage(&mut begin).unwrap(); | ||
begin.push_str(&target_feature_begin); | ||
if let Visibility::Hidden = item_data.visibility { | ||
writeln!(begin, ".private_extern {asm_name}").unwrap(); | ||
} | ||
writeln!(begin, "{asm_name}:").unwrap(); | ||
|
||
writeln!(end).unwrap(); | ||
end.push_str(&target_feature_end); | ||
writeln!(end, ".popsection").unwrap(); | ||
if !arch_suffix.is_empty() { | ||
writeln!(end, "{}", arch_suffix).unwrap(); | ||
|
@@ -242,13 +511,15 @@ fn prefix_and_suffix<'tcx>( | |
writeln!(begin, ".pushsection {},\"xr\"", section).unwrap(); | ||
writeln!(begin, ".balign {align}").unwrap(); | ||
write_linkage(&mut begin).unwrap(); | ||
begin.push_str(&target_feature_begin); | ||
writeln!(begin, ".def {asm_name}").unwrap(); | ||
writeln!(begin, ".scl 2").unwrap(); | ||
writeln!(begin, ".type 32").unwrap(); | ||
writeln!(begin, ".endef {asm_name}").unwrap(); | ||
writeln!(begin, "{asm_name}:").unwrap(); | ||
|
||
writeln!(end).unwrap(); | ||
end.push_str(&target_feature_end); | ||
writeln!(end, ".popsection").unwrap(); | ||
if !arch_suffix.is_empty() { | ||
writeln!(end, "{}", arch_suffix).unwrap(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
//@ add-core-stubs | ||
//@ assembly-output: emit-asm | ||
//@ only-aarch64 | ||
//@ compile-flags: -Ctarget-feature=+lse | ||
|
||
#![crate_type = "lib"] | ||
#![feature(no_core, naked_functions)] | ||
#![no_core] | ||
|
||
extern crate minicore; | ||
use minicore::*; | ||
|
||
// check that a naked function using target features does not disable these features for subsequent | ||
// asm blocks. | ||
|
||
// CHECK-LABEL: a: | ||
#[no_mangle] | ||
#[naked] | ||
#[target_feature(enable = "lse")] | ||
unsafe extern "C" fn a() { | ||
naked_asm!("casp x2, x3, x2, x3, [x1]") | ||
} | ||
|
||
// CHECK-LABEL: b: | ||
#[no_mangle] | ||
#[naked] | ||
unsafe extern "C" fn b() { | ||
naked_asm!("casp x2, x3, x2, x3, [x1]") | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this there some standard way of getting the architecture that I'm missing? I'd like the rest of the code to operate on an enum rather than doing a bunch of manual string matching.