Skip to content

support #[target_feature(enable = ...)] on #[naked] functions #137720

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
271 changes: 271 additions & 0 deletions compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use object::{Architecture, SubArchitecture};
use rustc_abi::{BackendRepr, Float, Integer, Primitive, RegKind};
use rustc_attr_parsing::InstructionSetAttr;
use rustc_hir::def_id::DefId;
use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrs, TargetFeature};
use rustc_middle::mir::mono::{Linkage, MonoItem, MonoItemData, Visibility};
use rustc_middle::mir::{Body, InlineAsmOperand};
use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt, HasTypingEnv, LayoutOf};
Expand Down Expand Up @@ -104,6 +106,262 @@ fn inline_to_global_operand<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
}
}

// FIXME share code with `create_object_file`
fn parse_architecture(
Comment on lines +109 to +110
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this there some standard way of getting the architecture that I'm missing? I'd like the rest of the code to operate on an enum rather than doing a bunch of manual string matching.

sess: &rustc_session::Session,
) -> Option<(Architecture, Option<SubArchitecture>)> {
let (architecture, subarchitecture) = match &sess.target.arch[..] {
"arm" => (Architecture::Arm, None),
"aarch64" => (
if sess.target.pointer_width == 32 {
Architecture::Aarch64_Ilp32
} else {
Architecture::Aarch64
},
None,
),
"x86" => (Architecture::I386, None),
"s390x" => (Architecture::S390x, None),
"mips" | "mips32r6" => (Architecture::Mips, None),
"mips64" | "mips64r6" => (Architecture::Mips64, None),
"x86_64" => (
if sess.target.pointer_width == 32 {
Architecture::X86_64_X32
} else {
Architecture::X86_64
},
None,
),
"powerpc" => (Architecture::PowerPc, None),
"powerpc64" => (Architecture::PowerPc64, None),
"riscv32" => (Architecture::Riscv32, None),
"riscv64" => (Architecture::Riscv64, None),
"sparc" => {
if sess.unstable_target_features.contains(&sym::v8plus) {
// Target uses V8+, aka EM_SPARC32PLUS, aka 64-bit V9 but in 32-bit mode
(Architecture::Sparc32Plus, None)
} else {
// Target uses V7 or V8, aka EM_SPARC
(Architecture::Sparc, None)
}
}
"sparc64" => (Architecture::Sparc64, None),
"avr" => (Architecture::Avr, None),
"msp430" => (Architecture::Msp430, None),
"hexagon" => (Architecture::Hexagon, None),
"bpf" => (Architecture::Bpf, None),
"loongarch64" => (Architecture::LoongArch64, None),
"csky" => (Architecture::Csky, None),
"arm64ec" => (Architecture::Aarch64, Some(SubArchitecture::Arm64EC)),

// These architecutres are added here, and not present in `create_object_file`
"wasm32" => (Architecture::Wasm32, None),
"wasm64" => (Architecture::Wasm64, None),
"m68k" => (Architecture::M68k, None),

// Unsupported architecture.
_ => return None,
};

Some((architecture, subarchitecture))
}

/// Enable the function's target features in the body of the function, then disable them again
fn enable_disable_target_features<'tcx>(
tcx: TyCtxt<'tcx>,
attrs: &CodegenFnAttrs,
) -> Option<(String, String)> {
use std::fmt::Write;

let mut begin = String::new();
let mut end = String::new();

let (architecture, _subarchitecture) = parse_architecture(tcx.sess)?;
let features = attrs.target_features.iter().filter(|attr| !attr.implied);

match architecture {
Architecture::X86_64 | Architecture::X86_64_X32 | Architecture::I386 => {
// no action is needed, all instructions are accepted regardless of target feature
}

Architecture::Aarch64 | Architecture::Aarch64_Ilp32 => {
// https://developer.arm.com/documentation/100067/0611/armclang-Integrated-Assembler/AArch32-Target-selection-directives?lang=en

for feature in features {
// only enable/disable a feature if it is not already globally enabled.
// so that we don't infuence subsequent asm blocks
if !tcx.sess.unstable_target_features.contains(&feature.name) {
writeln!(begin, ".arch_extension {}", feature.name).unwrap();

writeln!(end, ".arch_extension no{}", feature.name).unwrap();
}
Comment on lines +191 to +197
Copy link
Member

@taiki-e taiki-e Mar 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't actually appear to work in some cases because due to a bug in LLVM (#113221), even if the feature is enabled on the Rust side, it is not passed to the assembler...

However, given that the bug appears conditionally, I think it is also wrong to always set both .arch_extension {feature} and .arch_extension no{feature}.

A bit odd, but setting .arch_extension {feature} always and .arch_extension no{feature} conditionally would work in both cases.

In that approach, .arch_extension {feature} will affect subsequent assemblies if the feature is enabled, but what it enables is a globally enabled feature on the Rust side, so I guess there is probably no adverse effect. EDIT: see #137720 (comment)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess there is probably no adverse effec

Nah, this doesn't met stateful directive rule...

And I think the s390x implementation of this PR that attempts to restore state based on features enabled on the Rust side has the same problem...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree. We now have a separate feature flag for target features on naked functions (so that naked functions themselves can be stabilized), #138568. At this point I'm sceptical that we can make the approach of this PR work in general.

So, that means that either

  • we use this approach on platforms where it is guaranteed to work
  • we just have the user handle it (and can help out with some good documentation for that platforms that we've looked at)

Copy link
Member

@taiki-e taiki-e Mar 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now, I think it is safer to only support the x86/x86_64 which obviously does not require anything and architectures which support push/pop logic such as riscv, and make errors otherwise. Once the LLVM bug is fixed, we can also support aarch64 and s390x (and probably arm).

  • we just have the user handle it (and can help out with some good documentation for that platforms that we've looked at)

Considering the LLVM bug, I guess it is also difficult to use it properly on the user side. (as for aarch64 and s390x)

}
}
Architecture::Arm => {
// https://developer.arm.com/documentation/100067/0611/armclang-Integrated-Assembler/AArch32-Target-selection-directives?lang=en

// FIXME: implement the target feature handling. Contrary to most other targets, there
// is no convenient push/pop mechanism. That means we have to manually restore the state
// of the target features to the state on entry. This is complicated, given how arm
// features interact. There is some incomplete code here https://gist.github.com/folkertdev/fe99874c466e598d0fb2dadf13b91b6f

/* fallthrough */
}

Architecture::Riscv32 | Architecture::Riscv64 => {
// https://github.com/riscv-non-isa/riscv-asm-manual/blob/ad0de8c004e29c9a7ac33cfd054f4d4f9392f2fb/src/asm-manual.adoc#arch

writeln!(begin, ".option push").unwrap();
for feature in features {
writeln!(begin, ".option arch, +{}", feature.name).unwrap();
}

writeln!(end, ".option pop").unwrap();
}
Architecture::Mips | Architecture::Mips64 | Architecture::Mips64_N32 => {
// https://sourceware.org/binutils/docs/as/MIPS-ISA.html
// https://sourceware.org/binutils/docs/as/MIPS-ASE-Instruction-Generation-Overrides.html

writeln!(begin, ".set push").unwrap();
for feature in features {
writeln!(begin, ".set {}", feature.name).unwrap();
}

writeln!(end, ".set pop").unwrap();
}

Architecture::S390x => {
// https://sourceware.org/binutils/docs/as/s390-Directives.html

// based on src/llvm-project/llvm/lib/Target/SystemZ/SystemZFeatures.td
let isa_revision_for_feature_name = |feature_name| match feature_name {
"backchain" => None, // does not define any instructions
"deflate-conversion" => Some(13),
"enhanced-sort" => Some(13),
"guarded-storage" => Some(12),
"high-word" => None, // technically 9, but LLVM supports only >= 10
"nnp-assist" => Some(14),
"transactional-execution" => Some(10),
"vector" => Some(11),
"vector-enhancements-1" => Some(12),
"vector-enhancements-2" => Some(13),
"vector-packed-decimal" => Some(12),
"vector-packed-decimal-enhancement" => Some(13),
"vector-packed-decimal-enhancement-2" => Some(14),
_ => None,
};

let target_feature_isa = features
.filter_map(|feature| isa_revision_for_feature_name(feature.name.as_str()))
.max();

if let Some(minimum_isa) = target_feature_isa {
writeln!(begin, ".machine arch{minimum_isa}").unwrap();

// NOTE: LLVM does not currently support `.machine push` and `.machine pop`
// this is tracked in https://github.com/llvm/llvm-project/issues/129053.
//
// So instead we have to try revert to the previous state manually.
//
// However, this may still be observable if the user explicitly set the machine to
// a higher value using global assembly.
let global_isa = tcx
.sess
.unstable_target_features
.iter()
.filter_map(|feature| isa_revision_for_feature_name(feature.as_str()))
.max()
.unwrap_or(10);

writeln!(end, ".machine arch{global_isa}").unwrap();
}
}
Architecture::PowerPc | Architecture::PowerPc64 => {
// https://www.ibm.com/docs/en/ssw_aix_71/assembler/assembler_pdf.pdf

// based on src/llvm-project/llvm/lib/Target/PowerPC/PPC.td
let isa_revision_for_feature = |feature: &TargetFeature| match feature.name.as_str() {
"altivec" => Some(7),
"partword-atomics" => Some(8),
"power10-vector" => Some(10),
"power8-altivec" => Some(8),
"power8-crypto" => Some(8),
"power8-vector" => Some(9),
"power9-altivec" => Some(9),
"power9-vector" => Some(9),
"quadword-atomics" => Some(8),
"vsx" => Some(7),
_ => None,
};

if let Some(minimum_isa) = features.filter_map(isa_revision_for_feature).max() {
writeln!(begin, ".machine push").unwrap();

// LLVM currently ignores the .machine directive, and allows all instructions regardless
// of the machine. This may be fixed in the future.
//
// https://github.com/llvm/llvm-project/blob/74306afe87b85cb9b5734044eb6c74b8290098b3/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp#L1799
writeln!(begin, ".machine pwr{minimum_isa}").unwrap();

writeln!(end, ".machine pop").unwrap();
}
}

Architecture::M68k => {
// https://sourceware.org/binutils/docs/as/M68K_002dDirectives.html#index-directives_002c-M680x0

// M68k suports the .cpu and .arch directives, but they both can only be applied once
//
// > If it is given multiple times, or in conjunction with the -march option,
// > all uses must be for the same architecture and extension set.
//
// That is not flexible enough for us, because different functions might want different
// features.
//
// So far, we've not found any cases where ignoring the target features causes issues,
// so that's what we do for now.

/* fallthrough */
}

Architecture::Wasm32 | Architecture::Wasm64 => {
// LLVM does not appear to accept any directive to enable target features
//
// https://github.com/llvm/llvm-project/blob/74306afe87b85cb9b5734044eb6c74b8290098b3/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp#L909

/* fallthrough */
}

Architecture::LoongArch64 => {
// LLVM does not appear to accept any directive to enable target features
//
// https://github.com/llvm/llvm-project/blob/74306afe87b85cb9b5734044eb6c74b8290098b3/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp#L1918

/* fallthrough */
}

// FIXME: support naked_asm! on more architectures
Architecture::Avr => return None,
Architecture::Bpf => return None,
Architecture::Csky => return None,
Architecture::E2K32 => return None,
Architecture::E2K64 => return None,
Architecture::Hexagon => return None,
Architecture::Msp430 => return None,
Architecture::Sbf => return None,
Architecture::Sharc => return None,
Architecture::Sparc => return None,
Architecture::Sparc32Plus => return None,
Architecture::Sparc64 => return None,
Architecture::Xtensa => return None,

// the Architecture enum is non-exhaustive
Architecture::Unknown | _ => return None,
}

Some((begin, end))
}

fn prefix_and_suffix<'tcx>(
tcx: TyCtxt<'tcx>,
instance: Instance<'tcx>,
Expand Down Expand Up @@ -186,6 +444,12 @@ fn prefix_and_suffix<'tcx>(
Ok(())
};

let Some((target_feature_begin, target_feature_end)) =
enable_disable_target_features(tcx, attrs)
else {
panic!("target features on naked functions are not supported for this architecture");
};

let mut begin = String::new();
let mut end = String::new();
match asm_binary_format {
Expand All @@ -205,6 +469,8 @@ fn prefix_and_suffix<'tcx>(
writeln!(begin, ".pushsection {section},\"ax\", {progbits}").unwrap();
writeln!(begin, ".balign {align}").unwrap();
write_linkage(&mut begin).unwrap();
begin.push_str(&target_feature_begin);

if let Visibility::Hidden = item_data.visibility {
writeln!(begin, ".hidden {asm_name}").unwrap();
}
Expand All @@ -215,6 +481,7 @@ fn prefix_and_suffix<'tcx>(
writeln!(begin, "{asm_name}:").unwrap();

writeln!(end).unwrap();
end.push_str(&target_feature_end);
writeln!(end, ".size {asm_name}, . - {asm_name}").unwrap();
writeln!(end, ".popsection").unwrap();
if !arch_suffix.is_empty() {
Expand All @@ -226,12 +493,14 @@ fn prefix_and_suffix<'tcx>(
writeln!(begin, ".pushsection {},regular,pure_instructions", section).unwrap();
writeln!(begin, ".balign {align}").unwrap();
write_linkage(&mut begin).unwrap();
begin.push_str(&target_feature_begin);
if let Visibility::Hidden = item_data.visibility {
writeln!(begin, ".private_extern {asm_name}").unwrap();
}
writeln!(begin, "{asm_name}:").unwrap();

writeln!(end).unwrap();
end.push_str(&target_feature_end);
writeln!(end, ".popsection").unwrap();
if !arch_suffix.is_empty() {
writeln!(end, "{}", arch_suffix).unwrap();
Expand All @@ -242,13 +511,15 @@ fn prefix_and_suffix<'tcx>(
writeln!(begin, ".pushsection {},\"xr\"", section).unwrap();
writeln!(begin, ".balign {align}").unwrap();
write_linkage(&mut begin).unwrap();
begin.push_str(&target_feature_begin);
writeln!(begin, ".def {asm_name}").unwrap();
writeln!(begin, ".scl 2").unwrap();
writeln!(begin, ".type 32").unwrap();
writeln!(begin, ".endef {asm_name}").unwrap();
writeln!(begin, "{asm_name}:").unwrap();

writeln!(end).unwrap();
end.push_str(&target_feature_end);
writeln!(end, ".popsection").unwrap();
if !arch_suffix.is_empty() {
writeln!(end, "{}", arch_suffix).unwrap();
Expand Down
29 changes: 29 additions & 0 deletions tests/assembly/naked-fn-aarch64-global-target-feature.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//@ add-core-stubs
//@ assembly-output: emit-asm
//@ only-aarch64
//@ compile-flags: -Ctarget-feature=+lse

#![crate_type = "lib"]
#![feature(no_core, naked_functions)]
#![no_core]

extern crate minicore;
use minicore::*;

// check that a naked function using target features does not disable these features for subsequent
// asm blocks.

// CHECK-LABEL: a:
#[no_mangle]
#[naked]
#[target_feature(enable = "lse")]
unsafe extern "C" fn a() {
naked_asm!("casp x2, x3, x2, x3, [x1]")
}

// CHECK-LABEL: b:
#[no_mangle]
#[naked]
unsafe extern "C" fn b() {
naked_asm!("casp x2, x3, x2, x3, [x1]")
}
Loading
Loading