Skip to content

Commit 150e5a7

Browse files
committed
add nvptx_target_feature
Add target features for sm_* and ptx*, both of which form a partial order, but cannot be combined to a single partial order. These mirror the LLVM target features, but we do not provide LLVM target processors (which imply both an sm_* and ptx* feature). Add some documentation for the nvptx target.
1 parent f820b75 commit 150e5a7

File tree

7 files changed

+114
-0
lines changed

7 files changed

+114
-0
lines changed

compiler/rustc_codegen_llvm/src/llvm_util.rs

+9
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,15 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
271271
// Filter out features that are not supported by the current LLVM version
272272
("aarch64", "fpmr") => None, // only existed in 18
273273
("arm", "fp16") => Some(LLVMFeature::new("fullfp16")),
274+
// NVPTX targets added in LLVM 20
275+
("nvptx64", "sm_100") if get_version().0 < 20 => None,
276+
("nvptx64", "sm_100a") if get_version().0 < 20 => None,
277+
("nvptx64", "sm_101") if get_version().0 < 20 => None,
278+
("nvptx64", "sm_101a") if get_version().0 < 20 => None,
279+
("nvptx64", "sm_120") if get_version().0 < 20 => None,
280+
("nvptx64", "sm_120a") if get_version().0 < 20 => None,
281+
("nvptx64", "ptx86") if get_version().0 < 20 => None,
282+
("nvptx64", "ptx87") if get_version().0 < 20 => None,
274283
// Filter out features that are not supported by the current LLVM version
275284
("riscv32" | "riscv64", "zacas") if get_version().0 < 20 => None,
276285
// Enable the evex512 target feature if an avx512 target feature is enabled.

compiler/rustc_feature/src/unstable.rs

+1
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,7 @@ declare_features! (
325325
(unstable, m68k_target_feature, "1.85.0", Some(134328)),
326326
(unstable, mips_target_feature, "1.27.0", Some(44839)),
327327
(unstable, movrs_target_feature, "CURRENT_RUSTC_VERSION", Some(137976)),
328+
(unstable, nvptx_target_feature, "CURRENT_RUSTC_VERSION", Some(44839)),
328329
(unstable, powerpc_target_feature, "1.27.0", Some(44839)),
329330
(unstable, prfchw_target_feature, "1.78.0", Some(44839)),
330331
(unstable, riscv_target_feature, "1.45.0", Some(44839)),

compiler/rustc_span/src/symbol.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1472,6 +1472,7 @@ symbols! {
14721472
not,
14731473
notable_trait,
14741474
note,
1475+
nvptx_target_feature,
14751476
object_safe_for_dispatch,
14761477
of,
14771478
off,

compiler/rustc_target/src/target_features.rs

+67
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,70 @@ const MIPS_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
488488
// tidy-alphabetical-end
489489
];
490490

491+
const NVPTX_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
492+
// tidy-alphabetical-start
493+
("sm_20", Unstable(sym::nvptx_target_feature), &[]),
494+
("sm_21", Unstable(sym::nvptx_target_feature), &["sm_20"]),
495+
("sm_30", Unstable(sym::nvptx_target_feature), &["sm_21"]),
496+
("sm_32", Unstable(sym::nvptx_target_feature), &["sm_30"]),
497+
("sm_35", Unstable(sym::nvptx_target_feature), &["sm_32"]),
498+
("sm_37", Unstable(sym::nvptx_target_feature), &["sm_35"]),
499+
("sm_50", Unstable(sym::nvptx_target_feature), &["sm_37"]),
500+
("sm_52", Unstable(sym::nvptx_target_feature), &["sm_50"]),
501+
("sm_53", Unstable(sym::nvptx_target_feature), &["sm_52"]),
502+
("sm_60", Unstable(sym::nvptx_target_feature), &["sm_53"]),
503+
("sm_61", Unstable(sym::nvptx_target_feature), &["sm_60"]),
504+
("sm_62", Unstable(sym::nvptx_target_feature), &["sm_61"]),
505+
("sm_70", Unstable(sym::nvptx_target_feature), &["sm_62"]),
506+
("sm_72", Unstable(sym::nvptx_target_feature), &["sm_70"]),
507+
("sm_75", Unstable(sym::nvptx_target_feature), &["sm_72"]),
508+
("sm_80", Unstable(sym::nvptx_target_feature), &["sm_75"]),
509+
("sm_86", Unstable(sym::nvptx_target_feature), &["sm_80"]),
510+
("sm_87", Unstable(sym::nvptx_target_feature), &["sm_86"]),
511+
("sm_90", Unstable(sym::nvptx_target_feature), &["sm_87"]),
512+
("sm_90a", Unstable(sym::nvptx_target_feature), &["sm_90"]),
513+
// tidy-alphabetical-end
514+
// tidy-alphabetical-start
515+
("sm_100", Unstable(sym::nvptx_target_feature), &["sm_90"]),
516+
("sm_100a", Unstable(sym::nvptx_target_feature), &["sm_100"]),
517+
("sm_101", Unstable(sym::nvptx_target_feature), &["sm_100"]),
518+
("sm_101a", Unstable(sym::nvptx_target_feature), &["sm_101"]),
519+
("sm_120", Unstable(sym::nvptx_target_feature), &["sm_101"]),
520+
("sm_120a", Unstable(sym::nvptx_target_feature), &["sm_120"]),
521+
// tidy-alphabetical-end
522+
// tidy-alphabetical-start
523+
("ptx32", Unstable(sym::nvptx_target_feature), &[]),
524+
("ptx40", Unstable(sym::nvptx_target_feature), &["ptx32"]),
525+
("ptx41", Unstable(sym::nvptx_target_feature), &["ptx40"]),
526+
("ptx42", Unstable(sym::nvptx_target_feature), &["ptx41"]),
527+
("ptx43", Unstable(sym::nvptx_target_feature), &["ptx42"]),
528+
("ptx50", Unstable(sym::nvptx_target_feature), &["ptx43"]),
529+
("ptx60", Unstable(sym::nvptx_target_feature), &["ptx50"]),
530+
("ptx61", Unstable(sym::nvptx_target_feature), &["ptx60"]),
531+
("ptx62", Unstable(sym::nvptx_target_feature), &["ptx61"]),
532+
("ptx63", Unstable(sym::nvptx_target_feature), &["ptx62"]),
533+
("ptx64", Unstable(sym::nvptx_target_feature), &["ptx63"]),
534+
("ptx65", Unstable(sym::nvptx_target_feature), &["ptx64"]),
535+
("ptx70", Unstable(sym::nvptx_target_feature), &["ptx65"]),
536+
("ptx71", Unstable(sym::nvptx_target_feature), &["ptx70"]),
537+
("ptx72", Unstable(sym::nvptx_target_feature), &["ptx71"]),
538+
("ptx73", Unstable(sym::nvptx_target_feature), &["ptx72"]),
539+
("ptx74", Unstable(sym::nvptx_target_feature), &["ptx73"]),
540+
("ptx75", Unstable(sym::nvptx_target_feature), &["ptx74"]),
541+
("ptx76", Unstable(sym::nvptx_target_feature), &["ptx75"]),
542+
("ptx77", Unstable(sym::nvptx_target_feature), &["ptx76"]),
543+
("ptx78", Unstable(sym::nvptx_target_feature), &["ptx77"]),
544+
("ptx80", Unstable(sym::nvptx_target_feature), &["ptx78"]),
545+
("ptx81", Unstable(sym::nvptx_target_feature), &["ptx80"]),
546+
("ptx82", Unstable(sym::nvptx_target_feature), &["ptx81"]),
547+
("ptx83", Unstable(sym::nvptx_target_feature), &["ptx82"]),
548+
("ptx84", Unstable(sym::nvptx_target_feature), &["ptx83"]),
549+
("ptx85", Unstable(sym::nvptx_target_feature), &["ptx84"]),
550+
("ptx86", Unstable(sym::nvptx_target_feature), &["ptx85"]),
551+
("ptx87", Unstable(sym::nvptx_target_feature), &["ptx86"]),
552+
// tidy-alphabetical-end
553+
];
554+
491555
static RISCV_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
492556
// tidy-alphabetical-start
493557
("a", Stable, &["zaamo", "zalrsc"]),
@@ -726,6 +790,7 @@ pub fn all_rust_features() -> impl Iterator<Item = (&'static str, Stability)> {
726790
.chain(HEXAGON_FEATURES.iter())
727791
.chain(POWERPC_FEATURES.iter())
728792
.chain(MIPS_FEATURES.iter())
793+
.chain(NVPTX_FEATURES.iter())
729794
.chain(RISCV_FEATURES.iter())
730795
.chain(WASM_FEATURES.iter())
731796
.chain(BPF_FEATURES.iter())
@@ -791,6 +856,7 @@ impl Target {
791856
"x86" | "x86_64" => X86_FEATURES,
792857
"hexagon" => HEXAGON_FEATURES,
793858
"mips" | "mips32r6" | "mips64" | "mips64r6" => MIPS_FEATURES,
859+
"nvptx64" => NVPTX_FEATURES,
794860
"powerpc" | "powerpc64" => POWERPC_FEATURES,
795861
"riscv32" | "riscv64" => RISCV_FEATURES,
796862
"wasm32" | "wasm64" => WASM_FEATURES,
@@ -817,6 +883,7 @@ impl Target {
817883
"sparc" | "sparc64" => SPARC_FEATURES_FOR_CORRECT_VECTOR_ABI,
818884
"hexagon" => HEXAGON_FEATURES_FOR_CORRECT_VECTOR_ABI,
819885
"mips" | "mips32r6" | "mips64" | "mips64r6" => MIPS_FEATURES_FOR_CORRECT_VECTOR_ABI,
886+
"nvptx64" => &[], // no vector ABI
820887
"bpf" | "m68k" => &[], // no vector ABI
821888
"csky" => CSKY_FEATURES_FOR_CORRECT_VECTOR_ABI,
822889
// FIXME: for some tier3 targets, we are overly cautious and always give warnings

library/core/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@
190190
//
191191
// Target features:
192192
// tidy-alphabetical-start
193+
#![cfg_attr(not(bootstrap), feature(nvptx_target_feature))]
193194
#![feature(aarch64_unstable_target_feature)]
194195
#![feature(arm_target_feature)]
195196
#![feature(avx512_target_feature)]

src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md

+34
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,40 @@ platform.
1010
- Riccardo D'Ambrosio, https://github.com/RDambrosio016
1111
- Kjetil Kjeka, https://github.com/kjetilkjeka
1212

13+
## Requirements
14+
15+
This target is `no_std` and will typically be built with crate-type `cdylib` and `-C linker-flavor=llbc`, which generates PTX.
16+
The necessary components for this workflow are:
17+
18+
- `rustup toolchain add nightly`
19+
- `rustup component add llvm-tools --toolchain nightly`
20+
- `rustup component add llvm-bitcode-linker --toolchain nightly`
21+
22+
There are two options for using the core library:
23+
24+
- `rustup component add rust-src --toolchain nightly` and build using `-Z build-std=core`.
25+
- `rustup target add nvptx64-nvidia-cuda --toolchain nightly`
26+
27+
### Target and features
28+
29+
It is necessary to specify the target, such as `-C target-cpu=sm_89`. This implies two target features: `sm_89` and `ptx78` (and all preceding features within `sm_*` and `ptx*`). Rust will default to using the oldest PTX version that supports the target processor (see [this table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history)), which maximizes driver compatibility.
30+
One can use `-C target-feature=+ptx80` to choose a later PTX version without changing the target (the default `ptx78` requires CUDA driver version 11.8, while `ptx80` would require driver version 12.0).
31+
32+
Although `ptx*` is represented as a target feature, it is a compile-time property and it is not possible to build a crate that uses instructions not present in the PTX version specified at compile-time (either via `target-cpu` or `target-feature`).
33+
For example, consider an unaligned barrier `barrier.sync`, which requires both `sm_70` and `ptx60`.
34+
If one wants to support building for older devices (e.g., `-C target-cpu=sm_62`; ensuring that this unaligned barrier is unreachable at run-time on such devices), the relevant function could use attributes:
35+
```
36+
#[cfg(target_feature = "ptx60")]
37+
#[target_feature(enable = "sm_70")]
38+
```
39+
40+
## Building Rust kernels
41+
42+
A `no_std` crate containing one or more functions with `extern "ptx-kernel"` can be compiled to PTX using a command like the following.
43+
44+
```console
45+
$ cargo +nightly rustc --target=nvptx64-nvidia-cuda -Zbuild-std=core --crate-type=cdylib -- -Clinker-flavor=llbc -Ctarget-cpu=sm_89 -Zunstable-options
46+
```
1347
<!-- FIXME: fill this out
1448
1549
## Requirements

tests/ui/target-feature/gate.rs

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
// gate-test-arm_target_feature
88
// gate-test-hexagon_target_feature
99
// gate-test-mips_target_feature
10+
// gate-test-nvptx_target_feature
1011
// gate-test-wasm_target_feature
1112
// gate-test-adx_target_feature
1213
// gate-test-cmpxchg16b_target_feature

0 commit comments

Comments
 (0)