Skip to content

Commit 802f4b6

Browse files
committed
nvptx: fix _syncthreads to use unaligned barrier
* Deprecate _syncthreads (the CUDA name) in favor of new _barrier_sync (NVPTX name barrier.sync). * The: barrier.sync instruction is equivalent to barrier.sync.aligned prior to sm_70, and will lead to errors/deadlock if passes (such as MIR JumpThreading) lose the aligned property. rust-lang/rust#137086 https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar * Since: MIR does not currently have a way to apply something like LLVM's convergent attribute (https://llvm.org/docs/ConvergentOperations.html), we cannot prevent loss of alignment, and thus we require target feature sm_70.
1 parent b5cf231 commit 802f4b6

File tree

1 file changed

+38
-3
lines changed
  • crates/core_arch/src/nvptx

1 file changed

+38
-3
lines changed

crates/core_arch/src/nvptx/mod.rs

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ pub use packed::*;
2020

2121
#[allow(improper_ctypes)]
2222
unsafe extern "C" {
23-
#[link_name = "llvm.nvvm.barrier0"]
24-
fn syncthreads() -> ();
23+
#[link_name = "llvm.nvvm.barrier.sync"]
24+
fn barrier_sync(_: u32) -> ();
2525
#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.x"]
2626
fn block_dim_x() -> i32;
2727
#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.y"]
@@ -48,11 +48,46 @@ unsafe extern "C" {
4848
fn thread_idx_z() -> i32;
4949
}
5050

51+
/// Synchronizes all threads in the block.
52+
///
53+
/// The argument `a` is a logical barrier resource with value `0` through `15`.
54+
///
55+
/// This does not require textual alignment, so the following code is valid.
56+
///
57+
/// ```
58+
/// if tid % 2 == 0 {
59+
/// shared[tid] *= 2;
60+
/// _barrier_sync(0);
61+
/// myval += shared[tid + 1];
62+
/// } else {
63+
/// shared[tid] *= 4;
64+
/// _barrier_sync(0);
65+
/// }
66+
/// ```
67+
///
68+
/// This intrinsic has different execution semantics prior to `sm_70`, and thus
69+
/// it requires the `sm_70` target feature.
70+
///
71+
/// TODO: The more restrictive "aligned" semantics of
72+
/// `llvm.nvvm.barrier.sync.aligned` are [currently
73+
/// miscompiled](https://github.com/rust-lang/rust/issues/137086) due to MIR
74+
/// JumpThreading and lack of `convergent` attribute propagated to LLVM. Once
75+
/// resolved, this intrinsic should be exposed at all target features
76+
///
77+
#[inline]
78+
#[target_feature(enable = "sm_70")]
79+
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
80+
pub unsafe fn _barrier_sync(a: u32) -> () {
81+
barrier_sync(a)
82+
}
83+
5184
/// Synchronizes all threads in the block.
5285
#[inline]
86+
#[target_feature(enable = "sm_70")]
5387
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
88+
#[deprecated(since = "1.87.0", note = "use _barrier_sync(0)")]
5489
pub unsafe fn _syncthreads() -> () {
55-
syncthreads()
90+
_barrier_sync(0)
5691
}
5792

5893
/// x-th thread-block dimension.

0 commit comments

Comments
 (0)