Skip to content

Commit 6e8ffab

Browse files
authored
[mlir][nvvm] Introduce elect.sync Op (#68323)
The Op selects a leader thread from a set of threads. See for more information: https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-elect-sync
1 parent 470b652 commit 6e8ffab

File tree

3 files changed

+36
-0
lines changed

3 files changed

+36
-0
lines changed

mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,26 @@ def NVVM_SyncWarpOp :
574574
}
575575

576576

577+
def NVVM_ElectSyncOp : NVVM_Op<"elect.sync",
578+
[DeclareOpInterfaceMethods<BasicPtxBuilderOpInterface>]>
579+
{
580+
let results = (outs I1:$pred);
581+
let assemblyFormat = "attr-dict `->` type(results)";
582+
let extraClassDefinition = [{
583+
std::string $cppClass::getPtx() {
584+
return std::string(
585+
"{ \n"
586+
".reg .u32 rx; \n"
587+
".reg .pred px; \n"
588+
" mov.u32 %0, 0; \n"
589+
" elect.sync rx | px, 0xFFFFFFFF;\n"
590+
"@px mov.u32 %0, 1; \n"
591+
"}\n"
592+
);
593+
}
594+
}];
595+
}
596+
577597
def LoadCacheModifierCA : I32EnumAttrCase<"CA", 0, "ca">;
578598
def LoadCacheModifierCG : I32EnumAttrCase<"CG", 1, "cg">;
579599
def LoadCacheModifierCS : I32EnumAttrCase<"CS", 2, "cs">;

mlir/lib/Conversion/NVVMToLLVM/NVVMToLLVM.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ class PtxBuilder {
6363

6464
// https://docs.nvidia.com/cuda/inline-ptx-assembly/index.html#constraints
6565
char getRegisterType(Type type) {
66+
if (type.isInteger(1))
67+
return 'b';
6668
if (type.isInteger(16))
6769
return 'h';
6870
if (type.isInteger(32))

mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,3 +466,17 @@ func.func @wgmma_f32_e5m2_e4m3(%descA : i64, %descB : i64) -> !mat32f32 {
466466
: !mat32f32 -> !mat32f32
467467
return %result2 : !mat32f32
468468
}
469+
470+
// -----
471+
472+
func.func @elect_one_leader_sync() {
473+
// CHECK: llvm.inline_asm has_side_effects asm_dialect = att "{
474+
// CHECK-SAME: .reg .u32 rx;
475+
// CHECK-SAME: .reg .pred px;
476+
// CHECK-SAME: mov.u32 $0, 0;
477+
// CHECK-SAME: elect.sync rx | px, 0xFFFFFFFF;
478+
// CHECK-SAME: @px mov.u32 $0, 1;
479+
// CHECK-SAME: "=b" : () -> i1
480+
%cnd = nvvm.elect.sync -> i1
481+
return
482+
}

0 commit comments

Comments
 (0)