Skip to content

Commit 13d6233

Browse files
authored
[MLIR][NVGPU] Fix nvgpu_arrive syntax in matmulBuilder.py (#113713)
This patch updates the syntax for nvgpu_arrive Op in matmulBuilder.py. This fixes the compilation error for this test. For the warp-specialized matmul_kernel implementation, removing the WaitGroupSyncOp (after the mma-main-loop) fixes the hang observed. With these two fixes, the test compiles and executes successfully on an sm90a machine. Signed-off-by: Durgadoss R <[email protected]>
1 parent bb00f5b commit 13d6233

File tree

1 file changed

+2
-9
lines changed

1 file changed

+2
-9
lines changed

mlir/test/Integration/GPU/CUDA/sm90/python/tools/matmulBuilder.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -568,9 +568,7 @@ def generate_matmul_ws(
568568
barId,
569569
predicate=consumerPrimaryThread,
570570
)
571-
nvgpu.mbarrier_arrive(
572-
ir.Type.parse("!nvgpu.mbarrier.token"), mbarDONE, barId
573-
)
571+
nvgpu.mbarrier_arrive(mbarDONE, barId)
574572
debug_print(
575573
"[cons] iv={} | mbarDONE[{}] arrive [done]",
576574
iv,
@@ -589,14 +587,9 @@ def generate_matmul_ws(
589587
# Step 6.3.5. Yield
590588
scf.yield_([new_acc, phaseParity])
591589

592-
# Step 6.3. Wait All WGMMA
593-
nvvm.WgmmaWaitGroupSyncOp(0)
594-
595590
with ir.InsertionPoint(scf.IfOp(consumerPrimaryThread).then_block):
596591
barId = c((K // BLOCK_K) % num_stages)
597-
nvgpu.mbarrier_arrive(
598-
ir.Type.parse("!nvgpu.mbarrier.token"), mbarDONE, barId
599-
)
592+
nvgpu.mbarrier_arrive(mbarDONE, barId)
600593
scf.yield_([])
601594

602595
# Step 6.4. Epilogue (registers --> shared memory)

0 commit comments

Comments
 (0)