From 3d0b78eec8eb268bb95087e73af1173d484d092f Mon Sep 17 00:00:00 2001 From: Durgadoss R Date: Fri, 25 Oct 2024 11:11:07 +0000 Subject: [PATCH] [Tests-only][NVGPU] Fix nvgpu_arrive syntax in matmulBuilder.py This patch updates the syntax for nvgpu_arrive Op in matmulBuilder.py. This fixes the compilation error for this test. For the warp-specialized matmul_kernel implementation, removing the WaitGroupSyncOp (after the mma-main-loop) fixes the hang observed. With these two fixes, the test compiles and executes successfully on an sm90a machine. Signed-off-by: Durgadoss R --- .../GPU/CUDA/sm90/python/tools/matmulBuilder.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/mlir/test/Integration/GPU/CUDA/sm90/python/tools/matmulBuilder.py b/mlir/test/Integration/GPU/CUDA/sm90/python/tools/matmulBuilder.py index 75f0dc947e068..5394d4a327255 100644 --- a/mlir/test/Integration/GPU/CUDA/sm90/python/tools/matmulBuilder.py +++ b/mlir/test/Integration/GPU/CUDA/sm90/python/tools/matmulBuilder.py @@ -568,9 +568,7 @@ def generate_matmul_ws( barId, predicate=consumerPrimaryThread, ) - nvgpu.mbarrier_arrive( - ir.Type.parse("!nvgpu.mbarrier.token"), mbarDONE, barId - ) + nvgpu.mbarrier_arrive(mbarDONE, barId) debug_print( "[cons] iv={} | mbarDONE[{}] arrive [done]", iv, @@ -589,14 +587,9 @@ def generate_matmul_ws( # Step 6.3.5. Yield scf.yield_([new_acc, phaseParity]) - # Step 6.3. Wait All WGMMA - nvvm.WgmmaWaitGroupSyncOp(0) - with ir.InsertionPoint(scf.IfOp(consumerPrimaryThread).then_block): barId = c((K // BLOCK_K) % num_stages) - nvgpu.mbarrier_arrive( - ir.Type.parse("!nvgpu.mbarrier.token"), mbarDONE, barId - ) + nvgpu.mbarrier_arrive(mbarDONE, barId) scf.yield_([]) # Step 6.4. Epilogue (registers --> shared memory)