diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index ef2a98f09967c..f7a847ec7f38f 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -18207,6 +18207,8 @@ terminated by an ``.end_amdhsa_kernel`` directive. (wavefrontsize64) ``.amdhsa_uses_dynamic_stack`` 0 GFX6-GFX12 Controls USES_DYNAMIC_STACK in :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`. + ``.amdhsa_named_barrier_count`` 0 GFX1250+ Controls NAMED_BAR_CNT in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx12-table`. ``.amdhsa_system_sgpr_private_segment_wavefront_offset`` 0 GFX6-GFX10 Controls ENABLE_PRIVATE_SEGMENT in (except :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx12-table`. GFX942) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 845449931b5a9..36c0d1cbcea22 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -809,15 +809,10 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { " AccumOffset: " + getMCExprStr(AdjustedAccum), false); } - if (AMDGPU::isGFX1250(STM)) { - const MCExpr *BarBlkConst = MCConstantExpr::create(4, Ctx); - const MCExpr *AlignToBlk = AMDGPUMCExpr::createAlignTo( - CurrentProgramInfo.NamedBarCnt, BarBlkConst, Ctx); - const MCExpr *BarBlks = - MCBinaryExpr::createDiv(AlignToBlk, BarBlkConst, Ctx); - OutStreamer->emitRawComment(" NamedBarCnt: " + getMCExprStr(BarBlks), - false); - } + if (AMDGPU::isGFX1250(STM)) + OutStreamer->emitRawComment( + " NamedBarCnt: " + getMCExprStr(CurrentProgramInfo.NamedBarCnt), + false); OutStreamer->emitRawComment( " Occupancy: " + getMCExprStr(CurrentProgramInfo.Occupancy), false); @@ -1023,7 +1018,11 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ProgInfo.DynamicCallStack = MCBinaryExpr::createOr(GetSymRefExpr(RIK::RIK_HasDynSizedStack), GetSymRefExpr(RIK::RIK_HasRecursion), Ctx); - ProgInfo.NamedBarCnt = GetSymRefExpr(RIK::RIK_NumNamedBarrier); + + const MCExpr *BarBlkConst = MCConstantExpr::create(4, Ctx); + const MCExpr *AlignToBlk = AMDGPUMCExpr::createAlignTo( + GetSymRefExpr(RIK::RIK_NumNamedBarrier), BarBlkConst, Ctx); + ProgInfo.NamedBarCnt = MCBinaryExpr::createDiv(AlignToBlk, BarBlkConst, Ctx); const SIMachineFunctionInfo *MFI = MF.getInfo(); diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 951473264d089..78a2678808eee 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -5986,6 +5986,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { SMRange VGPRRange; const MCExpr *NextFreeVGPR = ZeroExpr; const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext()); + const MCExpr *NamedBarCnt = ZeroExpr; uint64_t SharedVGPRCount = 0; uint64_t PreloadLength = 0; uint64_t PreloadOffset = 0; @@ -6208,6 +6209,10 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { if (!isGFX90A()) return Error(IDRange.Start, "directive requires gfx90a+", IDRange); AccumOffset = ExprVal; + } else if (ID == ".amdhsa_named_barrier_count") { + if (!isGFX1250()) + return Error(IDRange.Start, "directive requires gfx1250+", IDRange); + NamedBarCnt = ExprVal; } else if (ID == ".amdhsa_reserve_vcc") { if (EvaluatableExpr && !isUInt<1>(Val)) return OutOfRangeError(ValRange); @@ -6448,6 +6453,12 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { getContext()); } + if (isGFX1250()) + MCKernelDescriptor::bits_set(KD.compute_pgm_rsrc3, NamedBarCnt, + COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT, + COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT, + getContext()); + if (IVersion.Major >= 10 && IVersion.Major < 12) { // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) { diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 197de1228a29e..b58ba947c72e2 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -507,6 +507,12 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( OS << '\n'; } + if (AMDGPU::isGFX1250(STI)) + PrintField(KD.compute_pgm_rsrc3, + amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT, + amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT, + ".amdhsa_named_barrier_count"); + OS << "\t\t.amdhsa_reserve_vcc "; EmitMCExpr(ReserveVCC); OS << '\n'; diff --git a/llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll b/llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll index 5295a13461f69..0804a52ba536d 100644 --- a/llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll +++ b/llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll @@ -26,6 +26,7 @@ define void @func2() { ret void } +; SOUT: .amdhsa_named_barrier_count 1 ; SOUT: .set kernel1.num_named_barrier, max(2, func1.num_named_barrier, func2.num_named_barrier) define amdgpu_kernel void @kernel1() #0 { ; CHECK-DAG: call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1.kernel1, i32 11) @@ -39,6 +40,7 @@ define amdgpu_kernel void @kernel1() #0 { ret void } +; SOUT: .amdhsa_named_barrier_count 1 ; SOUT: .set kernel2.num_named_barrier, max(2, func2.num_named_barrier) define amdgpu_kernel void @kernel2() #0 { ; CHECK-DAG: call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1, i32 9)