diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 09e88152e65d2..b1aefc1777f85 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1715,6 +1715,12 @@ def int_amdgcn_s_sleep : IntrHasSideEffects]> { } +def int_amdgcn_s_sleep_var + : ClangBuiltin<"__builtin_amdgcn_s_sleep_var">, + Intrinsic<[], [llvm_i32_ty], + [IntrNoMem, IntrHasSideEffects, IntrWillReturn]> { +} + def int_amdgcn_s_nop : DefaultAttrsIntrinsic<[], [llvm_i16_ty], [ImmArg>, IntrNoMem, IntrHasSideEffects]> { diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 03b6d19b2b3c0..d0c1302c3f003 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3101,6 +3101,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl( applyDefaultMapping(OpdMapper); constrainOpWithReadfirstlane(B, MI, 8); // M0 return; + case Intrinsic::amdgcn_s_sleep_var: + assert(OpdMapper.getVRegs(1).empty()); + constrainOpWithReadfirstlane(B, MI, 1); + return; case Intrinsic::amdgcn_s_barrier_signal_var: case Intrinsic::amdgcn_s_barrier_join: case Intrinsic::amdgcn_s_wakeup_barrier: @@ -4849,6 +4853,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { getVGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI); // %data1 break; } + case Intrinsic::amdgcn_s_sleep_var: + OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI); + break; case Intrinsic::amdgcn_s_barrier_signal_var: case Intrinsic::amdgcn_s_barrier_join: case Intrinsic::amdgcn_s_wakeup_barrier: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index d4746b559d925..03ffe8e10f4bb 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6564,6 +6564,19 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI, } } + // Legalize s_sleep_var. + if (MI.getOpcode() == AMDGPU::S_SLEEP_VAR) { + const DebugLoc &DL = MI.getDebugLoc(); + Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + int Src0Idx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); + MachineOperand &Src0 = MI.getOperand(Src0Idx); + BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) + .add(Src0); + Src0.ChangeToRegister(Reg, false); + return nullptr; + } + // Legalize MUBUF instructions. bool isSoffsetLegal = true; int SoffsetIdx = diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 50c4d279cfe23..c51534cdbd305 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1602,6 +1602,10 @@ def S_SLEEP : SOPP_Pseudo <"s_sleep", (ins i32imm:$simm16), "$simm16", [(int_amdgcn_s_sleep timm:$simm16)]> { } +def S_SLEEP_VAR : SOP1_0_32 <"s_sleep_var", [(int_amdgcn_s_sleep_var SSrc_b32:$src0)]> { + let hasSideEffects = 1; +} + def S_SETPRIO : SOPP_Pseudo <"s_setprio", (ins i16imm:$simm16), "$simm16", [(int_amdgcn_s_setprio timm:$simm16)]> { } @@ -1997,6 +2001,7 @@ defm S_GET_BARRIER_STATE_IMM : SOP1_Real_gfx12<0x050>; defm S_BARRIER_INIT_IMM : SOP1_Real_gfx12<0x051>; defm S_BARRIER_JOIN_IMM : SOP1_Real_gfx12<0x052>; defm S_WAKEUP_BARRIER_IMM : SOP1_Real_gfx12<0x057>; +defm S_SLEEP_VAR : SOP1_Real_gfx12<0x058>; //===----------------------------------------------------------------------===// // SOP1 - GFX1150, GFX12 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.var.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.var.ll new file mode 100644 index 0000000000000..5ad7ddfbe5fe9 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.var.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -global-isel=0 < %s | FileCheck -check-prefixes=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -global-isel=1 < %s | FileCheck -check-prefixes=GCN %s + +declare void @llvm.amdgcn.s.sleep.var(i32) + +define void @test_s_sleep_var1(i32 %arg) { +; GCN-LABEL: test_s_sleep_var1: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_readfirstlane_b32 s0, v0 +; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GCN-NEXT: s_sleep_var s0 +; GCN-NEXT: s_setpc_b64 s[30:31] + call void @llvm.amdgcn.s.sleep.var(i32 %arg) + ret void +} + +define void @test_s_sleep_var2() { +; GCN-LABEL: test_s_sleep_var2: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_sleep_var 10 +; GCN-NEXT: s_setpc_b64 s[30:31] + call void @llvm.amdgcn.s.sleep.var(i32 10) + ret void +} + +define amdgpu_kernel void @test_s_sleep_var3(i32 %arg) { +; GCN-LABEL: test_s_sleep_var3: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_b32 s0, s[0:1], 0x24 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_sleep_var s0 +; GCN-NEXT: s_endpgm + call void @llvm.amdgcn.s.sleep.var(i32 %arg) + ret void +} diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s index 8f2944586ed29..495a2ea78ffef 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s @@ -1,5 +1,11 @@ // RUN: llvm-mc -arch=amdgcn -show-encoding -mcpu=gfx1200 %s | FileCheck --check-prefix=GFX12 %s +s_sleep_var 0x1234 +// GFX12: encoding: [0xff,0x58,0x80,0xbe,0x34,0x12,0x00,0x00] + +s_sleep_var s1 +// GFX12: encoding: [0x01,0x58,0x80,0xbe] + s_cvt_f32_i32 s5, s1 // GFX12: encoding: [0x01,0x64,0x85,0xbe] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt index 1c31ee1e5dd7f..d15a329c8eade 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt @@ -1,5 +1,11 @@ # RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s +# GFX12: s_sleep_var 0x1234 ; encoding: [0xff,0x58,0x80,0xbe,0x34,0x12,0x00,0x00] +0xff,0x58,0x80,0xbe,0x34,0x12,0x00,0x00 + +# GFX12: s_sleep_var s1 ; encoding: [0x01,0x58,0x80,0xbe] +0x01,0x58,0x80,0xbe + # GFX12: s_cvt_f32_i32 s5, s1 ; encoding: [0x01,0x64,0x85,0xbe] 0x01,0x64,0x85,0xbe