Skip to content

Commit 75de53e

Browse files
committed
Fix align for hidden args.
1 parent 3fb33e5 commit 75de53e

File tree

4 files changed

+14
-12
lines changed

4 files changed

+14
-12
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -2510,8 +2510,7 @@ void SITargetLowering::allocatePreloadKernArgSGPRs(
25102510
const SmallVectorImpl<ISD::InputArg> &Ins, MachineFunction &MF,
25112511
const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const {
25122512
Function &F = MF.getFunction();
2513-
unsigned LastExplicitArgOffset =
2514-
MF.getSubtarget<GCNSubtarget>().getExplicitKernelArgOffset();
2513+
unsigned LastExplicitArgOffset = Subtarget->getExplicitKernelArgOffset();
25152514
GCNUserSGPRUsageInfo &SGPRInfo = Info.getUserSGPRInfo();
25162515
bool InPreloadSequence = true;
25172516
unsigned InIdx = 0;
@@ -2539,14 +2538,15 @@ void SITargetLowering::allocatePreloadKernArgSGPRs(
25392538
alignTo(ArgLoc.getLocVT().getFixedSizeInBits(), 32) / 32;
25402539

25412540
// Add padding SPGR to fix alignment for hidden arguments.
2542-
if (!AlignedForImplictArgs && Arg.hasAttribute("amdgpu-work-group-id")) {
2541+
if (!AlignedForImplictArgs &&
2542+
Arg.hasAttribute("amdgpu-hidden-argument")) {
25432543
unsigned OffsetBefore = LastExplicitArgOffset;
25442544
LastExplicitArgOffset = alignTo(
25452545
LastExplicitArgOffset, Subtarget->getAlignmentForImplicitArgPtr());
25462546
if (OffsetBefore != LastExplicitArgOffset) {
25472547
unsigned PaddingSGPRs =
25482548
alignTo(LastExplicitArgOffset - OffsetBefore, 4) / 4;
2549-
Info.allocateUserSGPRs(PaddingSGPRs);
2549+
Info.allocateUserSGPRs(*Subtarget, PaddingSGPRs);
25502550
ArgOffset += PaddingSGPRs * 4;
25512551
}
25522552
AlignedForImplictArgs = true;

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

+5-3
Original file line numberDiff line numberDiff line change
@@ -277,11 +277,13 @@ SmallVectorImpl<MCRegister> *SIMachineFunctionInfo::addPreloadedKernArg(
277277
return &ArgInfo.PreloadKernArgs[KernArgIdx].Regs;
278278
}
279279

280-
bool SIMachineFunctionInfo::allocateUserSGPRs(unsigned Number) {
281-
if (Number <= getNumUserSGPRs())
280+
bool SIMachineFunctionInfo::allocateUserSGPRs(const GCNSubtarget &ST,
281+
unsigned Number) {
282+
unsigned NewUserSGPRs = NumUserSGPRs + Number;
283+
if (NewUserSGPRs > ST.getMaxNumUserSGPRs())
282284
return false;
283285

284-
NumUserSGPRs = Number;
286+
NumUserSGPRs = NewUserSGPRs;
285287
return true;
286288
}
287289

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -781,7 +781,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
781781
int PaddingSGPRs);
782782

783783
/// Reserve up to \p Number of user SGPRs.
784-
bool allocateUserSGPRs(unsigned Number);
784+
bool allocateUserSGPRs(const GCNSubtarget &ST, unsigned Number);
785785

786786
/// Increment user SGPRs used for padding the argument list only.
787787
Register addReservedUserSGPR() {

llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ define amdgpu_kernel void @preload_unused_arg_block_count_x(ptr addrspace(1) inr
3232
; GFX940-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
3333
; GFX940-NEXT: ; %bb.0:
3434
; GFX940-NEXT: v_mov_b32_e32 v0, 0
35-
; GFX940-NEXT: v_mov_b32_e32 v1, s5
35+
; GFX940-NEXT: v_mov_b32_e32 v1, s6
3636
; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
3737
; GFX940-NEXT: s_endpgm
3838
;
@@ -41,7 +41,7 @@ define amdgpu_kernel void @preload_unused_arg_block_count_x(ptr addrspace(1) inr
4141
; GFX90a-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
4242
; GFX90a-NEXT: ; %bb.0:
4343
; GFX90a-NEXT: v_mov_b32_e32 v0, 0
44-
; GFX90a-NEXT: v_mov_b32_e32 v1, s9
44+
; GFX90a-NEXT: v_mov_b32_e32 v1, s10
4545
; GFX90a-NEXT: global_store_dword v0, v1, s[6:7]
4646
; GFX90a-NEXT: s_endpgm
4747
%imp_arg_ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
@@ -276,7 +276,7 @@ define amdgpu_kernel void @preload_block_count_x_imparg_align_ptr_i8(ptr addrspa
276276
; GFX940-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
277277
; GFX940-NEXT: ; %bb.0:
278278
; GFX940-NEXT: s_and_b32 s0, s4, 0xff
279-
; GFX940-NEXT: s_add_i32 s0, s5, s0
279+
; GFX940-NEXT: s_add_i32 s0, s6, s0
280280
; GFX940-NEXT: v_mov_b32_e32 v0, 0
281281
; GFX940-NEXT: v_mov_b32_e32 v1, s0
282282
; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
@@ -287,7 +287,7 @@ define amdgpu_kernel void @preload_block_count_x_imparg_align_ptr_i8(ptr addrspa
287287
; GFX90a-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
288288
; GFX90a-NEXT: ; %bb.0:
289289
; GFX90a-NEXT: s_and_b32 s0, s8, 0xff
290-
; GFX90a-NEXT: s_add_i32 s0, s9, s0
290+
; GFX90a-NEXT: s_add_i32 s0, s10, s0
291291
; GFX90a-NEXT: v_mov_b32_e32 v0, 0
292292
; GFX90a-NEXT: v_mov_b32_e32 v1, s0
293293
; GFX90a-NEXT: global_store_dword v0, v1, s[6:7]

0 commit comments

Comments
 (0)