Skip to content

Commit 92ae467

Browse files
committed
Address comments.
1 parent 479d15a commit 92ae467

9 files changed

+51
-17
lines changed

llvm/include/llvm/IR/Argument.h

+2
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,8 @@ class Argument final : public Value {
178178
/// Check if an argument has a given attribute.
179179
bool hasAttribute(Attribute::AttrKind Kind) const;
180180

181+
bool hasAttribute(StringRef Kind) const;
182+
181183
Attribute getAttribute(Attribute::AttrKind Kind) const;
182184

183185
/// Method for support type inquiry through isa, cast, and dyn_cast.

llvm/include/llvm/IR/Function.h

+3
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,9 @@ class LLVM_ABI Function : public GlobalObject, public ilist_node<Function> {
439439
/// check if an attributes is in the list of attributes.
440440
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const;
441441

442+
/// check if an attributes is in the list of attributes.
443+
bool hasParamAttribute(unsigned ArgNo, StringRef Kind) const;
444+
442445
/// gets the attribute from the list of attributes.
443446
Attribute getAttributeAtIndex(unsigned i, Attribute::AttrKind Kind) const;
444447

llvm/lib/IR/Function.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,10 @@ bool Argument::hasAttribute(Attribute::AttrKind Kind) const {
369369
return getParent()->hasParamAttribute(getArgNo(), Kind);
370370
}
371371

372+
bool Argument::hasAttribute(StringRef Kind) const {
373+
return getParent()->hasParamAttribute(getArgNo(), Kind);
374+
}
375+
372376
Attribute Argument::getAttribute(Attribute::AttrKind Kind) const {
373377
return getParent()->getParamAttribute(getArgNo(), Kind);
374378
}
@@ -756,6 +760,10 @@ bool Function::hasParamAttribute(unsigned ArgNo,
756760
return AttributeSets.hasParamAttr(ArgNo, Kind);
757761
}
758762

763+
bool Function::hasParamAttribute(unsigned ArgNo, StringRef Kind) const {
764+
return AttributeSets.hasParamAttr(ArgNo, Kind);
765+
}
766+
759767
Attribute Function::getAttributeAtIndex(unsigned i,
760768
Attribute::AttrKind Kind) const {
761769
return AttributeSets.getAttributeAtIndex(i, Kind);

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp

+1-3
Original file line numberDiff line numberDiff line change
@@ -261,9 +261,7 @@ void MetadataStreamerMsgPackV4::emitKernelArgs(const MachineFunction &MF,
261261
unsigned Offset = 0;
262262
auto Args = HSAMetadataDoc->getArrayNode();
263263
for (auto &Arg : Func.args()) {
264-
if (Func.getAttributes().hasAttributeAtIndex(AttributeList::FirstArgIndex +
265-
Arg.getArgNo(),
266-
"amdgpu-hidden-argument"))
264+
if (Arg.hasAttribute("amdgpu-hidden-argument"))
267265
continue;
268266

269267
emitKernelArg(Arg, Offset, Args);

llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp

+1-5
Original file line numberDiff line numberDiff line change
@@ -212,11 +212,7 @@ class PreloadKernelArgInfo {
212212

213213
// Allocate loads in order of offset. We need to be sure that the implicit
214214
// argument can actually be preloaded.
215-
std::sort(ImplicitArgLoads.begin(), ImplicitArgLoads.end(),
216-
[](const std::pair<LoadInst *, unsigned> &A,
217-
const std::pair<LoadInst *, unsigned> &B) {
218-
return A.second < B.second;
219-
});
215+
std::sort(ImplicitArgLoads.begin(), ImplicitArgLoads.end(), less_second());
220216

221217
uint64_t LastExplicitArgOffset = ImplicitArgsBaseOffset;
222218
// If we fail to preload any implicit argument we know we don't have SGPRs

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

+1-3
Original file line numberDiff line numberDiff line change
@@ -314,9 +314,7 @@ uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F,
314314
MaxAlign = Align(1);
315315

316316
for (const Argument &Arg : F.args()) {
317-
if (F.getAttributes().hasAttributeAtIndex(AttributeList::FirstArgIndex +
318-
Arg.getArgNo(),
319-
"amdgpu-hidden-argument"))
317+
if (Arg.hasAttribute("amdgpu-hidden-argument"))
320318
continue;
321319

322320
const bool IsByRef = Arg.hasByRefAttr();

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

+1-4
Original file line numberDiff line numberDiff line change
@@ -2536,10 +2536,7 @@ void SITargetLowering::allocatePreloadKernArgSGPRs(
25362536
alignTo(ArgLoc.getLocVT().getFixedSizeInBits(), 32) / 32;
25372537

25382538
// Add padding SPGR to fix alignment for hidden arguments.
2539-
if (!AlignedForImplictArgs &&
2540-
F.getAttributes().hasAttributeAtIndex(AttributeList::FirstArgIndex +
2541-
Arg.getArgNo(),
2542-
"amdgpu-hidden-argument")) {
2539+
if (!AlignedForImplictArgs && Arg.hasAttribute("amdgpu-work-group-id")) {
25432540
unsigned OffsetBefore = LastExplicitArgOffset;
25442541
LastExplicitArgOffset = alignTo(
25452542
LastExplicitArgOffset, Subtarget->getAlignmentForImplicitArgPtr());

llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-IR-lowering.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2-
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-attributor -amdgpu-lower-kernel-arguments -S < %s | FileCheck -check-prefix=NO-PRELOAD %s
3-
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-attributor -amdgpu-lower-kernel-arguments -amdgpu-kernarg-preload-count=16 -S < %s | FileCheck -check-prefix=PRELOAD %s
2+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes='amdgpu-attributor,function(amdgpu-lower-kernel-arguments)' -S < %s | FileCheck -check-prefix=NO-PRELOAD %s
3+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes='amdgpu-attributor,function(amdgpu-lower-kernel-arguments)' -amdgpu-kernarg-preload-count=16 -S < %s | FileCheck -check-prefix=PRELOAD %s
44

55
define amdgpu_kernel void @preload_block_count_x(ptr addrspace(1) %out) {
66
; NO-PRELOAD-LABEL: define amdgpu_kernel void @preload_block_count_x(

llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs.ll

+32
Original file line numberDiff line numberDiff line change
@@ -594,4 +594,36 @@ define amdgpu_kernel void @preloadremainder_xyz(ptr addrspace(1) inreg %out) #0
594594
ret void
595595
}
596596

597+
define amdgpu_kernel void @no_free_sgprs_preloadremainder_z(ptr addrspace(1) inreg %out) {
598+
; GFX940-LABEL: no_free_sgprs_preloadremainder_z:
599+
; GFX940: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
600+
; GFX940-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
601+
; GFX940-NEXT: ; %bb.0:
602+
; GFX940-NEXT: s_load_dword s0, s[2:3], 0x1c
603+
; GFX940-NEXT: v_mov_b32_e32 v0, 0
604+
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
605+
; GFX940-NEXT: s_lshr_b32 s0, s0, 16
606+
; GFX940-NEXT: v_mov_b32_e32 v1, s0
607+
; GFX940-NEXT: global_store_dword v0, v1, s[6:7] sc0 sc1
608+
; GFX940-NEXT: s_endpgm
609+
;
610+
; GFX90a-LABEL: no_free_sgprs_preloadremainder_z:
611+
; GFX90a: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
612+
; GFX90a-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
613+
; GFX90a-NEXT: ; %bb.0:
614+
; GFX90a-NEXT: s_load_dword s0, s[6:7], 0x1c
615+
; GFX90a-NEXT: v_mov_b32_e32 v0, 0
616+
; GFX90a-NEXT: s_waitcnt lgkmcnt(0)
617+
; GFX90a-NEXT: s_lshr_b32 s0, s0, 16
618+
; GFX90a-NEXT: v_mov_b32_e32 v1, s0
619+
; GFX90a-NEXT: global_store_dword v0, v1, s[10:11]
620+
; GFX90a-NEXT: s_endpgm
621+
%imp_arg_ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
622+
%gep = getelementptr i8, ptr addrspace(4) %imp_arg_ptr, i32 22
623+
%load = load i16, ptr addrspace(4) %gep
624+
%conv = zext i16 %load to i32
625+
store i32 %conv, ptr addrspace(1) %out
626+
ret void
627+
}
628+
597629
attributes #0 = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }

0 commit comments

Comments
 (0)