Skip to content

Commit 60a227c

Browse files
committed
[AMDGPU] Use inreg for hint to preload kernel arguments
This patch is the first in a series that adds support for pre-loading kernel arguments into SGPRs. The command-line argument 'amdgpu-kernarg-preload-count' is used to specify the number of arguments sequentially from the first that we should attempt to preload, the default is 0. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D156852
1 parent a50486f commit 60a227c

File tree

2 files changed

+286
-1
lines changed

2 files changed

+286
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ void initializeCycleInfoWrapperPassPass(PassRegistry &);
2828

2929
using namespace llvm;
3030

31+
static cl::opt<unsigned> KernargPreloadCount(
32+
"amdgpu-kernarg-preload-count",
33+
cl::desc("How many kernel arguments to preload onto SGPRs"), cl::init(0));
34+
3135
#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
3236

3337
enum ImplicitArgumentPositions {
@@ -914,6 +918,21 @@ AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
914918
llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
915919
}
916920

921+
static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
922+
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
923+
for (unsigned I = 0;
924+
I < F.arg_size() &&
925+
I < std::min(KernargPreloadCount.getValue(), ST.getMaxNumUserSGPRs());
926+
++I) {
927+
Argument &Arg = *F.getArg(I);
928+
// Check for incompatible attributes.
929+
if (Arg.hasByRefAttr() || Arg.hasNestAttr())
930+
break;
931+
932+
Arg.addAttr(Attribute::InReg);
933+
}
934+
}
935+
917936
class AMDGPUAttributor : public ModulePass {
918937
public:
919938
AMDGPUAttributor() : ModulePass(ID) {}
@@ -960,9 +979,12 @@ class AMDGPUAttributor : public ModulePass {
960979
if (!F.isIntrinsic()) {
961980
A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
962981
A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
963-
if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
982+
CallingConv::ID CC = F.getCallingConv();
983+
if (!AMDGPU::isEntryFunctionCC(CC)) {
964984
A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
965985
A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(F));
986+
} else if (CC == CallingConv::AMDGPU_KERNEL) {
987+
addPreloadKernArgHint(F, *TM);
966988
}
967989
}
968990
}
Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
2+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-attributor -S < %s | FileCheck -check-prefix=NO-PRELOAD %s
3+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=1 -amdgpu-attributor -S < %s | FileCheck -check-prefix=PRELOAD-1 %s
4+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=3 -amdgpu-attributor -S < %s | FileCheck -check-prefix=PRELOAD-3 %s
5+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=16 -amdgpu-attributor -S < %s | FileCheck -check-prefix=PRELOAD-16 %s
6+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=20 -amdgpu-attributor -S < %s | FileCheck -check-prefix=PRELOAD-20 %s
7+
8+
define amdgpu_kernel void @test_preload_hint_kernel_1(ptr %0) #0 {
9+
; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1
10+
; NO-PRELOAD-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
11+
; NO-PRELOAD-NEXT: ret void
12+
;
13+
; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1
14+
; PRELOAD-1-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
15+
; PRELOAD-1-NEXT: ret void
16+
;
17+
; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1
18+
; PRELOAD-3-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
19+
; PRELOAD-3-NEXT: ret void
20+
;
21+
; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1
22+
; PRELOAD-16-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
23+
; PRELOAD-16-NEXT: ret void
24+
;
25+
; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1
26+
; PRELOAD-20-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
27+
; PRELOAD-20-NEXT: ret void
28+
;
29+
ret void
30+
}
31+
32+
define amdgpu_kernel void @test_preload_hint_kernel_2(i32 %0, i64 %1) #0 {
33+
; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2
34+
; NO-PRELOAD-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR0]] {
35+
; NO-PRELOAD-NEXT: ret void
36+
;
37+
; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2
38+
; PRELOAD-1-SAME: (i32 inreg [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR0]] {
39+
; PRELOAD-1-NEXT: ret void
40+
;
41+
; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2
42+
; PRELOAD-3-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]]) #[[ATTR0]] {
43+
; PRELOAD-3-NEXT: ret void
44+
;
45+
; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2
46+
; PRELOAD-16-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]]) #[[ATTR0]] {
47+
; PRELOAD-16-NEXT: ret void
48+
;
49+
; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2
50+
; PRELOAD-20-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]]) #[[ATTR0]] {
51+
; PRELOAD-20-NEXT: ret void
52+
;
53+
ret void
54+
}
55+
56+
define amdgpu_kernel void @test_preload_hint_kernel_4(i32 %0, i64 %1, <2 x float> %2, ptr %3) #0 {
57+
; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_4
58+
; NO-PRELOAD-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], <2 x float> [[TMP2:%.*]], ptr [[TMP3:%.*]]) #[[ATTR0]] {
59+
; NO-PRELOAD-NEXT: ret void
60+
;
61+
; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_4
62+
; PRELOAD-1-SAME: (i32 inreg [[TMP0:%.*]], i64 [[TMP1:%.*]], <2 x float> [[TMP2:%.*]], ptr [[TMP3:%.*]]) #[[ATTR0]] {
63+
; PRELOAD-1-NEXT: ret void
64+
;
65+
; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_4
66+
; PRELOAD-3-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]], <2 x float> inreg [[TMP2:%.*]], ptr [[TMP3:%.*]]) #[[ATTR0]] {
67+
; PRELOAD-3-NEXT: ret void
68+
;
69+
; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_4
70+
; PRELOAD-16-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]], <2 x float> inreg [[TMP2:%.*]], ptr inreg [[TMP3:%.*]]) #[[ATTR0]] {
71+
; PRELOAD-16-NEXT: ret void
72+
;
73+
; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_4
74+
; PRELOAD-20-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]], <2 x float> inreg [[TMP2:%.*]], ptr inreg [[TMP3:%.*]]) #[[ATTR0]] {
75+
; PRELOAD-20-NEXT: ret void
76+
;
77+
ret void
78+
}
79+
80+
define amdgpu_kernel void @test_preload_hint_kernel_18(i32 %0, i64 %1, <2 x float> %2, ptr %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, i32 %14, i32 %15, i32 %16, i32 %17) #0 {
81+
; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_18
82+
; NO-PRELOAD-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], <2 x float> [[TMP2:%.*]], ptr [[TMP3:%.*]], i32 [[TMP4:%.*]], i32 [[TMP5:%.*]], i32 [[TMP6:%.*]], i32 [[TMP7:%.*]], i32 [[TMP8:%.*]], i32 [[TMP9:%.*]], i32 [[TMP10:%.*]], i32 [[TMP11:%.*]], i32 [[TMP12:%.*]], i32 [[TMP13:%.*]], i32 [[TMP14:%.*]], i32 [[TMP15:%.*]], i32 [[TMP16:%.*]], i32 [[TMP17:%.*]]) #[[ATTR0]] {
83+
; NO-PRELOAD-NEXT: ret void
84+
;
85+
; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_18
86+
; PRELOAD-1-SAME: (i32 inreg [[TMP0:%.*]], i64 [[TMP1:%.*]], <2 x float> [[TMP2:%.*]], ptr [[TMP3:%.*]], i32 [[TMP4:%.*]], i32 [[TMP5:%.*]], i32 [[TMP6:%.*]], i32 [[TMP7:%.*]], i32 [[TMP8:%.*]], i32 [[TMP9:%.*]], i32 [[TMP10:%.*]], i32 [[TMP11:%.*]], i32 [[TMP12:%.*]], i32 [[TMP13:%.*]], i32 [[TMP14:%.*]], i32 [[TMP15:%.*]], i32 [[TMP16:%.*]], i32 [[TMP17:%.*]]) #[[ATTR0]] {
87+
; PRELOAD-1-NEXT: ret void
88+
;
89+
; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_18
90+
; PRELOAD-3-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]], <2 x float> inreg [[TMP2:%.*]], ptr [[TMP3:%.*]], i32 [[TMP4:%.*]], i32 [[TMP5:%.*]], i32 [[TMP6:%.*]], i32 [[TMP7:%.*]], i32 [[TMP8:%.*]], i32 [[TMP9:%.*]], i32 [[TMP10:%.*]], i32 [[TMP11:%.*]], i32 [[TMP12:%.*]], i32 [[TMP13:%.*]], i32 [[TMP14:%.*]], i32 [[TMP15:%.*]], i32 [[TMP16:%.*]], i32 [[TMP17:%.*]]) #[[ATTR0]] {
91+
; PRELOAD-3-NEXT: ret void
92+
;
93+
; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_18
94+
; PRELOAD-16-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]], <2 x float> inreg [[TMP2:%.*]], ptr inreg [[TMP3:%.*]], i32 inreg [[TMP4:%.*]], i32 inreg [[TMP5:%.*]], i32 inreg [[TMP6:%.*]], i32 inreg [[TMP7:%.*]], i32 inreg [[TMP8:%.*]], i32 inreg [[TMP9:%.*]], i32 inreg [[TMP10:%.*]], i32 inreg [[TMP11:%.*]], i32 inreg [[TMP12:%.*]], i32 inreg [[TMP13:%.*]], i32 inreg [[TMP14:%.*]], i32 inreg [[TMP15:%.*]], i32 [[TMP16:%.*]], i32 [[TMP17:%.*]]) #[[ATTR0]] {
95+
; PRELOAD-16-NEXT: ret void
96+
;
97+
; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_18
98+
; PRELOAD-20-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]], <2 x float> inreg [[TMP2:%.*]], ptr inreg [[TMP3:%.*]], i32 inreg [[TMP4:%.*]], i32 inreg [[TMP5:%.*]], i32 inreg [[TMP6:%.*]], i32 inreg [[TMP7:%.*]], i32 inreg [[TMP8:%.*]], i32 inreg [[TMP9:%.*]], i32 inreg [[TMP10:%.*]], i32 inreg [[TMP11:%.*]], i32 inreg [[TMP12:%.*]], i32 inreg [[TMP13:%.*]], i32 inreg [[TMP14:%.*]], i32 inreg [[TMP15:%.*]], i32 [[TMP16:%.*]], i32 [[TMP17:%.*]]) #[[ATTR0]] {
99+
; PRELOAD-20-NEXT: ret void
100+
;
101+
ret void
102+
}
103+
104+
define void @test_preload_hint_non_kernel_2(i32 %0, i64 %1) #0 {
105+
; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_non_kernel_2
106+
; NO-PRELOAD-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] {
107+
; NO-PRELOAD-NEXT: ret void
108+
;
109+
; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_non_kernel_2
110+
; PRELOAD-1-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] {
111+
; PRELOAD-1-NEXT: ret void
112+
;
113+
; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_non_kernel_2
114+
; PRELOAD-3-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] {
115+
; PRELOAD-3-NEXT: ret void
116+
;
117+
; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_non_kernel_2
118+
; PRELOAD-16-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] {
119+
; PRELOAD-16-NEXT: ret void
120+
;
121+
; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_non_kernel_2
122+
; PRELOAD-20-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] {
123+
; PRELOAD-20-NEXT: ret void
124+
;
125+
ret void
126+
}
127+
128+
define amdgpu_kernel void @test_preload_hint_kernel_1_call_func(ptr %0) #0 {
129+
; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_func
130+
; NO-PRELOAD-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] {
131+
; NO-PRELOAD-NEXT: call void @func(ptr [[TMP0]])
132+
; NO-PRELOAD-NEXT: ret void
133+
;
134+
; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_func
135+
; PRELOAD-1-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] {
136+
; PRELOAD-1-NEXT: call void @func(ptr [[TMP0]])
137+
; PRELOAD-1-NEXT: ret void
138+
;
139+
; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_func
140+
; PRELOAD-3-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] {
141+
; PRELOAD-3-NEXT: call void @func(ptr [[TMP0]])
142+
; PRELOAD-3-NEXT: ret void
143+
;
144+
; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_func
145+
; PRELOAD-16-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] {
146+
; PRELOAD-16-NEXT: call void @func(ptr [[TMP0]])
147+
; PRELOAD-16-NEXT: ret void
148+
;
149+
; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_func
150+
; PRELOAD-20-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] {
151+
; PRELOAD-20-NEXT: call void @func(ptr [[TMP0]])
152+
; PRELOAD-20-NEXT: ret void
153+
;
154+
call void @func(ptr %0)
155+
ret void
156+
}
157+
158+
define amdgpu_kernel void @test_preload_hint_kernel_1_call_intrinsic(i16 %0) #0 {
159+
; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_intrinsic
160+
; NO-PRELOAD-SAME: (i16 [[TMP0:%.*]]) #[[ATTR2]] {
161+
; NO-PRELOAD-NEXT: call void @llvm.amdgcn.set.prio(i16 [[TMP0]])
162+
; NO-PRELOAD-NEXT: ret void
163+
;
164+
; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_intrinsic
165+
; PRELOAD-1-SAME: (i16 inreg [[TMP0:%.*]]) #[[ATTR2]] {
166+
; PRELOAD-1-NEXT: call void @llvm.amdgcn.set.prio(i16 [[TMP0]])
167+
; PRELOAD-1-NEXT: ret void
168+
;
169+
; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_intrinsic
170+
; PRELOAD-3-SAME: (i16 inreg [[TMP0:%.*]]) #[[ATTR2]] {
171+
; PRELOAD-3-NEXT: call void @llvm.amdgcn.set.prio(i16 [[TMP0]])
172+
; PRELOAD-3-NEXT: ret void
173+
;
174+
; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_intrinsic
175+
; PRELOAD-16-SAME: (i16 inreg [[TMP0:%.*]]) #[[ATTR2]] {
176+
; PRELOAD-16-NEXT: call void @llvm.amdgcn.set.prio(i16 [[TMP0]])
177+
; PRELOAD-16-NEXT: ret void
178+
;
179+
; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_intrinsic
180+
; PRELOAD-20-SAME: (i16 inreg [[TMP0:%.*]]) #[[ATTR2]] {
181+
; PRELOAD-20-NEXT: call void @llvm.amdgcn.set.prio(i16 [[TMP0]])
182+
; PRELOAD-20-NEXT: ret void
183+
;
184+
call void @llvm.amdgcn.set.prio(i16 %0)
185+
ret void
186+
}
187+
188+
define spir_kernel void @test_preload_hint_kernel_1_spir_cc(ptr %0) #0 {
189+
; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_spir_cc
190+
; NO-PRELOAD-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0]] {
191+
; NO-PRELOAD-NEXT: ret void
192+
;
193+
; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_spir_cc
194+
; PRELOAD-1-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0]] {
195+
; PRELOAD-1-NEXT: ret void
196+
;
197+
; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_spir_cc
198+
; PRELOAD-3-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0]] {
199+
; PRELOAD-3-NEXT: ret void
200+
;
201+
; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_spir_cc
202+
; PRELOAD-16-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0]] {
203+
; PRELOAD-16-NEXT: ret void
204+
;
205+
; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_spir_cc
206+
; PRELOAD-20-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0]] {
207+
; PRELOAD-20-NEXT: ret void
208+
;
209+
ret void
210+
}
211+
212+
define amdgpu_kernel void @test_preload_hint_kernel_2_preexisting(i32 inreg %0, i64 %1) #0 {
213+
; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2_preexisting
214+
; NO-PRELOAD-SAME: (i32 inreg [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR0]] {
215+
; NO-PRELOAD-NEXT: ret void
216+
;
217+
; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2_preexisting
218+
; PRELOAD-1-SAME: (i32 inreg [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR0]] {
219+
; PRELOAD-1-NEXT: ret void
220+
;
221+
; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2_preexisting
222+
; PRELOAD-3-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]]) #[[ATTR0]] {
223+
; PRELOAD-3-NEXT: ret void
224+
;
225+
; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2_preexisting
226+
; PRELOAD-16-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]]) #[[ATTR0]] {
227+
; PRELOAD-16-NEXT: ret void
228+
;
229+
; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2_preexisting
230+
; PRELOAD-20-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]]) #[[ATTR0]] {
231+
; PRELOAD-20-NEXT: ret void
232+
;
233+
ret void
234+
}
235+
236+
define amdgpu_kernel void @test_preload_hint_kernel_incompatible_attributes(ptr addrspace(4) byref(i32) %0, ptr nest %1) {
237+
; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_incompatible_attributes
238+
; NO-PRELOAD-SAME: (ptr addrspace(4) byref(i32) [[TMP0:%.*]], ptr nest [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
239+
; NO-PRELOAD-NEXT: ret void
240+
;
241+
; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_incompatible_attributes
242+
; PRELOAD-1-SAME: (ptr addrspace(4) byref(i32) [[TMP0:%.*]], ptr nest [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
243+
; PRELOAD-1-NEXT: ret void
244+
;
245+
; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_incompatible_attributes
246+
; PRELOAD-3-SAME: (ptr addrspace(4) byref(i32) [[TMP0:%.*]], ptr nest [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
247+
; PRELOAD-3-NEXT: ret void
248+
;
249+
; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_incompatible_attributes
250+
; PRELOAD-16-SAME: (ptr addrspace(4) byref(i32) [[TMP0:%.*]], ptr nest [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
251+
; PRELOAD-16-NEXT: ret void
252+
;
253+
; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_incompatible_attributes
254+
; PRELOAD-20-SAME: (ptr addrspace(4) byref(i32) [[TMP0:%.*]], ptr nest [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
255+
; PRELOAD-20-NEXT: ret void
256+
;
257+
ret void
258+
}
259+
260+
declare void @func(ptr) #0
261+
declare void @llvm.amdgcn.set.prio(i16)
262+
263+
attributes #0 = { nounwind }

0 commit comments

Comments
 (0)