Skip to content

Commit f15a9b2

Browse files
bokrzesiigcbot
authored andcommitted
[LLVM16][StatelessToStateful] Case where BUFFER_OFFSET doesn't seem to be 0
There's this assumption: "If m_hasPositivePointerOffset is true, BUFFER_OFFSET are assumed to be **zero**" But I've found a case on LLVM 16 + Opaques where such transformation causes this test to fail: https://github.com/intel/llvm/ => DeviceLib/string_test.cpp This: %15 = add i32 %bufferOffset1, 1 %16 = getelementptr inbounds i8, ptr addrspace(1) %1, i64 1 // unused %17 = inttoptr i32 %bindlessOffset2 to ptr addrspace(2490368) //
1 parent dbae9c5 commit f15a9b2

File tree

3 files changed

+211
-11
lines changed

3 files changed

+211
-11
lines changed

IGC/Compiler/Optimizer/OpenCLPasses/StatelessToStateful/StatelessToStateful.cpp

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -527,15 +527,35 @@ static alignment_t determinePointerAlignment(Value *Ptr, const DataLayout &DL, A
527527
// 1) Examine uses: look for loads/stores (which may carry explicit
528528
// alignment) or a GEP that reveals an ABI alignment from its element
529529
// type.
530+
531+
// We're saving users into the queue
532+
// in order to track bitcasts
533+
std::queue<Instruction *> queue;
534+
530535
for (User *U : Ptr->users()) {
536+
if (auto *I = dyn_cast<Instruction>(U)) {
537+
queue.push(I);
538+
}
539+
}
540+
541+
while (queue.size() > 0) {
542+
auto U = queue.front();
543+
queue.pop();
544+
531545
if (auto *LI = dyn_cast<LoadInst>(U)) {
532546
// Load has an explicit alignment.
533547
alignment_t LdAlign = LI->getAlign().value();
534548
if (LdAlign > BestAlign)
535549
BestAlign = LdAlign;
550+
} else if (auto *cast = dyn_cast<BitCastInst>(U)) {
551+
for (User *U : cast->users()) {
552+
if (auto *I = dyn_cast<Instruction>(U)) {
553+
queue.push(I);
554+
}
555+
}
536556
} else if (auto *SI = dyn_cast<StoreInst>(U)) {
537557
// Store sets alignment only if the pointer we store into is Ptr.
538-
if (SI->getPointerOperand() == Ptr) {
558+
if (SI->getPointerOperand()->stripPointerCasts() == Ptr) {
539559
alignment_t StAlign = SI->getAlign().value();
540560
if (StAlign > BestAlign)
541561
BestAlign = StAlign;
@@ -1100,6 +1120,12 @@ void StatelessToStateful::finalizeArgInitialValue(Function *F) {
11001120
return;
11011121
}
11021122

1123+
#if LLVM_VERSION_MAJOR >= 16
1124+
// Disabling this transformation/optimization because the assumption about BufferOffset being 0 doesn't seem to be true
1125+
// More details in PR / blame.
1126+
return;
1127+
#endif
1128+
11031129
Module *M = F->getParent();
11041130
Type *int32Ty = Type::getInt32Ty(M->getContext());
11051131
Value *ZeroValue = ConstantInt::get(int32Ty, 0);
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
;
9+
; REQUIRES: regkeys, llvm-16-plus
10+
; RUN: igc_opt --typed-pointers --regkey EnableOptionalBufferOffset=1 --regkey EnableSupportBufferOffset=1 -igc-stateless-to-stateful-resolution -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,%LLVM_DEPENDENT_CHECK_PREFIX%
11+
; RUN: igc_opt --opaque-pointers --regkey EnableOptionalBufferOffset=1 --regkey EnableSupportBufferOffset=1 -igc-stateless-to-stateful-resolution -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,%LLVM_DEPENDENT_CHECK_PREFIX%
12+
; ------------------------------------------------
13+
; StatelessToStateful - check if determinePointerAlignment handles bitcasts correctly:
14+
; the found alignment is biggest alignment of load / stores, so 4.
15+
; ------------------------------------------------
16+
17+
18+
; CHECK-LLVM-16: [[add1:%.*]] = add i32 %bufferOffset1, 1
19+
; CHECK-LLVM-16: [[inttoptr1:%.*]] = inttoptr i32 [[add1]] to {{ptr|i8}} addrspace({{[0-9]+}}){{.*}}
20+
21+
; CHECK-LLVM-14: [[inttoptr1:%.*]] = inttoptr i32 1 to i8
22+
; CHECK-LLVM-15: [[inttoptr1:%.*]] = inttoptr i32 1 to i8
23+
24+
; CHECK: [[load1:%.*]] = load i8, {{ptr|i8}} addrspace({{[0-9]+}}){{.*}} [[inttoptr1]], align 1
25+
26+
; CHECK-LLVM-16: [[add2:%.*]] = add i32 %bufferOffset, 1
27+
; CHECK-LLVM-16: [[inttoptr2:%.*]] = inttoptr i32 [[add2]] to {{ptr|i8}} addrspace({{[0-9]+}}){{.*}}
28+
29+
; CHECK-LLVM-14: [[inttoptr2:%.*]] = inttoptr i32 1 to i8
30+
; CHECK-LLVM-15: [[inttoptr2:%.*]] = inttoptr i32 1 to i8
31+
32+
; CHECK: store i8 [[load1]], {{ptr|i8}} addrspace({{[0-9]+}}){{.*}} [[inttoptr2]], align 1
33+
34+
; Function Attrs: convergent nounwind
35+
define spir_kernel void @_ZTS16KernelTestMemcpy(i8 addrspace(1)* align 1 %0, i8 addrspace(1)* readonly align 1 %1, <8 x i32> %r0, <3 x i32> %globalOffset, i32 %bufferOffset, i32 %bufferOffset1) #0 {
36+
%3 = addrspacecast i8 addrspace(1)* %0 to i8 addrspace(4)*
37+
%4 = addrspacecast i8 addrspace(1)* %1 to i8 addrspace(4)*
38+
39+
%5 = getelementptr inbounds i8, i8 addrspace(1)* %1, i64 1
40+
%6 = load i8, i8 addrspace(1)* %5, align 1
41+
42+
%7 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1
43+
store i8 %6, i8 addrspace(1)* %7, align 1
44+
45+
%8 = bitcast i8 addrspace(1)* %1 to i32 addrspace(1)*
46+
%9 = bitcast i8 addrspace(1)* %0 to i32 addrspace(1)*
47+
48+
%10 = load i32, i32 addrspace(1)* %8, align 4
49+
store i32 %10, i32 addrspace(1)* %9, align 4
50+
51+
ret void
52+
}
53+
54+
attributes #0 = { convergent nounwind "less-precise-fpmad"="true" }
55+
56+
!igc.functions = !{!3}
57+
!IGCMetadata = !{!0}
58+
59+
!3 = !{void (i8 addrspace(1)*, i8 addrspace(1)*, <8 x i32>, <3 x i32>, i32, i32)* @_ZTS16KernelTestMemcpy, !4}
60+
!4 = !{!5, !6}
61+
!5 = !{!"function_type", i32 0}
62+
!6 = !{!"implicit_arg_desc", !7, !8, !9, !11}
63+
!7 = !{i32 0}
64+
!8 = !{i32 2}
65+
!9 = !{i32 15, !10}
66+
!10 = !{!"explicit_arg_num", i32 0}
67+
!11 = !{i32 15, !12}
68+
!12 = !{!"explicit_arg_num", i32 1}
69+
70+
!0 = !{!"ModuleMD", !160, !100, !101}
71+
!100 = !{!"isPrecise", i1 false}
72+
73+
!101 = !{!"compOpt"}
74+
75+
!160 = !{!"FuncMD", !161, !162}
76+
!161 = !{!"FuncMDMap[0]", void (i8 addrspace(1)*, i8 addrspace(1)*, <8 x i32>, <3 x i32>, i32, i32)* @_ZTS16KernelTestMemcpy}
77+
!162 = !{!"FuncMDValue[0]", !163, !164, !168, !169, !170, !191, !206, !207, !208, !209, !210, !211, !212, !213, !214, !215, !216, !217, !218, !219, !220, !221, !222, !223, !224, !227, !230, !233, !236, !239, !242, !243}
78+
79+
!163 = !{!"localOffsets"}
80+
!164 = !{!"workGroupWalkOrder", !165, !166, !167}
81+
!165 = !{!"dim0", i32 0}
82+
!166 = !{!"dim1", i32 1}
83+
!167 = !{!"dim2", i32 2}
84+
!168 = !{!"funcArgs"}
85+
!169 = !{!"functionType", !"KernelFunction"}
86+
!170 = !{!"rtInfo", !171, !172, !173, !174, !175, !176, !177, !178, !179, !180, !181, !182, !183, !184, !185, !186, !188, !189, !190}
87+
!171 = !{!"callableShaderType", !"NumberOfCallableShaderTypes"}
88+
!172 = !{!"isContinuation", i1 false}
89+
!173 = !{!"hasTraceRayPayload", i1 false}
90+
!174 = !{!"hasHitAttributes", i1 false}
91+
!175 = !{!"hasCallableData", i1 false}
92+
!176 = !{!"ShaderStackSize", i32 0}
93+
!177 = !{!"ShaderHash", i64 0}
94+
!178 = !{!"ShaderName", !""}
95+
!179 = !{!"ParentName", !""}
96+
!180 = !{!"SlotNum", i1* null}
97+
!181 = !{!"NOSSize", i32 0}
98+
!182 = !{!"globalRootSignatureSize", i32 0}
99+
!183 = !{!"Entries"}
100+
!184 = !{!"SpillUnions"}
101+
!185 = !{!"CustomHitAttrSizeInBytes", i32 0}
102+
!186 = !{!"Types", !187}
103+
!187 = !{!"FullFrameTys"}
104+
!188 = !{!"Aliases"}
105+
!189 = !{!"numSyncRTStacks", i32 0}
106+
!190 = !{!"NumCoherenceHintBits", i32 0}
107+
!191 = !{!"resAllocMD", !192, !193, !194, !195, !205}
108+
!192 = !{!"uavsNumType", i32 0}
109+
!193 = !{!"srvsNumType", i32 0}
110+
!194 = !{!"samplersNumType", i32 0}
111+
!195 = !{!"argAllocMDList", !196, !200, !201, !202, !203, !204}
112+
!196 = !{!"argAllocMDListVec[0]", !197, !198, !199}
113+
!197 = !{!"type", i32 0}
114+
!198 = !{!"extensionType", i32 -1}
115+
!199 = !{!"indexType", i32 -1}
116+
!200 = !{!"argAllocMDListVec[1]", !197, !198, !199}
117+
!201 = !{!"argAllocMDListVec[2]", !197, !198, !199}
118+
!202 = !{!"argAllocMDListVec[3]", !197, !198, !199}
119+
!203 = !{!"argAllocMDListVec[4]", !197, !198, !199}
120+
!204 = !{!"argAllocMDListVec[5]", !197, !198, !199}
121+
!205 = !{!"inlineSamplersMD"}
122+
!206 = !{!"maxByteOffsets"}
123+
!207 = !{!"IsInitializer", i1 false}
124+
!208 = !{!"IsFinalizer", i1 false}
125+
!209 = !{!"CompiledSubGroupsNumber", i32 0}
126+
!210 = !{!"hasInlineVmeSamplers", i1 false}
127+
!211 = !{!"localSize", i32 0}
128+
!212 = !{!"localIDPresent", i1 false}
129+
!213 = !{!"groupIDPresent", i1 false}
130+
!214 = !{!"privateMemoryPerWI", i32 0}
131+
!215 = !{!"prevFPOffset", i32 0}
132+
!216 = !{!"globalIDPresent", i1 false}
133+
!217 = !{!"hasSyncRTCalls", i1 false}
134+
!218 = !{!"hasPrintfCalls", i1 false}
135+
!219 = !{!"hasIndirectCalls", i1 false}
136+
!220 = !{!"hasNonKernelArgLoad", i1 false}
137+
!221 = !{!"hasNonKernelArgStore", i1 false}
138+
!222 = !{!"hasNonKernelArgAtomic", i1 false}
139+
!223 = !{!"UserAnnotations"}
140+
!224 = !{!"m_OpenCLArgAddressSpaces", !225, !226}
141+
!225 = !{!"m_OpenCLArgAddressSpacesVec[0]", i32 1}
142+
!226 = !{!"m_OpenCLArgAddressSpacesVec[1]", i32 1}
143+
!227 = !{!"m_OpenCLArgAccessQualifiers", !228, !229}
144+
!228 = !{!"m_OpenCLArgAccessQualifiersVec[0]", !"none"}
145+
!229 = !{!"m_OpenCLArgAccessQualifiersVec[1]", !"none"}
146+
!230 = !{!"m_OpenCLArgTypes", !231, !232}
147+
!231 = !{!"m_OpenCLArgTypesVec[0]", !"char*"}
148+
!232 = !{!"m_OpenCLArgTypesVec[1]", !"char*"}
149+
!233 = !{!"m_OpenCLArgBaseTypes", !234, !235}
150+
!234 = !{!"m_OpenCLArgBaseTypesVec[0]", !"char*"}
151+
!235 = !{!"m_OpenCLArgBaseTypesVec[1]", !"char*"}
152+
!236 = !{!"m_OpenCLArgTypeQualifiers", !237, !238}
153+
!237 = !{!"m_OpenCLArgTypeQualifiersVec[0]", !""}
154+
!238 = !{!"m_OpenCLArgTypeQualifiersVec[1]", !""}
155+
!239 = !{!"m_OpenCLArgNames", !240, !241}
156+
!240 = !{!"m_OpenCLArgNamesVec[0]", !""}
157+
!241 = !{!"m_OpenCLArgNamesVec[1]", !""}
158+
!242 = !{!"m_OpenCLArgScalarAsPointers"}
159+
!243 = !{!"m_OptsToDisablePerFunc", !244, !245, !246}
160+
!244 = !{!"m_OptsToDisablePerFuncSet[0]", !"IGC-AddressArithmeticSinking"}
161+
!245 = !{!"m_OptsToDisablePerFuncSet[1]", !"IGC-AllowSimd32Slicing"}
162+
!246 = !{!"m_OptsToDisablePerFuncSet[2]", !"IGC-SinkLoadOpt"}

IGC/Compiler/tests/StatelessToStateful/Bindful/has_non_kernel_arg_LdSt.ll

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
;============================ end_copyright_notice =============================
99
;
1010
; REQUIRES: regkeys
11-
; RUN: igc_opt --typed-pointers --regkey DumpHasNonKernelArgLdSt=1 --regkey EnableOptionalBufferOffset=1 --regkey EnableSupportBufferOffset=1 -enable-debugify -igc-stateless-to-stateful-resolution -igc-serialize-metadata -S < %s 2>&1 | FileCheck %s
11+
; RUN: igc_opt --typed-pointers --regkey DumpHasNonKernelArgLdSt=1 --regkey EnableOptionalBufferOffset=1 --regkey EnableSupportBufferOffset=1 -enable-debugify -igc-stateless-to-stateful-resolution -igc-serialize-metadata -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,%LLVM_DEPENDENT_CHECK_PREFIX%
12+
; RUN: igc_opt --opaque-pointers --regkey DumpHasNonKernelArgLdSt=1 --regkey EnableOptionalBufferOffset=1 --regkey EnableSupportBufferOffset=1 -enable-debugify -igc-stateless-to-stateful-resolution -igc-serialize-metadata -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,%LLVM_DEPENDENT_CHECK_PREFIX%
1213
; ------------------------------------------------
1314
; StatelessToStateful
1415
; ------------------------------------------------
@@ -18,18 +19,29 @@
1819
; CHECK-NOT: WARNING
1920
; CHECK: CheckModuleDebugify: PASS
2021

21-
2222
define spir_kernel void @func_b(i32 %n, i32 addrspace(1)* %r, <8 x i32> %r0, <8 x i32> %payloadHeader, i8* %privateBase, i8 addrspace(1)* %s2, i8 addrspace(1)* %s3, i32 %s4, i32 %s5, i32 %bufferOffset) #0 {
2323
; CHECK-LABEL: @func_b(
2424
; CHECK-NEXT: entry:
25-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[R:%.*]], i32 16, !dbg [[DBG104:![0-9]+]]
26-
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 addrspace(1)* [[TMP0]], metadata [[META100:![0-9]+]], metadata !DIExpression()), !dbg [[DBG104]]
27-
; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i32 64 to i32 addrspace(131072)*, !dbg [[DBG105:![0-9]+]]
28-
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32 addrspace(131072)* [[TMP1]], align 4, !dbg [[DBG105]]
29-
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP2]], metadata [[META102:![0-9]+]], metadata !DIExpression()), !dbg [[DBG105]]
30-
; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i32 64 to i32 addrspace(131072)*, !dbg [[DBG106:![0-9]+]]
31-
; CHECK-NEXT: store i32 [[N:%.*]], i32 addrspace(131072)* [[TMP3]], align 4, !dbg [[DBG106]]
32-
; CHECK-NEXT: ret void, !dbg [[DBG107:![0-9]+]]
25+
26+
; CHECK-LLVM-16: %0 = add i32 %bufferOffset, 64
27+
; CHECK-LLVM-16: %1 = add i32 %bufferOffset, 64
28+
29+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, {{ptr|i32}} addrspace(1){{.*}} [[R:%.*]], i32 16, !dbg [[DBG104:![0-9]+]]
30+
; CHECK-NEXT: call void @llvm.dbg.value(metadata {{ptr|i32}} addrspace(1){{.*}} [[TMP0]], metadata [[META100:![0-9]+]], metadata !DIExpression()), !dbg [[DBG104]]
31+
32+
; CHECK-LLVM-14: [[TMP1:%.*]] = inttoptr i32 64 to {{ptr|i32}} addrspace({{[0-9]+}}){{.*}}, !dbg [[DBG105:![0-9]+]]
33+
; CHECK-LLVM-15: [[TMP1:%.*]] = inttoptr i32 64 to {{ptr|i32}} addrspace({{[0-9]+}}){{.*}}, !dbg [[DBG105:![0-9]+]]
34+
; CHECK-LLVM-16: [[TMP1:%.*]] = inttoptr i32 %0 to {{ptr|i32}} addrspace({{[0-9]+}}){{.*}}, !dbg [[DBG105:![0-9]+]]
35+
36+
; CHECK-NEXT: [[TMP2:%.*]] = load i32, {{ptr|i32}} addrspace({{[0-9]+}}){{.*}} [[TMP1]], align 4, !dbg [[DBG105]]
37+
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP2]], metadata [[META102:![0-9]+]], metadata !DIExpression()), !dbg [[DBG105]]
38+
39+
; CHECK-LLVM-14: [[TMP3:%.*]] = inttoptr i32 64 to {{ptr|i32}} addrspace({{[0-9]+}}){{.*}}, !dbg [[DBG106:![0-9]+]]
40+
; CHECK-LLVM-15: [[TMP3:%.*]] = inttoptr i32 64 to {{ptr|i32}} addrspace({{[0-9]+}}){{.*}}, !dbg [[DBG106:![0-9]+]]
41+
; CHECK-LLVM-16: [[TMP3:%.*]] = inttoptr i32 %1 to {{ptr|i32}} addrspace({{[0-9]+}}){{.*}}, !dbg [[DBG106:![0-9]+]]
42+
43+
; CHECK-NEXT: store i32 [[N:%.*]], {{ptr|i32}} addrspace({{[0-9]+}}){{.*}} [[TMP3]], align 4, !dbg [[DBG106]]
44+
; CHECK-NEXT: ret void, !dbg [[DBG107:![0-9]+]]
3345
;
3446
entry:
3547
%0 = getelementptr i32, i32 addrspace(1)* %r, i32 16

0 commit comments

Comments
 (0)