Skip to content

Commit 2ae441e

Browse files
committed
expand buffer load and store + tests
1 parent 8a44cd7 commit 2ae441e

File tree

5 files changed

+269
-0
lines changed

5 files changed

+269
-0
lines changed

llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,15 @@ static bool isIntrinsicExpansion(Function &F) {
7070
case Intrinsic::vector_reduce_add:
7171
case Intrinsic::vector_reduce_fadd:
7272
return true;
73+
case Intrinsic::dx_resource_load_typedbuffer: // want to transform double and
74+
// double2
75+
return F.getReturnType()
76+
->getStructElementType(0)
77+
->getScalarType()
78+
->isDoubleTy();
79+
case Intrinsic::dx_resource_store_typedbuffer: // want to transform double and
80+
// double2
81+
return F.getFunctionType()->getParamType(2)->getScalarType()->isDoubleTy();
7382
}
7483
return false;
7584
}
@@ -532,6 +541,80 @@ static Value *expandRadiansIntrinsic(CallInst *Orig) {
532541
return Builder.CreateFMul(X, PiOver180);
533542
}
534543

544+
static void expandTypedBufferLoadIntrinsic(CallInst *Orig) {
545+
IRBuilder<> Builder(Orig);
546+
547+
unsigned ExtractNum =
548+
Orig->getType()->getStructElementType(0)->isVectorTy() ? 4 : 2;
549+
Type *Ty = VectorType::get(Builder.getInt32Ty(), ExtractNum, false);
550+
551+
Type *LoadType = StructType::get(Ty, Builder.getInt1Ty());
552+
auto *X =
553+
Builder.CreateIntrinsic(LoadType, Intrinsic::dx_resource_load_typedbuffer,
554+
{Orig->getOperand(0), Orig->getOperand(1)});
555+
556+
// create new extract value
557+
Value *Extract = Builder.CreateExtractValue(X, {0});
558+
559+
SmallVector<Value *> ExtractElements;
560+
for (unsigned I = 0; I < ExtractNum; ++I)
561+
ExtractElements.push_back(
562+
Builder.CreateExtractElement(Extract, (uint64_t)I));
563+
564+
// combine into double(s)
565+
Value *Result =
566+
PoisonValue::get(VectorType::get(Builder.getDoubleTy(), 2, false));
567+
for (unsigned I = 0; I < ExtractNum; I += 2) {
568+
Value *Dbl =
569+
Builder.CreateIntrinsic(Builder.getDoubleTy(), Intrinsic::dx_asdouble,
570+
{ExtractElements[I], ExtractElements[I + 1]});
571+
if (ExtractNum == 4)
572+
Result = Builder.CreateInsertElement(Result, Dbl, (uint64_t)I / 2);
573+
else
574+
Result = Dbl;
575+
}
576+
577+
assert(Orig->hasOneUser() && "TypedBufferLoad is expected to have one user");
578+
auto *U = Orig->user_back();
579+
auto *OldExtract = dyn_cast<ExtractValueInst>(U);
580+
if (!OldExtract)
581+
llvm_unreachable("TypedBufferLoad's only users should be ExtractValueInst");
582+
OldExtract->replaceAllUsesWith(Result);
583+
OldExtract->eraseFromParent();
584+
}
585+
586+
void expandTypedBufferStoreIntrinsic(CallInst *Orig) {
587+
IRBuilder<> Builder(Orig);
588+
589+
unsigned ExtractNum =
590+
Orig->getFunctionType()->getParamType(2)->isVectorTy() ? 4 : 2;
591+
Type *SplitElementTy = Builder.getInt32Ty();
592+
SmallVector<int> Mask = {0, 1};
593+
if (ExtractNum == 4) {
594+
SplitElementTy = VectorType::get(SplitElementTy, 2, false);
595+
Mask = {0, 2, 1, 3};
596+
}
597+
598+
// split our double(s)
599+
auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy);
600+
Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble,
601+
Orig->getOperand(2));
602+
// create our vector
603+
Value *LowBits = Builder.CreateExtractValue(Split, 0);
604+
Value *HighBits = Builder.CreateExtractValue(Split, 1);
605+
Value *Val;
606+
if (ExtractNum == 2) {
607+
Val = PoisonValue::get(VectorType::get(SplitElementTy, 2, false));
608+
Val = Builder.CreateInsertElement(Val, LowBits, (uint64_t)0);
609+
Val = Builder.CreateInsertElement(Val, HighBits, 1);
610+
} else
611+
Val = Builder.CreateShuffleVector(LowBits, HighBits, {0, 2, 1, 3});
612+
613+
Builder.CreateIntrinsic(Builder.getVoidTy(),
614+
Intrinsic::dx_resource_store_typedbuffer,
615+
{Orig->getOperand(0), Orig->getOperand(1), Val});
616+
}
617+
535618
static Intrinsic::ID getMaxForClamp(Intrinsic::ID ClampIntrinsic) {
536619
if (ClampIntrinsic == Intrinsic::dx_uclamp)
537620
return Intrinsic::umax;
@@ -660,6 +743,14 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
660743
case Intrinsic::dx_radians:
661744
Result = expandRadiansIntrinsic(Orig);
662745
break;
746+
case Intrinsic::dx_resource_load_typedbuffer:
747+
expandTypedBufferLoadIntrinsic(Orig);
748+
Orig->eraseFromParent();
749+
return true;
750+
case Intrinsic::dx_resource_store_typedbuffer:
751+
expandTypedBufferStoreIntrinsic(Orig);
752+
Orig->eraseFromParent();
753+
return true;
663754
case Intrinsic::usub_sat:
664755
Result = expandUsubSat(Orig);
665756
break;

llvm/test/CodeGen/DirectX/BufferLoad.ll

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,4 +197,36 @@ define void @loadv4i16() {
197197
ret void
198198
}
199199

200+
define void @loadf64() {
201+
; show dxil op lower can handle typedbuffer load where target is double but load type is <2 x i32>
202+
; CHECK: [[B1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false) #0
203+
%buffer = call target("dx.TypedBuffer", double, 1, 0, 0)
204+
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
205+
i32 0, i32 1, i32 1, i32 0, i1 false)
206+
207+
; CHECK: [[BA:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[B1]], %dx.types.ResourceProperties { i32 4106, i32 266 }) #0
208+
%load = call { <2 x i32>, i1 } @llvm.dx.resource.load.typedbuffer(
209+
target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0)
210+
211+
; CHECK: call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[BA]], i32 0, i32 undef) #1
212+
%val = extractvalue { <2 x i32>, i1 } %load, 0
213+
ret void
214+
}
215+
216+
define void @loadv2f64() {
217+
; show dxil op lower can handle typedbuffer load where target is double2 but load type is <4 x i32>
218+
; CHECK: [[B1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false) #0
219+
%buffer = call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
220+
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
221+
i32 0, i32 1, i32 1, i32 0, i1 false)
222+
223+
; CHECK: [[BA:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[B1]], %dx.types.ResourceProperties { i32 4106, i32 522 }) #0
224+
%load = call { <4 x i32>, i1 } @llvm.dx.resource.load.typedbuffer(
225+
target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0)
226+
227+
; CHECK: call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[BA]], i32 0, i32 undef) #1
228+
%val = extractvalue { <4 x i32>, i1 } %load, 0
229+
ret void
230+
}
231+
200232
; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(read) {{.*}}}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s
2+
3+
target triple = "dxil-pc-shadermodel6.6-compute"
4+
5+
define void @loadf64() {
6+
; check the handle from binding is unchanged
7+
; CHECK: [[B:%.*]] = call target("dx.TypedBuffer", double, 1, 0, 0)
8+
; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
9+
; CHECK-SAME: i32 0, i32 1, i32 1, i32 0, i1 false)
10+
%buffer = call target("dx.TypedBuffer", double, 1, 0, 0)
11+
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
12+
i32 0, i32 1, i32 1, i32 0, i1 false)
13+
14+
; check we load an <2 x i32> instead of a double
15+
; CHECK: [[L0:%.*]] = call { <2 x i32>, i1 }
16+
; CHECK-SAME: @llvm.dx.resource.load.typedbuffer.v2i32.tdx.TypedBuffer_f64_1_0_0t(
17+
; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0)
18+
%load0 = call {double, i1} @llvm.dx.resource.load.typedbuffer(
19+
target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0)
20+
21+
; check we extract the two i32 and construct a double
22+
; CHECK: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0
23+
; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i64 0
24+
; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i64 1
25+
; CHECK: call double @llvm.dx.asdouble.i32(i32 [[Lo]], i32 [[Hi]])
26+
%data0 = extractvalue {double, i1} %load0, 0
27+
ret void
28+
}
29+
30+
define void @loadv2f64() {
31+
; check the handle from binding is unchanged
32+
; CHECK: [[B:%.*]] = call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
33+
; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
34+
; CHECK-SAME: i32 0, i32 1, i32 1, i32 0, i1 false)
35+
%buffer = call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
36+
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
37+
i32 0, i32 1, i32 1, i32 0, i1 false)
38+
39+
; check we load an <4 x i32> instead of a double2
40+
; CHECK: [[L0:%.*]] = call { <4 x i32>, i1 }
41+
; CHECK-SAME: @llvm.dx.resource.load.typedbuffer.v4i32.tdx.TypedBuffer_v2f64_1_0_0t(
42+
; CHECK-SAME: target("dx.TypedBuffer", <2 x double>, 1, 0, 0) [[B]], i32 0)
43+
%load0 = call { <2 x double>, i1 } @llvm.dx.resource.load.typedbuffer(
44+
target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0)
45+
46+
; check we extract the 4 i32 and construct a <2 x double>
47+
; CHECK: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0
48+
; CHECK: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i64 0
49+
; CHECK: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i64 1
50+
; CHECK: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i64 2
51+
; CHECK: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i64 3
52+
; CHECK: [[Dbl1:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo1]], i32 [[Hi1]])
53+
; CHECK: [[Vec:%.*]] = insertelement <2 x double> poison, double [[Dbl1]], i64 0
54+
; CHECK: [[Dbl2:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo2]], i32 [[Hi2]])
55+
; CHECK: insertelement <2 x double> [[Vec]], double [[Dbl2]], i64 1
56+
%data0 = extractvalue { <2 x double>, i1 } %load0, 0
57+
ret void
58+
}

llvm/test/CodeGen/DirectX/BufferStore.ll

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,3 +161,44 @@ define void @store_scalarized_floats(float %data0, float %data1, float %data2, f
161161

162162
ret void
163163
}
164+
165+
define void @storef64(<2 x i32> %0) {
166+
; CHECK: [[B1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217,
167+
; CHECK: [[BA:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[B1]]
168+
169+
%buffer = tail call target("dx.TypedBuffer", double, 1, 0, 0)
170+
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
171+
i32 0, i32 0, i32 1, i32 0, i1 false)
172+
173+
; The temporary casts should all have been cleaned up
174+
; CHECK-NOT: %dx.resource.casthandle
175+
176+
; CHECK: [[D0:%.*]] = extractelement <2 x i32> %0, i32 0
177+
; CHECK: [[D1:%.*]] = extractelement <2 x i32> %0, i32 1
178+
; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[BA]], i32 0, i32 undef, i32 %2, i32 %3, i32 %2, i32 %2, i8 15)
179+
call void @llvm.dx.resource.store.typedbuffer(
180+
target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0, <2 x i32> %0)
181+
ret void
182+
}
183+
184+
define void @storev2f64(<4 x i32> %0) {
185+
; CHECK: [[B1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217,
186+
; CHECK: [[BA:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[B1]]
187+
188+
%buffer = tail call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
189+
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
190+
i32 0, i32 0, i32 1, i32 0, i1 false)
191+
192+
; The temporary casts should all have been cleaned up
193+
; CHECK-NOT: %dx.resource.casthandle
194+
195+
; CHECK: [[D0:%.*]] = extractelement <4 x i32> %0, i32 0
196+
; CHECK: [[D1:%.*]] = extractelement <4 x i32> %0, i32 1
197+
; CHECK: [[D2:%.*]] = extractelement <4 x i32> %0, i32 2
198+
; CHECK: [[D3:%.*]] = extractelement <4 x i32> %0, i32 3
199+
; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[BA]], i32 0, i32 undef, i32 [[D0]], i32 [[D1]], i32 [[D2]], i32 [[D3]], i8 15)
200+
call void @llvm.dx.resource.store.typedbuffer(
201+
target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0,
202+
<4 x i32> %0)
203+
ret void
204+
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s
2+
3+
target triple = "dxil-pc-shadermodel6.6-compute"
4+
5+
define void @storef64(double %0) {
6+
; CHECK: [[B:%.*]] = tail call target("dx.TypedBuffer", double, 1, 0, 0)
7+
; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
8+
; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false)
9+
%buffer = tail call target("dx.TypedBuffer", double, 1, 0, 0)
10+
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
11+
i32 0, i32 0, i32 1, i32 0, i1 false)
12+
13+
; check we split the double and store the lo and hi bits
14+
; CHECK: [[SD:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double %0)
15+
; CHECK: [[Lo:%.*]] = extractvalue { i32, i32 } [[SD]], 0
16+
; CHECK: [[Hi:%.*]] = extractvalue { i32, i32 } [[SD]], 1
17+
; CHECK: [[Vec1:%.*]] = insertelement <2 x i32> poison, i32 [[Lo]], i64 0
18+
; CHECK: [[Vec2:%.*]] = insertelement <2 x i32> [[Vec1]], i32 [[Hi]], i64 1
19+
; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_f64_1_0_0t.v2i32(
20+
; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0, <2 x i32> [[Vec2]])
21+
call void @llvm.dx.resource.store.typedbuffer(
22+
target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0,
23+
double %0)
24+
ret void
25+
}
26+
27+
28+
define void @storev2f64(<2 x double> %0) {
29+
; CHECK: [[B:%.*]] = tail call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
30+
; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
31+
; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false)
32+
%buffer = tail call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
33+
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
34+
i32 0, i32 0, i32 1, i32 0, i1 false)
35+
36+
; CHECK: [[SD:%.*]] = call { <2 x i32>, <2 x i32> }
37+
; CHECK-SAME: @llvm.dx.splitdouble.v2i32(<2 x double> %0)
38+
; CHECK: [[Lo:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 0
39+
; CHECK: [[Hi:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 1
40+
; CHECK: [[Vec:%.*]] = shufflevector <2 x i32> [[Lo]], <2 x i32> [[Hi]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
41+
; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v2f64_1_0_0t.v4i32(
42+
; CHECK-SAME: target("dx.TypedBuffer", <2 x double>, 1, 0, 0) [[B]], i32 0, <4 x i32> [[Vec]])
43+
call void @llvm.dx.resource.store.typedbuffer(
44+
target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0,
45+
<2 x double> %0)
46+
ret void
47+
}

0 commit comments

Comments
 (0)