-
Notifications
You must be signed in to change notification settings - Fork 13.6k
InferAddressSpaces: Handle masked load and store intrinsics #102007
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
InferAddressSpaces: Handle masked load and store intrinsics #102007
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
Warning This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-transforms Author: Matt Arsenault (arsenm) ChangesFull diff: https://github.com/llvm/llvm-project/pull/102007.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 87b885447cc02..2ddf24be67702 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -369,13 +369,13 @@ bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,
Value *OldV,
Value *NewV) const {
Module *M = II->getParent()->getParent()->getParent();
-
- switch (II->getIntrinsicID()) {
- case Intrinsic::objectsize: {
+ Intrinsic::ID IID = II->getIntrinsicID();
+ switch (IID) {
+ case Intrinsic::objectsize:
+ case Intrinsic::masked_load: {
Type *DestTy = II->getType();
Type *SrcTy = NewV->getType();
- Function *NewDecl =
- Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy});
+ Function *NewDecl = Intrinsic::getDeclaration(M, IID, {DestTy, SrcTy});
II->setArgOperand(0, NewV);
II->setCalledFunction(NewDecl);
return true;
@@ -386,12 +386,12 @@ bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,
case Intrinsic::masked_gather: {
Type *RetTy = II->getType();
Type *NewPtrTy = NewV->getType();
- Function *NewDecl =
- Intrinsic::getDeclaration(M, II->getIntrinsicID(), {RetTy, NewPtrTy});
+ Function *NewDecl = Intrinsic::getDeclaration(M, IID, {RetTy, NewPtrTy});
II->setArgOperand(0, NewV);
II->setCalledFunction(NewDecl);
return true;
}
+ case Intrinsic::masked_store:
case Intrinsic::masked_scatter: {
Type *ValueTy = II->getOperand(0)->getType();
Type *NewPtrTy = NewV->getType();
@@ -429,11 +429,13 @@ void InferAddressSpacesImpl::collectRewritableIntrinsicOperands(
appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),
PostorderStack, Visited);
break;
+ case Intrinsic::masked_load:
case Intrinsic::masked_gather:
case Intrinsic::prefetch:
appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),
PostorderStack, Visited);
break;
+ case Intrinsic::masked_store:
case Intrinsic::masked_scatter:
appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(1),
PostorderStack, Visited);
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/masked-load-store.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/masked-load-store.ll
new file mode 100644
index 0000000000000..e14dfd055cbe8
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/masked-load-store.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s
+
+define <32 x i32> @masked_load_v32i32_global_to_flat(ptr addrspace(1) %ptr, <32 x i1> %mask) {
+; CHECK-LABEL: define <32 x i32> @masked_load_v32i32_global_to_flat(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) {
+; CHECK-NEXT: [[LOAD:%.*]] = call <32 x i32> @llvm.masked.load.v32i32.p1(ptr addrspace(1) [[PTR]], i32 8, <32 x i1> [[MASK]], <32 x i32> zeroinitializer)
+; CHECK-NEXT: ret <32 x i32> [[LOAD]]
+;
+ %cast = addrspacecast ptr addrspace(1) %ptr to ptr
+ %load = call <32 x i32> @llvm.masked.load.v32i32.p0(ptr %cast, i32 8, <32 x i1> %mask, <32 x i32> zeroinitializer)
+ ret <32 x i32> %load
+}
+define <32 x i32> @masked_load_v32i32_local_to_flat(ptr addrspace(3) %ptr, <32 x i1> %mask) {
+; CHECK-LABEL: define <32 x i32> @masked_load_v32i32_local_to_flat(
+; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) {
+; CHECK-NEXT: [[LOAD:%.*]] = call <32 x i32> @llvm.masked.load.v32i32.p3(ptr addrspace(3) [[PTR]], i32 8, <32 x i1> [[MASK]], <32 x i32> zeroinitializer)
+; CHECK-NEXT: ret <32 x i32> [[LOAD]]
+;
+ %cast = addrspacecast ptr addrspace(3) %ptr to ptr
+ %load = call <32 x i32> @llvm.masked.load.v32i32.p0(ptr %cast, i32 8, <32 x i1> %mask, <32 x i32> zeroinitializer)
+ ret <32 x i32> %load
+}
+
+define <32 x i32> @masked_load_v32i32_private_to_flat(ptr addrspace(5) %ptr, <32 x i1> %mask) {
+; CHECK-LABEL: define <32 x i32> @masked_load_v32i32_private_to_flat(
+; CHECK-SAME: ptr addrspace(5) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) {
+; CHECK-NEXT: [[LOAD:%.*]] = call <32 x i32> @llvm.masked.load.v32i32.p5(ptr addrspace(5) [[PTR]], i32 8, <32 x i1> [[MASK]], <32 x i32> zeroinitializer)
+; CHECK-NEXT: ret <32 x i32> [[LOAD]]
+;
+ %cast = addrspacecast ptr addrspace(5) %ptr to ptr
+ %load = call <32 x i32> @llvm.masked.load.v32i32.p0(ptr %cast, i32 8, <32 x i1> %mask, <32 x i32> zeroinitializer)
+ ret <32 x i32> %load
+}
+
+define void @masked_store_v32i32_global_to_flat(ptr addrspace(1) %ptr, <32 x i1> %mask) {
+; CHECK-LABEL: define void @masked_store_v32i32_global_to_flat(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) {
+; CHECK-NEXT: tail call void @llvm.masked.store.v32i32.p1(<32 x i32> zeroinitializer, ptr addrspace(1) [[PTR]], i32 128, <32 x i1> [[MASK]])
+; CHECK-NEXT: ret void
+;
+ %cast = addrspacecast ptr addrspace(1) %ptr to ptr
+ tail call void @llvm.masked.store.v32i32.p0(<32 x i32> zeroinitializer, ptr %cast, i32 128, <32 x i1> %mask)
+ ret void
+}
+
+define void @masked_store_v32i32_local_to_flat(ptr addrspace(3) %ptr, <32 x i1> %mask) {
+; CHECK-LABEL: define void @masked_store_v32i32_local_to_flat(
+; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) {
+; CHECK-NEXT: tail call void @llvm.masked.store.v32i32.p3(<32 x i32> zeroinitializer, ptr addrspace(3) [[PTR]], i32 128, <32 x i1> [[MASK]])
+; CHECK-NEXT: ret void
+;
+ %cast = addrspacecast ptr addrspace(3) %ptr to ptr
+ tail call void @llvm.masked.store.v32i32.p0(<32 x i32> zeroinitializer, ptr %cast, i32 128, <32 x i1> %mask)
+ ret void
+}
+
+define void @masked_store_v32i32_private_to_flat(ptr addrspace(5) %ptr, <32 x i1> %mask) {
+; CHECK-LABEL: define void @masked_store_v32i32_private_to_flat(
+; CHECK-SAME: ptr addrspace(5) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) {
+; CHECK-NEXT: tail call void @llvm.masked.store.v32i32.p5(<32 x i32> zeroinitializer, ptr addrspace(5) [[PTR]], i32 128, <32 x i1> [[MASK]])
+; CHECK-NEXT: ret void
+;
+ %cast = addrspacecast ptr addrspace(5) %ptr to ptr
+ tail call void @llvm.masked.store.v32i32.p0(<32 x i32> zeroinitializer, ptr %cast, i32 128, <32 x i1> %mask)
+ ret void
+}
+
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
2df6107
to
cd69508
Compare
0cd0fd7
to
fd7b3c2
Compare
No description provided.