-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot #69567
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-codegen Author: Rana Pratap Reddy (ranapratap55) ChangesCurrently __builtin_read_exec_hi lowers to llvm.read_register, this patch lowers it to use amdgcn_ballot. Full diff: https://github.com/llvm/llvm-project/pull/69567.diff 2 Files Affected:
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index db9f354fa8386d3..d60826f293f0c46 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7997,14 +7997,26 @@ enum SpecialRegisterAccessKind {
static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
llvm::Type *RegisterType,
- llvm::Type *ValueType) {
+ llvm::Type *ValueType, bool isExecHi) {
CodeGen::CGBuilderTy &Builder = CGF.Builder;
CodeGen::CodeGenModule &CGM = CGF.CGM;
llvm::Type *ResultType = CGF.ConvertType(E->getType());
- Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
- llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
- return Call;
+ llvm::Value *Call;
+ Function *F;
+
+ if (isExecHi) {
+ F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
+ Call = Builder.CreateCall(F, {Builder.getInt1(true)});
+ Value *C1 = llvm::ConstantInt::get(ValueType, 32);
+ Value *Rt2 = Builder.CreateLShr(Call, C1);
+ Rt2 = Builder.CreateTruncOrBitCast(Rt2, CGF.Int32Ty);
+ return Rt2;
+ } else {
+ F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
+ Call = Builder.CreateCall(F, {Builder.getInt1(true)});
+ return Call;
+ }
}
// Generates the IR for the read/write special register builtin,
@@ -17837,10 +17849,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
}
case AMDGPU::BI__builtin_amdgcn_read_exec:
+ return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
- case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
- return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty);
- }
+ return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
+ case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
+ return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index 8938642e3b19f8c..0bc9a54682d3e31 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -526,7 +526,9 @@ void test_read_exec_lo(global uint* out) {
// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]]
// CHECK-LABEL: @test_read_exec_hi(
-// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+// CHECK: lshr i64 [[A:%.*]], 32
+// CHECK: trunc i64 [[B:%.*]] to i32
void test_read_exec_hi(global uint* out) {
*out = __builtin_amdgcn_read_exec_hi();
}
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
340e633
to
1958244
Compare
clang/lib/CodeGen/CGBuiltin.cpp
Outdated
Call = Builder.CreateCall(F, {Builder.getInt1(true)}); | ||
Value *C1 = llvm::ConstantInt::get(ValueType, 32); | ||
Value *Rt2 = Builder.CreateLShr(Call, C1); | ||
Rt2 = Builder.CreateTruncOrBitCast(Rt2, CGF.Int32Ty); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should just be CreateTrunc, the result should always be i32
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated in the latest patch.
clang/lib/CodeGen/CGBuiltin.cpp
Outdated
F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType}); | ||
Call = Builder.CreateCall(F, {Builder.getInt1(true)}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can make this part common
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated.
@@ -526,7 +526,9 @@ void test_read_exec_lo(global uint* out) { | |||
// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]] | |||
|
|||
// CHECK-LABEL: @test_read_exec_hi( | |||
// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true) | |||
// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
needs wave32 and wave64 tests. These ballot tests belong in builtins-amdgcn-wave32.cl and builtins-amdgcn-wave64.cl
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
moved to respected wave32.cl and wave64.cl tests.
c2ed656
to
65c806a
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm with a nit
clang/lib/CodeGen/CGBuiltin.cpp
Outdated
|
||
if (isExecHi) { | ||
Value *C1 = llvm::ConstantInt::get(ValueType, 32); | ||
Value *Rt2 = Builder.CreateLShr(Call, C1); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's a CreateLShr overload that takes the constant integer, you don't need the ConstantInt::get
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removed ConstantInt::get.
} | ||
|
||
// CHECK-LABEL: @test_read_exec_hi( | ||
// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this codegen on wave32? Or should this directly emit 0?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, this is generated codegen on wave32.
65c806a
to
8a5dfe6
Compare
8a5dfe6
to
ebc8ebd
Compare
ebc8ebd
to
bd6e144
Compare
…vm#69567) Currently __builtin_amdgcn_read_exec_hi lowers to llvm.read_register, this patch lowers it to use amdgcn_ballot.
Currently __builtin_amdgcn_read_exec_hi lowers to llvm.read_register, this patch lowers it to use amdgcn_ballot.