Skip to content

Commit 13ea114

Browse files
authored
[AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (#69567)
Currently __builtin_amdgcn_read_exec_hi lowers to llvm.read_register, this patch lowers it to use amdgcn_ballot.
1 parent 68d993e commit 13ea114

File tree

4 files changed

+65
-7
lines changed

4 files changed

+65
-7
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

+15-6
Original file line numberDiff line numberDiff line change
@@ -7995,15 +7995,23 @@ enum SpecialRegisterAccessKind {
79957995
Write,
79967996
};
79977997

7998+
// Generates the IR for __builtin_read_exec_*.
7999+
// Lowers the builtin to amdgcn_ballot intrinsic.
79988000
static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
79998001
llvm::Type *RegisterType,
8000-
llvm::Type *ValueType) {
8002+
llvm::Type *ValueType, bool isExecHi) {
80018003
CodeGen::CGBuilderTy &Builder = CGF.Builder;
80028004
CodeGen::CodeGenModule &CGM = CGF.CGM;
80038005

8004-
llvm::Type *ResultType = CGF.ConvertType(E->getType());
8005-
Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
8006+
Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
80068007
llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
8008+
8009+
if (isExecHi) {
8010+
Value *Rt2 = Builder.CreateLShr(Call, 32);
8011+
Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
8012+
return Rt2;
8013+
}
8014+
80078015
return Call;
80088016
}
80098017

@@ -17857,10 +17865,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1785717865
return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
1785817866
}
1785917867
case AMDGPU::BI__builtin_amdgcn_read_exec:
17868+
return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
1786017869
case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
17861-
case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
17862-
return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty);
17863-
}
17870+
return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
17871+
case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
17872+
return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
1786417873
case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
1786517874
case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
1786617875
case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:

clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl

+24
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ void test_ballot_wave32(global uint* out, int a, int b)
1313
*out = __builtin_amdgcn_ballot_w32(a == b);
1414
}
1515

16+
// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]]
17+
1618
// CHECK-LABEL: @test_ballot_wave32_target_attr(
1719
// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}})
1820
__attribute__((target("wavefrontsize32")))
@@ -21,6 +23,28 @@ void test_ballot_wave32_target_attr(global uint* out, int a, int b)
2123
*out = __builtin_amdgcn_ballot_w32(a == b);
2224
}
2325

26+
// CHECK-LABEL: @test_read_exec(
27+
// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
28+
void test_read_exec(global uint* out) {
29+
*out = __builtin_amdgcn_read_exec();
30+
}
31+
32+
// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]]
33+
34+
// CHECK-LABEL: @test_read_exec_lo(
35+
// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
36+
void test_read_exec_lo(global uint* out) {
37+
*out = __builtin_amdgcn_read_exec_lo();
38+
}
39+
40+
// CHECK-LABEL: @test_read_exec_hi(
41+
// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
42+
// CHECK: lshr i64 [[A:%.*]], 32
43+
// CHECK: trunc i64 [[B:%.*]] to i32
44+
void test_read_exec_hi(global uint* out) {
45+
*out = __builtin_amdgcn_read_exec_hi();
46+
}
47+
2448
#if __AMDGCN_WAVEFRONT_SIZE != 32
2549
#error Wrong wavesize detected
2650
#endif

clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl

+23
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ void test_ballot_wave64(global ulong* out, int a, int b)
1313
*out = __builtin_amdgcn_ballot_w64(a == b);
1414
}
1515

16+
// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]]
17+
1618
// CHECK-LABEL: @test_ballot_wave64_target_attr(
1719
// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 %{{.+}})
1820
__attribute__((target("wavefrontsize64")))
@@ -21,6 +23,27 @@ void test_ballot_wave64_target_attr(global ulong* out, int a, int b)
2123
*out = __builtin_amdgcn_ballot_w64(a == b);
2224
}
2325

26+
// CHECK-LABEL: @test_read_exec(
27+
// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
28+
void test_read_exec(global ulong* out) {
29+
*out = __builtin_amdgcn_read_exec();
30+
}
31+
32+
// CHECK-LABEL: @test_read_exec_lo(
33+
// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
34+
void test_read_exec_lo(global ulong* out) {
35+
*out = __builtin_amdgcn_read_exec_lo();
36+
}
37+
38+
// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]]
39+
40+
// CHECK-LABEL: @test_read_exec_hi(
41+
// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
42+
// CHECK: lshr i64 [[A:%.*]], 32
43+
void test_read_exec_hi(global ulong* out) {
44+
*out = __builtin_amdgcn_read_exec_hi();
45+
}
46+
2447
#if __AMDGCN_WAVEFRONT_SIZE != 64
2548
#error Wrong wavesize detected
2649
#endif

clang/test/CodeGenOpenCL/builtins-amdgcn.cl

+3-1
Original file line numberDiff line numberDiff line change
@@ -526,7 +526,9 @@ void test_read_exec_lo(global uint* out) {
526526
// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]]
527527

528528
// CHECK-LABEL: @test_read_exec_hi(
529-
// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
529+
// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
530+
// CHECK: lshr i64 [[A:%.*]], 32
531+
// CHECK: trunc i64 [[B:%.*]] to i32
530532
void test_read_exec_hi(global uint* out) {
531533
*out = __builtin_amdgcn_read_exec_hi();
532534
}

0 commit comments

Comments
 (0)