Skip to content

[AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot #69567

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7995,15 +7995,23 @@ enum SpecialRegisterAccessKind {
Write,
};

// Generates the IR for __builtin_read_exec_*.
// Lowers the builtin to amdgcn_ballot intrinsic.
static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
llvm::Type *RegisterType,
llvm::Type *ValueType) {
llvm::Type *ValueType, bool isExecHi) {
CodeGen::CGBuilderTy &Builder = CGF.Builder;
CodeGen::CodeGenModule &CGM = CGF.CGM;

llvm::Type *ResultType = CGF.ConvertType(E->getType());
Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});

if (isExecHi) {
Value *Rt2 = Builder.CreateLShr(Call, 32);
Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
return Rt2;
}

return Call;
}

Expand Down Expand Up @@ -17857,10 +17865,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
}
case AMDGPU::BI__builtin_amdgcn_read_exec:
return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty);
}
return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
Expand Down
24 changes: 24 additions & 0 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ void test_ballot_wave32(global uint* out, int a, int b)
*out = __builtin_amdgcn_ballot_w32(a == b);
}

// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]]

// CHECK-LABEL: @test_ballot_wave32_target_attr(
// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}})
__attribute__((target("wavefrontsize32")))
Expand All @@ -21,6 +23,28 @@ void test_ballot_wave32_target_attr(global uint* out, int a, int b)
*out = __builtin_amdgcn_ballot_w32(a == b);
}

// CHECK-LABEL: @test_read_exec(
// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
void test_read_exec(global uint* out) {
*out = __builtin_amdgcn_read_exec();
}

// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]]

// CHECK-LABEL: @test_read_exec_lo(
// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
void test_read_exec_lo(global uint* out) {
*out = __builtin_amdgcn_read_exec_lo();
}

// CHECK-LABEL: @test_read_exec_hi(
// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this codegen on wave32? Or should this directly emit 0?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is generated codegen on wave32.

// CHECK: lshr i64 [[A:%.*]], 32
// CHECK: trunc i64 [[B:%.*]] to i32
void test_read_exec_hi(global uint* out) {
*out = __builtin_amdgcn_read_exec_hi();
}

#if __AMDGCN_WAVEFRONT_SIZE != 32
#error Wrong wavesize detected
#endif
23 changes: 23 additions & 0 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ void test_ballot_wave64(global ulong* out, int a, int b)
*out = __builtin_amdgcn_ballot_w64(a == b);
}

// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]]

// CHECK-LABEL: @test_ballot_wave64_target_attr(
// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 %{{.+}})
__attribute__((target("wavefrontsize64")))
Expand All @@ -21,6 +23,27 @@ void test_ballot_wave64_target_attr(global ulong* out, int a, int b)
*out = __builtin_amdgcn_ballot_w64(a == b);
}

// CHECK-LABEL: @test_read_exec(
// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
void test_read_exec(global ulong* out) {
*out = __builtin_amdgcn_read_exec();
}

// CHECK-LABEL: @test_read_exec_lo(
// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
void test_read_exec_lo(global ulong* out) {
*out = __builtin_amdgcn_read_exec_lo();
}

// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]]

// CHECK-LABEL: @test_read_exec_hi(
// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
// CHECK: lshr i64 [[A:%.*]], 32
void test_read_exec_hi(global ulong* out) {
*out = __builtin_amdgcn_read_exec_hi();
}

#if __AMDGCN_WAVEFRONT_SIZE != 64
#error Wrong wavesize detected
#endif
4 changes: 3 additions & 1 deletion clang/test/CodeGenOpenCL/builtins-amdgcn.cl
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,9 @@ void test_read_exec_lo(global uint* out) {
// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]]

// CHECK-LABEL: @test_read_exec_hi(
// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

needs wave32 and wave64 tests. These ballot tests belong in builtins-amdgcn-wave32.cl and builtins-amdgcn-wave64.cl

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moved to respected wave32.cl and wave64.cl tests.

// CHECK: lshr i64 [[A:%.*]], 32
// CHECK: trunc i64 [[B:%.*]] to i32
void test_read_exec_hi(global uint* out) {
*out = __builtin_amdgcn_read_exec_hi();
}
Expand Down