Skip to content

Commit 2650375

Browse files
authored
[OpenMP] Add amdgpu-num-work-groups attribute to OpenMP kernels (#87695)
Summary: This new attribute was introduced recently. We already do this for NVPTX kernels so we should apply this for AMDGPU as well. This patch simply applies this metadata in cases where a lower bound is known
1 parent 3b961d1 commit 2650375

File tree

2 files changed

+37
-0
lines changed

2 files changed

+37
-0
lines changed
+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Test target codegen - host bc file has to be created first.
2+
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-x86-host.bc
3+
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
4+
// expected-no-diagnostics
5+
6+
#ifndef HEADER
7+
#define HEADER
8+
9+
void foo(int N) {
10+
#pragma omp target teams distribute parallel for simd
11+
for (int i = 0; i < N; ++i)
12+
;
13+
#pragma omp target teams distribute parallel for simd thread_limit(4)
14+
for (int i = 0; i < N; ++i)
15+
;
16+
#pragma omp target teams distribute parallel for simd ompx_attribute(__attribute__((launch_bounds(42, 42))))
17+
for (int i = 0; i < N; ++i)
18+
;
19+
#pragma omp target teams distribute parallel for simd ompx_attribute(__attribute__((launch_bounds(42, 42)))) num_threads(22)
20+
for (int i = 0; i < N; ++i)
21+
;
22+
}
23+
24+
#endif
25+
26+
// CHECK: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+__Z3fooi_}}l10({{.*}}) #[[ATTR1:.+]] {
27+
// CHECK: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+__Z3fooi_}}l13({{.*}}) #[[ATTR2:.+]] {
28+
// CHECK: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+__Z3fooi_}}l16({{.*}}) #[[ATTR3:.+]] {
29+
// CHECK: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+__Z3fooi_}}l19({{.*}}) #[[ATTR4:.+]] {
30+
31+
// CHECK: attributes #[[ATTR1]] = { {{.*}} "amdgpu-flat-work-group-size"="1,256" {{.*}} }
32+
// CHECK: attributes #[[ATTR2]] = { {{.*}} "amdgpu-flat-work-group-size"="1,4" {{.*}} }
33+
// CHECK: attributes #[[ATTR3]] = { {{.*}} "amdgpu-flat-work-group-size"="1,42" "amdgpu-max-num-workgroups"="42,1,1"{{.*}} }
34+
// CHECK: attributes #[[ATTR4]] = { {{.*}} "amdgpu-flat-work-group-size"="1,22" "amdgpu-max-num-workgroups"="42,1,1"{{.*}} }

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -4791,6 +4791,9 @@ void OpenMPIRBuilder::writeTeamsForKernel(const Triple &T, Function &Kernel,
47914791
updateNVPTXMetadata(Kernel, "maxclusterrank", UB, true);
47924792
updateNVPTXMetadata(Kernel, "minctasm", LB, false);
47934793
}
4794+
if (T.isAMDGPU())
4795+
Kernel.addFnAttr("amdgpu-max-num-workgroups", llvm::utostr(LB) + ",1,1");
4796+
47944797
Kernel.addFnAttr("omp_target_num_teams", std::to_string(LB));
47954798
}
47964799

0 commit comments

Comments
 (0)