Skip to content

Commit d995b2e

Browse files
authored
[Clang][AMDGPU] Accept builtins in lambda declarations (#135027)
`Sema::getCurFunctionDecl(AllowLambda = false)` returns a nullptr when the lambda declaration is outside a function (for example, when assigning a lambda to a static constexpr variable). This triggered an assertion in `SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall`. Using `Sema::getCurFunctionDecl(AllowLambda = true)` returns the declaration of the enclosing lambda. Stumbled with this issue when refactoring some code in CK.
1 parent 68b7cba commit d995b2e

File tree

3 files changed

+88
-1
lines changed

3 files changed

+88
-1
lines changed

clang/lib/Sema/SemaAMDGPU.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
2727
// position of memory order and scope arguments in the builtin
2828
unsigned OrderIndex, ScopeIndex;
2929

30-
const auto *FD = SemaRef.getCurFunctionDecl();
30+
const auto *FD = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
3131
assert(FD && "AMDGPU builtins should not be used outside of a function");
3232
llvm::StringMap<bool> CallerFeatureMap;
3333
getASTContext().getFunctionFeatureMap(CallerFeatureMap, FD);
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// REQUIRES: amdgpu-registered-target
2+
// RUN: %clang_cc1 -std=c++20 -triple amdgcn -target-cpu tahiti -emit-llvm -fcuda-is-device -verify=no-memrealtime -o - %s
3+
// RUN: %clang_cc1 -std=c++20 -triple amdgcn -target-cpu gfx950 -emit-llvm -fcuda-is-device -o - %s
4+
5+
#define __device__ __attribute__((device))
6+
#define __shared__ __attribute__((shared))
7+
8+
struct S {
9+
static constexpr auto memrealtime_lambda = []() {
10+
__builtin_amdgcn_s_memrealtime(); // no-memrealtime-error{{'__builtin_amdgcn_s_memrealtime' needs target feature s-memrealtime}}
11+
};
12+
};
13+
14+
__attribute__((target("s-memrealtime")))
15+
__device__ void test_target_dependant_builtin_attr_fail() {
16+
S::memrealtime_lambda();
17+
}
18+
19+
constexpr auto memrealtime_lambda = []() {
20+
__builtin_amdgcn_s_memrealtime(); // no-memrealtime-error{{'__builtin_amdgcn_s_memrealtime' needs target feature s-memrealtime}}
21+
};
22+
23+
__attribute__((target("s-memrealtime")))
24+
__device__ void global_test_target_dependant_builtin_attr_fail() {
25+
memrealtime_lambda();
26+
}
27+
28+
__attribute__((target("s-memrealtime")))
29+
__device__ void local_test_target_dependant_builtin_attr_fail() {
30+
static constexpr auto f = []() {
31+
__builtin_amdgcn_s_memrealtime(); // no-memrealtime-error{{'__builtin_amdgcn_s_memrealtime' needs target feature s-memrealtime}}
32+
};
33+
f();
34+
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// RUN: %clang_cc1 -std=c++20 -triple amdgcn -target-cpu gfx90a -fsyntax-only -fcuda-is-device -verify=gfx90a -o - %s
2+
// RUN: %clang_cc1 -std=c++20 -triple amdgcn -target-cpu gfx950 -fsyntax-only -fcuda-is-device -o - %s
3+
4+
#define __device__ __attribute__((device))
5+
#define __shared__ __attribute__((shared))
6+
7+
struct S {
8+
static constexpr auto make_buffer_rsrc_lambda = [](void *p, short stride, int num, int flags) {
9+
return __builtin_amdgcn_make_buffer_rsrc(p, stride, num, flags);
10+
};
11+
12+
static constexpr auto global_load_lds_lambda = [](void* src, __shared__ void *dst) {
13+
__builtin_amdgcn_global_load_lds(src, dst, 16, 0, 0); // gfx90a-error{{invalid size value}} gfx90a-note{{size must be 1, 2, or 4}}
14+
};
15+
};
16+
17+
__device__ __amdgpu_buffer_rsrc_t test_simple_builtin(void *p, short stride, int num, int flags) {
18+
return S::make_buffer_rsrc_lambda(p, stride, num, flags);
19+
}
20+
21+
__device__ void test_target_dependant_builtin(void *src, __shared__ void *dst) {
22+
S::global_load_lds_lambda(src, dst);
23+
}
24+
25+
constexpr auto make_buffer_rsrc_lambda = [](void *p, short stride, int num, int flags) {
26+
return __builtin_amdgcn_make_buffer_rsrc(p, stride, num, flags);
27+
};
28+
29+
constexpr auto global_load_lds_lambda = [](void* src, __shared__ void *dst) {
30+
__builtin_amdgcn_global_load_lds(src, dst, 16, 0, 0); // gfx90a-error{{invalid size value}} gfx90a-note{{size must be 1, 2, or 4}}
31+
};
32+
33+
__device__ __amdgpu_buffer_rsrc_t global_test_simple_builtin(void *p, short stride, int num, int flags) {
34+
return make_buffer_rsrc_lambda(p, stride, num, flags);
35+
}
36+
37+
__device__ void global_test_target_dependant_builtin(void *src, __shared__ void *dst) {
38+
global_load_lds_lambda(src, dst);
39+
}
40+
41+
__device__ __amdgpu_buffer_rsrc_t local_test_simple_builtin(void *p, short stride, int num, int flags) {
42+
constexpr auto f = [](void *p, short stride, int num, int flags) {
43+
return __builtin_amdgcn_make_buffer_rsrc(p, stride, num, flags);
44+
};
45+
return f(p, stride, num, flags);
46+
}
47+
48+
__device__ void local_test_target_dependant_builtin(void *src, __shared__ void *dst) {
49+
constexpr auto f = [](void* src, __shared__ void *dst) {
50+
__builtin_amdgcn_global_load_lds(src, dst, 16, 0, 0); // gfx90a-error{{invalid size value}} gfx90a-note{{size must be 1, 2, or 4}}
51+
};
52+
f(src, dst);
53+
}

0 commit comments

Comments
 (0)