Skip to content

Commit 7788782

Browse files
Fznamznonbader
authored andcommitted
[SYCL] Implement OpenCL kernel function generation
Summary: All SYCL memory objects shared between host and device (buffers/images, these objects map to OpenCL buffers and images) must be accessed through special accessor classes. The "device" side implementation of these classes contain pointers to the device memory. As there is no way in OpenCL to pass structures with pointers inside as kernel arguments, all memory objects shared between host and device must be passed to the kernel as raw pointers. SYCL also has a special mechanism for passing kernel arguments from host to the device. In OpenCL kernel arguments are set by calling `clSetKernelArg` function for each kernel argument, meanwhile in SYCL all the kernel arguments are fields of "SYCL kernel function" which can be defined as a lambda function or a named function object and passed as an argument to SYCL function for invoking kernels (such as `parallel_for` or `single_task`). To facilitate the mapping of SYCL kernel data members to OpenCL kernel arguments and overcome OpenCL limitations we added the generation of an OpenCL kernel function inside the compiler. An OpenCL kernel function contains the body of the SYCL kernel function, receives OpenCL-like parameters and additionally does some manipulation to initialize SYCL kernel data members with these parameters. In some pseudo code the OpenCL kernel function can look like this: ``` // SYCL kernel is defined in SYCL headers: template <typename KernelName, typename KernelType/*, ...*/> __attribute__((sycl_kernel)) void sycl_kernel_function(KernelType KernelFuncObj) { // ... KernelFuncObj(); } // Generated OpenCL kernel function __kernel KernelName(global int* a) { KernelType KernelFuncObj; // Actually kernel function object declaration // doesn't have a name in AST. // Let the kernel function object have one captured field - accessor A. // We need to init it with global pointer from arguments: KernelFuncObj.A.__init(a); // Body of the SYCL kernel from SYCL headers: { KernelFuncObj(); } } ``` OpenCL kernel function is generated by the compiler inside the Sema using AST nodes. Reviewers: bader, Naghasan, ABataev, keryell Subscribers: agozillon, mgorny, yaxunl, jfb, ebevhan, Anastasia, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D71016
1 parent 371290d commit 7788782

18 files changed

+1010
-18
lines changed

clang/include/clang/Sema/Sema.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13116,6 +13116,19 @@ class Sema final {
1311613116
/// Adds Callee to DeviceCallGraph if we don't know if its caller will be
1311713117
/// codegen'ed yet.
1311813118
bool checkSYCLDeviceFunction(SourceLocation Loc, FunctionDecl *Callee);
13119+
13120+
private:
13121+
/// Contains generated OpenCL kernel functions for SYCL.
13122+
SmallVector<Decl *, 4> SYCLKernels;
13123+
13124+
public:
13125+
void addSYCLKernel(Decl *D) { SYCLKernels.push_back(D); }
13126+
/// Access to SYCL kernels.
13127+
SmallVectorImpl<Decl *> &getSYCLKernels() { return SYCLKernels; }
13128+
13129+
/// Constructs an OpenCL kernel using the KernelCaller function and adds it to
13130+
/// the SYCL device code.
13131+
void constructOpenCLKernel(FunctionDecl *KernelCallerFunc, MangleContext &MC);
1311913132
};
1312013133

1312113134
/// RAII object that enters a new expression evaluation context.

clang/lib/AST/ASTContext.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11072,6 +11072,10 @@ bool ASTContext::DeclMustBeEmitted(const Decl *D) {
1107211072
if (D->hasAttr<AliasAttr>() || D->hasAttr<UsedAttr>())
1107311073
return true;
1107411074

11075+
// If SYCL, only kernels are required.
11076+
if (LangOpts.SYCLIsDevice && !(D->hasAttr<OpenCLKernelAttr>()))
11077+
return false;
11078+
1107511079
if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
1107611080
// Forward declarations aren't required.
1107711081
if (!FD->doesThisDeclarationHaveABody())

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2960,6 +2960,12 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
29602960
}
29612961
}
29622962

2963+
if (LangOpts.SYCLIsDevice && Global->hasAttr<OpenCLKernelAttr>() &&
2964+
MustBeEmitted(Global)) {
2965+
addDeferredDeclToEmit(GD);
2966+
return;
2967+
}
2968+
29632969
// Ignore declarations, they will be emitted on their first use.
29642970
if (const auto *FD = dyn_cast<FunctionDecl>(Global)) {
29652971
// Forward declarations are emitted lazily on first use.

clang/lib/Parse/ParseAST.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,10 @@ void clang::ParseAST(Sema &S, bool PrintStats, bool SkipFunctionBodies) {
168168
for (Decl *D : S.WeakTopLevelDecls())
169169
Consumer->HandleTopLevelDecl(DeclGroupRef(D));
170170

171+
if (S.getLangOpts().SYCLIsDevice)
172+
for (Decl *D : S.getSYCLKernels())
173+
Consumer->HandleTopLevelDecl(DeclGroupRef(D));
174+
171175
Consumer->HandleTranslationUnit(S.getASTContext());
172176

173177
// Finalize the template instantiation observer chain.

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 443 additions & 0 deletions
Large diffs are not rendered by default.

clang/lib/Sema/SemaTemplateInstantiateDecl.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "clang/AST/DependentDiagnostic.h"
1919
#include "clang/AST/Expr.h"
2020
#include "clang/AST/ExprCXX.h"
21+
#include "clang/AST/Mangle.h"
2122
#include "clang/AST/PrettyDeclStackTrace.h"
2223
#include "clang/AST/TypeLoc.h"
2324
#include "clang/Basic/SourceManager.h"
@@ -6270,6 +6271,8 @@ NamedDecl *Sema::FindInstantiatedDecl(SourceLocation Loc, NamedDecl *D,
62706271
/// instantiations we have seen until this point.
62716272
void Sema::PerformPendingInstantiations(bool LocalOnly) {
62726273
std::deque<PendingImplicitInstantiation> delayedPCHInstantiations;
6274+
std::unique_ptr<MangleContext> MangleCtx(
6275+
getASTContext().createMangleContext());
62736276
while (!PendingLocalImplicitInstantiations.empty() ||
62746277
(!LocalOnly && !PendingInstantiations.empty())) {
62756278
PendingImplicitInstantiation Inst;
@@ -6288,17 +6291,25 @@ void Sema::PerformPendingInstantiations(bool LocalOnly) {
62886291
TSK_ExplicitInstantiationDefinition;
62896292
if (Function->isMultiVersion()) {
62906293
getASTContext().forEachMultiversionedFunctionVersion(
6291-
Function, [this, Inst, DefinitionRequired](FunctionDecl *CurFD) {
6294+
Function, [this, Inst, DefinitionRequired,
6295+
MangleCtx = move(MangleCtx)](FunctionDecl *CurFD) {
62926296
InstantiateFunctionDefinition(/*FIXME:*/ Inst.second, CurFD, true,
62936297
DefinitionRequired, true);
6294-
if (CurFD->isDefined())
6298+
if (CurFD->isDefined()) {
62956299
CurFD->setInstantiationIsPending(false);
6300+
if (getLangOpts().SYCLIsDevice &&
6301+
CurFD->hasAttr<SYCLKernelAttr>())
6302+
constructOpenCLKernel(CurFD, *MangleCtx);
6303+
}
62966304
});
62976305
} else {
62986306
InstantiateFunctionDefinition(/*FIXME:*/ Inst.second, Function, true,
62996307
DefinitionRequired, true);
6300-
if (Function->isDefined())
6308+
if (Function->isDefined()) {
6309+
if (getLangOpts().SYCLIsDevice && Function->hasAttr<SYCLKernelAttr>())
6310+
constructOpenCLKernel(Function, *MangleCtx);
63016311
Function->setInstantiationIsPending(false);
6312+
}
63026313
}
63036314
// Definition of a PCH-ed template declaration may be available only in the TU.
63046315
if (!LocalOnly && LangOpts.PCHInstantiateTemplates &&
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
#pragma once
2+
3+
inline namespace cl {
4+
namespace sycl {
5+
namespace access {
6+
7+
enum class target {
8+
global_buffer = 2014,
9+
constant_buffer,
10+
local,
11+
image,
12+
host_buffer,
13+
host_image,
14+
image_array
15+
};
16+
17+
enum class mode {
18+
read = 1024,
19+
write,
20+
read_write,
21+
discard_write,
22+
discard_read_write,
23+
atomic
24+
};
25+
26+
enum class placeholder {
27+
false_t,
28+
true_t
29+
};
30+
31+
enum class address_space : int {
32+
private_space = 0,
33+
global_space,
34+
constant_space,
35+
local_space
36+
};
37+
} // namespace access
38+
39+
template <int dim>
40+
struct id {
41+
template <typename... T>
42+
id(T... args) {} // fake constructor
43+
private:
44+
// Some fake field added to see using of id arguments in the
45+
// kernel wrapper
46+
int Data;
47+
};
48+
49+
template <int dim>
50+
struct range {
51+
template <typename... T>
52+
range(T... args) {} // fake constructor
53+
private:
54+
// Some fake field added to see using of range arguments in the
55+
// kernel wrapper
56+
int Data;
57+
};
58+
59+
template <int dim>
60+
struct _ImplT {
61+
range<dim> AccessRange;
62+
range<dim> MemRange;
63+
id<dim> Offset;
64+
};
65+
66+
template <typename dataT, int dimensions, access::mode accessmode,
67+
access::target accessTarget = access::target::global_buffer,
68+
access::placeholder isPlaceholder = access::placeholder::false_t>
69+
class accessor {
70+
71+
public:
72+
void use(void) const {}
73+
template <typename... T>
74+
void use(T... args) {}
75+
template <typename... T>
76+
void use(T... args) const {}
77+
_ImplT<dimensions> impl;
78+
79+
private:
80+
void __init(__attribute__((opencl_global)) dataT *Ptr,
81+
range<dimensions> AccessRange,
82+
range<dimensions> MemRange, id<dimensions> Offset) {}
83+
};
84+
85+
} // namespace sycl
86+
} // namespace cl

clang/test/CodeGenSYCL/address-space-conversions.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -triple spir64 -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s
1+
// RUN: %clang_cc1 -I%S/Inputs -triple spir64 -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s
22
void bar(int &Data) {}
33
// CHECK-DAG: define{{.*}} spir_func void @[[RAW_REF:[a-zA-Z0-9_]+]](i32 addrspace(4)* align 4 dereferenceable(4) %
44
void bar2(int &Data) {}
@@ -136,3 +136,15 @@ void usages() {
136136
// CHECK-DAG: define linkonce_odr spir_func void @_Z4tmplIPU3AS3iEvT_(i32 addrspace(3)* %
137137
// CHECK-DAG: define linkonce_odr spir_func void @_Z4tmplIPU3AS0iEvT_(i32* %
138138
// CHECK-DAG: define linkonce_odr spir_func void @_Z4tmplIPiEvT_(i32 addrspace(4)* %
139+
140+
#include "sycl.hpp"
141+
142+
int main() {
143+
cl::sycl::queue Q;
144+
Q.submit([&](cl::sycl::handler &cgh) {
145+
cgh.single_task<class test_kernel>([=]() {
146+
usages();
147+
});
148+
});
149+
return 0;
150+
}

clang/test/CodeGenSYCL/address-space-deduction.cpp

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2-
// RUN: %clang_cc1 -triple spir64 -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s
2+
// RUN: %clang_cc1 -I%S/Inputs -triple spir64 -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s
33

4-
// CHECK-LABEL: @_Z4testv(
4+
// Validates SYCL deduction rules compliance.
5+
// See clang/docs/SYCLSupport.rst#address-space-handling for the details.
6+
7+
// CHECK-LABEL: define {{.*}} @_Z4testv(
58
// CHECK-NEXT: entry:
69
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
710
// CHECK-NEXT: [[PPTR:%.*]] = alloca i32 addrspace(4)*, align 8
@@ -87,7 +90,8 @@
8790
// CHECK-NEXT: store i8 addrspace(4)* getelementptr inbounds ([21 x i8], [21 x i8] addrspace(4)* addrspacecast ([21 x i8] addrspace(1)* @.str.1 to [21 x i8] addrspace(4)*), i64 0, i64 0), i8 addrspace(4)* addrspace(4)* [[SELECT_STR_TRIVIAL2_ASCAST]], align 8
8891
// CHECK-NEXT: ret void
8992
//
90-
void test() {
93+
void test() {
94+
9195
static const int foo = 0x42;
9296

9397

@@ -127,3 +131,15 @@ void test() {
127131
const char *select_str_trivial2 = false ? str : "Another hello world!";
128132
(void)select_str_trivial2;
129133
}
134+
135+
#include "sycl.hpp"
136+
137+
int main() {
138+
cl::sycl::queue Q;
139+
Q.submit([&](cl::sycl::handler &cgh) {
140+
cgh.single_task<class test_kernel>([=]() {
141+
test();
142+
});
143+
});
144+
return 0;
145+
}
Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
// RUN: %clang_cc1 -triple spir64 -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s --check-prefix=SPIR
2-
// RUN: %clang_cc1 -triple x86_64 -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s --check-prefix=X86
1+
// RUN: %clang_cc1 -I%S/Inputs -triple spir64 -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s --check-prefix=SPIR
2+
// RUN: %clang_cc1 -I%S/Inputs -triple x86_64 -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s --check-prefix=X86
33

44
// REQUIRES: x86-registered-target
55

@@ -8,15 +8,15 @@ void foo(__attribute__((opencl_local)) int *);
88
void foo(__attribute__((opencl_private)) int *);
99
void foo(int *);
1010

11-
// SPIR: declare spir_func void @_Z3fooPU3AS1i(i32 addrspace(1)*) #1
12-
// SPIR: declare spir_func void @_Z3fooPU3AS3i(i32 addrspace(3)*) #1
13-
// SPIR: declare spir_func void @_Z3fooPU3AS0i(i32*) #1
14-
// SPIR: declare spir_func void @_Z3fooPi(i32 addrspace(4)*) #1
11+
// SPIR: declare spir_func void @_Z3fooPU3AS1i(i32 addrspace(1)*)
12+
// SPIR: declare spir_func void @_Z3fooPU3AS3i(i32 addrspace(3)*)
13+
// SPIR: declare spir_func void @_Z3fooPU3AS0i(i32*)
14+
// SPIR: declare spir_func void @_Z3fooPi(i32 addrspace(4)*)
1515

16-
// X86: declare void @_Z3fooPU8SYglobali(i32*) #1
17-
// X86: declare void @_Z3fooPU7SYlocali(i32*) #1
18-
// X86: declare void @_Z3fooPU9SYprivatei(i32*) #1
19-
// X86: declare void @_Z3fooPi(i32*) #1
16+
// X86: declare void @_Z3fooPU8SYglobali(i32*)
17+
// X86: declare void @_Z3fooPU7SYlocali(i32*)
18+
// X86: declare void @_Z3fooPU9SYprivatei(i32*)
19+
// X86: declare void @_Z3fooPi(i32*)
2020

2121
void test() {
2222
__attribute__((opencl_global)) int *glob;
@@ -28,3 +28,15 @@ void test() {
2828
foo(priv);
2929
foo(def);
3030
}
31+
32+
#include "sycl.hpp"
33+
34+
int main() {
35+
cl::sycl::queue Q;
36+
Q.submit([&](cl::sycl::handler &cgh) {
37+
cgh.single_task<class test_kernel>([=]() {
38+
test();
39+
});
40+
});
41+
return 0;
42+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s
2+
3+
// This test checks that compiler generates correct kernel wrapper for basic
4+
// case.
5+
6+
#include "Inputs/sycl.hpp"
7+
8+
template <typename name, typename Func>
9+
__attribute__((sycl_kernel)) void kernel(Func kernelFunc) {
10+
kernelFunc();
11+
}
12+
13+
int main() {
14+
cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write> accessorA;
15+
kernel<class kernel_function>(
16+
[=]() {
17+
accessorA.use();
18+
});
19+
return 0;
20+
}
21+
22+
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_function
23+
// CHECK-SAME: i32 addrspace(1)* [[MEM_ARG:%[a-zA-Z0-9_]+]],
24+
// CHECK-SAME: %"struct.cl::sycl::range"* byval{{.*}}align 4 [[ACC_RANGE:%[a-zA-Z0-9_]+_1]],
25+
// CHECK-SAME: %"struct.cl::sycl::range"* byval{{.*}}align 4 [[MEM_RANGE:%[a-zA-Z0-9_]+_2]],
26+
// CHECK-SAME: %"struct.cl::sycl::id"* byval{{.*}}align 4 [[OFFSET:%[a-zA-Z0-9_]+]])
27+
// Check alloca for pointer argument
28+
// CHECK: [[MEM_ARG]].addr = alloca i32 addrspace(1)*
29+
// CHECK: [[ANONALLOCA:%[0-9]+]] = alloca %class.anon
30+
// CHECK: [[ARANGEA:%agg.tmp.*]] = alloca %"struct.cl::sycl::range"
31+
// CHECK: [[MRANGEA:%agg.tmp.*]] = alloca %"struct.cl::sycl::range"
32+
// CHECK: [[OIDA:%agg.tmp.*]] = alloca %"struct.cl::sycl::id"
33+
// CHECK: [[ANON:%[0-9]+]] = addrspacecast %class.anon* [[ANONALLOCA]] to %class.anon addrspace(4)*
34+
// CHECK: [[ARANGET:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::range"* [[ARANGEA]] to %"struct.cl::sycl::range" addrspace(4)*
35+
// CHECK: [[MRANGET:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::range"* [[MRANGEA]] to %"struct.cl::sycl::range" addrspace(4)*
36+
// CHECK: [[OIDT:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::id"* [[OIDA]] to %"struct.cl::sycl::id" addrspace(4)*
37+
//
38+
// Check store of kernel pointer argument to alloca
39+
// CHECK: store i32 addrspace(1)* [[MEM_ARG]], i32 addrspace(1)* addrspace(4)* [[MEM_ARG]].addr.ascast, align 8
40+
41+
// Check for default constructor of accessor
42+
// CHECK: call spir_func {{.*}}accessor
43+
44+
// Check accessor GEP
45+
// CHECK: [[ACCESSOR:%[a-zA-Z0-9_]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* [[ANON]], i32 0, i32 0
46+
47+
// Check load from kernel pointer argument alloca
48+
// CHECK: [[MEM_LOAD:%[a-zA-Z0-9_]+]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[MEM_ARG]].addr.ascast
49+
50+
// Check accessor __init method call
51+
// CHECK: [[ARANGE:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::range" addrspace(4)* [[ARANGET]] to %"struct.cl::sycl::range"*
52+
// CHECK: [[MRANGE:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::range" addrspace(4)* [[MRANGET]] to %"struct.cl::sycl::range"*
53+
// CHECK: [[OID:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::id" addrspace(4)* [[OIDT]] to %"struct.cl::sycl::id"*
54+
// CHECK: call spir_func void @{{.*}}__init{{.*}}(%"class.cl::sycl::accessor" addrspace(4)* {{[^,]*}} [[ACCESSOR]], i32 addrspace(1)* [[MEM_LOAD]], %"struct.cl::sycl::range"* byval({{.*}}) align 4 [[ARANGE]], %"struct.cl::sycl::range"* byval({{.*}}) align 4 [[MRANGE]], %"struct.cl::sycl::id"* byval({{.*}}) align 4 [[OID]])
55+
56+
// Check lambda "()" operator call
57+
// CHECK: call spir_func void @{{.*}}(%class.anon addrspace(4)* {{[^,]*}})
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// RUN: %clang_cc1 -triple spir64 -fsycl-is-device -S -emit-llvm %s -o - | FileCheck %s
2+
3+
template <typename T>
4+
T bar(T arg);
5+
6+
void foo() {
7+
int a = 1 + 1 + bar(1);
8+
}
9+
10+
template <typename T>
11+
T bar(T arg) {
12+
return arg;
13+
}
14+
15+
template <typename name, typename Func>
16+
__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) {
17+
kernelFunc();
18+
}
19+
20+
// Make sure that definitions for the types not used in SYCL kernels are not
21+
// emitted
22+
// CHECK-NOT: %struct.A
23+
// CHECK-NOT: @a = {{.*}} %struct.A
24+
struct A {
25+
int x = 10;
26+
} a;
27+
28+
int main() {
29+
a.x = 8;
30+
kernel_single_task<class test_kernel>([]() { foo(); });
31+
return 0;
32+
}
33+
34+
// baz is not called from the SYCL kernel, so it must not be emitted
35+
// CHECK-NOT: define {{.*}} @{{.*}}baz
36+
void baz() {}
37+
38+
// CHECK-LABEL: define dso_local spir_kernel void @{{.*}}test_kernel
39+
// CHECK-LABEL: define internal spir_func void @_ZZ4mainENKUlvE_clEv(%class.anon addrspace(4)* {{.*}}%this)
40+
// CHECK-LABEL: define dso_local spir_func void @{{.*}}foo
41+
// CHECK-LABEL: define linkonce_odr spir_func i32 @{{.*}}bar

0 commit comments

Comments
 (0)