Skip to content

Commit f562cdf

Browse files
committed
Merge remote-tracking branch 'origin/sycl' into link_devicelib_by_default_in_aot_style
2 parents 640ba0d + e00ab74 commit f562cdf

File tree

81 files changed

+1872
-496
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

81 files changed

+1872
-496
lines changed

buildbot/dependency.conf

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@ ocl_cpu_rt_ver=2020.11.8.0.27
44
# https://github.com/intel/llvm/releases/download/2020-WW36/win-oclcpuexp-2020.11.8.0.27_rel.zip
55
ocl_cpu_rt_ver_win=2020.11.8.0.27
66
# Same GPU driver supports Level Zero and OpenCL:
7-
# https://github.com/intel/compute-runtime/releases/tag/20.29.17408
8-
ocl_gpu_rt_ver=20.29.17408
7+
# https://github.com/intel/compute-runtime/releases/tag/20.34.17727
8+
ocl_gpu_rt_ver=20.34.17727
99
# Same GPU driver supports Level Zero and OpenCL:
10-
# https://downloadmirror.intel.com/29674/a08/igfx_win10_100.8336.zip
11-
ocl_gpu_rt_ver_win=27.20.100.8336
10+
# https://downloadmirror.intel.com/29817/a08/igfx_win10_100.8673.zip
11+
ocl_gpu_rt_ver_win=27.20.100.8673
1212
intel_sycl_ver=build
1313
# https://github.com/oneapi-src/oneTBB/releases/download/v2021.1-beta08/oneapi-tbb-2021.1-beta08-lin.tgz
1414
tbb_ver=2021.1.9.636
@@ -24,8 +24,8 @@ fpga_ver_win=20200811_000006
2424
[DRIVER VERSIONS]
2525
cpu_driver_lin=2020.11.8.0.27
2626
cpu_driver_win=2020.11.8.0.27
27-
gpu_driver_lin=20.29.17408
28-
gpu_driver_win=27.20.100.8336
27+
gpu_driver_lin=20.34.17727
28+
gpu_driver_win=27.20.100.8673
2929
fpga_driver_lin=2020.11.8.0.27
3030
fpga_driver_win=2020.11.8.0.27
3131
# NVidia CUDA driver

clang/include/clang/Basic/Attr.td

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1299,7 +1299,6 @@ def LoopUnrollHint : InheritableAttr {
12991299

13001300
def IntelReqdSubGroupSize: InheritableAttr {
13011301
let Spellings = [GNU<"intel_reqd_sub_group_size">,
1302-
CXX11<"cl", "intel_reqd_sub_group_size">,
13031302
CXX11<"intel", "reqd_sub_group_size">];
13041303
let Args = [ExprArgument<"SubGroupSize">];
13051304
let Subjects = SubjectList<[Function, CXXMethod], ErrorDiag>;

clang/include/clang/Basic/AttrDocs.td

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3476,30 +3476,16 @@ code. See `cl_intel_required_subgroup_size
34763476
for details.
34773477

34783478
SYCL documentation:
3479-
The [[cl::intel_reqd_sub_group_size(n)]] and [[intel::reqd_sub_group_size(n)]]
3480-
attribute indicates that the kernel must be compiled and executed with a
3481-
sub-group of size n. The value of n must be set to a sub-group size supported
3482-
by the device, or device compilation will fail.
3479+
The [[intel::reqd_sub_group_size(n)]] attribute indicates that the kernel must
3480+
be compiled and executed with a sub-group of size n. The value of n must be set
3481+
to a sub-group size supported by the device, or device compilation will fail.
34833482

34843483
In addition to device functions, the required sub-group size attribute may also
34853484
be specified in the definition of a named functor object and lambda functions,
34863485
as in the examples below:
34873486

34883487
.. code-block:: c++
34893488

3490-
class Functor
3491-
{
3492-
void operator()(item<1> item) [[cl::intel_reqd_sub_group_size(16)]]
3493-
{
3494-
/* kernel code */
3495-
}
3496-
}
3497-
3498-
kernel<class kernel_name>(
3499-
[]() [[cl::intel_reqd_sub_group_size(n)]] {
3500-
/* kernel code */
3501-
});
3502-
35033489
class Functor
35043490
{
35053491
[[intel::reqd_sub_group_size(16)]] void operator()(item<1> item)

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10999,11 +10999,9 @@ def err_sycl_restrict : Error<
1099910999
"|use a const static or global variable that is neither zero-initialized "
1100011000
"nor constant-initialized"
1100111001
"}0">;
11002-
def warn_sycl_kernel_too_many_args : Warning<
11003-
"kernel argument count (%0) exceeds supported maximum of %1 on GPU">,
11004-
InGroup<SyclStrict>;
11005-
def note_sycl_kernel_args_count : Note<"array elements and fields of a "
11006-
"class/struct may be counted separately">;
11002+
def warn_sycl_kernel_too_big_args : Warning<
11003+
"size of kernel arguments (%0 bytes) exceeds supported maximum of %1 bytes "
11004+
"on GPU">, InGroup<SyclStrict>;
1100711005
def err_sycl_virtual_types : Error<
1100811006
"No class with a vtable can be used in a SYCL kernel or any code included in the kernel">;
1100911007
def note_sycl_recursive_function_declared_here: Note<"function implemented using recursion declared here">;
@@ -11070,11 +11068,6 @@ def err_ivdep_declrefexpr_arg : Error<
1107011068
def warn_ivdep_redundant : Warning <"ignoring redundant Intel FPGA loop "
1107111069
"attribute 'ivdep': safelen %select{INF|%1}0 >= safelen %select{INF|%3}2">,
1107211070
InGroup<IgnoredAttributes>;
11073-
def warn_attribute_spelling_deprecated : Warning<
11074-
"attribute %0 is deprecated">,
11075-
InGroup<DeprecatedAttributes>;
11076-
def note_spelling_suggestion : Note<
11077-
"did you mean to use %0 instead?">;
1107811071

1107911072
// errors of expect.with.probability
1108011073
def err_probability_not_constant_float : Error<

clang/lib/Driver/Driver.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6225,8 +6225,9 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,
62256225
}
62266226

62276227
// For /P, preprocess to file named after BaseInput.
6228-
if (C.getArgs().hasArg(options::OPT__SLASH_P)) {
6229-
assert(AtTopLevel && isa<PreprocessJobAction>(JA));
6228+
if (C.getArgs().hasArg(options::OPT__SLASH_P) &&
6229+
((AtTopLevel && isa<PreprocessJobAction>(JA)) ||
6230+
isa<OffloadBundlingJobAction>(JA))) {
62306231
StringRef BaseName = llvm::sys::path::filename(BaseInput);
62316232
StringRef NameArg;
62326233
if (Arg *A = C.getArgs().getLastArg(options::OPT__SLASH_Fi))

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4200,8 +4200,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
42004200
SYCLStdArg->render(Args, CmdArgs);
42014201
CmdArgs.push_back("-fsycl-std-layout-kernel-params");
42024202
} else {
4203-
// Ensure the default version in SYCL mode is 1.2.1 (aka 2017)
4204-
CmdArgs.push_back("-sycl-std=2017");
4203+
// Ensure the default version in SYCL mode is 2020
4204+
CmdArgs.push_back("-sycl-std=2020");
42054205
}
42064206
}
42074207

@@ -5995,10 +5995,17 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
59955995
options::OPT_fno_gnu_inline_asm, true))
59965996
CmdArgs.push_back("-fno-gnu-inline-asm");
59975997

5998+
bool EnableSYCLEarlyOptimizations =
5999+
Args.hasFlag(options::OPT_fsycl_early_optimizations,
6000+
options::OPT_fno_sycl_early_optimizations,
6001+
Triple.getSubArch() != llvm::Triple::SPIRSubArch_fpga);
6002+
59986003
// Enable vectorization per default according to the optimization level
59996004
// selected. For optimization levels that want vectorization we use the alias
60006005
// option to simplify the hasFlag logic.
60016006
bool EnableVec = shouldEnableVectorizerAtOLevel(Args, false);
6007+
if (UseSYCLTriple && EnableSYCLEarlyOptimizations)
6008+
EnableVec = false; // But disable vectorization for SYCL device code
60026009
OptSpecifier VectorizeAliasOption =
60036010
EnableVec ? options::OPT_O_Group : options::OPT_fvectorize;
60046011
if (Args.hasFlag(options::OPT_fvectorize, VectorizeAliasOption,
@@ -6007,6 +6014,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
60076014

60086015
// -fslp-vectorize is enabled based on the optimization level selected.
60096016
bool EnableSLPVec = shouldEnableVectorizerAtOLevel(Args, true);
6017+
if (UseSYCLTriple && EnableSYCLEarlyOptimizations)
6018+
EnableSLPVec = false; // But disable vectorization for SYCL device code
60106019
OptSpecifier SLPVectAliasOption =
60116020
EnableSLPVec ? options::OPT_O_Group : options::OPT_fslp_vectorize;
60126021
if (Args.hasFlag(options::OPT_fslp_vectorize, SLPVectAliasOption,

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,13 @@ void SYCL::Linker::ConstructJob(Compilation &C, const JobAction &JA,
205205
SpirvInputs);
206206
}
207207

208+
static const char *makeExeName(Compilation &C, StringRef Name) {
209+
llvm::SmallString<8> ExeName(Name);
210+
if (C.getDriver().IsCLMode())
211+
ExeName.append(".exe");
212+
return C.getArgs().MakeArgString(ExeName);
213+
}
214+
208215
void SYCL::fpga::BackendCompiler::ConstructJob(Compilation &C,
209216
const JobAction &JA,
210217
const InputInfo &Output,
@@ -313,7 +320,8 @@ void SYCL::fpga::BackendCompiler::ConstructJob(Compilation &C,
313320
CmdArgs.push_back(Args.MakeArgString(A->getAsString(Args)));
314321
}
315322

316-
SmallString<128> ExecPath(getToolChain().GetProgramPath("aoc"));
323+
SmallString<128> ExecPath(
324+
getToolChain().GetProgramPath(makeExeName(C, "aoc")));
317325
const char *Exec = C.getArgs().MakeArgString(ExecPath);
318326
auto Cmd = std::make_unique<Command>(
319327
JA, *this, ResponseFileSupport::None(), Exec, CmdArgs, None);
@@ -350,7 +358,8 @@ void SYCL::gen::BackendCompiler::ConstructJob(Compilation &C,
350358
static_cast<const toolchains::SYCLToolChain &>(getToolChain());
351359
TC.TranslateBackendTargetArgs(Args, CmdArgs);
352360
TC.TranslateLinkerTargetArgs(Args, CmdArgs);
353-
SmallString<128> ExecPath(getToolChain().GetProgramPath("ocloc"));
361+
SmallString<128> ExecPath(
362+
getToolChain().GetProgramPath(makeExeName(C, "ocloc")));
354363
const char *Exec = C.getArgs().MakeArgString(ExecPath);
355364
auto Cmd = std::make_unique<Command>(
356365
JA, *this, ResponseFileSupport::None(), Exec, CmdArgs, None);
@@ -383,7 +392,8 @@ void SYCL::x86_64::BackendCompiler::ConstructJob(Compilation &C,
383392

384393
TC.TranslateBackendTargetArgs(Args, CmdArgs);
385394
TC.TranslateLinkerTargetArgs(Args, CmdArgs);
386-
SmallString<128> ExecPath(getToolChain().GetProgramPath("opencl-aot"));
395+
SmallString<128> ExecPath(
396+
getToolChain().GetProgramPath(makeExeName(C, "opencl-aot")));
387397
const char *Exec = C.getArgs().MakeArgString(ExecPath);
388398
auto Cmd = std::make_unique<Command>(
389399
JA, *this, ResponseFileSupport::None(), Exec, CmdArgs, None);

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3013,13 +3013,6 @@ static void handleSubGroupSize(Sema &S, Decl *D, const ParsedAttr &AL) {
30133013
if (D->getAttr<IntelReqdSubGroupSizeAttr>())
30143014
S.Diag(AL.getLoc(), diag::warn_duplicate_attribute) << AL;
30153015

3016-
if (AL.getAttributeSpellingListIndex() ==
3017-
IntelReqdSubGroupSizeAttr::CXX11_cl_intel_reqd_sub_group_size) {
3018-
S.Diag(AL.getLoc(), diag::warn_attribute_spelling_deprecated) << AL;
3019-
S.Diag(AL.getLoc(), diag::note_spelling_suggestion)
3020-
<< "'intel::reqd_sub_group_size'";
3021-
}
3022-
30233016
S.addIntelReqdSubGroupSizeAttr(D, AL, E);
30243017
}
30253018

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ enum KernelInvocationKind {
5656

5757
const static std::string InitMethodName = "__init";
5858
const static std::string FinalizeMethodName = "__finalize";
59-
constexpr unsigned GPUMaxKernelArgsNum = 2000;
59+
constexpr unsigned GPUMaxKernelArgsSize = 2048;
6060

6161
namespace {
6262

@@ -1656,32 +1656,35 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler {
16561656
using SyclKernelFieldHandler::leaveStruct;
16571657
};
16581658

1659-
class SyclKernelNumArgsChecker : public SyclKernelFieldHandler {
1659+
class SyclKernelArgsSizeChecker : public SyclKernelFieldHandler {
16601660
SourceLocation KernelLoc;
1661-
unsigned NumOfParams = 0;
1661+
unsigned SizeOfParams = 0;
1662+
1663+
void addParam(QualType ArgTy) {
1664+
SizeOfParams +=
1665+
SemaRef.getASTContext().getTypeSizeInChars(ArgTy).getQuantity();
1666+
}
16621667

16631668
bool handleSpecialType(QualType FieldTy) {
16641669
const CXXRecordDecl *RecordDecl = FieldTy->getAsCXXRecordDecl();
16651670
assert(RecordDecl && "The accessor/sampler must be a RecordDecl");
16661671
CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, InitMethodName);
16671672
assert(InitMethod && "The accessor/sampler must have the __init method");
1668-
NumOfParams += InitMethod->getNumParams();
1673+
for (const ParmVarDecl *Param : InitMethod->parameters())
1674+
addParam(Param->getType());
16691675
return true;
16701676
}
16711677

16721678
public:
1673-
SyclKernelNumArgsChecker(Sema &S, SourceLocation Loc)
1679+
SyclKernelArgsSizeChecker(Sema &S, SourceLocation Loc)
16741680
: SyclKernelFieldHandler(S), KernelLoc(Loc) {}
16751681

1676-
~SyclKernelNumArgsChecker() {
1682+
~SyclKernelArgsSizeChecker() {
16771683
if (SemaRef.Context.getTargetInfo().getTriple().getSubArch() ==
1678-
llvm::Triple::SPIRSubArch_gen) {
1679-
if (NumOfParams > GPUMaxKernelArgsNum) {
1680-
SemaRef.Diag(KernelLoc, diag::warn_sycl_kernel_too_many_args)
1681-
<< NumOfParams << GPUMaxKernelArgsNum;
1682-
SemaRef.Diag(KernelLoc, diag::note_sycl_kernel_args_count);
1683-
}
1684-
}
1684+
llvm::Triple::SPIRSubArch_gen)
1685+
if (SizeOfParams > GPUMaxKernelArgsSize)
1686+
SemaRef.Diag(KernelLoc, diag::warn_sycl_kernel_too_big_args)
1687+
<< SizeOfParams << GPUMaxKernelArgsSize;
16851688
}
16861689

16871690
bool handleSyclAccessorType(FieldDecl *FD, QualType FieldTy) final {
@@ -1703,12 +1706,12 @@ class SyclKernelNumArgsChecker : public SyclKernelFieldHandler {
17031706
}
17041707

17051708
bool handlePointerType(FieldDecl *FD, QualType FieldTy) final {
1706-
NumOfParams++;
1709+
addParam(FieldTy);
17071710
return true;
17081711
}
17091712

17101713
bool handleScalarType(FieldDecl *FD, QualType FieldTy) final {
1711-
NumOfParams++;
1714+
addParam(FieldTy);
17121715
return true;
17131716
}
17141717

@@ -1717,17 +1720,17 @@ class SyclKernelNumArgsChecker : public SyclKernelFieldHandler {
17171720
}
17181721

17191722
bool handleSyclHalfType(FieldDecl *FD, QualType FieldTy) final {
1720-
NumOfParams++;
1723+
addParam(FieldTy);
17211724
return true;
17221725
}
17231726

17241727
bool handleSyclStreamType(FieldDecl *FD, QualType FieldTy) final {
1725-
NumOfParams++;
1728+
addParam(FieldTy);
17261729
return true;
17271730
}
17281731
bool handleSyclStreamType(const CXXRecordDecl *, const CXXBaseSpecifier &,
17291732
QualType FieldTy) final {
1730-
NumOfParams++;
1733+
addParam(FieldTy);
17311734
return true;
17321735
}
17331736
using SyclKernelFieldHandler::handleSyclHalfType;
@@ -2468,7 +2471,7 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, SourceRange CallLoc,
24682471

24692472
SyclKernelFieldChecker FieldChecker(*this);
24702473
SyclKernelUnionChecker UnionChecker(*this);
2471-
SyclKernelNumArgsChecker NumArgsChecker(*this, Args[0]->getExprLoc());
2474+
SyclKernelArgsSizeChecker ArgsSizeChecker(*this, Args[0]->getExprLoc());
24722475
// check that calling kernel conforms to spec
24732476
QualType KernelParamTy = KernelFunc->getParamDecl(0)->getType();
24742477
if (KernelParamTy->isReferenceType()) {
@@ -2488,9 +2491,9 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, SourceRange CallLoc,
24882491
KernelObjVisitor Visitor{*this};
24892492
DiagnosingSYCLKernel = true;
24902493
Visitor.VisitRecordBases(KernelObj, FieldChecker, UnionChecker,
2491-
NumArgsChecker);
2494+
ArgsSizeChecker);
24922495
Visitor.VisitRecordFields(KernelObj, FieldChecker, UnionChecker,
2493-
NumArgsChecker);
2496+
ArgsSizeChecker);
24942497
DiagnosingSYCLKernel = false;
24952498
if (!FieldChecker.isValid() || !UnionChecker.isValid())
24962499
KernelFunc->setInvalidDecl();

clang/test/CodeGenSYCL/reqd-sub-group-size.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
class Functor16 {
44
public:
5-
[[cl::intel_reqd_sub_group_size(16)]] void operator()() const {}
5+
[[intel::reqd_sub_group_size(16)]] void operator()() const {}
66
};
77

8-
[[cl::intel_reqd_sub_group_size(8)]] void foo() {}
8+
[[intel::reqd_sub_group_size(8)]] void foo() {}
99

1010
class Functor {
1111
public:
@@ -17,7 +17,7 @@ class Functor {
1717
template <int SIZE>
1818
class Functor5 {
1919
public:
20-
[[cl::intel_reqd_sub_group_size(SIZE)]] void operator()() const {}
20+
[[intel::reqd_sub_group_size(SIZE)]] void operator()() const {}
2121
};
2222

2323
template <typename name, typename Func>
@@ -33,7 +33,7 @@ void bar() {
3333
kernel<class kernel_name2>(f);
3434

3535
kernel<class kernel_name3>(
36-
[]() [[cl::intel_reqd_sub_group_size(4)]] {});
36+
[]() [[intel::reqd_sub_group_size(4)]]{});
3737

3838
Functor5<2> f5;
3939
kernel<class kernel_name4>(f5);

0 commit comments

Comments
 (0)