diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 523f85b57c823..638a3330ada8e 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10642,7 +10642,9 @@ def err_sycl_restrict : Error< "|allocate storage" "|use inline assembly" "|call a dllimport function" - "|call a variadic function}0">; + "|call a variadic function" + "|call an undefined function without SYCL_EXTERNAL attribute" + "}0">; def err_sycl_virtual_types : Error< "No class with a vtable can be used in a SYCL kernel or any code included in the kernel">; def note_sycl_used_here : Note<"used here">; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index c2d8663aad0cd..353ca10894b92 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -12298,11 +12298,13 @@ class Sema final { KernelAllocateStorage, KernelUseAssembly, KernelCallDllimportFunction, - KernelCallVariadicFunction - }; + KernelCallVariadicFunction, + KernelCallUndefinedFunction + }; + bool isKnownGoodSYCLDecl(const Decl *D); void ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, MangleContext &MC); - void MarkDevice(void); + void MarkDevice(); /// Creates a DeviceDiagBuilder that emits the diagnostic if the current /// context is "used as device code". @@ -12323,10 +12325,25 @@ class Sema final { /// SYCLDiagIfDeviceCode(Loc, diag::err_thread_unsupported); DeviceDiagBuilder SYCLDiagIfDeviceCode(SourceLocation Loc, unsigned DiagID); - /// Checks if Callee function is a device function and emits - /// diagnostics if it is known that it is a device function, adds this - /// function to the DeviceCallGraph otherwise. - void checkSYCLDeviceFunction(SourceLocation Loc, FunctionDecl *Callee); + /// Check whether we're allowed to call Callee from the current context. + /// + /// - If the call is never allowed in a semantically-correct program + /// emits an error and returns false. + /// + /// - If the call is allowed in semantically-correct programs, but only if + /// it's never codegen'ed, creates a deferred diagnostic to be emitted if + /// and when the caller is codegen'ed, and returns true. + /// + /// - Otherwise, returns true without emitting any diagnostics. + /// + /// Adds Callee to DeviceCallGraph if we don't know if its caller will be + /// codegen'ed yet. + bool checkSYCLDeviceFunction(SourceLocation Loc, FunctionDecl *Callee); + + /// Emit diagnostic that can't be emitted with deferred diagnostics mechanism. + /// At this step we imply that all device functions are marked with + /// sycl_device attribute. + void finalizeSYCLDelayedAnalysis(); }; template diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 27baeb6f9630e..c7006cb04229b 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4124,6 +4124,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, } else if (IsSYCL) { // Ensure the default version in SYCL mode is 1.2.1 CmdArgs.push_back("-sycl-std=1.2.1"); + // The user had not pass SYCL version, thus we'll employ no-sycl-strict + // to allow address-space unqualified pointers in function params/return + // along with marking the same function with explicit SYCL_EXTERNAL + CmdArgs.push_back("-Wno-sycl-strict"); } if (IsOpenMPDevice) { diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 2eabeed850fc4..9ab9cd7af27db 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -974,6 +974,7 @@ void Sema::ActOnEndOfTranslationUnitFragment(TUFragmentKind Kind) { if (SyclIntHeader != nullptr) SyclIntHeader->emit(getLangOpts().SYCLIntHeader); MarkDevice(); + finalizeSYCLDelayedAnalysis(); } // Finalize analysis of OpenMP-specific constructs. diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index e0c53dc4aae5e..414c18b492816 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -18016,6 +18016,12 @@ Decl *Sema::getObjCDeclContext() const { } Sema::FunctionEmissionStatus Sema::getEmissionStatus(FunctionDecl *FD) { + // Due to SYCL functions are template we check if they have appropriate + // attribute prior to checking if it is a template + if (LangOpts.SYCLIsDevice && + (FD->hasAttr() || FD->hasAttr())) + return FunctionEmissionStatus::Emitted; + // Templates are emitted when they're instantiated. if (FD->isDependentContext()) return FunctionEmissionStatus::TemplateDiscarded; @@ -18080,6 +18086,23 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(FunctionDecl *FD) { return FunctionEmissionStatus::Emitted; } + if (getLangOpts().SYCLIsDevice) { + if (!FD->hasAttr() && !FD->hasAttr()) + return FunctionEmissionStatus::Unknown; + + // Check whether this function is externally visible -- if so, it's + // known-emitted. + // + // We have to check the GVA linkage of the function's *definition* -- if we + // only have a declaration, we don't know whether or not the function will + // be emitted, because (say) the definition could include "inline". + FunctionDecl *Def = FD->getDefinition(); + + if (Def && + !isDiscardableGVALinkage(getASTContext().GetGVALinkageForFunction(Def))) + return FunctionEmissionStatus::Emitted; + } + // Otherwise, the function is known-emitted if it's in our set of // known-emitted functions. return (DeviceKnownEmittedFns.count(FD) > 0) diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 5b13eb44169e1..4c99faeac2959 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -14684,8 +14684,9 @@ Sema::BuildCXXConstructExpr(SourceLocation ConstructLoc, QualType DeclInitType, MarkFunctionReferenced(ConstructLoc, Constructor); if (getLangOpts().CUDA && !CheckCUDACall(ConstructLoc, Constructor)) return ExprError(); - if (getLangOpts().SYCLIsDevice) - checkSYCLDeviceFunction(ConstructLoc, Constructor); + if (getLangOpts().SYCLIsDevice && + !checkSYCLDeviceFunction(ConstructLoc, Constructor)) + return ExprError(); return CXXConstructExpr::Create( Context, DeclInitType, ConstructLoc, Constructor, Elidable, diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 68d220690cf20..e7d7b889f6634 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -301,8 +301,8 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef Locs, if (getLangOpts().CUDA && !CheckCUDACall(Loc, FD)) return true; - if (getLangOpts().SYCLIsDevice) - checkSYCLDeviceFunction(Loc, FD); + if (getLangOpts().SYCLIsDevice && !checkSYCLDeviceFunction(Loc, FD)) + return true; } if (auto *MD = dyn_cast(D)) { diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 771a95bac7069..26cc8916fae5e 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -537,6 +537,7 @@ class MarkDeviceFunction : public RecursiveASTVisitor { } return true; } + Sema &SemaRef; }; @@ -1410,18 +1411,6 @@ void Sema::MarkDevice(void) { // SYCL device specific diagnostics implementation // ----------------------------------------------------------------------------- -// Do we know that we will eventually codegen the given function? -static bool isKnownEmitted(Sema &S, FunctionDecl *FD) { - assert(FD && "Given function may not be null."); - - if (FD->hasAttr() || FD->hasAttr()) - return true; - - // Otherwise, the function is known-emitted if it's in our set of - // known-emitted functions. - return S.DeviceKnownEmittedFns.count(FD) > 0; -} - Sema::DeviceDiagBuilder Sema::SYCLDiagIfDeviceCode(SourceLocation Loc, unsigned DiagID) { assert(getLangOpts().SYCLIsDevice && @@ -1430,29 +1419,104 @@ Sema::DeviceDiagBuilder Sema::SYCLDiagIfDeviceCode(SourceLocation Loc, DeviceDiagBuilder::Kind DiagKind = [this, FD] { if (ConstructingOpenCLKernel || !FD) return DeviceDiagBuilder::K_Nop; - if (isKnownEmitted(*this, FD)) + if (getEmissionStatus(FD) == Sema::FunctionEmissionStatus::Emitted) return DeviceDiagBuilder::K_ImmediateWithCallStack; return DeviceDiagBuilder::K_Deferred; }(); return DeviceDiagBuilder(DiagKind, Loc, DiagID, FD, *this); } -void Sema::checkSYCLDeviceFunction(SourceLocation Loc, FunctionDecl *Callee) { +bool Sema::checkSYCLDeviceFunction(SourceLocation Loc, FunctionDecl *Callee) { + assert(getLangOpts().SYCLIsDevice && + "Should only be called during SYCL compilation"); assert(Callee && "Callee may not be null."); // Errors in unevaluated context don't need to be generated, // so we can safely skip them. - if (isUnevaluatedContext()) - return; + if (isUnevaluatedContext() || isConstantEvaluated()) + return true; FunctionDecl *Caller = dyn_cast(getCurLexicalContext()); + if (!Caller) + return true; + + bool CallerKnownEmitted = + getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted; + // If the caller is known-emitted, mark the callee as known-emitted. // Otherwise, mark the call in our call graph so we can traverse it later. - if (Caller && isKnownEmitted(*this, Caller)) - markKnownEmitted(*this, Caller, Callee, Loc, isKnownEmitted); - else if (Caller) + if (CallerKnownEmitted) + markKnownEmitted(*this, Caller, Callee, Loc, [](Sema &S, FunctionDecl *FD) { + return S.getEmissionStatus(FD) == Sema::FunctionEmissionStatus::Emitted; + }); + else DeviceCallGraph[Caller].insert({Callee, Loc}); + + DeviceDiagBuilder::Kind DiagKind = DeviceDiagBuilder::K_Nop; + + // TODO Set DiagKind to K_Immediate/K_Deferred to emit diagnostics for Callee + + DeviceDiagBuilder(DiagKind, Loc, diag::err_sycl_restrict, Caller, *this) + << Sema::KernelCallUndefinedFunction; + DeviceDiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl, + Caller, *this) + << Callee; + + return DiagKind != DeviceDiagBuilder::K_Immediate && + DiagKind != DeviceDiagBuilder::K_ImmediateWithCallStack; +} + +static void emitCallToUndefinedFnDiag(Sema &SemaRef, const FunctionDecl *Callee, + const FunctionDecl *Caller, + const SourceLocation &Loc) { + // Somehow an unspecialized template appears to be in callgraph or list of + // device functions. We don't want to emit diagnostic here. + if (Callee->getTemplatedKind() == FunctionDecl::TK_FunctionTemplate) + return; + + // Don't emit diagnostic for functions not called from device code + if (!Caller->hasAttr() && !Caller->hasAttr()) + return; + + bool RedeclHasAttr = false; + + for (const Decl *Redecl : Callee->redecls()) { + if (const FunctionDecl *FD = dyn_cast_or_null(Redecl)) { + if ((FD->hasAttr() && + !FD->getAttr()->isImplicit()) || + FD->hasAttr()) { + RedeclHasAttr = true; + break; + } + } + } + + // Disallow functions with neither definition nor SYCL_EXTERNAL mark + bool NotDefinedNoAttr = !Callee->isDefined() && !RedeclHasAttr; + + if (NotDefinedNoAttr && !Callee->getBuiltinID()) { + SemaRef.Diag(Loc, diag::err_sycl_restrict) + << Sema::KernelCallUndefinedFunction; + SemaRef.Diag(Callee->getLocation(), diag::note_previous_decl) << Callee; + SemaRef.Diag(Caller->getLocation(), diag::note_called_by) << Caller; + } +} + +void Sema::finalizeSYCLDelayedAnalysis() { + assert(getLangOpts().SYCLIsDevice && + "Should only be called during SYCL compilation"); + + llvm::DenseSet Checked; + + for (const auto &EmittedWithLoc : DeviceKnownEmittedFns) { + const FunctionDecl *Caller = EmittedWithLoc.getSecond().FD; + const SourceLocation &Loc = EmittedWithLoc.getSecond().Loc; + const FunctionDecl *Callee = EmittedWithLoc.getFirst(); + + if (Checked.insert(Callee).second) + emitCallToUndefinedFnDiag(*this, Callee, Caller, Loc); + } } // ----------------------------------------------------------------------------- diff --git a/clang/test/CodeGenSYCL/address-space-new.cpp b/clang/test/CodeGenSYCL/address-space-new.cpp index 3ba027bdef088..77a8cd297181a 100644 --- a/clang/test/CodeGenSYCL/address-space-new.cpp +++ b/clang/test/CodeGenSYCL/address-space-new.cpp @@ -10,7 +10,7 @@ struct HasX { struct Y : SpaceWaster, HasX {}; -void bar(HasX &hx); +SYCL_EXTERNAL void bar(HasX &hx); void baz(Y &y) { bar(y); diff --git a/clang/test/CodeGenSYCL/bool-vectors.cpp b/clang/test/CodeGenSYCL/bool-vectors.cpp index 9db276fab1ddd..6e81b160b4d76 100644 --- a/clang/test/CodeGenSYCL/bool-vectors.cpp +++ b/clang/test/CodeGenSYCL/bool-vectors.cpp @@ -11,17 +11,17 @@ using bool4 = bool __attribute__((ext_vector_type(4))); using bool8 = bool __attribute__((ext_vector_type(8))); using bool16 = bool __attribute__((ext_vector_type(16))); -extern bool1 foo1(); +extern SYCL_EXTERNAL bool1 foo1(); // CHECK-DAG: declare spir_func zeroext i1 @[[FOO1:[a-zA-Z0-9_]+]]() -extern bool2 foo2(); +extern SYCL_EXTERNAL bool2 foo2(); // CHECK-DAG: declare spir_func <2 x i1> @[[FOO2:[a-zA-Z0-9_]+]]() -extern bool3 foo3(); +extern SYCL_EXTERNAL bool3 foo3(); // CHECK-DAG: declare spir_func <3 x i1> @[[FOO3:[a-zA-Z0-9_]+]]() -extern bool4 foo4(); +extern SYCL_EXTERNAL bool4 foo4(); // CHECK-DAG: declare spir_func <4 x i1> @[[FOO4:[a-zA-Z0-9_]+]]() -extern bool8 foo8(); +extern SYCL_EXTERNAL bool8 foo8(); // CHECK-DAG: declare spir_func <8 x i1> @[[FOO8:[a-zA-Z0-9_]+]]() -extern bool16 foo16(); +extern SYCL_EXTERNAL bool16 foo16(); // CHECK-DAG: declare spir_func <16 x i1> @[[FOO16:[a-zA-Z0-9_]+]]() void bar (bool1 b) {}; diff --git a/clang/test/CodeGenSYCL/fpga_pipes.cpp b/clang/test/CodeGenSYCL/fpga_pipes.cpp index 2eddd5fe7383b..555571ee1a8c7 100644 --- a/clang/test/CodeGenSYCL/fpga_pipes.cpp +++ b/clang/test/CodeGenSYCL/fpga_pipes.cpp @@ -3,10 +3,10 @@ // CHECK: %opencl.pipe_ro_t using WPipeTy = __attribute__((pipe("write_only"))) const int; -WPipeTy WPipeCreator(); +SYCL_EXTERNAL WPipeTy WPipeCreator(); using RPipeTy = __attribute__((pipe("read_only"))) const int; -RPipeTy RPipeCreator(); +SYCL_EXTERNAL RPipeTy RPipeCreator(); template void foo(PipeTy Pipe) {} @@ -24,7 +24,7 @@ template constexpr PipeStorageTy TempStorage __attribute__((io_pipe_id(N))) = {2}; -void boo(PipeStorageTy PipeStorage); +SYCL_EXTERNAL void boo(PipeStorageTy PipeStorage); template struct ethernet_pipe { diff --git a/clang/test/CodeGenSYCL/unique-stable-name.cpp b/clang/test/CodeGenSYCL/unique-stable-name.cpp index 22f5071a27c46..4c6a83274dcf3 100644 --- a/clang/test/CodeGenSYCL/unique-stable-name.cpp +++ b/clang/test/CodeGenSYCL/unique-stable-name.cpp @@ -8,7 +8,7 @@ // CHECK: @[[LAMBDA_IN_DEP_INT:[^\w]+]] = private unnamed_addr constant [[DEP_INT_SIZE:\[[0-9]+ x i8\]]] c"_ZTSZ28lambda_in_dependent_functionIiEvvEUlvE23->12\00", // CHECK: @[[LAMBDA_IN_DEP_X:[^\w]+]] = private unnamed_addr constant [[DEP_LAMBDA_SIZE:\[[0-9]+ x i8\]]] c"_ZTSZ28lambda_in_dependent_functionIZZ4mainENKUlvE42->5clEvEUlvE46->16EvvEUlvE23->12\00", -extern "C" void printf(const char*); +extern "C" void printf(const char *) {} template void template_param() { diff --git a/clang/test/SemaSYCL/call-to-undefined-function.cpp b/clang/test/SemaSYCL/call-to-undefined-function.cpp new file mode 100644 index 0000000000000..44cfcad131ccf --- /dev/null +++ b/clang/test/SemaSYCL/call-to-undefined-function.cpp @@ -0,0 +1,169 @@ +// RUN: %clang_cc1 -fsycl-is-device -verify -fsyntax-only %s + +void defined() { +} + +void undefined(); +// expected-note@-1 {{'undefined' declared here}} + +SYCL_EXTERNAL void undefinedExternal(); + +template +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { + kernelFunc(); +} + +template +void definedTpl() { +} + +template +void undefinedTpl(); +// expected-note@-1 {{'undefinedTpl' declared here}} + +template +extern SYCL_EXTERNAL void undefinedExternalTpl(); + +template +void definedPartialTpl() { +} + +template <> +void definedPartialTpl() { +} + +template +struct Tpl { + void defined() { + } +}; + +template +struct Tpl { + void defined() { + } +}; + +template +struct TplWithTplMethod { + template + void defined() { + } +}; + +template +struct TplWithTplMethod { + template + void defined() { + } +}; + +template +struct TplWithTplMethod2 { + template + void defined() { + } + + template <> + void defined() { + } +}; + +template +struct TplWithTplMethod2 { + template + void defined() { + } + + template <> + void defined() { + } +}; + +void forwardDeclFn(); +void forwardDeclFn2(); + +void useFwDeclFn() { + forwardDeclFn(); + forwardDeclFn2(); +} + +void forwardDeclFn() { +} + +int main() { + kernel_single_task([]() { + // expected-note@-1 {{called by 'operator()'}} + // expected-note@-2 {{called by 'operator()'}} + + // simple functions + defined(); + undefinedExternal(); + undefined(); + // expected-error@-1 {{SYCL kernel cannot call an undefined function without SYCL_EXTERNAL attribute}} + + // templated functions + definedTpl(); + undefinedExternalTpl(); + undefinedTpl(); + // expected-error@-1 {{SYCL kernel cannot call an undefined function without SYCL_EXTERNAL attribute}} + + // partially specialized template function + definedPartialTpl(); + definedPartialTpl(); + definedPartialTpl(); + definedPartialTpl(); + + // template class with specialization + { + Tpl tpl; + tpl.defined(); + } + + { + Tpl tpl; + tpl.defined(); + } + + // template class with template method, both have specializations. + { + TplWithTplMethod tpl; + tpl.defined(); + tpl.defined(); + tpl.defined(); + tpl.defined(); + } + + { + TplWithTplMethod tpl; + tpl.defined(); + tpl.defined(); + tpl.defined(); + tpl.defined(); + } + + { + TplWithTplMethod2 tpl; + tpl.defined(); + tpl.defined(); + tpl.defined(); + tpl.defined(); + } + + { + TplWithTplMethod2 tpl; + tpl.defined(); + tpl.defined(); + tpl.defined(); + tpl.defined(); + } + + // forward-declared function + useFwDeclFn(); + forwardDeclFn(); + forwardDeclFn2(); + }); +} + +void forwardDeclFn2() { +} diff --git a/clang/test/SemaSYCL/deferred-diagnostics-emit.cpp b/clang/test/SemaSYCL/deferred-diagnostics-emit.cpp index 030985832f3cb..a95c8c9051a55 100644 --- a/clang/test/SemaSYCL/deferred-diagnostics-emit.cpp +++ b/clang/test/SemaSYCL/deferred-diagnostics-emit.cpp @@ -16,7 +16,7 @@ __attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { } // namespace cl //variadic functions from SYCL kernels emit a deferred diagnostic -void variadic(int, ...); +void variadic(int, ...) {} int calledFromKernel(int a) { // expected-error@+1 {{zero-length arrays are not permitted in C++}} diff --git a/clang/test/SemaSYCL/markfunction-astconsumer.cpp b/clang/test/SemaSYCL/markfunction-astconsumer.cpp index 05b8617d1d1be..70dac892e47b3 100644 --- a/clang/test/SemaSYCL/markfunction-astconsumer.cpp +++ b/clang/test/SemaSYCL/markfunction-astconsumer.cpp @@ -1,8 +1,8 @@ // RUN: %clang_cc1 -fcxx-exceptions -fsycl-is-device -Wno-return-type -verify -fsyntax-only -std=c++17 %s void bar(); -template -void usage(T func ) { +template +void usage(T func) { bar(); } @@ -11,17 +11,17 @@ __attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } -void foo(); - // expected-error@+2 {{SYCL kernel cannot call a recursive function}} - // expected-note@+1 2{{function implemented using recursion declared here}} +SYCL_EXTERNAL void foo(); +// expected-error@+2 {{SYCL kernel cannot call a recursive function}} +// expected-note@+1 2{{function implemented using recursion declared here}} void fum() { return fum(); }; int main() { - kernel_single_task([]() { usage(foo ); }); + kernel_single_task([]() { usage(foo); }); } -template +template void templ_func() { // expected-error@+1 {{SYCL kernel cannot call a recursive function}} fum(); foo(); } -void bar(){ templ_func(); } +void bar() { templ_func(); } diff --git a/clang/test/SemaSYCL/restrict-recursion3.cpp b/clang/test/SemaSYCL/restrict-recursion3.cpp index 83b26972325ea..44862e58d68dc 100644 --- a/clang/test/SemaSYCL/restrict-recursion3.cpp +++ b/clang/test/SemaSYCL/restrict-recursion3.cpp @@ -1,19 +1,23 @@ -// RUN: %clang_cc1 -fcxx-exceptions -fsycl-is-device -Wno-return-type -verify -fsyntax-only -std=c++17 %s +// RUN: %clang_cc1 -fcxx-exceptions -fsycl-is-device -Wno-return-type -Wno-sycl-strict -verify -fsyntax-only -std=c++17 %s // This recursive function is not called from sycl kernel, // so it should not be diagnosed. -int fib(int n) -{ - if (n <= 1) - return n; - return fib(n-1) + fib(n-2); +int fib(int n) { + if (n <= 1) + return n; + return fib(n - 1) + fib(n - 2); } void kernel3(void) { ; } -using myFuncDef = int(int,int); +using myFuncDef = int(int, int); + +typedef __typeof__(sizeof(int)) size_t; + +SYCL_EXTERNAL +void *operator new(size_t); void usage3(myFuncDef functionPtr) { // expected-error@+1 {{SYCL kernel cannot allocate storage}} @@ -22,11 +26,11 @@ void usage3(myFuncDef functionPtr) { } int addInt(int n, int m) { - return n+m; + return n + m; } template - // expected-note@+1 2{{function implemented using recursion declared here}} +// expected-note@+1 2{{function implemented using recursion declared here}} __attribute__((sycl_kernel)) void kernel_single_task2(Func kernelFunc) { // expected-note@+1 {{called by 'kernel_single_task2}} kernelFunc(); @@ -36,6 +40,6 @@ __attribute__((sycl_kernel)) void kernel_single_task2(Func kernelFunc) { int main() { // expected-note@+1 {{called by 'operator()'}} - kernel_single_task2([]() { usage3( &addInt ); }); + kernel_single_task2([]() { usage3(&addInt); }); return fib(5); } diff --git a/clang/test/SemaSYCL/restrict-recursion4.cpp b/clang/test/SemaSYCL/restrict-recursion4.cpp index cad0b9aff7273..b3be1560901ff 100644 --- a/clang/test/SemaSYCL/restrict-recursion4.cpp +++ b/clang/test/SemaSYCL/restrict-recursion4.cpp @@ -1,21 +1,25 @@ -// RUN: %clang_cc1 -fcxx-exceptions -fsycl-is-device -Wno-return-type -verify -fsyntax-only -std=c++17 %s +// RUN: %clang_cc1 -fcxx-exceptions -fsycl-is-device -Wno-return-type -Wno-sycl-strict -verify -fsyntax-only -std=c++17 %s // This recursive function is not called from sycl kernel, // so it should not be diagnosed. -int fib(int n) -{ - if (n <= 1) - return n; - return fib(n-1) + fib(n-2); +int fib(int n) { + if (n <= 1) + return n; + return fib(n - 1) + fib(n - 2); } - // expected-note@+1 2{{function implemented using recursion declared here}} +// expected-note@+1 2{{function implemented using recursion declared here}} void kernel2(void) { // expected-error@+1 {{SYCL kernel cannot call a recursive function}} kernel2(); } -using myFuncDef = int(int,int); +using myFuncDef = int(int, int); + +typedef __typeof__(sizeof(int)) size_t; + +SYCL_EXTERNAL +void *operator new(size_t); void usage2(myFuncDef functionPtr) { // expected-error@+1 {{SYCL kernel cannot allocate storage}} @@ -25,7 +29,7 @@ void usage2(myFuncDef functionPtr) { } int addInt(int n, int m) { - return n+m; + return n + m; } template @@ -36,6 +40,6 @@ __attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { int main() { // expected-note@+1 {{called by 'operator()'}} - kernel_single_task([]() {usage2(&addInt);}); + kernel_single_task([]() { usage2(&addInt); }); return fib(5); } diff --git a/clang/test/SemaSYCL/sycl-dllimport-dllexport.cpp b/clang/test/SemaSYCL/sycl-dllimport-dllexport.cpp index 54a9f8986f3a9..47650cbb4f224 100644 --- a/clang/test/SemaSYCL/sycl-dllimport-dllexport.cpp +++ b/clang/test/SemaSYCL/sycl-dllimport-dllexport.cpp @@ -1,16 +1,16 @@ // RUN: %clang_cc1 -triple spir64-unknown-unknown-sycldevice -fms-extensions \ // RUN: -aux-triple x86_64-unknown-linux-gnu -fsycl-is-device -fsyntax-only \ -// RUN: -DWARNCHECK %s -o /dev/null 2>&1 | FileCheck %s +// RUN: -DWARNCHECK %s -o /dev/null 2>&1 | FileCheck %s // check random triple aux-triple with sycl-device // RUN: %clang_cc1 -triple spir64-unknown-windows-sycldevice -fsyntax-only \ -// RUN: -fms-extensions -DWARNCHECK %s -o /dev/null 2>&1 | FileCheck %s +// RUN: -fms-extensions -DWARNCHECK %s -o /dev/null 2>&1 | FileCheck %s // check without -aux-triple but sycl-device // RUN: %clang_cc1 -triple spir64-unknown-windows-sycldevice -fsycl-is-device \ // RUN: -aux-triple x86_64-pc-windows-msvc -fms-extensions -fsyntax-only \ // RUN: -DWARNCHECK %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes CHECKALL -// check -aux-tripe without sycl-device +// check -aux-tripe without sycl-device // RUN: %clang_cc1 -triple spir64-unknown-windows-sycldevice -fsyntax-only \ // RUN: -aux-triple x86_64-pc-windows-msvc -fsycl-is-device -fms-extensions \ @@ -40,7 +40,7 @@ int __declspec(dllexport) foo(int a) { return a; } // expected-note@+1 {{'bar' declared here}} -int __declspec(dllimport) bar(); +SYCL_EXTERNAL int __declspec(dllimport) bar(); // expected-note@+2 {{previous attribute is here}} // expected-note@+1 {{previous declaration is here}} int __declspec(dllimport) foobar(); diff --git a/clang/test/SemaSYCL/sycl-restrict.cpp b/clang/test/SemaSYCL/sycl-restrict.cpp index 8c8f862fcf42d..028b932572624 100644 --- a/clang/test/SemaSYCL/sycl-restrict.cpp +++ b/clang/test/SemaSYCL/sycl-restrict.cpp @@ -6,6 +6,13 @@ namespace std { class type_info; typedef __typeof__(sizeof(int)) size_t; } // namespace std + +// we're testing a restricted mode, thus just provide a stub implementation for +// function with address-space-unspecified pointers. +void *operator new(std::size_t) { + return reinterpret_cast(1); +} + namespace Check_User_Operators { class Fraction { // expected-error@+2 {{SYCL kernel cannot call a recursive function}} diff --git a/clang/test/SemaSYCL/sycl-varargs-cconv.cpp b/clang/test/SemaSYCL/sycl-varargs-cconv.cpp index 35140b0e6967c..75a7904ac8d19 100644 --- a/clang/test/SemaSYCL/sycl-varargs-cconv.cpp +++ b/clang/test/SemaSYCL/sycl-varargs-cconv.cpp @@ -1,13 +1,15 @@ -// RUN: %clang_cc1 -fsycl-is-device -verify -fsyntax-only %s -// RUN: %clang_cc1 -fsycl-is-device -verify -fsyntax-only -DPRINTF_INVALID_DEF %s -// RUN: %clang_cc1 -fsycl-is-device -verify -fsyntax-only -DPRINTF_INVALID_DECL %s -// RUN: %clang_cc1 -fsycl-is-device -verify -fsyntax-only -DPRINTF_VALID1 %s -// RUN: %clang_cc1 -fsycl-is-device -verify -fsyntax-only -DPRINTF_VALID2 %s +// The following runs use -Wno-sycl-strict to bypass SYCL_EXTERNAL applied to +// funtion with raw pointer parameter +// RUN: %clang_cc1 -fsycl-is-device -verify -Wno-sycl-strict -fsyntax-only %s +// RUN: %clang_cc1 -fsycl-is-device -verify -Wno-sycl-strict -fsyntax-only -DPRINTF_INVALID_DEF %s +// RUN: %clang_cc1 -fsycl-is-device -verify -fsyntax-only -Wno-sycl-strict -DPRINTF_INVALID_DECL %s +// RUN: %clang_cc1 -fsycl-is-device -verify -fsyntax-only -Wno-sycl-strict -DPRINTF_VALID1 %s +// RUN: %clang_cc1 -fsycl-is-device -verify -fsyntax-only -Wno-sycl-strict -DPRINTF_VALID2 %s #if defined(PRINTF_INVALID_DECL) -extern "C" int __spirv_ocl_printf(const char *__format, ...); +extern "C" SYCL_EXTERNAL int __spirv_ocl_printf(const char *__format, ...); namespace A { - int __spirv_ocl_printf(const char *__format, ...); +SYCL_EXTERNAL int __spirv_ocl_printf(const char *__format, ...); } #elif defined(PRINTF_INVALID_DEF) int __spirv_ocl_printf(const char *__format, ...) { @@ -17,23 +19,26 @@ int __spirv_ocl_printf(const char *__format, ...) { class A { friend int __spirv_ocl_printf(const char *__format, ...); }; +SYCL_EXTERNAL int __spirv_ocl_printf(const char *__format, ...); #elif defined(PRINTF_VALID2) extern "C" { - extern "C++" { - int __spirv_ocl_printf(const char *__format, ...); - } +extern "C++" { +SYCL_EXTERNAL +int __spirv_ocl_printf(const char *__format, ...); +} } #else -int __spirv_ocl_printf(const char *__format, ...); +SYCL_EXTERNAL +int __spirv_ocl_printf(const char *, ...); #endif -int __cdecl foo(int, ...); // expected-no-error +SYCL_EXTERNAL int __cdecl foo(int, ...); // expected-no-error float bar(float f, ...) { return ++f; } // expected-no-error void bar() { - foo(5); // expected-no-error + foo(5); // expected-no-error bar(7.0f); // expected-no-error } diff --git a/clang/test/SemaSYCL/variadic-func-call.cpp b/clang/test/SemaSYCL/variadic-func-call.cpp index 515c0ce68991e..875c2b557b3cf 100644 --- a/clang/test/SemaSYCL/variadic-func-call.cpp +++ b/clang/test/SemaSYCL/variadic-func-call.cpp @@ -1,13 +1,13 @@ // RUN: %clang_cc1 -triple spir64-unknown-unknown -fsycl-is-device -fsyntax-only -verify %s -void variadic(int, ...); +void variadic(int, ...) {} namespace NS { -void variadic(int, ...); +void variadic(int, ...) {} } struct S { - S(int, ...); - void operator()(int, ...); + S(int, ...) {} + void operator()(int, ...) {} }; void foo() { @@ -15,8 +15,8 @@ void foo() { x(5, 10); //expected-error{{SYCL kernel cannot call a variadic function}} } -void overloaded(int, int); -void overloaded(int, ...); +void overloaded(int, int) {} +void overloaded(int, ...) {} template __attribute__((sycl_kernel)) void task(Func KF) { KF(); // expected-note 2 {{called by 'task}} diff --git a/sycl/include/CL/__spirv/spirv_ops.hpp b/sycl/include/CL/__spirv/spirv_ops.hpp index 24e355780b622..38bcba20d8a27 100644 --- a/sycl/include/CL/__spirv/spirv_ops.hpp +++ b/sycl/include/CL/__spirv/spirv_ops.hpp @@ -8,29 +8,40 @@ #pragma once #include +#include #include #include #include #ifdef __SYCL_DEVICE_ONLY__ +template +extern SYCL_EXTERNAL RetT __spirv_ImageQueryFormat(ImageT); + +template +extern SYCL_EXTERNAL RetT __spirv_ImageQueryOrder(ImageT); + +template +extern SYCL_EXTERNAL RetT __spirv_ImageQuerySize(ImageT); + template -extern void __spirv_ImageWrite(ImageT, CoordT, ValT); +extern SYCL_EXTERNAL void __spirv_ImageWrite(ImageT, CoordT, ValT); -template -extern ReTTT __spirv_ImageRead(ImageT, TempArgT); +template +extern SYCL_EXTERNAL RetT __spirv_ImageRead(ImageT, TempArgT); template -extern SampledType __spirv_SampledImage(ImageT, __ocl_sampler_t); +extern SYCL_EXTERNAL SampledType __spirv_SampledImage(ImageT, __ocl_sampler_t); template -extern TempRetT __spirv_ImageSampleExplicitLod(SampledType, TempArgT, int, - float); +extern SYCL_EXTERNAL TempRetT __spirv_ImageSampleExplicitLod(SampledType, + TempArgT, int, + float); -#ifdef __SYCL_NVPTX__ +#ifdef __SYCL_NVPTX__ // -// This a workaround to avoid a SPIR-V ABI issue. +// This a workaround to avoid a SPIR-V ABI issue. // template @@ -60,13 +71,13 @@ __ocl_event_t __spirv_GroupAsyncCopy(__spv::Scope Execution, } #else template -extern __ocl_event_t __spirv_GroupAsyncCopy( +extern SYCL_EXTERNAL __ocl_event_t __spirv_GroupAsyncCopy( __spv::Scope Execution, __attribute__((opencl_local)) dataT *Dest, - __attribute__((opencl_global)) dataT *Src, size_t NumElements, size_t Stride, - __ocl_event_t E) noexcept; + __attribute__((opencl_global)) dataT *Src, size_t NumElements, + size_t Stride, __ocl_event_t E) noexcept; template -extern __ocl_event_t __spirv_GroupAsyncCopy( +extern SYCL_EXTERNAL __ocl_event_t __spirv_GroupAsyncCopy( __spv::Scope Execution, __attribute__((opencl_global)) dataT *Dest, __attribute__((opencl_local)) dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t E) noexcept; @@ -77,45 +88,45 @@ extern __ocl_event_t __spirv_GroupAsyncCopy( // Atomic SPIR-V builtins #define __SPIRV_ATOMIC_LOAD(AS, Type) \ - extern Type __spirv_AtomicLoad(AS const Type *P, __spv::Scope S, \ - __spv::MemorySemanticsMask O); + extern SYCL_EXTERNAL Type __spirv_AtomicLoad( \ + AS const Type *P, __spv::Scope S, __spv::MemorySemanticsMask O); #define __SPIRV_ATOMIC_STORE(AS, Type) \ - extern void __spirv_AtomicStore(AS Type *P, __spv::Scope S, \ - __spv::MemorySemanticsMask O, Type V); + extern SYCL_EXTERNAL void __spirv_AtomicStore( \ + AS Type *P, __spv::Scope S, __spv::MemorySemanticsMask O, Type V); #define __SPIRV_ATOMIC_EXCHANGE(AS, Type) \ - extern Type __spirv_AtomicExchange(AS Type *P, __spv::Scope S, \ - __spv::MemorySemanticsMask O, Type V); + extern SYCL_EXTERNAL Type __spirv_AtomicExchange( \ + AS Type *P, __spv::Scope S, __spv::MemorySemanticsMask O, Type V); #define __SPIRV_ATOMIC_CMP_EXCHANGE(AS, Type) \ - extern Type __spirv_AtomicCompareExchange( \ + extern SYCL_EXTERNAL Type __spirv_AtomicCompareExchange( \ AS Type *P, __spv::Scope S, __spv::MemorySemanticsMask E, \ __spv::MemorySemanticsMask U, Type V, Type C); #define __SPIRV_ATOMIC_IADD(AS, Type) \ - extern Type __spirv_AtomicIAdd(AS Type *P, __spv::Scope S, \ - __spv::MemorySemanticsMask O, Type V); + extern SYCL_EXTERNAL Type __spirv_AtomicIAdd( \ + AS Type *P, __spv::Scope S, __spv::MemorySemanticsMask O, Type V); #define __SPIRV_ATOMIC_ISUB(AS, Type) \ - extern Type __spirv_AtomicISub(AS Type *P, __spv::Scope S, \ - __spv::MemorySemanticsMask O, Type V); + extern SYCL_EXTERNAL Type __spirv_AtomicISub( \ + AS Type *P, __spv::Scope S, __spv::MemorySemanticsMask O, Type V); #define __SPIRV_ATOMIC_SMIN(AS, Type) \ - extern Type __spirv_AtomicSMin(AS Type *P, __spv::Scope S, \ - __spv::MemorySemanticsMask O, Type V); + extern SYCL_EXTERNAL Type __spirv_AtomicSMin( \ + AS Type *P, __spv::Scope S, __spv::MemorySemanticsMask O, Type V); #define __SPIRV_ATOMIC_UMIN(AS, Type) \ - extern Type __spirv_AtomicUMin(AS Type *P, __spv::Scope S, \ - __spv::MemorySemanticsMask O, Type V); + extern SYCL_EXTERNAL Type __spirv_AtomicUMin( \ + AS Type *P, __spv::Scope S, __spv::MemorySemanticsMask O, Type V); #define __SPIRV_ATOMIC_SMAX(AS, Type) \ - extern Type __spirv_AtomicSMax(AS Type *P, __spv::Scope S, \ - __spv::MemorySemanticsMask O, Type V); + extern SYCL_EXTERNAL Type __spirv_AtomicSMax( \ + AS Type *P, __spv::Scope S, __spv::MemorySemanticsMask O, Type V); #define __SPIRV_ATOMIC_UMAX(AS, Type) \ - extern Type __spirv_AtomicUMax(AS Type *P, __spv::Scope S, \ - __spv::MemorySemanticsMask O, Type V); + extern SYCL_EXTERNAL Type __spirv_AtomicUMax( \ + AS Type *P, __spv::Scope S, __spv::MemorySemanticsMask O, Type V); #define __SPIRV_ATOMIC_AND(AS, Type) \ - extern Type __spirv_AtomicAnd(AS Type *P, __spv::Scope S, \ - __spv::MemorySemanticsMask O, Type V); + extern SYCL_EXTERNAL Type __spirv_AtomicAnd( \ + AS Type *P, __spv::Scope S, __spv::MemorySemanticsMask O, Type V); #define __SPIRV_ATOMIC_OR(AS, Type) \ - extern Type __spirv_AtomicOr(AS Type *P, __spv::Scope S, \ - __spv::MemorySemanticsMask O, Type V); + extern SYCL_EXTERNAL Type __spirv_AtomicOr( \ + AS Type *P, __spv::Scope S, __spv::MemorySemanticsMask O, Type V); #define __SPIRV_ATOMIC_XOR(AS, Type) \ - extern Type __spirv_AtomicXor(AS Type *P, __spv::Scope S, \ - __spv::MemorySemanticsMask O, Type V); + extern SYCL_EXTERNAL Type __spirv_AtomicXor( \ + AS Type *P, __spv::Scope S, __spv::MemorySemanticsMask O, Type V); #define __SPIRV_ATOMIC_FLOAT(AS, Type) \ __SPIRV_ATOMIC_LOAD(AS, Type) \ @@ -157,7 +168,9 @@ extern __ocl_event_t __spirv_GroupAsyncCopy( return __spirv_AtomicU##Op(Ptr, Memory, Semantics, Value); \ } -#define __SPIRV_ATOMICS(macro, Arg) macro(__attribute__((opencl_global)), Arg) macro(__attribute__((opencl_local)), Arg) +#define __SPIRV_ATOMICS(macro, Arg) \ + macro(__attribute__((opencl_global)), Arg) \ + macro(__attribute__((opencl_local)), Arg) __SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, float) __SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, int) @@ -169,107 +182,359 @@ __SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned long long) __SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Min) __SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Max) -extern bool __spirv_GroupAll(__spv::Scope Execution, bool Predicate) noexcept; +extern SYCL_EXTERNAL bool __spirv_GroupAll(__spv::Scope Execution, + bool Predicate) noexcept; -extern bool __spirv_GroupAny(__spv::Scope Execution, bool Predicate) noexcept; +extern SYCL_EXTERNAL bool __spirv_GroupAny(__spv::Scope Execution, + bool Predicate) noexcept; template -extern dataT __spirv_GroupBroadcast(__spv::Scope Execution, dataT Value, - size_t LocalId) noexcept; +extern SYCL_EXTERNAL dataT __spirv_GroupBroadcast(__spv::Scope Execution, + dataT Value, + uint32_t LocalId) noexcept; template -extern dataT __spirv_GroupBroadcast(__spv::Scope Execution, dataT Value, - __ocl_vec_t LocalId) noexcept; +extern SYCL_EXTERNAL dataT +__spirv_GroupBroadcast(__spv::Scope Execution, dataT Value, + __ocl_vec_t LocalId) noexcept; template -extern dataT __spirv_GroupBroadcast(__spv::Scope Execution, dataT Value, - __ocl_vec_t LocalId) noexcept; +extern SYCL_EXTERNAL dataT +__spirv_GroupBroadcast(__spv::Scope Execution, dataT Value, + __ocl_vec_t LocalId) noexcept; template -extern dataT __spirv_GroupIAdd(__spv::Scope Execution, __spv::GroupOperation Op, - dataT Value) noexcept; +extern SYCL_EXTERNAL dataT __spirv_GroupIAdd(__spv::Scope Execution, + __spv::GroupOperation Op, + dataT Value) noexcept; template -extern dataT __spirv_GroupFAdd(__spv::Scope Execution, __spv::GroupOperation Op, - dataT Value) noexcept; +extern SYCL_EXTERNAL dataT __spirv_GroupFAdd(__spv::Scope Execution, + __spv::GroupOperation Op, + dataT Value) noexcept; template -extern dataT __spirv_GroupUMin(__spv::Scope Execution, __spv::GroupOperation Op, - dataT Value) noexcept; +extern SYCL_EXTERNAL dataT __spirv_GroupUMin(__spv::Scope Execution, + __spv::GroupOperation Op, + dataT Value) noexcept; template -extern dataT __spirv_GroupSMin(__spv::Scope Execution, __spv::GroupOperation Op, - dataT Value) noexcept; +extern SYCL_EXTERNAL dataT __spirv_GroupSMin(__spv::Scope Execution, + __spv::GroupOperation Op, + dataT Value) noexcept; template -extern dataT __spirv_GroupFMin(__spv::Scope Execution, __spv::GroupOperation Op, - dataT Value) noexcept; +extern SYCL_EXTERNAL dataT __spirv_GroupFMin(__spv::Scope Execution, + __spv::GroupOperation Op, + dataT Value) noexcept; template -extern dataT __spirv_GroupUMax(__spv::Scope Execution, __spv::GroupOperation Op, - dataT Value) noexcept; +extern SYCL_EXTERNAL dataT __spirv_GroupUMax(__spv::Scope Execution, + __spv::GroupOperation Op, + dataT Value) noexcept; template -extern dataT __spirv_GroupSMax(__spv::Scope Execution, __spv::GroupOperation Op, - dataT Value) noexcept; +extern SYCL_EXTERNAL dataT __spirv_GroupSMax(__spv::Scope Execution, + __spv::GroupOperation Op, + dataT Value) noexcept; template -extern dataT __spirv_GroupFMax(__spv::Scope Execution, __spv::GroupOperation Op, - dataT Value) noexcept; +extern SYCL_EXTERNAL dataT __spirv_GroupFMax(__spv::Scope Execution, + __spv::GroupOperation Op, + dataT Value) noexcept; template -extern dataT __spirv_SubgroupShuffleINTEL(dataT Data, - uint32_t InvocationId) noexcept; +extern SYCL_EXTERNAL dataT +__spirv_SubgroupShuffleINTEL(dataT Data, uint32_t InvocationId) noexcept; template -extern dataT __spirv_SubgroupShuffleDownINTEL(dataT Current, dataT Next, - uint32_t Delta) noexcept; +extern SYCL_EXTERNAL dataT __spirv_SubgroupShuffleDownINTEL( + dataT Current, dataT Next, uint32_t Delta) noexcept; template -extern dataT __spirv_SubgroupShuffleUpINTEL(dataT Previous, dataT Current, - uint32_t Delta) noexcept; +extern SYCL_EXTERNAL dataT __spirv_SubgroupShuffleUpINTEL( + dataT Previous, dataT Current, uint32_t Delta) noexcept; template -extern dataT __spirv_SubgroupShuffleXorINTEL(dataT Data, - uint32_t Value) noexcept; +extern SYCL_EXTERNAL dataT +__spirv_SubgroupShuffleXorINTEL(dataT Data, uint32_t Value) noexcept; template -extern dataT -__spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global)) - uint8_t *Ptr) noexcept; +extern SYCL_EXTERNAL dataT __spirv_SubgroupBlockReadINTEL( + const __attribute__((opencl_global)) uint8_t *Ptr) noexcept; template -extern void __spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) - uint8_t *Ptr, - dataT Data) noexcept; +extern SYCL_EXTERNAL void +__spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint8_t *Ptr, + dataT Data) noexcept; template -extern dataT -__spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global)) uint16_t *Ptr) noexcept; +extern SYCL_EXTERNAL dataT __spirv_SubgroupBlockReadINTEL( + const __attribute__((opencl_global)) uint16_t *Ptr) noexcept; template -extern void __spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint16_t *Ptr, - dataT Data) noexcept; +extern SYCL_EXTERNAL void +__spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint16_t *Ptr, + dataT Data) noexcept; template -extern dataT -__spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global)) uint32_t *Ptr) noexcept; +extern SYCL_EXTERNAL dataT __spirv_SubgroupBlockReadINTEL( + const __attribute__((opencl_global)) uint32_t *Ptr) noexcept; template -extern void __spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint32_t *Ptr, - dataT Data) noexcept; +extern SYCL_EXTERNAL void +__spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint32_t *Ptr, + dataT Data) noexcept; template -extern int32_t __spirv_ReadPipe(RPipeTy Pipe, dataT *Data, - int32_t Size, int32_t Alignment) noexcept; +extern SYCL_EXTERNAL int32_t __spirv_ReadPipe(RPipeTy Pipe, dataT *Data, + int32_t Size, + int32_t Alignment) noexcept; template -extern int32_t __spirv_WritePipe(WPipeTy Pipe, const dataT *Data, - int32_t Size, int32_t Alignment) noexcept; +extern SYCL_EXTERNAL int32_t __spirv_WritePipe(WPipeTy Pipe, + const dataT *Data, int32_t Size, + int32_t Alignment) noexcept; template -extern void __spirv_ReadPipeBlockingINTEL(RPipeTy Pipe, dataT *Data, - int32_t Size, - int32_t Alignment) noexcept; +extern SYCL_EXTERNAL void +__spirv_ReadPipeBlockingINTEL(RPipeTy Pipe, dataT *Data, int32_t Size, + int32_t Alignment) noexcept; template -extern void __spirv_WritePipeBlockingINTEL(WPipeTy Pipe, - const dataT *Data, int32_t Size, - int32_t Alignment) noexcept; +extern SYCL_EXTERNAL void +__spirv_WritePipeBlockingINTEL(WPipeTy Pipe, const dataT *Data, + int32_t Size, int32_t Alignment) noexcept; template -extern RPipeTy __spirv_CreatePipeFromPipeStorage_read( +extern SYCL_EXTERNAL RPipeTy __spirv_CreatePipeFromPipeStorage_read( const ConstantPipeStorage *Storage) noexcept; template -extern WPipeTy __spirv_CreatePipeFromPipeStorage_write( +extern SYCL_EXTERNAL WPipeTy __spirv_CreatePipeFromPipeStorage_write( const ConstantPipeStorage *Storage) noexcept; -extern void __spirv_ocl_prefetch(const __attribute__((opencl_global)) char *Ptr, - size_t NumBytes) noexcept; +extern SYCL_EXTERNAL void +__spirv_ocl_prefetch(const __attribute__((opencl_global)) char *Ptr, + size_t NumBytes) noexcept; + +extern SYCL_EXTERNAL int +__spirv_ocl_printf(const __attribute__((opencl_constant)) char *fmt, ...); + +#define __SPIRV_COMPARISON(Order, Cmp) \ + template \ + extern SYCL_EXTERNAL RetT __spirv_F##Order##Cmp(T, T); + +#define __SPIRV_ALL_COMPARISON(Order) \ + __SPIRV_COMPARISON(Order, Equal) \ + __SPIRV_COMPARISON(Order, NotEqual) \ + __SPIRV_COMPARISON(Order, LessThan) \ + __SPIRV_COMPARISON(Order, GreaterThan) \ + __SPIRV_COMPARISON(Order, LessThanEqual) \ + __SPIRV_COMPARISON(Order, GreaterThanEqual) + +__SPIRV_ALL_COMPARISON(Unord) +__SPIRV_ALL_COMPARISON(Ord) + +#undef __SPIRV_COMPARISON +#undef __SPIRV_ALL_COMPARISON + +#define __SPIRV_COMPARISON(Cmp) \ + template \ + extern SYCL_EXTERNAL RetT __spirv_##Cmp(T, T); + +__SPIRV_COMPARISON(IEqual) +__SPIRV_COMPARISON(INotEqual) + +__SPIRV_COMPARISON(ULessThan) +__SPIRV_COMPARISON(UGreaterThanEqual) +__SPIRV_COMPARISON(ULessThanEqual) +__SPIRV_COMPARISON(UGreaterThan) + +__SPIRV_COMPARISON(SLessThan) +__SPIRV_COMPARISON(SGreaterThanEqual) +__SPIRV_COMPARISON(SLessThanEqual) +__SPIRV_COMPARISON(SGreaterThan) + +__SPIRV_COMPARISON(LessOrGreater) + +#undef __SPIRV_COMPARISON + +template extern SYCL_EXTERNAL RetT __spirv_Any(T); + +template extern SYCL_EXTERNAL RetT __spirv_All(T); + +template +extern SYCL_EXTERNAL RetT __spirv_IsFinite(T); + +template extern SYCL_EXTERNAL RetT __spirv_IsInf(T); + +template extern SYCL_EXTERNAL RetT __spirv_IsNan(T); + +template +extern SYCL_EXTERNAL RetT __spirv_IsNormal(T); + +template +extern SYCL_EXTERNAL RetT __spirv_SignBitSet(T); + +template +extern SYCL_EXTERNAL RetT __spirv_Ordered(T, T); + +template +extern SYCL_EXTERNAL RetT __spirv_Unordered(T, T); + +template +extern SYCL_EXTERNAL RetT __spirv_Dot(T, T); + +template extern SYCL_EXTERNAL T __spirv_FMul(T, T); + +#define __SPIRV_DECLARE_OCL1(name) \ + template \ + extern SYCL_EXTERNAL RetT __spirv_ocl_##name(T); + +#define __SPIRV_DECLARE_OCL2(name) \ + template \ + extern SYCL_EXTERNAL RetT __spirv_ocl_##name(T1, T2); + +#define __SPIRV_DECLARE_OCL3(name) \ + template \ + extern SYCL_EXTERNAL RetT __spirv_ocl_##name(T1, T2, T3); + +__SPIRV_DECLARE_OCL1(acos) +__SPIRV_DECLARE_OCL1(acosh) +__SPIRV_DECLARE_OCL1(acospi) +__SPIRV_DECLARE_OCL1(asin) +__SPIRV_DECLARE_OCL1(asinh) +__SPIRV_DECLARE_OCL1(asinpi) +__SPIRV_DECLARE_OCL1(atan) +__SPIRV_DECLARE_OCL2(atan2) +__SPIRV_DECLARE_OCL1(atanh) +__SPIRV_DECLARE_OCL1(atanpi) +__SPIRV_DECLARE_OCL2(atan2pi) +__SPIRV_DECLARE_OCL1(cbrt) +__SPIRV_DECLARE_OCL1(ceil) +__SPIRV_DECLARE_OCL2(copysign) +__SPIRV_DECLARE_OCL1(cos) +__SPIRV_DECLARE_OCL1(cosh) +__SPIRV_DECLARE_OCL1(cospi) +__SPIRV_DECLARE_OCL1(erfc) +__SPIRV_DECLARE_OCL1(erf) +__SPIRV_DECLARE_OCL1(exp) +__SPIRV_DECLARE_OCL1(exp2) +__SPIRV_DECLARE_OCL1(exp10) +__SPIRV_DECLARE_OCL1(expm1) +__SPIRV_DECLARE_OCL1(fabs) +__SPIRV_DECLARE_OCL2(fdim) +__SPIRV_DECLARE_OCL1(floor) +__SPIRV_DECLARE_OCL3(fma) +__SPIRV_DECLARE_OCL2(fmax) +__SPIRV_DECLARE_OCL2(fmin) +__SPIRV_DECLARE_OCL2(fmod) +__SPIRV_DECLARE_OCL2(fract) +__SPIRV_DECLARE_OCL2(frexp) +__SPIRV_DECLARE_OCL2(hypot) +__SPIRV_DECLARE_OCL1(ilogb) +__SPIRV_DECLARE_OCL2(ldexp) +__SPIRV_DECLARE_OCL1(lgamma) +__SPIRV_DECLARE_OCL2(lgamma_r) +__SPIRV_DECLARE_OCL1(log) +__SPIRV_DECLARE_OCL1(log2) +__SPIRV_DECLARE_OCL1(log10) +__SPIRV_DECLARE_OCL1(log1p) +__SPIRV_DECLARE_OCL1(logb) +__SPIRV_DECLARE_OCL3(mad) +__SPIRV_DECLARE_OCL2(maxmag) +__SPIRV_DECLARE_OCL2(minmag) +__SPIRV_DECLARE_OCL2(modf) +__SPIRV_DECLARE_OCL1(nan) +__SPIRV_DECLARE_OCL2(nextafter) +__SPIRV_DECLARE_OCL2(pow) +__SPIRV_DECLARE_OCL2(pown) +__SPIRV_DECLARE_OCL2(powr) +__SPIRV_DECLARE_OCL2(remainder) +__SPIRV_DECLARE_OCL3(remquo) +__SPIRV_DECLARE_OCL1(rint) +__SPIRV_DECLARE_OCL2(rootn) +__SPIRV_DECLARE_OCL1(round) +__SPIRV_DECLARE_OCL1(rsqrt) +__SPIRV_DECLARE_OCL1(sin) +__SPIRV_DECLARE_OCL2(sincos) +__SPIRV_DECLARE_OCL1(sinh) +__SPIRV_DECLARE_OCL1(sinpi) +__SPIRV_DECLARE_OCL1(sqrt) +__SPIRV_DECLARE_OCL1(tan) +__SPIRV_DECLARE_OCL1(tanh) +__SPIRV_DECLARE_OCL1(tanpi) +__SPIRV_DECLARE_OCL1(tgamma) +__SPIRV_DECLARE_OCL1(trunc) +__SPIRV_DECLARE_OCL1(native_cos) +__SPIRV_DECLARE_OCL2(native_divide) +__SPIRV_DECLARE_OCL1(native_exp) +__SPIRV_DECLARE_OCL1(native_exp2) +__SPIRV_DECLARE_OCL1(native_exp10) +__SPIRV_DECLARE_OCL1(native_log) +__SPIRV_DECLARE_OCL1(native_log2) +__SPIRV_DECLARE_OCL1(native_log10) +__SPIRV_DECLARE_OCL2(native_powr) +__SPIRV_DECLARE_OCL1(native_recip) +__SPIRV_DECLARE_OCL1(native_rsqrt) +__SPIRV_DECLARE_OCL1(native_sin) +__SPIRV_DECLARE_OCL1(native_sqrt) +__SPIRV_DECLARE_OCL1(native_tan) +__SPIRV_DECLARE_OCL1(half_cos) +__SPIRV_DECLARE_OCL2(half_divide) +__SPIRV_DECLARE_OCL1(half_exp) +__SPIRV_DECLARE_OCL1(half_exp2) +__SPIRV_DECLARE_OCL1(half_exp10) +__SPIRV_DECLARE_OCL1(half_log) +__SPIRV_DECLARE_OCL1(half_log2) +__SPIRV_DECLARE_OCL1(half_log10) +__SPIRV_DECLARE_OCL2(half_powr) +__SPIRV_DECLARE_OCL1(half_recip) +__SPIRV_DECLARE_OCL1(half_rsqrt) +__SPIRV_DECLARE_OCL1(half_sin) +__SPIRV_DECLARE_OCL1(half_sqrt) +__SPIRV_DECLARE_OCL1(half_tan) +__SPIRV_DECLARE_OCL1(s_abs) +__SPIRV_DECLARE_OCL1(u_abs) +__SPIRV_DECLARE_OCL2(s_abs_diff) +__SPIRV_DECLARE_OCL2(u_abs_diff) +__SPIRV_DECLARE_OCL2(s_add_sat) +__SPIRV_DECLARE_OCL2(u_add_sat) +__SPIRV_DECLARE_OCL2(s_hadd) +__SPIRV_DECLARE_OCL2(u_hadd) +__SPIRV_DECLARE_OCL2(s_rhadd) +__SPIRV_DECLARE_OCL2(u_rhadd) +__SPIRV_DECLARE_OCL3(s_clamp) +__SPIRV_DECLARE_OCL3(u_clamp) +__SPIRV_DECLARE_OCL1(clz) +__SPIRV_DECLARE_OCL1(ctz) +__SPIRV_DECLARE_OCL3(s_mad_hi) +__SPIRV_DECLARE_OCL3(u_mad_hi) +__SPIRV_DECLARE_OCL3(u_mad_sat) +__SPIRV_DECLARE_OCL3(s_mad_sat) +__SPIRV_DECLARE_OCL2(s_max) +__SPIRV_DECLARE_OCL2(u_max) +__SPIRV_DECLARE_OCL2(s_min) +__SPIRV_DECLARE_OCL2(u_min) +__SPIRV_DECLARE_OCL2(s_mul_hi) +__SPIRV_DECLARE_OCL2(u_mul_hi) +__SPIRV_DECLARE_OCL2(rotate) +__SPIRV_DECLARE_OCL2(s_sub_sat) +__SPIRV_DECLARE_OCL2(u_sub_sat) +__SPIRV_DECLARE_OCL2(u_upsample) +__SPIRV_DECLARE_OCL2(s_upsample) +__SPIRV_DECLARE_OCL1(popcount) +__SPIRV_DECLARE_OCL3(s_mad24) +__SPIRV_DECLARE_OCL3(u_mad24) +__SPIRV_DECLARE_OCL2(s_mul24) +__SPIRV_DECLARE_OCL2(u_mul24) +__SPIRV_DECLARE_OCL3(fclamp) +__SPIRV_DECLARE_OCL1(degrees) +__SPIRV_DECLARE_OCL2(fmax_common) +__SPIRV_DECLARE_OCL2(fmin_common) +__SPIRV_DECLARE_OCL3(mix) +__SPIRV_DECLARE_OCL1(radians) +__SPIRV_DECLARE_OCL2(step) +__SPIRV_DECLARE_OCL3(smoothstep) +__SPIRV_DECLARE_OCL1(sign) +__SPIRV_DECLARE_OCL2(cross) +__SPIRV_DECLARE_OCL2(distance) +__SPIRV_DECLARE_OCL1(length) +__SPIRV_DECLARE_OCL1(normalize) +__SPIRV_DECLARE_OCL2(fast_distance) +__SPIRV_DECLARE_OCL1(fast_length) +__SPIRV_DECLARE_OCL1(fast_normalize) +__SPIRV_DECLARE_OCL3(bitselect) +__SPIRV_DECLARE_OCL3(select) // select + +#undef __SPIRV_DECLARE_OCL1 +#undef __SPIRV_DECLARE_OCL2 +#undef __SPIRV_DECLARE_OCL3 + #else // if !__SYCL_DEVICE_ONLY__ template @@ -300,12 +565,13 @@ extern void __spirv_ocl_prefetch(const char *Ptr, size_t NumBytes) noexcept; #endif // !__SYCL_DEVICE_ONLY__ -extern void __spirv_ControlBarrier(__spv::Scope Execution, __spv::Scope Memory, - uint32_t Semantics) noexcept; - -extern void __spirv_MemoryBarrier(__spv::Scope Memory, - uint32_t Semantics) noexcept; +extern SYCL_EXTERNAL void __spirv_ControlBarrier(__spv::Scope Execution, + __spv::Scope Memory, + uint32_t Semantics) noexcept; -extern void __spirv_GroupWaitEvents(__spv::Scope Execution, uint32_t NumEvents, - __ocl_event_t *WaitEvents) noexcept; +extern SYCL_EXTERNAL void __spirv_MemoryBarrier(__spv::Scope Memory, + uint32_t Semantics) noexcept; +extern SYCL_EXTERNAL void +__spirv_GroupWaitEvents(__spv::Scope Execution, uint32_t NumEvents, + __ocl_event_t *WaitEvents) noexcept; diff --git a/sycl/include/CL/__spirv/spirv_vars.hpp b/sycl/include/CL/__spirv/spirv_vars.hpp index 728bc05104d93..ff85b98482c49 100644 --- a/sycl/include/CL/__spirv/spirv_vars.hpp +++ b/sycl/include/CL/__spirv/spirv_vars.hpp @@ -10,42 +10,38 @@ #ifdef __SYCL_DEVICE_ONLY__ -size_t __spirv_GlobalInvocationId_x(); -size_t __spirv_GlobalInvocationId_y(); -size_t __spirv_GlobalInvocationId_z(); - -size_t __spirv_GlobalSize_x(); -size_t __spirv_GlobalSize_y(); -size_t __spirv_GlobalSize_z(); - -size_t __spirv_GlobalInvocationId_x(); -size_t __spirv_GlobalInvocationId_y(); -size_t __spirv_GlobalInvocationId_z(); - -size_t __spirv_GlobalOffset_x(); -size_t __spirv_GlobalOffset_y(); -size_t __spirv_GlobalOffset_z(); - -size_t __spirv_NumWorkgroups_x(); -size_t __spirv_NumWorkgroups_y(); -size_t __spirv_NumWorkgroups_z(); - -size_t __spirv_WorkgroupSize_x(); -size_t __spirv_WorkgroupSize_y(); -size_t __spirv_WorkgroupSize_z(); - -size_t __spirv_WorkgroupId_x(); -size_t __spirv_WorkgroupId_y(); -size_t __spirv_WorkgroupId_z(); - -size_t __spirv_LocalInvocationId_x(); -size_t __spirv_LocalInvocationId_y(); -size_t __spirv_LocalInvocationId_z(); - -#define DEFINE_FUNC_ID_TO_XYZ_CONVERTER(POSTFIX) \ - template static inline size_t get##POSTFIX(); \ - template <> size_t get##POSTFIX<0>() { return __spirv_##POSTFIX##_x(); } \ - template <> size_t get##POSTFIX<1>() { return __spirv_##POSTFIX##_y(); } \ +SYCL_EXTERNAL size_t __spirv_GlobalInvocationId_x(); +SYCL_EXTERNAL size_t __spirv_GlobalInvocationId_y(); +SYCL_EXTERNAL size_t __spirv_GlobalInvocationId_z(); + +SYCL_EXTERNAL size_t __spirv_GlobalSize_x(); +SYCL_EXTERNAL size_t __spirv_GlobalSize_y(); +SYCL_EXTERNAL size_t __spirv_GlobalSize_z(); + +SYCL_EXTERNAL size_t __spirv_GlobalOffset_x(); +SYCL_EXTERNAL size_t __spirv_GlobalOffset_y(); +SYCL_EXTERNAL size_t __spirv_GlobalOffset_z(); + +SYCL_EXTERNAL size_t __spirv_NumWorkgroups_x(); +SYCL_EXTERNAL size_t __spirv_NumWorkgroups_y(); +SYCL_EXTERNAL size_t __spirv_NumWorkgroups_z(); + +SYCL_EXTERNAL size_t __spirv_WorkgroupSize_x(); +SYCL_EXTERNAL size_t __spirv_WorkgroupSize_y(); +SYCL_EXTERNAL size_t __spirv_WorkgroupSize_z(); + +SYCL_EXTERNAL size_t __spirv_WorkgroupId_x(); +SYCL_EXTERNAL size_t __spirv_WorkgroupId_y(); +SYCL_EXTERNAL size_t __spirv_WorkgroupId_z(); + +SYCL_EXTERNAL size_t __spirv_LocalInvocationId_x(); +SYCL_EXTERNAL size_t __spirv_LocalInvocationId_y(); +SYCL_EXTERNAL size_t __spirv_LocalInvocationId_z(); + +#define DEFINE_FUNC_ID_TO_XYZ_CONVERTER(POSTFIX) \ + template static inline size_t get##POSTFIX(); \ + template <> size_t get##POSTFIX<0>() { return __spirv_##POSTFIX##_x(); } \ + template <> size_t get##POSTFIX<1>() { return __spirv_##POSTFIX##_y(); } \ template <> size_t get##POSTFIX<2>() { return __spirv_##POSTFIX##_z(); } namespace __spirv { diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index 8d8b28248aa5b..76765a61e4ba4 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -1542,4 +1542,18 @@ detail::enable_if_t::value, T> tan(T x) __NOEXC { } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) +#ifdef __SYCL_DEVICE_ONLY__ +#if defined(__GNUC__) || defined(__clang__) +extern "C" { +extern SYCL_EXTERNAL void __assert_fail(const char *expr, const char *file, + unsigned int line, const char *func); +} +#elif defined(_MSC_VER) +extern "C" { +extern SYCL_EXTERNAL void _wassert(const wchar_t *wexpr, const wchar_t *wfile, + unsigned line); +} +#endif // defined(_MSC_VER_) +#endif // __SYCL_DEVICE_ONLY__ + #undef __NOEXC diff --git a/sycl/include/CL/sycl/detail/builtins.hpp b/sycl/include/CL/sycl/detail/builtins.hpp index 28b61d208f718..b4e80fdbb8fb6 100644 --- a/sycl/include/CL/sycl/detail/builtins.hpp +++ b/sycl/include/CL/sycl/detail/builtins.hpp @@ -8,6 +8,7 @@ #pragma once +#include #include #include @@ -17,52 +18,98 @@ #define __NOEXC /*noexcept*/ #ifdef __SYCL_DEVICE_ONLY__ -#define __FUNC_PREFIX_OCL __spirv_ocl_ -#define __FUNC_PREFIX_CORE __spirv_ +#define __FUNC_PREFIX_OCL __spirv_ocl_ +#define __FUNC_PREFIX_CORE __spirv_ +#define __SYCL_EXTERN_IT1(Ret, prefix, call, Arg1) +#define __SYCL_EXTERN_IT2(Ret, prefix, call, Arg1, Arg2) +#define __SYCL_EXTERN_IT2_SAME(Ret, prefix, call, Arg) +#define __SYCL_EXTERN_IT3(Ret, prefix, call, Arg1, Arg2, Arg3) +#define __SYCL_TPARAMS0(Type) +#define __SYCL_TPARAMS1(Ret, A1) +#define __SYCL_TPARAMS2(Ret, A1, A2) +#define __SYCL_TPARAMS3(Ret, A1, A2, A3) #else #define __FUNC_PREFIX_OCL #define __FUNC_PREFIX_CORE +#define __SYCL_EXTERN_IT1(Ret, prefix, call, Arg) \ + extern Ret __SYCL_PPCAT(prefix, call)(Arg) +#define __SYCL_EXTERN_IT2_SAME(Ret, prefix, call, Arg) \ + extern Ret __SYCL_PPCAT(prefix, call)(Arg, Arg) +#define __SYCL_EXTERN_IT2(Ret, prefix, call, Arg1, Arg2) \ + extern Ret __SYCL_PPCAT(prefix, call)(Arg1, Arg2) +#define __SYCL_EXTERN_IT3(Ret, prefix, call, Arg1, Arg2, Arg3) \ + extern Ret __SYCL_PPCAT(prefix, call)(Arg1, Arg2, Arg3) +#define __SYCL_TPARAMS0(Type) +#define __SYCL_TPARAMS1(Ret, A1) +#define __SYCL_TPARAMS2(Ret, A1, A2) +#define __SYCL_TPARAMS3(Ret, A1, A2, A3) #endif -#define PPCAT_NX(A, B) A ## B -#define PPCAT(A, B) PPCAT_NX(A, B) +#define __SYCL_PPCAT_NX(A, B) A##B +#define __SYCL_PPCAT(A, B) __SYCL_PPCAT_NX(A, B) -#define MAKE_CALL_ARG1(call, prefix) \ +#define __SYCL_MAKE_CALL_ARG1(call, prefix) \ template \ inline ALWAYS_INLINE R __invoke_##call(T1 t1) __NOEXC { \ using Ret = cl::sycl::detail::ConvertToOpenCLType_t; \ using Arg1 = cl::sycl::detail::ConvertToOpenCLType_t; \ - extern Ret PPCAT(prefix, call)(Arg1); \ + __SYCL_EXTERN_IT1(Ret, prefix, call, Arg1); \ Arg1 arg1 = cl::sycl::detail::convertDataToType(t1); \ - Ret ret = PPCAT(prefix, call)(arg1); \ + Ret ret = __SYCL_PPCAT(prefix, call) __SYCL_TPARAMS1(Ret, Arg1)(arg1); \ return cl::sycl::detail::convertDataToType(ret); \ } -#define MAKE_CALL_ARG2(call, prefix) \ +#define __SYCL_MAKE_CALL_ARG2(call, prefix) \ template \ inline ALWAYS_INLINE R __invoke_##call(T1 t1, T2 t2) __NOEXC { \ using Ret = cl::sycl::detail::ConvertToOpenCLType_t; \ using Arg1 = cl::sycl::detail::ConvertToOpenCLType_t; \ using Arg2 = cl::sycl::detail::ConvertToOpenCLType_t; \ - extern Ret PPCAT(prefix, call)(Arg1, Arg2); \ + __SYCL_EXTERN_IT2(Ret, prefix, call, Arg1, Arg2); \ Arg1 arg1 = cl::sycl::detail::convertDataToType(t1); \ Arg2 arg2 = cl::sycl::detail::convertDataToType(t2); \ - Ret ret = PPCAT(prefix, call)(arg1, arg2); \ + Ret ret = __SYCL_PPCAT(prefix, call) \ + __SYCL_TPARAMS2(Ret, Arg1, Arg2)(arg1, arg2); \ return cl::sycl::detail::convertDataToType(ret); \ } -#define MAKE_CALL_ARG3(call, prefix) \ +#define __SYCL_MAKE_CALL_ARG2_SAME(call, prefix) \ + template \ + inline ALWAYS_INLINE R __invoke_##call(T t1, T t2) __NOEXC { \ + using Ret = cl::sycl::detail::ConvertToOpenCLType_t; \ + using Arg = cl::sycl::detail::ConvertToOpenCLType_t; \ + __SYCL_EXTERN_IT2_SAME(Ret, prefix, call, Arg); \ + Arg arg1 = cl::sycl::detail::convertDataToType(t1); \ + Arg arg2 = cl::sycl::detail::convertDataToType(t2); \ + Ret ret = \ + __SYCL_PPCAT(prefix, call) __SYCL_TPARAMS1(Ret, Arg)(arg1, arg2); \ + return cl::sycl::detail::convertDataToType(ret); \ + } + +#define __SYCL_MAKE_CALL_ARG2_SAME_RESULT(call, prefix) \ + template \ + inline ALWAYS_INLINE T __invoke_##call(T v1, T v2) __NOEXC { \ + using Type = cl::sycl::detail::ConvertToOpenCLType_t; \ + __SYCL_EXTERN_IT2_SAME(Type, prefix, call, Type); \ + Type arg1 = cl::sycl::detail::convertDataToType(v1); \ + Type arg2 = cl::sycl::detail::convertDataToType(v2); \ + Type ret = __SYCL_PPCAT(prefix, call) __SYCL_TPARAMS0(Type)(arg1, arg2); \ + return cl::sycl::detail::convertDataToType(ret); \ + } + +#define __SYCL_MAKE_CALL_ARG3(call, prefix) \ template \ inline ALWAYS_INLINE R __invoke_##call(T1 t1, T2 t2, T3 t3) __NOEXC { \ using Ret = cl::sycl::detail::ConvertToOpenCLType_t; \ using Arg1 = cl::sycl::detail::ConvertToOpenCLType_t; \ using Arg2 = cl::sycl::detail::ConvertToOpenCLType_t; \ using Arg3 = cl::sycl::detail::ConvertToOpenCLType_t; \ - extern Ret PPCAT(prefix, call)(Arg1, Arg2, Arg3); \ + __SYCL_EXTERN_IT3(Ret, prefix, call, Arg1, Arg2, Arg3); \ Arg1 arg1 = cl::sycl::detail::convertDataToType(t1); \ Arg2 arg2 = cl::sycl::detail::convertDataToType(t2); \ Arg3 arg3 = cl::sycl::detail::convertDataToType(t3); \ - Ret ret = PPCAT(prefix, call)(arg1, arg2, arg3); \ + Ret ret = __SYCL_PPCAT(prefix, call) \ + __SYCL_TPARAMS3(Ret, Arg1, Arg2, Arg3)(arg1, arg2, arg3); \ return cl::sycl::detail::convertDataToType(ret); \ } @@ -71,185 +118,195 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace __host_std { #endif // __SYCL_DEVICE_ONLY__ /* ----------------- 4.13.3 Math functions. ---------------------------------*/ -MAKE_CALL_ARG1(acos, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(acosh, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(acospi, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(asin, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(asinh, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(asinpi, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(atan, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(atan2, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(atanh, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(atanpi, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(atan2pi, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(cbrt, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(ceil, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(copysign, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(cos, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(cosh, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(cospi, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(erfc, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(erf, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(exp, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(exp2, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(exp10, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(expm1, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(fabs, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(fdim, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(floor, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG3(fma, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(fmax, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(fmin, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(fmod, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(fract, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(frexp, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(hypot, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(ilogb, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(ldexp, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(lgamma, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(lgamma_r, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(log, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(log2, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(log10, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(log1p, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(logb, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG3(mad, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(maxmag, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(minmag, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(modf, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(nan, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(nextafter, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(pow, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(pown, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(powr, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(remainder, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG3(remquo, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(rint, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(rootn, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(round, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(rsqrt, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(sin, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(sincos, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(sinh, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(sinpi, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(sqrt, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(tan, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(tanh, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(tanpi, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(tgamma, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(trunc, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(native_cos, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(native_divide, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(native_exp, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(native_exp2, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(native_exp10, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(native_log, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(native_log2, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(native_log10, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(native_powr, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(native_recip, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(native_rsqrt, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(native_sin, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(native_sqrt, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(native_tan, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(half_cos, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(half_divide, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(half_exp, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(half_exp2, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(half_exp10, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(half_log, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(half_log2, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(half_log10, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(half_powr, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(half_recip, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(half_rsqrt, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(half_sin, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(half_sqrt, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(half_tan, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(acos, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(acosh, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(acospi, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(asin, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(asinh, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(asinpi, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(atan, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(atan2, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(atanh, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(atanpi, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(atan2pi, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(cbrt, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(ceil, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(copysign, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(cos, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(cosh, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(cospi, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(erfc, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(erf, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(exp, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(exp2, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(exp10, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(expm1, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(fabs, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(fdim, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(floor, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG3(fma, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(fmax, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(fmin, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(fmod, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(fract, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(frexp, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(hypot, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(ilogb, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(ldexp, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(lgamma, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(lgamma_r, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(log, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(log2, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(log10, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(log1p, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(logb, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG3(mad, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(maxmag, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(minmag, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(modf, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(nan, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(nextafter, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(pow, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(pown, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(powr, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(remainder, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG3(remquo, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(rint, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(rootn, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(round, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(rsqrt, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(sin, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(sincos, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(sinh, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(sinpi, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(sqrt, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(tan, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(tanh, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(tanpi, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(tgamma, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(trunc, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(native_cos, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(native_divide, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(native_exp, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(native_exp2, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(native_exp10, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(native_log, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(native_log2, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(native_log10, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(native_powr, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(native_recip, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(native_rsqrt, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(native_sin, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(native_sqrt, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(native_tan, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(half_cos, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(half_divide, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(half_exp, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(half_exp2, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(half_exp10, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(half_log, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(half_log2, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(half_log10, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(half_powr, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(half_recip, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(half_rsqrt, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(half_sin, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(half_sqrt, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(half_tan, __FUNC_PREFIX_OCL) /* --------------- 4.13.4 Integer functions. --------------------------------*/ -MAKE_CALL_ARG1(s_abs, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(u_abs, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(s_abs_diff, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(u_abs_diff, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(s_add_sat, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(u_add_sat, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(s_hadd, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(u_hadd, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(s_rhadd, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(u_rhadd, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG3(s_clamp, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG3(u_clamp, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(clz, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(ctz, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG3(s_mad_hi, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG3(u_mad_hi, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG3(u_mad_sat, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG3(s_mad_sat, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(s_max, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(u_max, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(s_min, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(u_min, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(s_mul_hi, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(u_mul_hi, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(rotate, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(s_sub_sat, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(u_sub_sat, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(u_upsample, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(s_upsample, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(popcount, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG3(s_mad24, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG3(u_mad24, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(s_mul24, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(u_mul24, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(s_abs, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(u_abs, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(s_abs_diff, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(u_abs_diff, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(s_add_sat, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(u_add_sat, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(s_hadd, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(u_hadd, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(s_rhadd, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(u_rhadd, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG3(s_clamp, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG3(u_clamp, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(clz, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(ctz, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG3(s_mad_hi, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG3(u_mad_hi, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG3(u_mad_sat, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG3(s_mad_sat, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(s_max, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(u_max, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(s_min, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(u_min, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(s_mul_hi, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(u_mul_hi, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(rotate, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(s_sub_sat, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(u_sub_sat, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(u_upsample, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(s_upsample, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(popcount, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG3(s_mad24, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG3(u_mad24, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(s_mul24, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(u_mul24, __FUNC_PREFIX_OCL) /* --------------- 4.13.5 Common functions. ---------------------------------*/ -MAKE_CALL_ARG3(fclamp, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(degrees, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(fmax_common, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(fmin_common, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG3(mix, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(radians, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(step, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG3(smoothstep, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(sign, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG3(fclamp, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(degrees, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(fmax_common, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(fmin_common, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG3(mix, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(radians, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(step, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG3(smoothstep, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(sign, __FUNC_PREFIX_OCL) /* --------------- 4.13.6 Geometric Functions. ------------------------------*/ -MAKE_CALL_ARG2(cross, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(Dot, __FUNC_PREFIX_CORE) // dot -MAKE_CALL_ARG2(FMul, __FUNC_PREFIX_CORE) // dot -MAKE_CALL_ARG2(distance, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(length, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(normalize, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG2(fast_distance, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(fast_length, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG1(fast_normalize, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(cross, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2_SAME(Dot, __FUNC_PREFIX_CORE) // dot +__SYCL_MAKE_CALL_ARG2_SAME_RESULT(FMul, __FUNC_PREFIX_CORE) // dot +__SYCL_MAKE_CALL_ARG2(distance, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(length, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(normalize, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG2(fast_distance, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(fast_length, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG1(fast_normalize, __FUNC_PREFIX_OCL) /* --------------- 4.13.7 Relational functions. -----------------------------*/ -MAKE_CALL_ARG2(FOrdEqual, __FUNC_PREFIX_CORE) // isequal -MAKE_CALL_ARG2(FUnordNotEqual, __FUNC_PREFIX_CORE) // isnotequal -MAKE_CALL_ARG2(FOrdGreaterThan, __FUNC_PREFIX_CORE) // isgreater -MAKE_CALL_ARG2(FOrdGreaterThanEqual, __FUNC_PREFIX_CORE) // isgreaterequal -MAKE_CALL_ARG2(FOrdLessThan, __FUNC_PREFIX_CORE) // isless -MAKE_CALL_ARG2(FOrdLessThanEqual, __FUNC_PREFIX_CORE) // islessequal -MAKE_CALL_ARG2(LessOrGreater, __FUNC_PREFIX_CORE) // islessgreater -MAKE_CALL_ARG1(IsFinite, __FUNC_PREFIX_CORE) // isfinite -MAKE_CALL_ARG1(IsInf, __FUNC_PREFIX_CORE) // isinf -MAKE_CALL_ARG1(IsNan, __FUNC_PREFIX_CORE) // isnan -MAKE_CALL_ARG1(IsNormal, __FUNC_PREFIX_CORE) // isnormal -MAKE_CALL_ARG2(Ordered, __FUNC_PREFIX_CORE) // isordered -MAKE_CALL_ARG2(Unordered, __FUNC_PREFIX_CORE) // isunordered -MAKE_CALL_ARG1(SignBitSet, __FUNC_PREFIX_CORE) // signbit -MAKE_CALL_ARG1(Any, __FUNC_PREFIX_CORE) // any -MAKE_CALL_ARG1(All, __FUNC_PREFIX_CORE) // all -MAKE_CALL_ARG3(bitselect, __FUNC_PREFIX_OCL) -MAKE_CALL_ARG3(select, __FUNC_PREFIX_OCL) // select +__SYCL_MAKE_CALL_ARG2_SAME(FOrdEqual, __FUNC_PREFIX_CORE) // isequal +__SYCL_MAKE_CALL_ARG2_SAME(FUnordNotEqual, __FUNC_PREFIX_CORE) // isnotequal +__SYCL_MAKE_CALL_ARG2_SAME(FOrdGreaterThan, __FUNC_PREFIX_CORE) // isgreater +__SYCL_MAKE_CALL_ARG2_SAME(FOrdGreaterThanEqual, + __FUNC_PREFIX_CORE) // isgreaterequal +__SYCL_MAKE_CALL_ARG2_SAME(FOrdLessThan, __FUNC_PREFIX_CORE) // isless +__SYCL_MAKE_CALL_ARG2_SAME(FOrdLessThanEqual, __FUNC_PREFIX_CORE) // islessequal +__SYCL_MAKE_CALL_ARG2_SAME(LessOrGreater, __FUNC_PREFIX_CORE) // islessgreater +__SYCL_MAKE_CALL_ARG1(IsFinite, __FUNC_PREFIX_CORE) // isfinite +__SYCL_MAKE_CALL_ARG1(IsInf, __FUNC_PREFIX_CORE) // isinf +__SYCL_MAKE_CALL_ARG1(IsNan, __FUNC_PREFIX_CORE) // isnan +__SYCL_MAKE_CALL_ARG1(IsNormal, __FUNC_PREFIX_CORE) // isnormal +__SYCL_MAKE_CALL_ARG2_SAME(Ordered, __FUNC_PREFIX_CORE) // isordered +__SYCL_MAKE_CALL_ARG2_SAME(Unordered, __FUNC_PREFIX_CORE) // isunordered +__SYCL_MAKE_CALL_ARG1(SignBitSet, __FUNC_PREFIX_CORE) // signbit +__SYCL_MAKE_CALL_ARG1(Any, __FUNC_PREFIX_CORE) // any +__SYCL_MAKE_CALL_ARG1(All, __FUNC_PREFIX_CORE) // all +__SYCL_MAKE_CALL_ARG3(bitselect, __FUNC_PREFIX_OCL) +__SYCL_MAKE_CALL_ARG3(select, __FUNC_PREFIX_OCL) // select #ifndef __SYCL_DEVICE_ONLY__ } // namespace __host_std } // __SYCL_INLINE_NAMESPACE(cl) #endif #undef __NOEXC -#undef MAKE_CALL_ARG1 -#undef MAKE_CALL_ARG2 -#undef MAKE_CALL_ARG3 -#undef PPCAT_NX -#undef PPCAT +#undef __SYCL_MAKE_CALL_ARG1 +#undef __SYCL_MAKE_CALL_ARG2 +#undef __SYCL_MAKE_CALL_ARG2_SAME +#undef __SYCL_MAKE_CALL_ARG3 +#undef __SYCL_PPCAT_NX +#undef __SYCL_PPCAT #undef __FUNC_PREFIX_OCL #undef __FUNC_PREFIX_CORE +#undef __SYCL_TPARAMS0 +#undef __SYCL_TPARAMS1 +#undef __SYCL_TPARAMS2 +#undef __SYCL_TPARAMS3 +#undef __SYCL_EXTERN_IT1 +#undef __SYCL_EXTERN_IT2 +#undef __SYCL_EXTERN_IT2_SAME +#undef __SYCL_EXTERN_IT3 diff --git a/sycl/include/CL/sycl/detail/defines.hpp b/sycl/include/CL/sycl/detail/defines.hpp index c9e9d25e0d06d..29dc311fe6808 100644 --- a/sycl/include/CL/sycl/detail/defines.hpp +++ b/sycl/include/CL/sycl/detail/defines.hpp @@ -33,11 +33,11 @@ #endif #if __cplusplus >= 201402 - #define __SYCL_DEPRECATED__ \ - [[deprecated("Replaced by in_order queue property")]] +#define __SYCL_DEPRECATED__ \ + [[deprecated("Replaced by in_order queue property")]] #elif !defined _MSC_VER - #define __SYCL_DEPRECATED__ __attribute__ \ - ((deprecated("Replaced by in_order queue property"))) +#define __SYCL_DEPRECATED__ \ + __attribute__((deprecated("Replaced by in_order queue property"))) #else - #define __SYCL_DEPRECATED__ +#define __SYCL_DEPRECATED__ #endif diff --git a/sycl/include/CL/sycl/detail/image_ocl_types.hpp b/sycl/include/CL/sycl/detail/image_ocl_types.hpp index 83ea63914a8bd..bd569523da2a5 100644 --- a/sycl/include/CL/sycl/detail/image_ocl_types.hpp +++ b/sycl/include/CL/sycl/detail/image_ocl_types.hpp @@ -31,13 +31,14 @@ #include #include +#include + #define INVOKE_SPIRV_CALL_ARG1(call) \ - template inline R __invoke_##call(T1 ParT1) { \ + template inline R __invoke_##call(T1 ParT1) { \ using Ret = cl::sycl::detail::ConvertToOpenCLType_t; \ - extern Ret __spirv_##call(T1); \ - T1 Arg1 = ParT1; \ - Ret RetVar = __spirv_##call(Arg1); \ - return cl::sycl::detail::convertDataToType(RetVar); \ + T1 Arg1 = ParT1; \ + Ret RetVar = __spirv_##call(Arg1); \ + return cl::sycl::detail::convertDataToType(RetVar); \ } // The macro defines the function __invoke_ImageXXXX, diff --git a/sycl/include/CL/sycl/intel/builtins.hpp b/sycl/include/CL/sycl/intel/builtins.hpp index a9c93393e360c..a59258a2290ba 100644 --- a/sycl/include/CL/sycl/intel/builtins.hpp +++ b/sycl/include/CL/sycl/intel/builtins.hpp @@ -8,12 +8,10 @@ #pragma once +#include #ifdef __SYCL_DEVICE_ONLY__ #define CONSTANT_AS __attribute__((opencl_constant)) -// Note: __format string is declared in constant address space to be compatible -// with OpenCL C -extern int __spirv_ocl_printf(const CONSTANT_AS char *__format, ...); #else #define CONSTANT_AS #endif diff --git a/sycl/source/detail/devicelib/device_math.h b/sycl/source/detail/devicelib/device_math.h index 3651b4f852f02..89ddcdc37f4b3 100644 --- a/sycl/source/detail/devicelib/device_math.h +++ b/sycl/source/detail/devicelib/device_math.h @@ -9,278 +9,278 @@ #ifndef __SYCL_CMATH_WRAPPER_H__ #define __SYCL_CMATH_WRAPPER_H__ -double __spirv_ocl_log(double); -double __spirv_ocl_sin(double); -double __spirv_ocl_cos(double); -double __spirv_ocl_sinh(double); -double __spirv_ocl_cosh(double); -double __spirv_ocl_tanh(double); -double __spirv_ocl_exp(double); -double __spirv_ocl_sqrt(double); -bool __spirv_IsInf(double); -bool __spirv_IsFinite(double); -bool __spirv_IsNan(double); -bool __spirv_IsNormal(double); -bool __spirv_SignBitSet(double); -double __spirv_ocl_hypot(double, double); -double __spirv_ocl_atan2(double, double); -double __spirv_ocl_pow(double, double); -double __spirv_ocl_ldexp(double, int); -double __spirv_ocl_copysign(double, double); -double __spirv_ocl_fmax(double, double); -double __spirv_ocl_fabs(double); -double __spirv_ocl_tan(double); -double __spirv_ocl_acos(double); -double __spirv_ocl_asin(double); -double __spirv_ocl_atan(double); -double __spirv_ocl_atan2(double, double); -double __spirv_ocl_cosh(double); -double __spirv_ocl_sinh(double); -double __spirv_ocl_tanh(double); -double __spirv_ocl_acosh(double); -double __spirv_ocl_asinh(double); -double __spirv_ocl_atanh(double); -double __spirv_ocl_frexp(double, int *); -double __spirv_ocl_log10(double); -double __spirv_ocl_modf(double, double *); -double __spirv_ocl_exp2(double); -double __spirv_ocl_expm1(double); -int __spirv_ocl_ilogb(double); -double __spriv_ocl_log1p(double); -double __spirv_ocl_log2(double); -double __spirv_ocl_logb(double); -double __spirv_ocl_sqrt(double); -double __spirv_ocl_cbrt(double); -double __spirv_ocl_hypot(double); -double __spirv_ocl_erf(double); -double __spirv_ocl_erfc(double); -double __spirv_ocl_tgamma(double); -double __spirv_ocl_lgamma(double); -double __spirv_ocl_fmod(double, double); -double __spirv_ocl_remainder(double, double); -double __spirv_ocl_remquo(double, double, int*); -double __spirv_ocl_nextafter(double, double); -double __spirv_ocl_fdim(double, double); -double __spirv_ocl_fma(double, double, double); - -float __spirv_ocl_log(float); -float __spirv_ocl_logb(float); -float __spirv_ocl_sin(float); -float __spirv_ocl_cos(float); -float __spirv_ocl_sinh(float); -float __spirv_ocl_cosh(float); -float __spirv_ocl_tanh(float); -float __spirv_ocl_exp(float); -float __spirv_ocl_sqrt(float); -bool __spirv_IsInf(float); -bool __spirv_IsFinite(float); -bool __spirv_IsNan(float); -bool __spirv_IsNormal(double); -bool __spirv_SignBitSet(float); -float __spirv_ocl_hypot(float, float); -float __spirv_ocl_atan2(float, float); -float __spirv_ocl_pow(float, float); -float __spirv_ocl_ldexp(float, int); -float __spirv_ocl_copysign(float, float); -float __spirv_ocl_fmax(float, float); -float __spirv_ocl_fabs(float); -float __spirv_ocl_tan(float); -float __spirv_ocl_acos(float); -float __spirv_ocl_asin(float); -float __spirv_ocl_atan(float); -float __spirv_ocl_atan2(float, float); -float __spirv_ocl_cosh(float); -float __spirv_ocl_sinh(float); -float __spirv_ocl_tanh(float); -float __spirv_ocl_acosh(float); -float __spirv_ocl_asinh(float); -float __spirv_ocl_atanh(float); -float __spirv_ocl_frexp(float, int *); -float __spirv_ocl_log10(float); -float __spirv_ocl_modf(float, float *); -float __spirv_ocl_exp2(float); -float __spirv_ocl_expm1(float); -int __spirv_ocl_ilogb(float); -float __spirv_ocl_log1p(float); -float __spirv_ocl_log2(float); -float __spirv_ocl_sqrt(float); -float __spirv_ocl_cbrt(float); -float __spirv_ocl_hypot(float); -float __spirv_ocl_erf(float); -float __spirv_ocl_erfc(float); -float __spirv_ocl_tgamma(float); -float __spirv_ocl_lgamma(float); -float __spirv_ocl_fmod(float, float); -float __spirv_ocl_remainder(float, float); -float __spirv_ocl_remquo(float, float, int*); -float __spirv_ocl_nextafter(float, float); -float __spirv_ocl_fdim(float, float); -float __spirv_ocl_fma(float, float, float); +SYCL_EXTERNAL double __spirv_ocl_log(double); +SYCL_EXTERNAL double __spirv_ocl_sin(double); +SYCL_EXTERNAL double __spirv_ocl_cos(double); +SYCL_EXTERNAL double __spirv_ocl_sinh(double); +SYCL_EXTERNAL double __spirv_ocl_cosh(double); +SYCL_EXTERNAL double __spirv_ocl_tanh(double); +SYCL_EXTERNAL double __spirv_ocl_exp(double); +SYCL_EXTERNAL double __spirv_ocl_sqrt(double); +SYCL_EXTERNAL bool __spirv_IsInf(double); +SYCL_EXTERNAL bool __spirv_IsFinite(double); +SYCL_EXTERNAL bool __spirv_IsNan(double); +SYCL_EXTERNAL bool __spirv_IsNormal(double); +SYCL_EXTERNAL bool __spirv_SignBitSet(double); +SYCL_EXTERNAL double __spirv_ocl_hypot(double, double); +SYCL_EXTERNAL double __spirv_ocl_atan2(double, double); +SYCL_EXTERNAL double __spirv_ocl_pow(double, double); +SYCL_EXTERNAL double __spirv_ocl_ldexp(double, int); +SYCL_EXTERNAL double __spirv_ocl_copysign(double, double); +SYCL_EXTERNAL double __spirv_ocl_fmax(double, double); +SYCL_EXTERNAL double __spirv_ocl_fabs(double); +SYCL_EXTERNAL double __spirv_ocl_tan(double); +SYCL_EXTERNAL double __spirv_ocl_acos(double); +SYCL_EXTERNAL double __spirv_ocl_asin(double); +SYCL_EXTERNAL double __spirv_ocl_atan(double); +SYCL_EXTERNAL double __spirv_ocl_atan2(double, double); +SYCL_EXTERNAL double __spirv_ocl_cosh(double); +SYCL_EXTERNAL double __spirv_ocl_sinh(double); +SYCL_EXTERNAL double __spirv_ocl_tanh(double); +SYCL_EXTERNAL double __spirv_ocl_acosh(double); +SYCL_EXTERNAL double __spirv_ocl_asinh(double); +SYCL_EXTERNAL double __spirv_ocl_atanh(double); +SYCL_EXTERNAL double __spirv_ocl_frexp(double, int *); +SYCL_EXTERNAL double __spirv_ocl_log10(double); +SYCL_EXTERNAL double __spirv_ocl_modf(double, double *); +SYCL_EXTERNAL double __spirv_ocl_exp2(double); +SYCL_EXTERNAL double __spirv_ocl_expm1(double); +SYCL_EXTERNAL int __spirv_ocl_ilogb(double); +SYCL_EXTERNAL double __spriv_ocl_log1p(double); +SYCL_EXTERNAL double __spirv_ocl_log2(double); +SYCL_EXTERNAL double __spirv_ocl_logb(double); +SYCL_EXTERNAL double __spirv_ocl_sqrt(double); +SYCL_EXTERNAL double __spirv_ocl_cbrt(double); +SYCL_EXTERNAL double __spirv_ocl_hypot(double); +SYCL_EXTERNAL double __spirv_ocl_erf(double); +SYCL_EXTERNAL double __spirv_ocl_erfc(double); +SYCL_EXTERNAL double __spirv_ocl_tgamma(double); +SYCL_EXTERNAL double __spirv_ocl_lgamma(double); +SYCL_EXTERNAL double __spirv_ocl_fmod(double, double); +SYCL_EXTERNAL double __spirv_ocl_remainder(double, double); +SYCL_EXTERNAL double __spirv_ocl_remquo(double, double, int *); +SYCL_EXTERNAL double __spirv_ocl_nextafter(double, double); +SYCL_EXTERNAL double __spirv_ocl_fdim(double, double); +SYCL_EXTERNAL double __spirv_ocl_fma(double, double, double); + +SYCL_EXTERNAL float __spirv_ocl_log(float); +SYCL_EXTERNAL float __spirv_ocl_logb(float); +SYCL_EXTERNAL float __spirv_ocl_sin(float); +SYCL_EXTERNAL float __spirv_ocl_cos(float); +SYCL_EXTERNAL float __spirv_ocl_sinh(float); +SYCL_EXTERNAL float __spirv_ocl_cosh(float); +SYCL_EXTERNAL float __spirv_ocl_tanh(float); +SYCL_EXTERNAL float __spirv_ocl_exp(float); +SYCL_EXTERNAL float __spirv_ocl_sqrt(float); +SYCL_EXTERNAL bool __spirv_IsInf(float); +SYCL_EXTERNAL bool __spirv_IsFinite(float); +SYCL_EXTERNAL bool __spirv_IsNan(float); +SYCL_EXTERNAL bool __spirv_IsNormal(double); +SYCL_EXTERNAL bool __spirv_SignBitSet(float); +SYCL_EXTERNAL float __spirv_ocl_hypot(float, float); +SYCL_EXTERNAL float __spirv_ocl_atan2(float, float); +SYCL_EXTERNAL float __spirv_ocl_pow(float, float); +SYCL_EXTERNAL float __spirv_ocl_ldexp(float, int); +SYCL_EXTERNAL float __spirv_ocl_copysign(float, float); +SYCL_EXTERNAL float __spirv_ocl_fmax(float, float); +SYCL_EXTERNAL float __spirv_ocl_fabs(float); +SYCL_EXTERNAL float __spirv_ocl_tan(float); +SYCL_EXTERNAL float __spirv_ocl_acos(float); +SYCL_EXTERNAL float __spirv_ocl_asin(float); +SYCL_EXTERNAL float __spirv_ocl_atan(float); +SYCL_EXTERNAL float __spirv_ocl_atan2(float, float); +SYCL_EXTERNAL float __spirv_ocl_cosh(float); +SYCL_EXTERNAL float __spirv_ocl_sinh(float); +SYCL_EXTERNAL float __spirv_ocl_tanh(float); +SYCL_EXTERNAL float __spirv_ocl_acosh(float); +SYCL_EXTERNAL float __spirv_ocl_asinh(float); +SYCL_EXTERNAL float __spirv_ocl_atanh(float); +SYCL_EXTERNAL float __spirv_ocl_frexp(float, int *); +SYCL_EXTERNAL float __spirv_ocl_log10(float); +SYCL_EXTERNAL float __spirv_ocl_modf(float, float *); +SYCL_EXTERNAL float __spirv_ocl_exp2(float); +SYCL_EXTERNAL float __spirv_ocl_expm1(float); +SYCL_EXTERNAL int __spirv_ocl_ilogb(float); +SYCL_EXTERNAL float __spirv_ocl_log1p(float); +SYCL_EXTERNAL float __spirv_ocl_log2(float); +SYCL_EXTERNAL float __spirv_ocl_sqrt(float); +SYCL_EXTERNAL float __spirv_ocl_cbrt(float); +SYCL_EXTERNAL float __spirv_ocl_hypot(float); +SYCL_EXTERNAL float __spirv_ocl_erf(float); +SYCL_EXTERNAL float __spirv_ocl_erfc(float); +SYCL_EXTERNAL float __spirv_ocl_tgamma(float); +SYCL_EXTERNAL float __spirv_ocl_lgamma(float); +SYCL_EXTERNAL float __spirv_ocl_fmod(float, float); +SYCL_EXTERNAL float __spirv_ocl_remainder(float, float); +SYCL_EXTERNAL float __spirv_ocl_remquo(float, float, int *); +SYCL_EXTERNAL float __spirv_ocl_nextafter(float, float); +SYCL_EXTERNAL float __spirv_ocl_fdim(float, float); +SYCL_EXTERNAL float __spirv_ocl_fma(float, float, float); SYCL_EXTERNAL extern "C" double __devicelib_log(double x); SYCL_EXTERNAL -extern "C" float __devicelib_logf(float x); +extern "C" float __devicelib_logf(float x); SYCL_EXTERNAL extern "C" double __devicelib_sin(double x); SYCL_EXTERNAL -extern "C" float __devicelib_sinf(float x); +extern "C" float __devicelib_sinf(float x); SYCL_EXTERNAL extern "C" double __devicelib_cos(double x); SYCL_EXTERNAL -extern "C" float __devicelib_cosf(float x); +extern "C" float __devicelib_cosf(float x); SYCL_EXTERNAL extern "C" double __devicelib_tan(double x); SYCL_EXTERNAL -extern "C" float __devicelib_tanf(float x); +extern "C" float __devicelib_tanf(float x); SYCL_EXTERNAL extern "C" double __devicelib_acos(double x); SYCL_EXTERNAL -extern "C" float __devicelib_acosf(float x); +extern "C" float __devicelib_acosf(float x); SYCL_EXTERNAL extern "C" double __devicelib_pow(double x, double y); SYCL_EXTERNAL -extern "C" float __devicelib_powf(float x, float y); +extern "C" float __devicelib_powf(float x, float y); SYCL_EXTERNAL extern "C" double __devicelib_sqrt(double x); SYCL_EXTERNAL -extern "C" float __devicelib_sqrtf(float x); +extern "C" float __devicelib_sqrtf(float x); SYCL_EXTERNAL extern "C" double __devicelib_cbrt(double x); SYCL_EXTERNAL -extern "C" float __devicelib_cbrtf(float x); +extern "C" float __devicelib_cbrtf(float x); SYCL_EXTERNAL extern "C" double __devicelib_hypot(double x, double y); SYCL_EXTERNAL -extern "C" float __devicelib_hypotf(float x, float y); +extern "C" float __devicelib_hypotf(float x, float y); SYCL_EXTERNAL extern "C" double __devicelib_erf(double x); SYCL_EXTERNAL -extern "C" float __devicelib_erff(float x); +extern "C" float __devicelib_erff(float x); SYCL_EXTERNAL extern "C" double __devicelib_erfc(double x); SYCL_EXTERNAL -extern "C" float __devicelib_erfcf(float x); +extern "C" float __devicelib_erfcf(float x); SYCL_EXTERNAL extern "C" double __devicelib_tgamma(double x); SYCL_EXTERNAL -extern "C" float __devicelib_tgammaf(float x); +extern "C" float __devicelib_tgammaf(float x); SYCL_EXTERNAL extern "C" double __devicelib_lgamma(double x); SYCL_EXTERNAL -extern "C" float __devicelib_lgammaf(float x); +extern "C" float __devicelib_lgammaf(float x); SYCL_EXTERNAL extern "C" double __devicelib_fmod(double x, double y); SYCL_EXTERNAL -extern "C" float __devicelib_fmodf(float x, float y); +extern "C" float __devicelib_fmodf(float x, float y); SYCL_EXTERNAL extern "C" double __devicelib_remainder(double x, double y); SYCL_EXTERNAL -extern "C" float __devicelib_remainderf(float x, float y); +extern "C" float __devicelib_remainderf(float x, float y); SYCL_EXTERNAL extern "C" double __devicelib_remquo(double x, double y, int *q); SYCL_EXTERNAL -extern "C" float __devicelib_remquof(float x, float y, int *q); +extern "C" float __devicelib_remquof(float x, float y, int *q); SYCL_EXTERNAL extern "C" double __devicelib_nextafter(double x, double y); SYCL_EXTERNAL -extern "C" float __devicelib_nextafterf(float x, float y); +extern "C" float __devicelib_nextafterf(float x, float y); SYCL_EXTERNAL extern "C" double __devicelib_fdim(double x, double y); SYCL_EXTERNAL -extern "C" float __devicelib_fdimf(float x, float y); +extern "C" float __devicelib_fdimf(float x, float y); SYCL_EXTERNAL extern "C" double __devicelib_fma(double x, double y, double z); SYCL_EXTERNAL -extern "C" float __devicelib_fmaf(float x, float y, float z); +extern "C" float __devicelib_fmaf(float x, float y, float z); SYCL_EXTERNAL -extern "C" float __devicelib_asinf(float x); +extern "C" float __devicelib_asinf(float x); SYCL_EXTERNAL extern "C" double __devicelib_asin(double x); SYCL_EXTERNAL -extern "C" float __devicelib_atanf(float x); +extern "C" float __devicelib_atanf(float x); SYCL_EXTERNAL extern "C" double __devicelib_atan(double x); SYCL_EXTERNAL -extern "C" float __devicelib_atan2f(float x, float y); +extern "C" float __devicelib_atan2f(float x, float y); SYCL_EXTERNAL extern "C" double __devicelib_atan2(double x, double y); SYCL_EXTERNAL -extern "C" float __devicelib_coshf(float x); +extern "C" float __devicelib_coshf(float x); SYCL_EXTERNAL extern "C" double __devicelib_cosh(double x); SYCL_EXTERNAL -extern "C" float __devicelib_sinhf(float x); +extern "C" float __devicelib_sinhf(float x); SYCL_EXTERNAL extern "C" double __devicelib_sinh(double x); SYCL_EXTERNAL -extern "C" float __devicelib_tanhf(float x); +extern "C" float __devicelib_tanhf(float x); SYCL_EXTERNAL extern "C" double __devicelib_tanh(double x); SYCL_EXTERNAL -extern "C" float __devicelib_acoshf(float x); +extern "C" float __devicelib_acoshf(float x); SYCL_EXTERNAL extern "C" double __devicelib_acosh(double x); SYCL_EXTERNAL -extern "C" float __devicelib_asinhf(float x); +extern "C" float __devicelib_asinhf(float x); SYCL_EXTERNAL extern "C" double __devicelib_asinh(double x); SYCL_EXTERNAL -extern "C" float __devicelib_atanhf(float x); +extern "C" float __devicelib_atanhf(float x); SYCL_EXTERNAL extern "C" double __devicelib_atanh(double x); @@ -289,68 +289,68 @@ SYCL_EXTERNAL extern "C" double __devicelib_frexp(double x, int *exp); SYCL_EXTERNAL -extern "C" float __devicelib_frexpf(float x, int *exp); +extern "C" float __devicelib_frexpf(float x, int *exp); SYCL_EXTERNAL extern "C" double __devicelib_ldexp(double x, int exp); SYCL_EXTERNAL -extern "C" float __devicelib_ldexpf(float x, int exp); +extern "C" float __devicelib_ldexpf(float x, int exp); SYCL_EXTERNAL extern "C" double __devicelib_log10(double x); SYCL_EXTERNAL -extern "C" float __devicelib_log10f(float x); +extern "C" float __devicelib_log10f(float x); SYCL_EXTERNAL extern "C" double __devicelib_modf(double x, double *intpart); SYCL_EXTERNAL -extern "C" float __devicelib_modff(float x, float *intpart); +extern "C" float __devicelib_modff(float x, float *intpart); SYCL_EXTERNAL extern "C" double __devicelib_exp(double x); SYCL_EXTERNAL -extern "C" float __devicelib_expf(float x); +extern "C" float __devicelib_expf(float x); SYCL_EXTERNAL extern "C" double __devicelib_exp2(double x); SYCL_EXTERNAL -extern "C" float __devicelib_exp2f(float x); +extern "C" float __devicelib_exp2f(float x); SYCL_EXTERNAL extern "C" double __devicelib_expm1(double x); SYCL_EXTERNAL -extern "C" float __devicelib_expm1f(float x); +extern "C" float __devicelib_expm1f(float x); SYCL_EXTERNAL -extern "C" int __devicelib_ilogb(double x); +extern "C" int __devicelib_ilogb(double x); SYCL_EXTERNAL -extern "C" int __devicelib_ilogbf(float x); +extern "C" int __devicelib_ilogbf(float x); SYCL_EXTERNAL extern "C" double __devicelib_log1p(double x); SYCL_EXTERNAL -extern "C" float __devicelib_log1pf(float x); +extern "C" float __devicelib_log1pf(float x); SYCL_EXTERNAL extern "C" double __devicelib_log2(double x); SYCL_EXTERNAL -extern "C" float __devicelib_log2f(float x); +extern "C" float __devicelib_log2f(float x); SYCL_EXTERNAL extern "C" double __devicelib_logb(double x); SYCL_EXTERNAL -extern "C" float __devicelib_logbf(float x); +extern "C" float __devicelib_logbf(float x); SYCL_EXTERNAL -extern "C" float __devicelib_scalbnf(float x, int n); +extern "C" float __devicelib_scalbnf(float x, int n); #endif diff --git a/sycl/source/detail/devicelib/fallback-cassert.cpp b/sycl/source/detail/devicelib/fallback-cassert.cpp index d8e9ddd0a079d..f1b255a17a887 100644 --- a/sycl/source/detail/devicelib/fallback-cassert.cpp +++ b/sycl/source/detail/devicelib/fallback-cassert.cpp @@ -8,9 +8,7 @@ #include "wrapper.h" -// __attribute((format(...))) enables compiler checks for a format string. -int __spirv_ocl_printf(const __attribute__((opencl_constant)) char *fmt, ...) - __attribute__((format(printf, 1, 2))); +#include static const __attribute__((opencl_constant)) char assert_fmt[] = "%s:%d: %s: global id: [%lu,%lu,%lu], local id: [%lu,%lu,%lu] "