diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 40ad35cc84576..acac611ae43bf 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -314,6 +314,7 @@ class SYCLIntegrationHeader { kind_accessor = kind_first, kind_std_layout, kind_sampler, + kind_stream, kind_pointer, kind_last = kind_pointer }; diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 631f81fe4a9ab..67ae17e7b5a5a 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -880,26 +880,6 @@ class KernelObjVisitor { VisitRecordFields(Owner, handlers...); } - // FIXME: Can this be refactored/handled some other way? - template - void VisitStreamRecord(const CXXRecordDecl *Owner, ParentTy &Parent, - CXXRecordDecl *Wrapper, Handlers &... handlers) { - (void)std::initializer_list{ - (handlers.enterStruct(Owner, Parent), 0)...}; - for (const auto &Field : Wrapper->fields()) { - QualType FieldTy = Field->getType(); - (void)std::initializer_list{ - (handlers.enterField(Wrapper, Field), 0)...}; - // Required to initialize accessors inside streams. - if (Util::isSyclAccessorType(FieldTy)) - KF_FOR_EACH(handleSyclAccessorType, Field, FieldTy); - (void)std::initializer_list{ - (handlers.leaveField(Wrapper, Field), 0)...}; - } - (void)std::initializer_list{ - (handlers.leaveStruct(Owner, Parent), 0)...}; - } - template void VisitRecordBases(const CXXRecordDecl *KernelFunctor, Handlers &... handlers) { @@ -924,12 +904,9 @@ class KernelObjVisitor { KF_FOR_EACH(handleSyclHalfType, Field, FieldTy); else if (Util::isSyclSpecConstantType(FieldTy)) KF_FOR_EACH(handleSyclSpecConstantType, Field, FieldTy); - else if (Util::isSyclStreamType(FieldTy)) { - CXXRecordDecl *RD = FieldTy->getAsCXXRecordDecl(); - // Handle accessors in stream class. - VisitStreamRecord(Owner, Field, RD, handlers...); + else if (Util::isSyclStreamType(FieldTy)) KF_FOR_EACH(handleSyclStreamType, Field, FieldTy); - } else if (FieldTy->isStructureOrClassType()) { + else if (FieldTy->isStructureOrClassType()) { if (KF_FOR_EACH(handleStructType, Field, FieldTy)) { CXXRecordDecl *RD = FieldTy->getAsCXXRecordDecl(); VisitRecord(Owner, Field, RD, handlers...); @@ -1297,8 +1274,7 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { } bool handleSyclStreamType(FieldDecl *FD, QualType FieldTy) final { - addParam(FD, FieldTy); - return true; + return handleSpecialType(FD, FieldTy); } bool handleSyclStreamType(const CXXBaseSpecifier &, QualType FieldTy) final { @@ -1515,6 +1491,13 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { createSpecialMethodCall(MemberExprBases.back(), InitMethod, FD); BodyStmts.push_back(InitCall); } + CXXMethodDecl *FinalizeMethod = + getMethodByName(RecordDecl, FinalizeMethodName); + if (FinalizeMethod) { + CXXMemberCallExpr *FinalizeCall = + createSpecialMethodCall(MemberExprBases.back(), FinalizeMethod, FD); + FinalizeStmts.push_back(FinalizeCall); + } return true; } @@ -1537,6 +1520,13 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { createSpecialMethodCall(MemberExprBases.back(), InitMethod, nullptr); BodyStmts.push_back(InitCall); } + CXXMethodDecl *FinalizeMethod = + getMethodByName(RecordDecl, FinalizeMethodName); + if (FinalizeMethod) { + CXXMemberCallExpr *FinalizeCall = createSpecialMethodCall( + MemberExprBases.back(), FinalizeMethod, nullptr); + FinalizeStmts.push_back(FinalizeCall); + } return true; } @@ -1583,23 +1573,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { } bool handleSyclStreamType(FieldDecl *FD, QualType Ty) final { - const auto *StreamDecl = Ty->getAsCXXRecordDecl(); - createExprForStructOrScalar(FD); - size_t NumBases = MemberExprBases.size(); - CXXMethodDecl *InitMethod = getMethodByName(StreamDecl, InitMethodName); - if (InitMethod) { - CXXMemberCallExpr *InitCall = - createSpecialMethodCall(MemberExprBases.back(), InitMethod, FD); - BodyStmts.push_back(InitCall); - } - CXXMethodDecl *FinalizeMethod = - getMethodByName(StreamDecl, FinalizeMethodName); - if (FinalizeMethod) { - CXXMemberCallExpr *FinalizeCall = createSpecialMethodCall( - MemberExprBases[NumBases - 2], FinalizeMethod, FD); - FinalizeStmts.push_back(FinalizeCall); - } - return true; + return handleSpecialType(FD, Ty); } bool handleSyclStreamType(const CXXBaseSpecifier &BS, QualType Ty) final { @@ -1666,18 +1640,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { const CXXRecordDecl *RD = FD->getType()->getBaseElementTypeUnsafe()->getAsCXXRecordDecl(); - // Initializers for accessors inside stream not added. - if (!Util::isSyclStreamType(FD->getType())) - addStructInit(RD); - // Pop out unused initializers created in handleSyclAccesorType - // for accessors inside stream class. - else { - for (const auto &Field : RD->fields()) { - QualType FieldTy = Field->getType(); - if (Util::isSyclAccessorType(FieldTy)) - InitExprs.pop_back(); - } - } + addStructInit(RD); return true; } @@ -1831,7 +1794,7 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { } bool handleSyclStreamType(FieldDecl *FD, QualType FieldTy) final { - addParam(FD, FieldTy, SYCLIntegrationHeader::kind_std_layout); + addParam(FD, FieldTy, SYCLIntegrationHeader::kind_stream); return true; } @@ -2211,6 +2174,7 @@ static const char *paramKind2Str(KernelParamKind K) { CASE(accessor); CASE(std_layout); CASE(sampler); + CASE(stream); CASE(pointer); default: return ""; diff --git a/clang/test/CodeGenSYCL/Inputs/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/sycl.hpp index 3184c58edcbfc..06145b8914fc2 100644 --- a/clang/test/CodeGenSYCL/Inputs/sycl.hpp +++ b/clang/test/CodeGenSYCL/Inputs/sycl.hpp @@ -141,6 +141,7 @@ class accessor { private: void __init(__attribute__((opencl_global)) dataT *Ptr, range AccessRange, range MemRange, id Offset) {} + friend class stream; }; template @@ -314,10 +315,22 @@ class stream { public: stream(unsigned long BufferSize, unsigned long MaxStatementSize, handler &CGH) {} +#ifdef __SYCL_DEVICE_ONLY__ + // Default constructor for objects later initialized with __init member. + stream() = default; +#endif - void __init() {} + void __init(__attribute((opencl_global)) char *Ptr, range<1> AccessRange, + range<1> MemRange, id<1> Offset, int _FlushBufferSize) { + Acc.__init(Ptr, AccessRange, MemRange, Offset); + FlushBufferSize = _FlushBufferSize; + } void __finalize() {} + +private: + cl::sycl::accessor Acc; + int FlushBufferSize; }; template diff --git a/clang/test/CodeGenSYCL/stream.cpp b/clang/test/CodeGenSYCL/stream.cpp index 8620f83c5fabf..3af57a2d6f5d5 100644 --- a/clang/test/CodeGenSYCL/stream.cpp +++ b/clang/test/CodeGenSYCL/stream.cpp @@ -1,8 +1,16 @@ // RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -I %S/Inputs -disable-llvm-passes -emit-llvm %s -o %t.ll // RUN: FileCheck < %t.ll --enable-var-scope %s // -// CHECK: define spir_kernel void @"{{.*}}StreamTester"(%"{{.*}}cl::sycl::stream"* byval(%"{{.*}}cl::sycl::stream") {{.*}}){{.*}} -// CHECK: call spir_func void @{{.*}}__init{{.*}}(%{{.*}}cl::sycl::stream{{.*}} addrspace(4)* %{{[0-9]+}}) +// CHECK: %[[RANGE_TYPE:"struct.*cl::sycl::range"]] +// CHECK: %[[ID_TYPE:"struct.*cl::sycl::id"]] +// CHECK: define spir_kernel void @{{.*}}StreamTester +// CHECK-SAME: i8 addrspace(1)* [[ACC_DATA:%[a-zA-Z0-9_]+]], +// CHECK-SAME: %[[RANGE_TYPE]]* byval(%[[RANGE_TYPE]]) align 4 [[ACC_RANGE1:%[a-zA-Z0-9_]+]], +// CHECK-SAME: %[[RANGE_TYPE]]* byval(%[[RANGE_TYPE]]) align 4 [[ACC_RANGE2:%[a-zA-Z0-9_]+]], +// CHECK-SAME: %[[ID_TYPE]]* byval(%[[ID_TYPE]]) align 4 [[ACC_ID:%[a-zA-Z0-9_]+]], +// CHECK-SAME: i32 [[ARG_INT:%[a-zA-Z0-9_]+]]) + +// CHECK: call spir_func void @{{.*}}__init{{.*}}(%{{.*}}cl::sycl::stream{{.*}} addrspace(4)* %{{[0-9]+}}, i8 addrspace(1)* %5, %[[RANGE_TYPE]]* byval(%[[RANGE_TYPE]]) {{.*}} %{{.*}} // CHECK: call spir_func void @{{.*}}__finalize{{.*}}(%{{.*}}cl::sycl::stream{{.*}} addrspace(4)* %{{[0-9]+}}) // diff --git a/sycl/include/CL/sycl/accessor.hpp b/sycl/include/CL/sycl/accessor.hpp index 8631cee6ab640..0f4f4a1b2b7e7 100755 --- a/sycl/include/CL/sycl/accessor.hpp +++ b/sycl/include/CL/sycl/accessor.hpp @@ -199,6 +199,7 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +class stream; namespace intel { namespace gpu { // Forward declare a "back-door" access class to support ESIMD. @@ -886,6 +887,7 @@ class accessor : private: friend class sycl::intel::gpu::AccessorPrivateProxy; + friend class sycl::stream; public: using value_type = DataT; diff --git a/sycl/include/CL/sycl/detail/kernel_desc.hpp b/sycl/include/CL/sycl/detail/kernel_desc.hpp index 120fb9dc4e96c..62996a406a609 100644 --- a/sycl/include/CL/sycl/detail/kernel_desc.hpp +++ b/sycl/include/CL/sycl/detail/kernel_desc.hpp @@ -27,6 +27,7 @@ enum class kernel_param_kind_t { kind_accessor, kind_std_layout, // standard layout object parameters kind_sampler, + kind_stream, kind_pointer }; diff --git a/sycl/include/CL/sycl/stream.hpp b/sycl/include/CL/sycl/stream.hpp index 20cdef064b2ef..bca427a001e93 100644 --- a/sycl/include/CL/sycl/stream.hpp +++ b/sycl/include/CL/sycl/stream.hpp @@ -63,11 +63,24 @@ using GlobalBufAccessorT = accessor; +constexpr static access::address_space GlobalBufAS = + TargetToAS::AS; +using GlobalBufPtrType = + typename detail::PtrValueType::type *; +constexpr static int GlobalBufDim = 1; + using GlobalOffsetAccessorT = accessor; +constexpr static access::address_space GlobalOffsetAS = + TargetToAS::AS; +using GlobalOffsetPtrType = + typename detail::PtrValueType::type *; +constexpr static int GlobalOffsetDim = 1; + + inline void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, unsigned &Offset, const char *Str, unsigned Len, unsigned Padding = 0) { @@ -697,6 +710,12 @@ inline __width_manipulator__ setw(int Width) { /// \ingroup sycl_api class __SYCL_EXPORT stream { public: + +#ifdef __SYCL_DEVICE_ONLY__ + // Default constructor for objects later initialized with __init member. + stream() = default; +#endif + stream(size_t BufferSize, size_t MaxStatementSize, handler &CGH); size_t get_size() const; @@ -810,7 +829,28 @@ class __SYCL_EXPORT stream { } #ifdef __SYCL_DEVICE_ONLY__ - void __init() { + void __init(detail::GlobalBufPtrType GlobalBufPtr, + range GlobalBufAccRange, + range GlobalBufMemRange, + id GlobalBufId, + detail::GlobalOffsetPtrType GlobalOffsetPtr, + range GlobalOffsetAccRange, + range GlobalOffsetMemRange, + id GlobalOffsetId, + detail::GlobalBufPtrType GlobalFlushPtr, + range GlobalFlushAccRange, + range GlobalFlushMemRange, + id GlobalFlushId, + size_t _FlushBufferSize) { +#ifndef __SYCL_EXPLICIT_SIMD__ + GlobalBuf.__init(GlobalBufPtr, GlobalBufAccRange, GlobalBufMemRange, + GlobalBufId); + GlobalOffset.__init(GlobalOffsetPtr, GlobalOffsetAccRange, + GlobalOffsetMemRange, GlobalOffsetId); + GlobalFlushBuf.__init(GlobalFlushPtr, GlobalFlushAccRange, + GlobalFlushMemRange, GlobalFlushId); +#endif + FlushBufferSize = _FlushBufferSize; // Calculate work item's global id, this should be done once, that // is why this is done in _init method, call to __init method is generated // by frontend. As a result each work item will write to its own section @@ -834,6 +874,8 @@ class __SYCL_EXPORT stream { } #endif + friend class handler; + friend const stream &operator<<(const stream &, const char); friend const stream &operator<<(const stream &, const char *); template diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index f21d39d8efc3d..e0798c89de34a 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1652,6 +1652,8 @@ pi_result ExecCGCommand::SetKernelParamsAndLaunch( const detail::plugin &Plugin = MQueue->getPlugin(); for (ArgDesc &Arg : ExecKernel->MArgs) { switch (Arg.MType) { + case kernel_param_kind_t::kind_stream: + break; case kernel_param_kind_t::kind_accessor: { Requirement *Req = (Requirement *)(Arg.MPtr); AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 5a54760e813e7..74fc220f050be 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -130,6 +131,41 @@ void handler::associateWithHandler(detail::AccessorBaseHost *AccBase, /*index*/ 0); } +static void addArgsForGlobalAccessor(detail::Requirement *AccImpl, + const size_t Index, size_t &IndexShift, + const int Size, + bool IsKernelCreatedFromSource, + size_t GlobalSize, + vector_class &Args) { + using detail::kernel_param_kind_t; + if (AccImpl->PerWI) + AccImpl->resize(GlobalSize); + + Args.emplace_back(kernel_param_kind_t::kind_accessor, AccImpl, Size, + Index + IndexShift); + + // TODO ESIMD currently does not suport offset, memory and access ranges - + // accessor::init for ESIMD-mode accessor has a single field, translated + // to a single kernel argument set above. + if (!AccImpl->MIsESIMDAcc && !IsKernelCreatedFromSource) { + // Dimensionality of the buffer is 1 when dimensionality of the + // accessor is 0. + const size_t SizeAccField = + sizeof(size_t) * (AccImpl->MDims == 0 ? 1 : AccImpl->MDims); + ++IndexShift; + Args.emplace_back(kernel_param_kind_t::kind_std_layout, + &AccImpl->MAccessRange[0], SizeAccField, + Index + IndexShift); + ++IndexShift; + Args.emplace_back(kernel_param_kind_t::kind_std_layout, + &AccImpl->MMemoryRange[0], SizeAccField, + Index + IndexShift); + ++IndexShift; + Args.emplace_back(kernel_param_kind_t::kind_std_layout, + &AccImpl->MOffset[0], SizeAccField, Index + IndexShift); + } +} + void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, const int Size, const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource) { @@ -141,6 +177,40 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, MArgs.emplace_back(Kind, Ptr, Size, Index + IndexShift); break; } + case kernel_param_kind_t::kind_stream: { + // Stream contains several accessors inside. + stream *S = static_cast(Ptr); + + detail::AccessorBaseHost *GBufBase = + (detail::AccessorBaseHost *)&S->GlobalBuf; + detail::AccessorImplPtr GBufImpl = detail::getSyclObjImpl(*GBufBase); + detail::Requirement *GBufReq = GBufImpl.get(); + addArgsForGlobalAccessor(GBufReq, Index, IndexShift, Size, + IsKernelCreatedFromSource, + MNDRDesc.GlobalSize.size(), MArgs); + ++IndexShift; + detail::AccessorBaseHost *GOffsetBase = + (detail::AccessorBaseHost *)&S->GlobalOffset; + detail::AccessorImplPtr GOfssetImpl = detail::getSyclObjImpl(*GOffsetBase); + detail::Requirement *GOffsetReq = GOfssetImpl.get(); + addArgsForGlobalAccessor(GOffsetReq, Index, IndexShift, Size, + IsKernelCreatedFromSource, + MNDRDesc.GlobalSize.size(), MArgs); + ++IndexShift; + detail::AccessorBaseHost *GFlushBase = + (detail::AccessorBaseHost *)&S->GlobalFlushBuf; + detail::AccessorImplPtr GFlushImpl = detail::getSyclObjImpl(*GFlushBase); + detail::Requirement *GFlushReq = GFlushImpl.get(); + addArgsForGlobalAccessor(GFlushReq, Index, IndexShift, Size, + IsKernelCreatedFromSource, + MNDRDesc.GlobalSize.size(), MArgs); + ++IndexShift; + MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, + &S->FlushBufferSize, sizeof(S->FlushBufferSize), + Index + IndexShift); + + break; + } case kernel_param_kind_t::kind_accessor: { // For args kind of accessor Size is information about accessor. // The first 11 bits of Size encodes the accessor target. @@ -149,37 +219,9 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, case access::target::global_buffer: case access::target::constant_buffer: { detail::Requirement *AccImpl = static_cast(Ptr); - - // Stream implementation creates an accessor with initial size for - // work item. Number of work items is not available during - // stream construction, that is why size of the accessor is updated here - // using information about number of work items. - if (AccImpl->PerWI) { - AccImpl->resize(MNDRDesc.GlobalSize.size()); - } - MArgs.emplace_back(Kind, AccImpl, Size, Index + IndexShift); - - // TODO ESIMD currently does not suport offset, memory and access ranges - - // accessor::init for ESIMD-mode accessor has a single field, translated - // to a single kernel argument set above. - if (!AccImpl->MIsESIMDAcc && !IsKernelCreatedFromSource) { - // Dimensionality of the buffer is 1 when dimensionality of the - // accessor is 0. - const size_t SizeAccField = - sizeof(size_t) * (AccImpl->MDims == 0 ? 1 : AccImpl->MDims); - ++IndexShift; - MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, - &AccImpl->MAccessRange[0], SizeAccField, - Index + IndexShift); - ++IndexShift; - MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, - &AccImpl->MMemoryRange[0], SizeAccField, - Index + IndexShift); - ++IndexShift; - MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, - &AccImpl->MOffset[0], SizeAccField, - Index + IndexShift); - } + addArgsForGlobalAccessor(AccImpl, Index, IndexShift, Size, + IsKernelCreatedFromSource, + MNDRDesc.GlobalSize.size(), MArgs); break; } case access::target::local: {