diff --git a/sycl/include/CL/sycl/accessor.hpp b/sycl/include/CL/sycl/accessor.hpp index 413196a9cf076..5c1fff316dc5a 100644 --- a/sycl/include/CL/sycl/accessor.hpp +++ b/sycl/include/CL/sycl/accessor.hpp @@ -125,11 +125,12 @@ SYCL_ACCESSOR_IMPL(isTargetHostAccess(accessTarget) && dimensions == 0) { SYCL_ACCESSOR_IMPL(isTargetHostAccess(accessTarget) && dimensions > 0) { dataT *Data; range Range; + range BufRange; id Offset; - accessor_impl(dataT *Data, range Range, - id Offset = {}) - : Data(Data), Range(Range), Offset(Offset) {} + accessor_impl(dataT * Data, range Range, + range BufRange, id Offset = {}) + : Data(Data), Range(Range), BufRange(BufRange), Offset(Offset) {} // Returns the number of accessed elements. size_t get_count() const { return Range.size(); } @@ -146,10 +147,9 @@ SYCL_ACCESSOR_IMPL(!isTargetHostAccess(accessTarget) && // reinterpret casting while setting kernel arguments in order to get cl_mem // value from the buffer regardless of the accessor's dimensionality. #ifndef __SYCL_DEVICE_ONLY__ - detail::buffer_impl *m_Buf = nullptr; - + detail::buffer_impl> *m_Buf = nullptr; #else - char padding[sizeof(detail::buffer_impl *)]; + char padding[sizeof(detail::buffer_impl> *)]; #endif // __SYCL_DEVICE_ONLY__ dataT *Data; @@ -182,22 +182,23 @@ SYCL_ACCESSOR_IMPL(!isTargetHostAccess(accessTarget) && // reinterpret casting while setting kernel arguments in order to get cl_mem // value from the buffer regardless of the accessor's dimensionality. #ifndef __SYCL_DEVICE_ONLY__ - detail::buffer_impl *m_Buf = nullptr; + detail::buffer_impl> *m_Buf = nullptr; #else - char padding[sizeof(detail::buffer_impl *)]; + char padding[sizeof(detail::buffer_impl> *)]; #endif // __SYCL_DEVICE_ONLY__ dataT *Data; range Range; + range BufRange; id Offset; // Device accessors must be associated with a command group handler. // The handler though can be nullptr at the creation point if the // accessor is a placeholder accessor. - accessor_impl(dataT *Data, range Range, - handler *Handler = nullptr, id Offset = {}) - : Data(Data), Range(Range), Offset(Offset) - {} + accessor_impl(dataT * Data, range Range, + range BufRange, handler *Handler = nullptr, + id Offset = {}) + : Data(Data), Range(Range), BufRange(BufRange), Offset(Offset) {} // Returns the number of accessed elements. size_t get_count() const { return Range.size(); } @@ -633,8 +634,8 @@ class accessor #ifdef __SYCL_DEVICE_ONLY__ ; // This ctor can't be used in device code, so no need to define it. #else // !__SYCL_DEVICE_ONLY__ - : __impl(detail::getSyclObjImpl(bufferRef)->BufPtr, - detail::getSyclObjImpl(bufferRef)->Range, + : __impl((dataT *)detail::getSyclObjImpl(bufferRef)->BufPtr, + bufferRef.get_range(), bufferRef.get_range(), &commandGroupHandlerRef) { auto BufImpl = detail::getSyclObjImpl(bufferRef); if (BufImpl->OpenCLInterop && !BufImpl->isValidAccessToMem(accessMode)) { @@ -669,8 +670,8 @@ class accessor AccessTarget == access::target::constant_buffer))) && Dimensions > 0), buffer>::type &bufferRef) - : __impl(detail::getSyclObjImpl(bufferRef)->BufPtr, - detail::getSyclObjImpl(bufferRef)->Range) { + : __impl((dataT *)detail::getSyclObjImpl(bufferRef)->BufPtr, + bufferRef.get_range(), bufferRef.get_range()) { auto BufImpl = detail::getSyclObjImpl(bufferRef); if (AccessTarget == access::target::host_buffer) { if (BufImpl->OpenCLInterop) { @@ -701,17 +702,17 @@ class accessor access::target AccessTarget = accessTarget, access::placeholder IsPlaceholder = isPlaceholder> accessor(typename std::enable_if< - (IsPlaceholder == access::placeholder::false_t && - (AccessTarget == access::target::global_buffer || - AccessTarget == access::target::constant_buffer) && - Dimensions > 0), - buffer>::type &bufferRef, + (IsPlaceholder == access::placeholder::false_t && + (AccessTarget == access::target::global_buffer || + AccessTarget == access::target::constant_buffer) && + Dimensions > 0), + buffer>::type &bufferRef, handler &commandGroupHandlerRef) #ifdef __SYCL_DEVICE_ONLY__ ; // This ctor can't be used in device code, so no need to define it. #else - : __impl(detail::getSyclObjImpl(bufferRef)->BufPtr, - detail::getSyclObjImpl(bufferRef)->Range, + : __impl((dataT *)detail::getSyclObjImpl(bufferRef)->BufPtr, + bufferRef.get_range(), bufferRef.get_range(), &commandGroupHandlerRef) { auto BufImpl = detail::getSyclObjImpl(bufferRef); if (BufImpl->OpenCLInterop && !BufImpl->isValidAccessToMem(accessMode)) { @@ -739,20 +740,19 @@ class accessor access::target AccessTarget = accessTarget, access::placeholder IsPlaceholder = isPlaceholder> accessor(typename std::enable_if< - ((IsPlaceholder == access::placeholder::false_t && - AccessTarget == access::target::host_buffer) || - (IsPlaceholder == access::placeholder::true_t && - (AccessTarget == access::target::global_buffer || - AccessTarget == access::target::constant_buffer) && - Dimensions > 0)), - buffer>::type &bufferRef, - range Range, - id Offset = {} - ) + ((IsPlaceholder == access::placeholder::false_t && + AccessTarget == access::target::host_buffer) || + (IsPlaceholder == access::placeholder::true_t && + (AccessTarget == access::target::global_buffer || + AccessTarget == access::target::constant_buffer) && + Dimensions > 0)), + buffer>::type &bufferRef, + range Range, id Offset = {}) #ifdef __SYCL_DEVICE_ONLY__ ; // This ctor can't be used in device code, so no need to define it. -#else // !__SYCL_DEVICE_ONLY__ - : __impl(detail::getSyclObjImpl(bufferRef)->BufPtr, Range, Offset) { +#else // !__SYCL_DEVICE_ONLY__ + : __impl(detail::getSyclObjImpl(bufferRef)->BufPtr, Range, + bufferRef.get_range(), Offset) { auto BufImpl = detail::getSyclObjImpl(bufferRef); if (AccessTarget == access::target::host_buffer) { if (BufImpl->OpenCLInterop) { @@ -769,7 +769,7 @@ class accessor "interoperability buffer"); } } -#endif // !__SYCL_DEVICE_ONLY__ +#endif // !__SYCL_DEVICE_ONLY__ // buffer ctor #6: // accessor(buffer &, handler &, range Range, id Offset); @@ -784,20 +784,18 @@ class accessor access::target AccessTarget = accessTarget, access::placeholder IsPlaceholder = isPlaceholder> accessor(typename std::enable_if< - (IsPlaceholder == access::placeholder::false_t && - (AccessTarget == access::target::global_buffer || - AccessTarget == access::target::constant_buffer) && - Dimensions > 0), - buffer>::type &bufferRef, - handler &commandGroupHandlerRef, - range Range, - id Offset = {} - ) + (IsPlaceholder == access::placeholder::false_t && + (AccessTarget == access::target::global_buffer || + AccessTarget == access::target::constant_buffer) && + Dimensions > 0), + buffer>::type &bufferRef, + handler &commandGroupHandlerRef, range Range, + id Offset = {}) #ifdef __SYCL_DEVICE_ONLY__ ; // This ctor can't be used in device code, so no need to define it. -#else // !__SYCL_DEVICE_ONLY__ - : __impl(detail::getSyclObjImpl(bufferRef)->BufPtr, Range, - &commandGroupHandlerRef, Offset) { +#else // !__SYCL_DEVICE_ONLY__ + : __impl((dataT *)detail::getSyclObjImpl(bufferRef)->BufPtr, Range, + bufferRef.get_range(), &commandGroupHandlerRef, Offset) { auto BufImpl = detail::getSyclObjImpl(bufferRef); if (BufImpl->OpenCLInterop && !BufImpl->isValidAccessToMem(accessMode)) { throw cl::sycl::runtime_error( @@ -807,7 +805,7 @@ class accessor commandGroupHandlerRef.AddBufDep(*BufImpl); __impl.m_Buf = BufImpl.get(); } -#endif // !__SYCL_DEVICE_ONLY__ +#endif // !__SYCL_DEVICE_ONLY__ // TODO: // local accessor ctor #1 diff --git a/sycl/include/CL/sycl/buffer.hpp b/sycl/include/CL/sycl/buffer.hpp index f06a4aff59dfe..eac3d1fee317e 100644 --- a/sycl/include/CL/sycl/buffer.hpp +++ b/sycl/include/CL/sycl/buffer.hpp @@ -21,7 +21,7 @@ class queue; template class range; template > + typename AllocatorT = cl::sycl::buffer_allocator> class buffer { public: using value_type = T; @@ -30,9 +30,10 @@ class buffer { using allocator_type = AllocatorT; buffer(const range &bufferRange, - const property_list &propList = {}) { - impl = std::make_shared>( - bufferRange, propList); + const property_list &propList = {}) + : Range(bufferRange) { + impl = std::make_shared>( + get_count() * sizeof(T), propList); } // buffer(const range &bufferRange, AllocatorT allocator, @@ -42,9 +43,10 @@ class buffer { // } buffer(T *hostData, const range &bufferRange, - const property_list &propList = {}) { - impl = std::make_shared>( - hostData, bufferRange, propList); + const property_list &propList = {}) + : Range(bufferRange) { + impl = std::make_shared>( + hostData, get_count() * sizeof(T), propList); } // buffer(T *hostData, const range &bufferRange, @@ -54,9 +56,10 @@ class buffer { // } buffer(const T *hostData, const range &bufferRange, - const property_list &propList = {}) { - impl = std::make_shared>( - hostData, bufferRange, propList); + const property_list &propList = {}) + : Range(bufferRange) { + impl = std::make_shared>( + hostData, get_count() * sizeof(T), propList); } // buffer(const T *hostData, const range &bufferRange, @@ -74,9 +77,10 @@ class buffer { buffer(const shared_ptr_class &hostData, const range &bufferRange, - const property_list &propList = {}) { - impl = std::make_shared>( - hostData, bufferRange, propList); + const property_list &propList = {}) + : Range(bufferRange) { + impl = std::make_shared>( + hostData, get_count() * sizeof(T), propList); } // template @@ -89,9 +93,10 @@ class buffer { template > buffer(InputIterator first, InputIterator last, - const property_list &propList = {}) { - impl = std::make_shared>( - first, last, propList); + const property_list &propList = {}) + : Range(range<1>(std::distance(first, last))) { + impl = std::make_shared>( + first, last, get_count() * sizeof(T), propList); } // buffer(buffer b, const id @@ -102,7 +107,7 @@ class buffer { template > buffer(cl_mem MemObject, const context &SyclContext, event AvailableEvent = {}) { - impl = std::make_shared>( + impl = std::make_shared>( MemObject, SyclContext, AvailableEvent); } @@ -124,26 +129,27 @@ class buffer { /* -- property interface members -- */ - range get_range() const { return impl->get_range(); } + range get_range() const { return Range; } - size_t get_count() const { return impl->get_count(); } + size_t get_count() const { return Range.size(); } size_t get_size() const { return impl->get_size(); } - AllocatorT get_allocator() const { return impl->get_allocator(); } + // AllocatorT get_allocator() const { return impl->get_allocator(); } template accessor get_access(handler &commandGroupHandler) { - return impl->template get_access(*this, commandGroupHandler); + return impl->template get_access( + *this, commandGroupHandler); } template accessor get_access() { - return impl->template get_access(*this); + return impl->template get_access(*this); } // template is_sub_buffer(); } - // template - // buffer - // reinterpret(range reinterpretRange) const { - // return impl->reinterpret((reinterpretRange)); - // } + template + buffer + reinterpret(range reinterpretRange) const { + if (sizeof(ReinterpretT) * reinterpretRange.size() != get_size()) + throw cl::sycl::invalid_object_error( + "Total size in bytes represented by the type and range of the " + "reinterpreted SYCL buffer does not equal the total size in bytes " + "represented by the type and range of this SYCL buffer"); + return buffer(impl, + reinterpretRange); + } private: - shared_ptr_class> impl; + shared_ptr_class> impl; template friend decltype(Obj::impl) detail::getSyclObjImpl(const Obj &SyclObject); + template friend class buffer; + range Range; + + // Reinterpret contructor + buffer(shared_ptr_class> Impl, + range reinterpretRange) + : impl(Impl), Range(reinterpretRange){}; }; } // namespace sycl } // namespace cl @@ -190,8 +209,7 @@ template struct hash> { size_t operator()(const cl::sycl::buffer &b) const { - return hash>>()( + return hash>>()( cl::sycl::detail::getSyclObjImpl(b)); } }; diff --git a/sycl/include/CL/sycl/detail/buffer_impl.hpp b/sycl/include/CL/sycl/detail/buffer_impl.hpp index 73064e988538f..e1d29619f34ed 100644 --- a/sycl/include/CL/sycl/detail/buffer_impl.hpp +++ b/sycl/include/CL/sycl/detail/buffer_impl.hpp @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -40,77 +39,80 @@ template class id; template class range; template using buffer_allocator = std::allocator; namespace detail { -template > -class buffer_impl { +template class buffer_impl { public: - buffer_impl(const range &bufferRange, - const property_list &propList = {}) - : buffer_impl((T *)nullptr, bufferRange, propList) {} + buffer_impl(const size_t sizeInBytes, const property_list &propList) + : buffer_impl((void *)nullptr, sizeInBytes, propList) {} - buffer_impl(T *hostData, const range &bufferRange, - const property_list &propList = {}) - : Range(bufferRange), Props(propList) { + buffer_impl(void *hostData, const size_t sizeInBytes, + const property_list &propList) + : SizeInBytes(sizeInBytes), Props(propList) { if (Props.has_property()) { BufPtr = hostData; } else { BufData.resize(get_size()); - BufPtr = reinterpret_cast(BufData.data()); + BufPtr = reinterpret_cast(BufData.data()); if (hostData != nullptr) { - set_final_data(hostData); - std::copy(hostData, hostData + get_count(), BufPtr); + auto HostPtr = reinterpret_cast(hostData); + set_final_data(HostPtr); + std::copy(HostPtr, HostPtr + SizeInBytes, BufData.data()); } } } // TODO temporary solution for allowing initialisation with const data - buffer_impl(const T *hostData, const range &bufferRange, - const property_list &propList = {}) - : Range(bufferRange), Props(propList) { + buffer_impl(const void *hostData, const size_t sizeInBytes, + const property_list &propList) + : SizeInBytes(sizeInBytes), Props(propList) { if (Props.has_property()) { // TODO make this buffer read only - BufPtr = const_cast(hostData); + BufPtr = const_cast(hostData); } else { BufData.resize(get_size()); - BufPtr = reinterpret_cast(BufData.data()); + BufPtr = reinterpret_cast(BufData.data()); if (hostData != nullptr) { - std::copy(hostData, hostData + get_count(), BufPtr); + std::copy((char *)hostData, (char *)hostData + SizeInBytes, + BufData.data()); } } } - buffer_impl(const shared_ptr_class &hostData, - const range &bufferRange, - const property_list &propList = {}) - : Range(bufferRange), Props(propList) { + template + buffer_impl(const shared_ptr_class &hostData, const size_t sizeInBytes, + const property_list &propList) + : SizeInBytes(sizeInBytes), Props(propList) { if (Props.has_property()) { BufPtr = hostData.get(); } else { BufData.resize(get_size()); - BufPtr = reinterpret_cast(BufData.data()); + BufPtr = reinterpret_cast(BufData.data()); if (hostData.get() != nullptr) { weak_ptr_class hostDataWeak = hostData; set_final_data(hostDataWeak); - std::copy(hostData.get(), hostData.get() + get_count(), BufPtr); + std::copy((char *)hostData.get(), (char *)hostData.get() + SizeInBytes, + BufData.data()); } } } - template > - buffer_impl(InputIterator first, InputIterator last, - const property_list &propList = {}) - : Range(range<1>(std::distance(first, last))), Props(propList) { + template + buffer_impl(InputIterator first, InputIterator last, const size_t sizeInBytes, + const property_list &propList) + : SizeInBytes(sizeInBytes), Props(propList) { if (Props.has_property()) { + // TODO next line looks unsafe BufPtr = &*first; } else { BufData.resize(get_size()); - BufPtr = reinterpret_cast(BufData.data()); - std::copy(first, last, BufPtr); + BufPtr = reinterpret_cast(BufData.data()); + // We need cast BufPtr to pointer to the iteration type to get correct + // offset in std::copy when it will increment destination pointer. + auto *Ptr = reinterpret_cast< + typename std::iterator_traits::pointer>(BufPtr); + std::copy(first, last, Ptr); } } - template > buffer_impl(cl_mem MemObject, const context &SyclContext, event AvailableEvent = {}) : OpenCLInterop(true), AvailableEvent(AvailableEvent) { @@ -129,11 +131,7 @@ class buffer_impl { CHECK_OCL_CODE(clRetainMemObject(MemObject)); } - range get_range() const { return Range; } - - size_t get_count() const { return Range.size(); } - - size_t get_size() const { return get_count() * sizeof(T); } + size_t get_size() const { return SizeInBytes; } ~buffer_impl() { if (!OpenCLInterop) @@ -155,13 +153,14 @@ class buffer_impl { void set_final_data(std::nullptr_t) { uploadData = nullptr; } - void set_final_data(weak_ptr_class final_data) { + template void set_final_data(weak_ptr_class final_data) { if (OpenCLInterop) throw cl::sycl::runtime_error( "set_final_data could not be used with interoperability buffer"); uploadData = [this, final_data]() { if (auto finalData = final_data.lock()) { - std::copy(BufPtr, BufPtr + get_count(), finalData.get()); + T *Ptr = reinterpret_cast(BufPtr); + std::copy(Ptr, Ptr + SizeInBytes / sizeof(T), finalData.get()); } }; } @@ -174,11 +173,16 @@ class buffer_impl { "Сan not write in a constant Destination. Destination should " "not be const."); uploadData = [this, final_data]() mutable { - std::copy(BufPtr, BufPtr + get_count(), final_data); + auto *Ptr = + reinterpret_cast::pointer>( + BufPtr); + size_t ValSize = + sizeof(typename std::iterator_traits::value_type); + std::copy(Ptr, Ptr + SizeInBytes / ValSize, final_data); }; } - template accessor get_access(buffer &Buffer, @@ -187,7 +191,7 @@ class buffer_impl { Buffer, commandGroupHandler); } - template + template accessor get_access(buffer &Buffer) { @@ -205,10 +209,11 @@ class buffer_impl { void copy(QueueImplPtr Queue, std::vector DepEvents, EventImplPtr Event, simple_scheduler::BufferReqPtr SrcReq, - const int DimSrc, const size_t *const SrcRange, + const int DimSrc, const int DimDest, const size_t *const SrcRange, const size_t *const SrcOffset, const size_t *const DestOffset, - const size_t SizeTySrc, const size_t SizeSrc, - const size_t *const BuffSrcRange); + const size_t SizeTySrc, const size_t SizeTyDest, + const size_t SizeSrc, const size_t *const BuffSrcRange, + const size_t *const BuffDestRange); size_t convertSycl2OCLMode(cl::sycl::access::mode mode); @@ -242,13 +247,13 @@ class buffer_impl { bool OpenCLInterop = false; event AvailableEvent; cl_context OpenCLContext = nullptr; - T *BufPtr = nullptr; + void *BufPtr = nullptr; vector_class BufData; // TODO: enable support of cl_mem objects from multiple contexts // TODO: at the current moment, using a buffer on multiple devices // or on a device and a host simultaneously is not supported (the // implementation is incorrect). - range Range; + size_t SizeInBytes = 0; property_list Props; std::function uploadData = nullptr; template -void buffer_impl::fill( - QueueImplPtr Queue, std::vector DepEvents, - EventImplPtr Event, const void *Pattern, size_t PatternSize, int Dim, - size_t *OffsetArr, size_t *RangeArr) { +template +void buffer_impl::fill(QueueImplPtr Queue, + std::vector DepEvents, + EventImplPtr Event, const void *Pattern, + size_t PatternSize, int Dim, + size_t *OffsetArr, size_t *RangeArr) { - assert(dimensions == 1 && - "OpenCL doesn't support multidimensional fill method."); + assert(Dim == 1 && "OpenCL doesn't support multidimensional fill method."); assert(!Queue->is_host() && "Host case is handled in other place."); size_t Offset = OffsetArr[0]; @@ -287,19 +292,16 @@ void buffer_impl::fill( CHECK_OCL_CODE(clReleaseCommandQueue(CommandQueue)); } -template -void buffer_impl::copy( +template +void buffer_impl::copy( QueueImplPtr Queue, std::vector DepEvents, EventImplPtr Event, simple_scheduler::BufferReqPtr SrcReq, const int DimSrc, - const size_t *const SrcRange, const size_t *const SrcOffset, - const size_t *const DestOffset, const size_t SizeTySrc, - const size_t SizeSrc, const size_t *const BuffSrcRange) { + const int DimDest, const size_t *const SrcRange, + const size_t *const SrcOffset, const size_t *const DestOffset, + const size_t SizeTySrc, const size_t SizeTyDest, const size_t SizeSrc, + const size_t *const BuffSrcRange, const size_t *const BuffDestRange) { assert(!Queue->is_host() && "Host case is handled in other place."); - size_t *BuffDestRange = &get_range()[0]; - size_t SizeTyDest = sizeof(T); - const int DimDest = dimensions; - ContextImplPtr Context = detail::getSyclObjImpl(Queue->get_context()); cl_event &BufEvent = Event->getHandleRef(); @@ -341,8 +343,8 @@ void buffer_impl::copy( Event->setIsHostEvent(false); } -template -void buffer_impl::moveMemoryTo( +template +void buffer_impl::moveMemoryTo( QueueImplPtr Queue, std::vector DepEvents, EventImplPtr Event) { @@ -425,9 +427,9 @@ void buffer_impl::moveMemoryTo( assert(0 && "Not handled"); } -template -size_t buffer_impl::convertSycl2OCLMode( - cl::sycl::access::mode mode) { +template +size_t +buffer_impl::convertSycl2OCLMode(cl::sycl::access::mode mode) { switch (mode) { case cl::sycl::access::mode::read: return CL_MEM_READ_ONLY; @@ -442,8 +444,8 @@ size_t buffer_impl::convertSycl2OCLMode( } } -template -bool buffer_impl::isValidAccessToMem( +template +bool buffer_impl::isValidAccessToMem( cl::sycl::access::mode AccessMode) { cl_mem_flags Flags; assert(OCLState.Mem != nullptr && @@ -456,10 +458,11 @@ bool buffer_impl::isValidAccessToMem( return true; } -template -void buffer_impl::allocate( - QueueImplPtr Queue, std::vector DepEvents, - EventImplPtr Event, cl::sycl::access::mode mode) { +template +void buffer_impl::allocate(QueueImplPtr Queue, + std::vector DepEvents, + EventImplPtr Event, + cl::sycl::access::mode mode) { detail::waitEvents(DepEvents); @@ -508,8 +511,8 @@ void buffer_impl::allocate( assert(0 && "Unhandled Alloca"); } -template -cl_mem buffer_impl::getOpenCLMem() const { +template +cl_mem buffer_impl::getOpenCLMem() const { assert(nullptr != OCLState.Mem); return OCLState.Mem; } diff --git a/sycl/include/CL/sycl/detail/scheduler/commands.cpp b/sycl/include/CL/sycl/detail/scheduler/commands.cpp index 7d1ae84af1277..4679f31c20694 100644 --- a/sycl/include/CL/sycl/detail/scheduler/commands.cpp +++ b/sycl/include/CL/sycl/detail/scheduler/commands.cpp @@ -59,9 +59,9 @@ void ExecuteKernelCommand< switch (static_cast(m_KernelArgs[I].info)) { case cl::sycl::access::target::global_buffer: case cl::sycl::access::target::constant_buffer: { - auto *Ptr = - *(getParamAddress *>( - &m_HostKernel, m_KernelArgs[I].offset)); + auto *Ptr = *(getParamAddress< + cl::sycl::detail::buffer_impl> *>( + &m_HostKernel, m_KernelArgs[I].offset)); cl_mem CLBuf = Ptr->getOpenCLMem(); CHECK_OCL_CODE(clSetKernelArg(m_ClKernel, I, sizeof(cl_mem), &CLBuf)); break; diff --git a/sycl/include/CL/sycl/detail/scheduler/commands.h b/sycl/include/CL/sycl/detail/scheduler/commands.h index 78870c34f5bdd..7d18de59c9ed4 100644 --- a/sycl/include/CL/sycl/detail/scheduler/commands.h +++ b/sycl/include/CL/sycl/detail/scheduler/commands.h @@ -361,13 +361,14 @@ template class CopyCommand : public Command { public: CopyCommand(BufferReqPtr BufSrc, BufferReqPtr BufDest, QueueImplPtr Queue, range SrcRange, id SrcOffset, - id DestOffset, size_t SizeTySrc, size_t SizeSrc, - range BuffSrcRange) + id DestOffset, size_t SizeTySrc, size_t SizeTyDest, + size_t SizeSrc, range BuffSrcRange, + range BuffDestRange) : Command(Command::COPY, std::move(Queue)), m_BufSrc(std::move(BufSrc)), m_BufDest(std::move(BufDest)), m_SrcRange(std::move(SrcRange)), m_SrcOffset(std::move(SrcOffset)), m_DestOffset(std::move(DestOffset)), - m_SizeTySrc(SizeTySrc), m_SizeSrc(SizeSrc), - m_BuffSrcRange(BuffSrcRange) {} + m_SizeTySrc(SizeTySrc), m_SizeTyDest(SizeTyDest), m_SizeSrc(SizeSrc), + m_BuffSrcRange(BuffSrcRange), m_BuffDestRange(BuffDestRange) {} access::mode getAccessModeType() const { return m_BufDest->getAccessModeType(); @@ -381,8 +382,9 @@ template class CopyCommand : public Command { assert(nullptr != m_BufSrc && "m_BufSrc is nullptr"); assert(nullptr != m_BufDest && "m_BufDest is nullptr"); m_BufDest->copy(m_Queue, std::move(DepEvents), std::move(Event), m_BufSrc, - DimSrc, &m_SrcRange[0], &m_SrcOffset[0], &m_DestOffset[0], - m_SizeTySrc, m_SizeSrc, &m_BuffSrcRange[0]); + DimSrc, DimDest, &m_SrcRange[0], &m_SrcOffset[0], + &m_DestOffset[0], m_SizeTySrc, m_SizeTyDest, m_SizeSrc, + &m_BuffSrcRange[0], &m_BuffDestRange[0]); } BufferReqPtr m_BufSrc = nullptr; BufferReqPtr m_BufDest = nullptr; @@ -390,8 +392,10 @@ template class CopyCommand : public Command { id m_SrcOffset; id m_DestOffset; size_t m_SizeTySrc; + size_t m_SizeTyDest; size_t m_SizeSrc; range m_BuffSrcRange; + range m_BuffDestRange; }; } // namespace simple_scheduler diff --git a/sycl/include/CL/sycl/detail/scheduler/requirements.h b/sycl/include/CL/sycl/detail/scheduler/requirements.h index 47968905018a5..5bbf9af10fb49 100644 --- a/sycl/include/CL/sycl/detail/scheduler/requirements.h +++ b/sycl/include/CL/sycl/detail/scheduler/requirements.h @@ -19,7 +19,7 @@ namespace cl { namespace sycl { namespace detail { -template class buffer_impl; +template class buffer_impl; } // namespace detail namespace detail { @@ -70,9 +70,12 @@ class BufferRequirement { virtual void copy(QueueImplPtr Queue, std::vector DepEvents, EventImplPtr Event, BufferReqPtr SrcReq, const int DimSrc, - const size_t *const SrcRange, const size_t *const SrcOffset, + const int DimDest, const size_t *const SrcRange, + const size_t *const SrcOffset, const size_t *const DestOffset, const size_t SizeTySrc, - const size_t SizeSrc, const size_t *const BuffSrcRange) = 0; + const size_t SizeTyDest, const size_t SizeSrc, + const size_t *const BuffSrcRange, + const size_t *const BuffDestRange) = 0; access::target getTargetType() const { return m_TargetType; } @@ -93,12 +96,10 @@ class BufferRequirement { access::target m_TargetType; }; -template +template class BufferStorage : public BufferRequirement { public: - BufferStorage( - typename cl::sycl::detail::buffer_impl &Buffer) + BufferStorage(typename cl::sycl::detail::buffer_impl &Buffer) : BufferRequirement(&Buffer, Mode, Target), m_Buffer(&Buffer) {} void allocate(QueueImplPtr Queue, std::vector DepEvents, @@ -125,15 +126,18 @@ class BufferStorage : public BufferRequirement { void copy(QueueImplPtr Queue, std::vector DepEvents, EventImplPtr Event, BufferReqPtr SrcReq, const int DimSrc, - const size_t *const SrcRange, const size_t *const SrcOffset, - const size_t *const DestOffset, const size_t SizeTySrc, - const size_t SizeSrc, const size_t *const BuffSrcRange) override { + const int DimDest, const size_t *const SrcRange, + const size_t *const SrcOffset, const size_t *const DestOffset, + const size_t SizeTySrc, const size_t SizeTyDest, + const size_t SizeSrc, const size_t *const BuffSrcRange, + const size_t *const BuffDestRange) override { assert(m_Buffer != nullptr && "BufferStorage::m_Buffer is nullptr"); assert(SrcReq != nullptr && "BufferStorage::SrcReq is nullptr"); m_Buffer->copy(std::move(Queue), std::move(DepEvents), std::move(Event), - std::move(SrcReq), DimSrc, SrcRange, SrcOffset, DestOffset, - SizeTySrc, SizeSrc, BuffSrcRange); + std::move(SrcReq), DimSrc, DimDest, SrcRange, SrcOffset, + DestOffset, SizeTySrc, SizeTyDest, SizeSrc, BuffSrcRange, + BuffDestRange); } cl_mem getCLMemObject() override { @@ -142,7 +146,7 @@ class BufferStorage : public BufferRequirement { } private: - cl::sycl::detail::buffer_impl *m_Buffer = nullptr; + cl::sycl::detail::buffer_impl *m_Buffer = nullptr; }; struct classcomp { diff --git a/sycl/include/CL/sycl/detail/scheduler/scheduler.cpp b/sycl/include/CL/sycl/detail/scheduler/scheduler.cpp index 0509196da4bd0..d2ee4676af3e8 100644 --- a/sycl/include/CL/sycl/detail/scheduler/scheduler.cpp +++ b/sycl/include/CL/sycl/detail/scheduler/scheduler.cpp @@ -25,10 +25,10 @@ namespace cl { namespace sycl { namespace simple_scheduler { -template +template static BufferReqPtr getReqForBuffer(const std::set &BufReqs, - const detail::buffer_impl &Buf) { + const detail::buffer_impl &Buf) { for (const auto &Req : BufReqs) { if (Req->getUniqID() == &Buf) { return Req; @@ -38,18 +38,16 @@ getReqForBuffer(const std::set &BufReqs, } // Adds a buffer requirement for this node. -template -void Node::addBufRequirement( - detail::buffer_impl &Buf) { +template +void Node::addBufRequirement(detail::buffer_impl &Buf) { BufferReqPtr Req = getReqForBuffer(m_Bufs, Buf); // Check if there is requirement for the same buffer already. if (nullptr != Req) { Req->addAccessMode(Mode); } else { - BufferReqPtr BufStor = std::make_shared< - BufferStorage>(Buf); + BufferReqPtr BufStor = + std::make_shared>(Buf); m_Bufs.insert(BufStor); } } @@ -60,11 +58,11 @@ template &&Acc, int argIndex) { - detail::buffer_impl *buf = + detail::buffer_impl> *buf = Acc.template accessor_base::__impl() ->m_Buf; - addBufRequirement(*buf); + addBufRequirement(*buf); addInteropArg(nullptr, buf->get_size(), argIndex, getReqForBuffer(m_Bufs, *buf)); } @@ -132,7 +130,7 @@ void Node::addExplicitMemOp( isPlaceholder>::__impl(); assert(DestBase != nullptr && "Accessor should have an initialized accessor_base"); - detail::buffer_impl *Buf = DestBase->m_Buf; + detail::buffer_impl> *Buf = DestBase->m_Buf; range Range = DestBase->Range; id Offset = DestBase->Offset; @@ -163,10 +161,10 @@ void Node::addExplicitMemOp( assert(DestBase != nullptr && "Accessor should have an initialized accessor_base"); - detail::buffer_impl *SrcBuf = SrcBase->m_Buf; + detail::buffer_impl> *SrcBuf = SrcBase->m_Buf; assert(SrcBuf != nullptr && "Accessor should have an initialized buffer_impl"); - detail::buffer_impl *DestBuf = DestBase->m_Buf; + detail::buffer_impl> *DestBuf = DestBase->m_Buf; assert(DestBuf != nullptr && "Accessor should have an initialized buffer_impl"); @@ -174,7 +172,9 @@ void Node::addExplicitMemOp( id SrcOffset = SrcBase->Offset; id DestOffset = DestBase->Offset; - range BuffSrcRange = SrcBase->m_Buf->get_range(); + // Use BufRange here + range BuffSrcRange = SrcBase->BufRange; + range BuffDestRange = DestBase->BufRange; BufferReqPtr SrcReq = getReqForBuffer(m_Bufs, *SrcBuf); BufferReqPtr DestReq = getReqForBuffer(m_Bufs, *DestBuf); @@ -182,7 +182,7 @@ void Node::addExplicitMemOp( assert(!m_Kernel && "This node already contains an execution command"); m_Kernel = std::make_shared>( SrcReq, DestReq, m_Queue, SrcRange, SrcOffset, DestOffset, sizeof(T_src), - SrcBase->get_count(), BuffSrcRange); + sizeof(T_dest), SrcBase->get_count(), BuffSrcRange, BuffDestRange); } // Updates host data of the specified accessor @@ -195,28 +195,25 @@ void Scheduler::updateHost( isPlaceholder>::__impl(); assert(AccBase != nullptr && "Accessor should have an initialized accessor_base"); - detail::buffer_impl *Buf = AccBase->m_Buf; + detail::buffer_impl> *Buf = AccBase->m_Buf; updateHost(*Buf, Event); } -template -void Scheduler::copyBack(detail::buffer_impl &Buf) { +template +void Scheduler::copyBack(detail::buffer_impl &Buf) { cl::sycl::event Event; updateHost(Buf, Event); detail::getSyclObjImpl(Event)->waitInternal(); } // Updates host data of the specified buffer_impl -template -void Scheduler::updateHost(detail::buffer_impl &Buf, +template +void Scheduler::updateHost(detail::buffer_impl &Buf, cl::sycl::event &Event) { CommandPtr UpdateHostCmd; BufferReqPtr BufStor = - std::make_shared>( - Buf); + std::make_shared>(Buf); if (0 == m_BuffersEvolution.count(BufStor)) { return; @@ -236,12 +233,11 @@ void Scheduler::updateHost(detail::buffer_impl &Buf, Event = EnqueueCommand(std::move(UpdateHostCmd)); } -template -void Scheduler::removeBuffer( - detail::buffer_impl &Buf) { - BufferReqPtr BufStor = std::make_shared< - BufferStorage>(Buf); +template +void Scheduler::removeBuffer(detail::buffer_impl &Buf) { + BufferReqPtr BufStor = + std::make_shared>(Buf); if (0 == m_BuffersEvolution.count(BufStor)) { return; diff --git a/sycl/include/CL/sycl/detail/scheduler/scheduler.h b/sycl/include/CL/sycl/detail/scheduler/scheduler.h index cabbacff4bb11..f1cccf086a036 100644 --- a/sycl/include/CL/sycl/detail/scheduler/scheduler.h +++ b/sycl/include/CL/sycl/detail/scheduler/scheduler.h @@ -49,9 +49,8 @@ class Node { m_NextOCLIndex(RHS.m_NextOCLIndex) {} // Adds a buffer requirement for this node. - template - void addBufRequirement(detail::buffer_impl &Buf); + template + void addBufRequirement(detail::buffer_impl &Buf); // Adds an accessor requirement for this node. template - void copyBack(detail::buffer_impl &Buf); + template + void copyBack(detail::buffer_impl &Buf); // Updates host data of the specified buffer_impl - template - void updateHost(detail::buffer_impl &Buf, - cl::sycl::event &Event); + template + void updateHost(detail::buffer_impl &Buf, cl::sycl::event &Event); // Updates host data of the specified accessor template - void removeBuffer(detail::buffer_impl &Buf); + template + void removeBuffer(detail::buffer_impl &Buf); // Waits for the event passed. void waitForEvent(EventImplPtr Event); diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index d58de20b0c426..12f2fe459eca4 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -100,7 +100,7 @@ template class accessor_impl; -template class buffer_impl; +template class buffer_impl; // Type inference of first arg from a lambda // auto fun = [&](item a) { a; }; // lambda_arg_type value; # value type is item @@ -144,8 +144,7 @@ class handler { typename voidT> friend class detail::accessor_impl; - template - friend class detail::buffer_impl; + template friend class detail::buffer_impl; friend class detail::queue_impl; @@ -172,9 +171,8 @@ class handler { bool is_host() { return isHost; } - template - void AddBufDep(detail::buffer_impl &Buf) { + template + void AddBufDep(detail::buffer_impl &Buf) { m_Node.addBufRequirement(Buf); } @@ -577,7 +575,7 @@ class handler { getAccessorRangeHelper::getAccessorRange(src); // TODO use buffer_allocator when it is possible - buffer> Buffer( + buffer> Buffer( (shared_ptr_class)dest, Range, {property::buffer::use_host_ptr()}); accessor::getAccessorRange(dest); // TODO use buffer_allocator when it is possible - buffer> Buffer( + buffer> Buffer( (shared_ptr_class)src, Range, {property::buffer::use_host_ptr()}); accessor::getAccessorRange(src); // TODO use buffer_allocator when it is possible - buffer> Buffer( + buffer> Buffer( (T_src *)dest, Range, {property::buffer::use_host_ptr()}); accessor @@ -637,7 +635,7 @@ class handler { getAccessorRangeHelper::getAccessorRange(dest); // TODO use buffer_allocator when it is possible - buffer> Buffer( + buffer> Buffer( (T_dest *)src, Range, {property::buffer::use_host_ptr()}); accessor diff --git a/sycl/test/basic_tests/buffer/reinterpret.cpp b/sycl/test/basic_tests/buffer/reinterpret.cpp new file mode 100644 index 0000000000000..7ffe633bb3cbb --- /dev/null +++ b/sycl/test/basic_tests/buffer/reinterpret.cpp @@ -0,0 +1,83 @@ +// RUN: %clang -std=c++11 -fsycl %s -o %t.out -lstdc++ -lOpenCL -lsycl +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +//==---------- reinterpret.cpp --- SYCL buffer reinterpret basic test ------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +// This tests verifies basic cases of using cl::sycl::buffer::reinterpret +// functionality - changing buffer type and range. This test checks that +// original buffer updates when we write to reinterpreted buffer and also checks +// that we can't create reinterpreted buffer when total size in bytes will be +// not same as total size in bytes of original buffer. + +int main() { + + bool failed = false; + cl::sycl::queue q; + + cl::sycl::range<1> r1(1); + cl::sycl::range<1> r2(sizeof(unsigned int) / sizeof(unsigned char)); + cl::sycl::buffer buf_i(r1); + auto buf_char = buf_i.reinterpret(r2); + q.submit([&](cl::sycl::handler &cgh) { + auto acc = buf_char.get_access(cgh); + cgh.parallel_for( + r2, [=](cl::sycl::id<1> i) { acc[i] = UCHAR_MAX; }); + }); + + { + auto acc = buf_i.get_access(); + if (acc[0] != UINT_MAX) { + std::cout << acc[0] << std::endl; + std::cout << "line: " << __LINE__ << " array[" << 0 << "] is " << acc[0] + << " expected " << UINT_MAX << std::endl; + failed = true; + } + } + + cl::sycl::range<1> r1d(9); + cl::sycl::range<2> r2d(3, 3); + cl::sycl::buffer buf_1d(r1d); + auto buf_2d = buf_1d.reinterpret(r2d); + q.submit([&](cl::sycl::handler &cgh) { + auto acc2d = buf_2d.get_access(cgh); + cgh.parallel_for(r2d, [=](cl::sycl::item<2> itemID) { + size_t i = itemID.get_id(0); + size_t j = itemID.get_id(1); + if (i == j) + acc2d[i][j] = 1; + else + acc2d[i][j] = 0; + }); + }); + + { + auto acc = buf_1d.get_access(); + for (auto i = 0u; i < r1d.size(); i++) { + size_t expected = (i % 4) ? 0 : 1; + if (acc[i] != expected) { + std::cout << "line: " << __LINE__ << " array[" << i << "] is " << acc[i] + << " expected " << expected << std::endl; + failed = true; + } + } + } + + try { + cl::sycl::buffer buf_fl(r1d); + auto buf_d = buf_1d.reinterpret(r2d); + } catch (cl::sycl::invalid_object_error e) { + std::cout << "Expected exception has been caught: " << e.what() + << std::endl; + } + + return failed; +}