From 8bc3423142504ddd57b2398ce7ae51aaccdecdc6 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Mon, 15 Aug 2022 09:44:45 -0700 Subject: [PATCH 1/2] [SYCL][ABI Break] Remove ext::oneapi::reduction This PR also move the implementation from the extension header to the regular sycl/reduction.hpp. There might be some simplifications in the implementation enabled by that but they are left for a future PR. --- sycl/CMakeLists.txt | 2 +- sycl/include/sycl/ext/oneapi/reduction.hpp | 2552 -------------------- sycl/include/sycl/handler.hpp | 68 +- sycl/include/sycl/queue.hpp | 6 +- sycl/include/sycl/reduction.hpp | 2472 ++++++++++++++++++- sycl/include/sycl/sycl.hpp | 1 - sycl/source/detail/reduction.cpp | 23 +- sycl/test/abi/sycl_symbols_linux.dump | 10 +- sycl/test/basic_tests/reduction_ctor.cpp | 53 +- 9 files changed, 2505 insertions(+), 2682 deletions(-) delete mode 100644 sycl/include/sycl/ext/oneapi/reduction.hpp diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index bae361ef99f35..b5bc80623b30c 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -30,7 +30,7 @@ set(SYCL_MINOR_VERSION 7) set(SYCL_PATCH_VERSION 0) # Don't forget to re-enable sycl_symbols_windows.dump once we leave ABI-breaking # window! -set(SYCL_DEV_ABI_VERSION 10) +set(SYCL_DEV_ABI_VERSION 11) if (SYCL_ADD_DEV_VERSION_POSTFIX) set(SYCL_VERSION_POSTFIX "-${SYCL_DEV_ABI_VERSION}") endif() diff --git a/sycl/include/sycl/ext/oneapi/reduction.hpp b/sycl/include/sycl/ext/oneapi/reduction.hpp deleted file mode 100644 index 425e69d54ef7d..0000000000000 --- a/sycl/include/sycl/ext/oneapi/reduction.hpp +++ /dev/null @@ -1,2552 +0,0 @@ -//==---------------- reduction.hpp - SYCL reduction ------------*- C++ -*---==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// ===--------------------------------------------------------------------=== // - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -namespace sycl { -__SYCL_INLINE_VER_NAMESPACE(_V1) { -namespace ext { -namespace oneapi { -namespace detail { - -/// Base non-template class which is a base class for all reduction -/// implementation classes. It is needed to detect the reduction classes. -class reduction_impl_base {}; - -/// Predicate returning true if all template type parameters except the last one -/// are reductions. -template struct AreAllButLastReductions { - static constexpr bool value = - std::is_base_of>::value && - AreAllButLastReductions::value; -}; - -/// Helper specialization of AreAllButLastReductions for one element only. -/// Returns true if the template parameter is not a reduction. -template struct AreAllButLastReductions { - static constexpr bool value = - !std::is_base_of>::value; -}; -} // namespace detail -} // namespace oneapi -} // namespace ext -} // __SYCL_INLINE_VER_NAMESPACE(_V1) -} // namespace sycl - -#if __cplusplus >= 201703L -// Entire feature is dependent on C++17. We still have to make the trait above -// available as queue shortcuts use them unconditionally, including on -// non-reduction path. -namespace sycl { -__SYCL_INLINE_VER_NAMESPACE(_V1) { -namespace ext { -namespace oneapi { -namespace detail { -template -event withAuxHandler(std::shared_ptr Queue, bool IsHost, - FunctorTy Func) { - handler AuxHandler(Queue, IsHost); - Func(AuxHandler); - return AuxHandler.finalize(); -} - -using sycl::detail::bool_constant; -using sycl::detail::enable_if_t; -using sycl::detail::queue_impl; -using sycl::detail::remove_AS; - -// This type trait is used to detect if the atomic operation BinaryOperation -// used with operands of the type T is available for using in reduction. -// The order in which the atomic operations are performed may be arbitrary and -// thus may cause different results from run to run even on the same elements -// and on same device. The macro SYCL_REDUCTION_DETERMINISTIC prohibits using -// atomic operations for reduction and helps to produce stable results. -// SYCL_REDUCTION_DETERMINISTIC is a short term solution, which perhaps become -// deprecated eventually and is replaced by a sycl property passed to reduction. -template -using IsReduOptForFastAtomicFetch = -#ifdef SYCL_REDUCTION_DETERMINISTIC - bool_constant; -#else - bool_constant<((sycl::detail::is_sgenfloat::value && sizeof(T) == 4) || - sycl::detail::is_sgeninteger::value) && - sycl::detail::IsValidAtomicType::value && - (sycl::detail::IsPlus::value || - sycl::detail::IsMinimum::value || - sycl::detail::IsMaximum::value || - sycl::detail::IsBitOR::value || - sycl::detail::IsBitXOR::value || - sycl::detail::IsBitAND::value)>; -#endif - -// This type trait is used to detect if the atomic operation BinaryOperation -// used with operands of the type T is available for using in reduction, in -// addition to the cases covered by "IsReduOptForFastAtomicFetch", if the device -// has the atomic64 aspect. This type trait should only be used if the device -// has the atomic64 aspect. Note that this type trait is currently a subset of -// IsReduOptForFastReduce. The macro SYCL_REDUCTION_DETERMINISTIC prohibits -// using the reduce_over_group() algorithm to produce stable results across same -// type devices. -template -using IsReduOptForAtomic64Op = -#ifdef SYCL_REDUCTION_DETERMINISTIC - bool_constant; -#else - bool_constant<(sycl::detail::IsPlus::value || - sycl::detail::IsMinimum::value || - sycl::detail::IsMaximum::value) && - sycl::detail::is_sgenfloat::value && sizeof(T) == 8>; -#endif - -// This type trait is used to detect if the group algorithm reduce() used with -// operands of the type T and the operation BinaryOperation is available -// for using in reduction. -// The macro SYCL_REDUCTION_DETERMINISTIC prohibits using the reduce() algorithm -// to produce stable results across same type devices. -template -using IsReduOptForFastReduce = -#ifdef SYCL_REDUCTION_DETERMINISTIC - bool_constant; -#else - bool_constant<((sycl::detail::is_sgeninteger::value && - (sizeof(T) == 4 || sizeof(T) == 8)) || - sycl::detail::is_sgenfloat::value) && - (sycl::detail::IsPlus::value || - sycl::detail::IsMinimum::value || - sycl::detail::IsMaximum::value)>; -#endif - -// std::tuple seems to be a) too heavy and b) not copyable to device now -// Thus sycl::detail::tuple is used instead. -// Switching from sycl::device::tuple to std::tuple can be done by re-defining -// the ReduTupleT type and makeReduTupleT() function below. -template using ReduTupleT = sycl::detail::tuple; -template ReduTupleT makeReduTupleT(Ts... Elements) { - return sycl::detail::make_tuple(Elements...); -} - -__SYCL_EXPORT size_t reduGetMaxWGSize(std::shared_ptr Queue, - size_t LocalMemBytesPerWorkItem); -__SYCL_EXPORT size_t reduComputeWGSize(size_t NWorkItems, size_t MaxWGSize, - size_t &NWorkGroups); -__SYCL_EXPORT size_t reduGetPreferredWGSize(std::shared_ptr &Queue, - size_t LocalMemBytesPerWorkItem); - -/// Class that is used to represent objects that are passed to user's lambda -/// functions and representing users' reduction variable. -/// The generic version of the class represents those reductions of those -/// types and operations for which the identity value is not known. -/// The View template describes whether the reducer owns its data or not: if -/// View is 'true', then the reducer does not own its data and instead provides -/// a view of data allocated elsewhere (i.e. via a reference or pointer member); -/// if View is 'false', then the reducer owns its data. With the current default -/// reduction algorithm, the top-level reducers that are passed to the user's -/// lambda contain a private copy of the reduction variable, whereas any reducer -/// created by a subscript operator contains a reference to a reduction variable -/// allocated elsewhere. The Subst parameter is an implementation detail and is -/// used to spell out restrictions using 'enable_if'. -template -class reducer; - -/// Helper class for accessing reducer-defined types in CRTP -/// May prove to be useful for other things later -template struct ReducerTraits; - -template -struct ReducerTraits> { - using type = T; - using op = BinaryOperation; - static constexpr int dims = Dims; - static constexpr size_t extent = Extent; -}; - -/// Use CRTP to avoid redefining shorthand operators in terms of combine -/// -/// Also, for many types with known identity the operation 'atomic_combine()' -/// is implemented here, which allows to use more efficient version of kernels -/// using those operations, which are based on functionality provided by -/// sycl::atomic class. -/// -/// For example, it is known that 0 is identity for sycl::plus operations -/// accepting native scalar types to which scalar 0 is convertible. -/// Also, for int32/64 types the atomic_combine() is lowered to -/// sycl::atomic::fetch_add(). -template class combiner { - using Ty = typename ReducerTraits::type; - using BinaryOp = typename ReducerTraits::op; - static constexpr int Dims = ReducerTraits::dims; - static constexpr size_t Extent = ReducerTraits::extent; - -public: - template - enable_if_t<(_Dims == 0) && sycl::detail::IsPlus<_T, BinaryOp>::value && - sycl::detail::is_geninteger<_T>::value> - operator++() { - static_cast(this)->combine(static_cast<_T>(1)); - } - - template - enable_if_t<(_Dims == 0) && sycl::detail::IsPlus<_T, BinaryOp>::value && - sycl::detail::is_geninteger<_T>::value> - operator++(int) { - static_cast(this)->combine(static_cast<_T>(1)); - } - - template - enable_if_t<(_Dims == 0) && sycl::detail::IsPlus<_T, BinaryOp>::value> - operator+=(const _T &Partial) { - static_cast(this)->combine(Partial); - } - - template - enable_if_t<(_Dims == 0) && sycl::detail::IsMultiplies<_T, BinaryOp>::value> - operator*=(const _T &Partial) { - static_cast(this)->combine(Partial); - } - - template - enable_if_t<(_Dims == 0) && sycl::detail::IsBitOR<_T, BinaryOp>::value> - operator|=(const _T &Partial) { - static_cast(this)->combine(Partial); - } - - template - enable_if_t<(_Dims == 0) && sycl::detail::IsBitXOR<_T, BinaryOp>::value> - operator^=(const _T &Partial) { - static_cast(this)->combine(Partial); - } - - template - enable_if_t<(_Dims == 0) && sycl::detail::IsBitAND<_T, BinaryOp>::value> - operator&=(const _T &Partial) { - static_cast(this)->combine(Partial); - } - -private: - template - static constexpr memory_scope getMemoryScope() { - return Space == access::address_space::local_space - ? memory_scope::work_group - : memory_scope::device; - } - - template - void atomic_combine_impl(T *ReduVarPtr, AtomicFunctor Functor) const { - auto reducer = static_cast(this); - for (size_t E = 0; E < Extent; ++E) { - auto AtomicRef = - sycl::atomic_ref(), - Space>(multi_ptr(ReduVarPtr)[E]); - Functor(AtomicRef, reducer->getElement(E)); - } - } - - template - static constexpr bool BasicCheck = - std::is_same::type, Ty>::value && - (Space == access::address_space::global_space || - Space == access::address_space::local_space); - -public: - /// Atomic ADD operation: *ReduVarPtr += MValue; - template - enable_if_t && - (IsReduOptForFastAtomicFetch<_T, _BinaryOperation>::value || - IsReduOptForAtomic64Op<_T, _BinaryOperation>::value) && - sycl::detail::IsPlus<_T, _BinaryOperation>::value> - atomic_combine(_T *ReduVarPtr) const { - atomic_combine_impl( - ReduVarPtr, [](auto Ref, auto Val) { return Ref.fetch_add(Val); }); - } - - /// Atomic BITWISE OR operation: *ReduVarPtr |= MValue; - template - enable_if_t && - IsReduOptForFastAtomicFetch<_T, _BinaryOperation>::value && - sycl::detail::IsBitOR<_T, _BinaryOperation>::value> - atomic_combine(_T *ReduVarPtr) const { - atomic_combine_impl( - ReduVarPtr, [](auto Ref, auto Val) { return Ref.fetch_or(Val); }); - } - - /// Atomic BITWISE XOR operation: *ReduVarPtr ^= MValue; - template - enable_if_t && - IsReduOptForFastAtomicFetch<_T, _BinaryOperation>::value && - sycl::detail::IsBitXOR<_T, _BinaryOperation>::value> - atomic_combine(_T *ReduVarPtr) const { - atomic_combine_impl( - ReduVarPtr, [](auto Ref, auto Val) { return Ref.fetch_xor(Val); }); - } - - /// Atomic BITWISE AND operation: *ReduVarPtr &= MValue; - template - enable_if_t::type, _T>::value && - IsReduOptForFastAtomicFetch<_T, _BinaryOperation>::value && - sycl::detail::IsBitAND<_T, _BinaryOperation>::value && - (Space == access::address_space::global_space || - Space == access::address_space::local_space)> - atomic_combine(_T *ReduVarPtr) const { - atomic_combine_impl( - ReduVarPtr, [](auto Ref, auto Val) { return Ref.fetch_and(Val); }); - } - - /// Atomic MIN operation: *ReduVarPtr = sycl::minimum(*ReduVarPtr, MValue); - template - enable_if_t && - (IsReduOptForFastAtomicFetch<_T, _BinaryOperation>::value || - IsReduOptForAtomic64Op<_T, _BinaryOperation>::value) && - sycl::detail::IsMinimum<_T, _BinaryOperation>::value> - atomic_combine(_T *ReduVarPtr) const { - atomic_combine_impl( - ReduVarPtr, [](auto Ref, auto Val) { return Ref.fetch_min(Val); }); - } - - /// Atomic MAX operation: *ReduVarPtr = sycl::maximum(*ReduVarPtr, MValue); - template - enable_if_t && - (IsReduOptForFastAtomicFetch<_T, _BinaryOperation>::value || - IsReduOptForAtomic64Op<_T, _BinaryOperation>::value) && - sycl::detail::IsMaximum<_T, _BinaryOperation>::value> - atomic_combine(_T *ReduVarPtr) const { - atomic_combine_impl( - ReduVarPtr, [](auto Ref, auto Val) { return Ref.fetch_max(Val); }); - } -}; - -/// Specialization of the generic class 'reducer'. It is used for reductions -/// of those types and operations for which the identity value is not known. -/// -/// It stores a copy of the identity and binary operation associated with the -/// reduction. -template -class reducer< - T, BinaryOperation, Dims, Extent, View, - enable_if_t::value>> - : public combiner< - reducer::value>>> { -public: - reducer(const T &Identity, BinaryOperation BOp) - : MValue(Identity), MIdentity(Identity), MBinaryOp(BOp) {} - - void combine(const T &Partial) { MValue = MBinaryOp(MValue, Partial); } - - T getIdentity() const { return MIdentity; } - - T &getElement(size_t) { return MValue; } - const T &getElement(size_t) const { return MValue; } - T MValue; - -private: - const T MIdentity; - BinaryOperation MBinaryOp; -}; - -/// Specialization of the generic class 'reducer'. It is used for reductions -/// of those types and operations for which the identity value is known. -/// -/// It allows to reduce the size of the 'reducer' object by not holding -/// the identity field inside it and allows to add a default constructor. -template -class reducer< - T, BinaryOperation, Dims, Extent, View, - enable_if_t::value>> - : public combiner< - reducer::value>>> { -public: - reducer() : MValue(getIdentity()) {} - reducer(const T & /* Identity */, BinaryOperation) : MValue(getIdentity()) {} - - void combine(const T &Partial) { - BinaryOperation BOp; - MValue = BOp(MValue, Partial); - } - - static T getIdentity() { - return sycl::detail::known_identity_impl::value; - } - - T &getElement(size_t) { return MValue; } - const T &getElement(size_t) const { return MValue; } - T MValue; -}; - -/// Component of 'reducer' class for array reductions, representing a single -/// element of the span (as returned by the subscript operator). -template -class reducer> - : public combiner>> { -public: - reducer(T &Ref, BinaryOperation BOp) : MElement(Ref), MBinaryOp(BOp) {} - - void combine(const T &Partial) { MElement = MBinaryOp(MElement, Partial); } - -private: - T &MElement; - BinaryOperation MBinaryOp; -}; - -/// Specialization of 'reducer' class for array reductions exposing the -/// subscript operator. -template -class reducer< - T, BinaryOperation, Dims, Extent, View, - enable_if_t::value>> - : public combiner::value>>> { -public: - reducer(const T &Identity, BinaryOperation BOp) - : MValue(Identity), MIdentity(Identity), MBinaryOp(BOp) {} - - reducer operator[](size_t Index) { - return {MValue[Index], MBinaryOp}; - } - - T getIdentity() const { return MIdentity; } - T &getElement(size_t E) { return MValue[E]; } - const T &getElement(size_t E) const { return MValue[E]; } - -private: - marray MValue; - const T MIdentity; - BinaryOperation MBinaryOp; -}; - -/// Specialization of 'reducer' class for array reductions accepting a span -/// in cases where the identity value is known. -template -class reducer< - T, BinaryOperation, Dims, Extent, View, - enable_if_t::value>> - : public combiner::value>>> { -public: - reducer() : MValue(getIdentity()) {} - reducer(const T & /* Identity */, BinaryOperation) : MValue(getIdentity()) {} - - // SYCL 2020 revision 4 says this should be const, but this is a bug - // see https://github.com/KhronosGroup/SYCL-Docs/pull/252 - reducer operator[](size_t Index) { - return {MValue[Index], BinaryOperation()}; - } - - static T getIdentity() { - return sycl::detail::known_identity_impl::value; - } - - T &getElement(size_t E) { return MValue[E]; } - const T &getElement(size_t E) const { return MValue[E]; } - -private: - marray MValue; -}; - -/// Templated class for common functionality of all reduction implementation -/// classes. -template class reduction_impl_common { -protected: - reduction_impl_common(const T &Identity, BinaryOperation BinaryOp, - bool Init = false) - : MIdentity(Identity), MBinaryOp(BinaryOp), InitializeToIdentity(Init) {} - -public: - /// Returns the statically known identity value. - template - enable_if_t::value, - _T> constexpr getIdentity() { - return sycl::detail::known_identity_impl<_BinaryOperation, _T>::value; - } - - /// Returns the identity value given by user. - template - enable_if_t::value, _T> - getIdentity() { - return MIdentity; - } - - /// Returns the binary operation associated with the reduction. - BinaryOperation getBinaryOperation() const { return MBinaryOp; } - bool initializeToIdentity() const { return InitializeToIdentity; } - -protected: - /// Identity of the BinaryOperation. - /// The result of BinaryOperation(X, MIdentity) is equal to X for any X. - const T MIdentity; - - BinaryOperation MBinaryOp; - bool InitializeToIdentity; -}; - -template struct is_rw_acc_t : public std::false_type {}; - -template -struct is_rw_acc_t> - : public std::true_type {}; - -template struct is_dw_acc_t : public std::false_type {}; - -template -struct is_dw_acc_t> - : public std::true_type {}; - -template struct is_placeholder_t : public std::false_type {}; - -template -struct is_placeholder_t> - : public std::true_type {}; - -// Used for determining dimensions for temporary storage (mainly). -template struct data_dim_t { - static constexpr int value = 1; -}; - -template -struct data_dim_t< - accessor> { - static constexpr int value = AccessorDims; -}; - -template struct get_red_t; -template struct get_red_t { - using type = T; -}; - -template -struct get_red_t< - accessor> { - using type = T; -}; - -template -class reduction_impl_algo : public reduction_impl_common { - using base = reduction_impl_common; - using self = reduction_impl_algo; - -public: - using reducer_type = reducer; - using result_type = T; - using binary_operation = BinaryOperation; - - // Buffers and accessors always describe scalar reductions (i.e. Dims == 0) - // The input buffer/accessor is allowed to have different dimensionality - // AccessorDims also determines the dimensionality of some temp storage - static constexpr int accessor_dim = data_dim_t::value; - static constexpr int buffer_dim = (accessor_dim == 0) ? 1 : accessor_dim; - static constexpr access::placeholder is_placeholder = - is_placeholder_t::value ? access::placeholder::true_t - : access::placeholder::false_t; - using rw_accessor_type = accessor>; - static constexpr bool has_float64_atomics = - IsReduOptForAtomic64Op::value; - static constexpr bool has_fast_atomics = - IsReduOptForFastAtomicFetch::value; - static constexpr bool has_fast_reduce = - IsReduOptForFastReduce::value; - - static constexpr bool is_usm = std::is_same_v; - - static constexpr bool is_rw_acc = is_rw_acc_t::value; - static constexpr bool is_dw_acc = is_dw_acc_t::value; - static constexpr bool is_acc = is_rw_acc | is_dw_acc; - static_assert(!is_rw_acc || !is_dw_acc, "Can be only one at once!"); - static_assert(!is_usm || !is_acc, "Can be only one at once!"); - - static constexpr size_t dims = Dims; - static constexpr size_t num_elements = Extent; - - reduction_impl_algo(const T &Identity, BinaryOperation BinaryOp, bool Init, - RedOutVar RedOut) - : base(Identity, BinaryOp, Init), MRedOut(std::move(RedOut)){}; - - /// Creates and returns a local accessor with the \p Size elements. - /// By default the local accessor elements are of the same type as the - /// elements processed by the reduction, but may it be altered by specifying - /// \p _T explicitly if need an accessor with elements of different type. - template - static accessor<_T, buffer_dim, access::mode::read_write, - access::target::local> - getReadWriteLocalAcc(size_t Size, handler &CGH) { - return {Size, CGH}; - } - - accessor - getReadAccToPreviousPartialReds(handler &CGH) const { - CGH.addReduction(MOutBufPtr); - return {*MOutBufPtr, CGH}; - } - - template - auto getWriteMemForPartialReds(size_t Size, handler &CGH) { - // If there is only one WG we can avoid creation of temporary buffer with - // partial sums and write directly into user's reduction variable. - // - // Current implementation doesn't allow that in case of DW accessor used for - // reduction because C++ types for it and for temporary storage don't match, - // hence the second part of the check. - if constexpr (IsOneWG && !is_dw_acc) { - return MRedOut; - } else { - MOutBufPtr = std::make_shared>(range<1>(Size)); - CGH.addReduction(MOutBufPtr); - return createHandlerWiredReadWriteAccessor(CGH, *MOutBufPtr); - } - } - - template - auto &getTempBuffer(size_t Size, handler &CGH) { - auto Buffer = std::make_shared>(range<1>(Size)); - CGH.addReduction(Buffer); - return *Buffer; - } - - /// Returns an accessor accessing the memory that will hold the reduction - /// partial sums. - /// If \p Size is equal to one, then the reduction result is the final and - /// needs to be written to user's read-write accessor (if there is such). - /// Otherwise, a new buffer is created and accessor to that buffer is - /// returned. - rw_accessor_type getWriteAccForPartialReds(size_t Size, handler &CGH) { - if constexpr (is_rw_acc) { - if (Size == 1) { - CGH.associateWithHandler(&MRedOut, access::target::device); - return MRedOut; - } - } - - // Create a new output buffer and return an accessor to it. - MOutBufPtr = std::make_shared>(range<1>(Size)); - CGH.addReduction(MOutBufPtr); - return createHandlerWiredReadWriteAccessor(CGH, *MOutBufPtr); - } - - /// If reduction is initialized with read-write accessor, which does not - /// require initialization with identity value, then return user's read-write - /// accessor. Otherwise, create global buffer with 'num_elements' initialized - /// with identity value and return an accessor to that buffer. - template - std::enable_if_t - getReadWriteAccessorToInitializedMem(handler &CGH) { - if constexpr (is_rw_acc) { - if (!base::initializeToIdentity()) - return MRedOut; - } - assert(!(is_dw_acc && !base::initializeToIdentity()) && - "Unexpected condition!"); - - // TODO: Move to T[] in C++20 to simplify handling here - // auto RWReduVal = std::make_shared(); - auto RWReduVal = std::make_shared>(); - for (int i = 0; i < num_elements; ++i) { - (*RWReduVal)[i] = base::getIdentity(); - } - CGH.addReduction(RWReduVal); - MOutBufPtr = std::make_shared>(RWReduVal.get()->data(), - range<1>(num_elements)); - MOutBufPtr->set_final_data(); - CGH.addReduction(MOutBufPtr); - return createHandlerWiredReadWriteAccessor(CGH, *MOutBufPtr); - } - - accessor - getReadWriteAccessorToInitializedGroupsCounter(handler &CGH) { - auto CounterMem = std::make_shared(0); - CGH.addReduction(CounterMem); - auto CounterBuf = std::make_shared>(CounterMem.get(), 1); - CounterBuf->set_final_data(); - CGH.addReduction(CounterBuf); - return {*CounterBuf, CGH}; - } - - // On discrete (vs. integrated) GPUs it's faster to initialize memory with an - // extra kernel than copy it from the host. - template auto getGroupsCounterAccDiscrete(handler &CGH) { - auto &Buf = getTempBuffer(1, CGH); - std::shared_ptr QueueCopy = CGH.MQueue; - auto Event = CGH.withAuxHandler(QueueCopy, [&](handler &InitHandler) { - auto Acc = accessor{Buf, InitHandler, sycl::write_only, sycl::no_init}; - InitHandler.single_task([=]() { Acc[0] = 0; }); - }); - CGH.depends_on(Event); - return accessor{Buf, CGH}; - } - - RedOutVar &getUserRedVar() { return MRedOut; } - - static inline result_type *getOutPointer(result_type *OutPtr) { - return OutPtr; - } - template - static inline result_type *getOutPointer(const AccessorType &OutAcc) { - return OutAcc.get_pointer().get(); - } - -private: - template - rw_accessor_type createHandlerWiredReadWriteAccessor(handler &CGH, - BufferT Buffer) { - // TODO: - // SYCL 2020: The accessor template parameter IsPlaceholder is allowed to be - // specified, but it has no bearing on whether the accessor instance is a - // placeholder. This is determined solely by the constructor used to create - // the instance. The associated type access::placeholder is also deprecated. - if constexpr (is_placeholder == access::placeholder::true_t) { - rw_accessor_type Acc(Buffer); - CGH.require(Acc); - return Acc; - } else { - return {Buffer, CGH}; - } - } - - std::shared_ptr> MOutBufPtr; - - /// User's accessor/USM pointer to where the reduction must be written. - RedOutVar MRedOut; -}; -/// This class encapsulates the reduction variable/accessor, -/// the reduction operator and an optional operator identity. -template -class reduction_impl - : private reduction_impl_base, - public reduction_impl_algo { -private: - using algo = reduction_impl_algo; - using self = reduction_impl; - - static constexpr bool is_known_identity = - sycl::detail::IsKnownIdentityOp::value; - - // TODO: Do we also need chooseBinOp? - static constexpr T chooseIdentity(const T &Identity) { - // For now the implementation ignores the identity value given by user - // when the implementation knows the identity. - // The SPEC could prohibit passing identity parameter to operations with - // known identity, but that could have some bad consequences too. - // For example, at some moment the implementation may NOT know the identity - // for COMPLEX-PLUS reduction. User may create a program that would pass - // COMPLEX value (0,0) as identity for PLUS reduction. At some later moment - // when the implementation starts handling COMPLEX-PLUS as known operation - // the existing user's program remains compilable and working correctly. - // I.e. with this constructor here, adding more reduction operations to the - // list of known operations does not break the existing programs. - if constexpr (is_known_identity) { - (void)Identity; - return reducer_type::getIdentity(); - - } else { - return Identity; - } - } - -public: - using algo::is_acc; - using algo::is_dw_acc; - using algo::is_rw_acc; - using algo::is_usm; - - using reducer_type = typename algo::reducer_type; - using rw_accessor_type = typename algo::rw_accessor_type; - - // Only scalar and 1D array reductions are supported by SYCL 2020. - static_assert(Dims <= 1, "Multi-dimensional reductions are not supported."); - - /// Constructs reduction_impl when the identity value is statically known. - template * = nullptr> - reduction_impl(RedOutVar &Acc) - : algo(reducer_type::getIdentity(), BinaryOperation(), is_dw_acc, Acc) { - if (Acc.size() != 1) - throw sycl::runtime_error(errc::invalid, - "Reduction variable must be a scalar.", - PI_ERROR_INVALID_VALUE); - } - - /// Constructs reduction_impl when the identity value is statically known. - /// The \param VarPtr is a USM pointer to memory, to where the computed - /// reduction value is added using BinaryOperation, i.e. it is expected that - /// the memory is pre-initialized with some meaningful value. - template * = nullptr> - reduction_impl(RedOutVar VarPtr, bool InitializeToIdentity = false) - : algo(reducer_type::getIdentity(), BinaryOperation(), - InitializeToIdentity, VarPtr) {} - - /// SYCL-2020. - /// Constructs reduction_impl when the identity value is statically known. - template * = - nullptr> - reduction_impl(RedOutVar &Acc, handler &CGH, bool InitializeToIdentity) - : algo(reducer_type::getIdentity(), BinaryOperation(), - InitializeToIdentity, Acc) { - associateWithHandler(CGH, &Acc, access::target::device); - if (Acc.size() != 1) - throw sycl::runtime_error(errc::invalid, - "Reduction variable must be a scalar.", - PI_ERROR_INVALID_VALUE); - } - - /// Constructs reduction_impl when the identity value is unknown. - template * = nullptr> - reduction_impl(RedOutVar &Acc, const T &Identity, BinaryOperation BOp) - : algo(chooseIdentity(Identity), BOp, is_dw_acc, Acc) { - if (Acc.size() != 1) - throw sycl::runtime_error(errc::invalid, - "Reduction variable must be a scalar.", - PI_ERROR_INVALID_VALUE); - } - - /// The \param VarPtr is a USM pointer to memory, to where the computed - /// reduction value is added using BinaryOperation, i.e. it is expected that - /// the memory is pre-initialized with some meaningful value. - template * = nullptr> - reduction_impl(RedOutVar VarPtr, const T &Identity, BinaryOperation BOp, - bool InitializeToIdentity = false) - : algo(chooseIdentity(Identity), BOp, InitializeToIdentity, VarPtr) {} - - /// For placeholder accessor. - template * = nullptr> - reduction_impl(RedOutVar &Acc, handler &CGH, const T &Identity, - BinaryOperation BOp, bool InitializeToIdentity) - : algo(chooseIdentity(Identity), BOp, InitializeToIdentity, Acc) { - associateWithHandler(CGH, &Acc, access::target::device); - if (Acc.size() != 1) - throw sycl::runtime_error(errc::invalid, - "Reduction variable must be a scalar.", - PI_ERROR_INVALID_VALUE); - } -}; - -template -auto make_reduction(RedOutVar RedVar, RestTy &&...Rest) { - return reduction_impl::type, BinaryOp, Dims, - Extent, RedOutVar>{RedVar, - std::forward(Rest)...}; -} - -/// A helper to pass undefined (sycl::detail::auto_name) names unmodified. We -/// must do that to avoid name collisions. -template