diff --git a/sycl/include/sycl/ext/oneapi/reduction.hpp b/sycl/include/sycl/ext/oneapi/reduction.hpp index bcb7fccfb61d0..2a175d5d753bb 100644 --- a/sycl/include/sycl/ext/oneapi/reduction.hpp +++ b/sycl/include/sycl/ext/oneapi/reduction.hpp @@ -13,12 +13,13 @@ #include #include +#include #include +#include #include #include #include #include -#include #include @@ -222,8 +223,8 @@ template class combiner { auto reducer = static_cast(this); for (size_t E = 0; E < Extent; ++E) { auto AtomicRef = - atomic_ref(), Space>( - multi_ptr(ReduVarPtr)[E]); + sycl::atomic_ref(), + Space>(multi_ptr(ReduVarPtr)[E]); Functor(AtomicRef, reducer->getElement(E)); } } @@ -312,13 +313,15 @@ template class combiner { /// reduction. template -class reducer::value>> +class reducer< + T, BinaryOperation, Dims, Extent, Algorithm, View, + enable_if_t::value>> : public combiner< reducer::value>>> { + !sycl::detail::IsKnownIdentityOp< + T, BinaryOperation>::value>>> { public: reducer(const T &Identity, BinaryOperation BOp) : MValue(Identity), MIdentity(Identity), MBinaryOp(BOp) {} @@ -343,13 +346,15 @@ class reducer -class reducer::value>> +class reducer< + T, BinaryOperation, Dims, Extent, Algorithm, View, + enable_if_t::value>> : public combiner< reducer::value>>> { + sycl::detail::IsKnownIdentityOp< + T, BinaryOperation>::value>>> { public: reducer() : MValue(getIdentity()) {} reducer(const T & /* Identity */, BinaryOperation) : MValue(getIdentity()) {} @@ -360,7 +365,7 @@ class reducer::value; + return sycl::detail::known_identity_impl::value; } T &getElement(size_t) { return MValue; } @@ -390,13 +395,14 @@ class reducer -class reducer::value>> - : public combiner< - reducer::value>>> { +class reducer< + T, BinaryOperation, Dims, Extent, Algorithm, View, + enable_if_t::value>> + : public combiner::value>>> { public: reducer(const T &Identity, BinaryOperation BOp) : MValue(Identity), MIdentity(Identity), MBinaryOp(BOp) {} @@ -420,13 +426,14 @@ class reducer -class reducer::value>> - : public combiner< - reducer::value>>> { +class reducer< + T, BinaryOperation, Dims, Extent, Algorithm, View, + enable_if_t::value>> + : public combiner::value>>> { public: reducer() : MValue(getIdentity()) {} reducer(const T & /* Identity */, BinaryOperation) : MValue(getIdentity()) {} @@ -439,7 +446,7 @@ class reducer::value; + return sycl::detail::known_identity_impl::value; } T &getElement(size_t E) { return MValue[E]; } @@ -464,14 +471,14 @@ template class reduction_impl_common { public: /// Returns the statically known identity value. template - enable_if_t::value, + enable_if_t::value, _T> constexpr getIdentity() { - return known_identity_impl<_BinaryOperation, _T>::value; + return sycl::detail::known_identity_impl<_BinaryOperation, _T>::value; } /// Returns the identity value given by user. template - enable_if_t::value, _T> + enable_if_t::value, _T> getIdentity() { return MIdentity; } @@ -752,8 +759,8 @@ class reduction_impl /// SYCL-2020. /// Constructs reduction_impl when the identity value is statically known. template ::value> * = - nullptr> + std::enable_if_t::value> * = nullptr> reduction_impl(buffer<_T, 1, AllocatorT> Buffer, handler &CGH, bool InitializeToIdentity) : algo(reducer_type::getIdentity(), BinaryOperation(), @@ -766,9 +773,8 @@ class reduction_impl } /// Constructs reduction_impl when the identity value is statically known. - template < - typename _T = T, - enable_if_t::value> * = nullptr> + template ::value> * = nullptr> reduction_impl(rw_accessor_type &Acc) : algo(reducer_type::getIdentity(), BinaryOperation(), false, std::make_shared(Acc)) { @@ -779,9 +785,8 @@ class reduction_impl } /// Constructs reduction_impl when the identity value is statically known. - template < - typename _T = T, - enable_if_t::value> * = nullptr> + template ::value> * = nullptr> reduction_impl(dw_accessor_type &Acc) : algo(reducer_type::getIdentity(), BinaryOperation(), true, std::make_shared(Acc)) { @@ -796,7 +801,8 @@ class reduction_impl /// and user still passed the identity value. template < typename _T, typename AllocatorT, - enable_if_t::value> * = nullptr> + enable_if_t::value> + * = nullptr> reduction_impl(buffer<_T, 1, AllocatorT> Buffer, handler &CGH, const T & /*Identity*/, BinaryOperation, bool InitializeToIdentity) @@ -822,9 +828,8 @@ class reduction_impl /// Constructs reduction_impl when the identity value is statically known, /// and user still passed the identity value. - template < - typename _T = T, - enable_if_t::value> * = nullptr> + template ::value> * = nullptr> reduction_impl(rw_accessor_type &Acc, const T & /*Identity*/, BinaryOperation) : algo(reducer_type::getIdentity(), BinaryOperation(), false, std::make_shared(Acc)) { @@ -847,9 +852,8 @@ class reduction_impl /// Constructs reduction_impl when the identity value is statically known, /// and user still passed the identity value. - template < - typename _T = T, - enable_if_t::value> * = nullptr> + template ::value> * = nullptr> reduction_impl(dw_accessor_type &Acc, const T & /*Identity*/, BinaryOperation) : algo(reducer_type::getIdentity(), BinaryOperation(), true, std::make_shared(Acc)) { @@ -874,7 +878,8 @@ class reduction_impl /// Constructs reduction_impl when the identity value is NOT known statically. template < typename _T, typename AllocatorT, - enable_if_t::value> * = nullptr> + enable_if_t::value> + * = nullptr> reduction_impl(buffer<_T, 1, AllocatorT> Buffer, handler &CGH, const T &Identity, BinaryOperation BOp, bool InitializeToIdentity) @@ -888,9 +893,8 @@ class reduction_impl } /// Constructs reduction_impl when the identity value is unknown. - template < - typename _T = T, - enable_if_t::value> * = nullptr> + template ::value> * = nullptr> reduction_impl(rw_accessor_type &Acc, const T &Identity, BinaryOperation BOp) : algo(Identity, BOp, false, std::make_shared(Acc)) { if (Acc.size() != 1) @@ -900,9 +904,8 @@ class reduction_impl } /// Constructs reduction_impl when the identity value is unknown. - template < - typename _T = T, - enable_if_t::value> * = nullptr> + template ::value> * = nullptr> reduction_impl(dw_accessor_type &Acc, const T &Identity, BinaryOperation BOp) : algo(Identity, BOp, true, std::make_shared(Acc)) { if (Acc.size() != 1) @@ -915,9 +918,8 @@ class reduction_impl /// The \param VarPtr is a USM pointer to memory, to where the computed /// reduction value is added using BinaryOperation, i.e. it is expected that /// the memory is pre-initialized with some meaningful value. - template < - typename _T = T, - enable_if_t::value> * = nullptr> + template ::value> * = nullptr> reduction_impl(T *VarPtr, bool InitializeToIdentity = false) : algo(reducer_type::getIdentity(), BinaryOperation(), InitializeToIdentity, VarPtr) {} @@ -927,9 +929,8 @@ class reduction_impl /// The \param VarPtr is a USM pointer to memory, to where the computed /// reduction value is added using BinaryOperation, i.e. it is expected that /// the memory is pre-initialized with some meaningful value. - template < - typename _T = T, - enable_if_t::value> * = nullptr> + template ::value> * = nullptr> reduction_impl(T *VarPtr, const T &Identity, BinaryOperation, bool InitializeToIdentity = false) : algo(Identity, BinaryOperation(), InitializeToIdentity, VarPtr) { @@ -950,35 +951,31 @@ class reduction_impl /// The \param VarPtr is a USM pointer to memory, to where the computed /// reduction value is added using BinaryOperation, i.e. it is expected that /// the memory is pre-initialized with some meaningful value. - template < - typename _T = T, - enable_if_t::value> * = nullptr> + template ::value> * = nullptr> reduction_impl(T *VarPtr, const T &Identity, BinaryOperation BOp, bool InitializeToIdentity = false) : algo(Identity, BOp, InitializeToIdentity, VarPtr) {} /// Constructs reduction_impl when the identity value is statically known - template < - typename _T = T, - enable_if_t::value> * = nullptr> + template ::value> * = nullptr> reduction_impl(span<_T, Extent> Span, bool InitializeToIdentity = false) : algo(reducer_type::getIdentity(), BinaryOperation(), InitializeToIdentity, Span.data()) {} /// Constructs reduction_impl when the identity value is statically known /// and user passed an identity value anyway - template < - typename _T = T, - enable_if_t::value> * = nullptr> + template ::value> * = nullptr> reduction_impl(span<_T, Extent> Span, const T & /* Identity */, BinaryOperation BOp, bool InitializeToIdentity = false) : algo(reducer_type::getIdentity(), BOp, InitializeToIdentity, Span.data()) {} /// Constructs reduction_impl when the identity value is not statically known - template < - typename _T = T, - enable_if_t::value> * = nullptr> + template ::value> * = nullptr> reduction_impl(span Span, const T &Identity, BinaryOperation BOp, bool InitializeToIdentity = false) : algo(Identity, BOp, InitializeToIdentity, Span.data()) {} @@ -1105,8 +1102,8 @@ void reduCGFuncForRangeFastReduce(handler &CGH, KernelType KernelFunc, // Signal this work-group has finished after all values are reduced if (LID == 0) { auto NFinished = - atomic_ref( + sycl::atomic_ref( NWorkGroupsFinished[0]); DoReducePartialSumsInLastWG[0] = ++NFinished == NWorkGroups && NWorkGroups > 1; @@ -1204,8 +1201,8 @@ void reduCGFuncForRangeBasic(handler &CGH, KernelType KernelFunc, // Signal this work-group has finished after all values are reduced if (LID == 0) { auto NFinished = - atomic_ref( + sycl::atomic_ref( NWorkGroupsFinished[0]); DoReducePartialSumsInLastWG[0] = ++NFinished == NWorkGroups && NWorkGroups > 1; @@ -1302,7 +1299,7 @@ void reduCGFuncForNDRangeBothFastReduceAndAtomics( typename Reduction::binary_operation BOp; for (int E = 0; E < NElements; ++E) { Reducer.getElement(E) = - ext::oneapi::reduce(NDIt.get_group(), Reducer.getElement(E), BOp); + reduce_over_group(NDIt.get_group(), Reducer.getElement(E), BOp); } if (NDIt.get_local_linear_id() == 0) Reducer.atomic_combine(Reduction::getOutPointer(Out)); @@ -1426,7 +1423,7 @@ void reduCGFuncForNDRangeFastReduceOnly( for (int E = 0; E < NElements; ++E) { typename Reduction::result_type PSum; PSum = Reducer.getElement(E); - PSum = ext::oneapi::reduce(NDIt.get_group(), PSum, BOp); + PSum = reduce_over_group(NDIt.get_group(), PSum, BOp); if (NDIt.get_local_linear_id() == 0) { if (IsUpdateOfUserVar) PSum = BOp(Reduction::getOutPointer(Out)[E], PSum); @@ -1556,7 +1553,7 @@ void reduAuxCGFuncFastReduceImpl(handler &CGH, bool UniformWG, (UniformWG || (GID < NWorkItems)) ? In[GID * NElements + E] : Reduction::reducer_type::getIdentity(); - PSum = ext::oneapi::reduce(NDIt.get_group(), PSum, BOp); + PSum = reduce_over_group(NDIt.get_group(), PSum, BOp); if (NDIt.get_local_linear_id() == 0) { if (IsUpdateOfUserVar) PSum = BOp(Reduction::getOutPointer(Out)[E], PSum); @@ -2587,7 +2584,7 @@ reduction(accessor &Acc, /// The identity value is not passed to this version as it is statically known. template -std::enable_if_t::value, +std::enable_if_t::value, detail::reduction_impl< T, BinaryOperation, 0, 1, detail::default_reduction_algorithm>> @@ -2615,7 +2612,7 @@ reduction(T *VarPtr, const T &Identity, BinaryOperation BOp) { /// The identity value is not passed to this version as it is statically known. template std::enable_if_t< - detail::IsKnownIdentityOp::value, + sycl::detail::IsKnownIdentityOp::value, detail::reduction_impl>>