From aef3c5b6de4b764318e7cda65e61e75514d6f771 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Wed, 23 Sep 2020 14:03:17 -0700 Subject: [PATCH 01/10] [SYCL][Doc] Add has_known_identity/known_identity These traits have been requested by developers who would like to determine at compile-time whether the reduction() interface supports their use-case, or whether they need to fall back to some other implementation. Signed-off-by: John Pennycook --- sycl/doc/extensions/Reduction/Reduction.md | 24 ++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/sycl/doc/extensions/Reduction/Reduction.md b/sycl/doc/extensions/Reduction/Reduction.md index 37a6748142519..5b96c04ed2f26 100644 --- a/sycl/doc/extensions/Reduction/Reduction.md +++ b/sycl/doc/extensions/Reduction/Reduction.md @@ -38,6 +38,30 @@ unspecified reduction(span var, const T& identity, BinaryOperation co The exact behavior of a reduction is specific to an implementation; the only interface exposed to the user is the set of functions above, which construct an unspecified `reduction` object encapsulating the reduction variable, an optional operator identity and the reduction operator. For user-defined binary operations, an implementation should issue a compile-time warning if an identity is not specified and this is known to negatively impact performance (e.g. as a result of the implementation choosing a different reduction algorithm). For standard binary operations (e.g. `std::plus`) on arithmetic types, the implementation must determine the correct identity automatically in order to avoid performance penalties. +Whether an implementation can identify the identity value for a given combination of accumulator type `AccumulatorT` and function object type `BinaryOperation` can be determined using the `has_known_identity` trait class: +```c++ +template +struct has_known_identity { + static constexpr bool value; +}; + +// Available if C++17 +template +inline constexpr bool has_known_identity_v = has_known_identity::value; +``` + +If `has_known_identity` returns `true` for a given combination of accumulator type and function object type, the value of the identity can be extracted using the `known_identity` trait class: +```c++ +template +struct known_identity { + static constexpr T value; +}; + +// Available if C++17 +template +inline constexpr T known_identity_v = known_identity::value; +``` + The dimensionality of the `accessor` passed to the `reduction` function specifies the dimensionality of the reduction variable: a 0-dimensional `accessor` represents a scalar reduction, and any other dimensionality represents an array reduction. Specifying an array reduction of size N is functionally equivalent to specifying N independent scalar reductions. The access mode of the accessor determines whether the reduction variable's original value is included in the reduction (i.e. for `access::mode::read_write` it is included, and for `access::mode::discard_write` it is not). Multiple reductions aliasing the same output results in undefined behavior. `T` must be trivially copyable, permitting an implementation to (optionally) use atomic operations to implement the reduction. This restriction is aligned with `std::atomic` and `std::atomic_ref`. From be643d0400b27dffbe9e3bc0722f734207873199 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Wed, 23 Sep 2020 14:25:14 -0700 Subject: [PATCH 02/10] [SYCL] Implement has_known_identity/known_identity Exposes existing functionality of reduction implementation as part of the public API for reductions. Signed-off-by: John Pennycook --- sycl/include/CL/sycl/ONEAPI/reduction.hpp | 113 +++++++++++++++------- 1 file changed, 78 insertions(+), 35 deletions(-) diff --git a/sycl/include/CL/sycl/ONEAPI/reduction.hpp b/sycl/include/CL/sycl/ONEAPI/reduction.hpp index e33db4783156b..c5de3e3389a32 100644 --- a/sycl/include/CL/sycl/ONEAPI/reduction.hpp +++ b/sycl/include/CL/sycl/ONEAPI/reduction.hpp @@ -142,6 +142,62 @@ using IsKnownIdentityOp = IsMinimumIdentityOp::value || IsMaximumIdentityOp::value>; +template +struct has_known_identity_impl { + static constexpr bool value = + IsKnownIdentityOp::value; +}; + +template +struct known_identity_impl {}; + +/// Returns zero as identity for ADD, OR, XOR operations. +template +struct known_identity_impl::value>::type> { + static constexpr AccumulatorT value = 0; +}; + +/// Returns one as identify for MULTIPLY operations. +template +struct known_identity_impl::value>::type> { + static constexpr AccumulatorT value = 1; +}; + +/// Returns bit image consisting of all ones as identity for AND operations. +template +struct known_identity_impl::value>::type> { + static constexpr AccumulatorT value = ~static_cast(0); +}; + +/// Returns maximal possible value as identity for MIN operations. +template +struct known_identity_impl::value>::type> { + static constexpr AccumulatorT value = + std::numeric_limits::has_infinity + ? std::numeric_limits::infinity() + : (std::numeric_limits::max)(); +}; + +/// Returns minimal possible value as identity for MAX operations. +template +struct known_identity_impl::value>::type> { + static constexpr AccumulatorT value = + std::numeric_limits::has_infinity + ? static_cast( + -std::numeric_limits::infinity()) + : std::numeric_limits::lowest(); +}; + /// Class that is used to represent objects that are passed to user's lambda /// functions and representing users' reduction variable. /// The generic version of the class represents those reductions of those @@ -191,43 +247,10 @@ class reducer - static enable_if_t::value, _T> - getIdentity() { - return 0; - } - - /// Returns one as identify for MULTIPLY operations. template - static enable_if_t::value, _T> + static enable_if_t::value, _T> getIdentity() { - return 1; - } - - /// Returns bit image consisting of all ones as identity for AND operations. - template - static enable_if_t::value, _T> - getIdentity() { - return ~static_cast<_T>(0); - } - - /// Returns maximal possible value as identity for MIN operations. - template - static enable_if_t::value, _T> - getIdentity() { - return std::numeric_limits<_T>::has_infinity - ? std::numeric_limits<_T>::infinity() - : (std::numeric_limits<_T>::max)(); - } - - /// Returns minimal possible value as identity for MAX operations. - template - static enable_if_t::value, _T> - getIdentity() { - return std::numeric_limits<_T>::has_infinity - ? static_cast<_T>(-std::numeric_limits<_T>::infinity()) - : std::numeric_limits<_T>::lowest(); + return known_identity_impl<_BinaryOperation, _T>::value; } template @@ -1076,6 +1099,26 @@ reduction(T *VarPtr, BinaryOperation) { access::mode::read_write>(VarPtr); } +template +struct has_known_identity : detail::has_known_identity_impl< + typename std::decay::type, + typename std::decay::type> {}; +#if __cplusplus >= 201703L +template +inline constexpr bool has_known_identity_v = + has_known_identity::value; +#endif + +template +struct known_identity + : detail::known_identity_impl::type, + typename std::decay::type> {}; +#if __cplusplus >= 201703L +template +inline constexpr AccumulatorT known_identity_v = + known_identity::value; +#endif + } // namespace ONEAPI } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) From c2ff9dcde43cb62a3d60dfa18fee6abae03d073d Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Thu, 24 Sep 2020 10:55:10 -0400 Subject: [PATCH 03/10] [SYCL][Doc] Fix typo: T => AccumulatorT Signed-off-by: John Pennycook --- sycl/doc/extensions/Reduction/Reduction.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/extensions/Reduction/Reduction.md b/sycl/doc/extensions/Reduction/Reduction.md index 5b96c04ed2f26..aad8185051967 100644 --- a/sycl/doc/extensions/Reduction/Reduction.md +++ b/sycl/doc/extensions/Reduction/Reduction.md @@ -54,7 +54,7 @@ If `has_known_identity` returns `true` for a given combination of accumulator ty ```c++ template struct known_identity { - static constexpr T value; + static constexpr AccumulatorT value; }; // Available if C++17 From aa2222abdc047789114542b011af27158474d9bd Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Thu, 24 Sep 2020 16:41:35 -0400 Subject: [PATCH 04/10] [SYCL][Doc] Fix typo again: T => AccumulatorT Signed-off-by: John Pennycook --- sycl/doc/extensions/Reduction/Reduction.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/extensions/Reduction/Reduction.md b/sycl/doc/extensions/Reduction/Reduction.md index aad8185051967..5376cc7cfe518 100644 --- a/sycl/doc/extensions/Reduction/Reduction.md +++ b/sycl/doc/extensions/Reduction/Reduction.md @@ -59,7 +59,7 @@ struct known_identity { // Available if C++17 template -inline constexpr T known_identity_v = known_identity::value; +inline constexpr AccumulatorT known_identity_v = known_identity::value; ``` The dimensionality of the `accessor` passed to the `reduction` function specifies the dimensionality of the reduction variable: a 0-dimensional `accessor` represents a scalar reduction, and any other dimensionality represents an array reduction. Specifying an array reduction of size N is functionally equivalent to specifying N independent scalar reductions. The access mode of the accessor determines whether the reduction variable's original value is included in the reduction (i.e. for `access::mode::read_write` it is included, and for `access::mode::discard_write` it is not). Multiple reductions aliasing the same output results in undefined behavior. From 0136df20a9371b9b1cea5db2c46473074af957d5 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 25 Sep 2020 13:38:54 -0400 Subject: [PATCH 05/10] [SYCL][Doc] Define has in terms of known Signed-off-by: John Pennycook --- sycl/doc/extensions/Reduction/Reduction.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/sycl/doc/extensions/Reduction/Reduction.md b/sycl/doc/extensions/Reduction/Reduction.md index 5376cc7cfe518..a454b2490faf0 100644 --- a/sycl/doc/extensions/Reduction/Reduction.md +++ b/sycl/doc/extensions/Reduction/Reduction.md @@ -38,28 +38,29 @@ unspecified reduction(span var, const T& identity, BinaryOperation co The exact behavior of a reduction is specific to an implementation; the only interface exposed to the user is the set of functions above, which construct an unspecified `reduction` object encapsulating the reduction variable, an optional operator identity and the reduction operator. For user-defined binary operations, an implementation should issue a compile-time warning if an identity is not specified and this is known to negatively impact performance (e.g. as a result of the implementation choosing a different reduction algorithm). For standard binary operations (e.g. `std::plus`) on arithmetic types, the implementation must determine the correct identity automatically in order to avoid performance penalties. -Whether an implementation can identify the identity value for a given combination of accumulator type `AccumulatorT` and function object type `BinaryOperation` can be determined using the `has_known_identity` trait class: +If an implementation can identify the identity value for a given combination of accumulator type `AccumulatorT` and function object type `BinaryOperation`, the value is defined as a member of the `known_identity` trait class: ```c++ template -struct has_known_identity { - static constexpr bool value; +struct known_identity { + static constexpr AccumulatorT value; }; // Available if C++17 template -inline constexpr bool has_known_identity_v = has_known_identity::value; +inline constexpr AccumulatorT known_identity_v = known_identity::value; ``` -If `has_known_identity` returns `true` for a given combination of accumulator type and function object type, the value of the identity can be extracted using the `known_identity` trait class: +Whether `known_identity::value` exists can be tested using the `has_known_identity` trait class: + ```c++ template -struct known_identity { - static constexpr AccumulatorT value; +struct has_known_identity { + static constexpr bool value; }; // Available if C++17 template -inline constexpr AccumulatorT known_identity_v = known_identity::value; +inline constexpr bool has_known_identity_v = has_known_identity::value; ``` The dimensionality of the `accessor` passed to the `reduction` function specifies the dimensionality of the reduction variable: a 0-dimensional `accessor` represents a scalar reduction, and any other dimensionality represents an array reduction. Specifying an array reduction of size N is functionally equivalent to specifying N independent scalar reductions. The access mode of the accessor determines whether the reduction variable's original value is included in the reduction (i.e. for `access::mode::read_write` it is included, and for `access::mode::discard_write` it is not). Multiple reductions aliasing the same output results in undefined behavior. From 6ca8965b32cba6475ca2e457b4026d91b14fa20d Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Wed, 30 Sep 2020 10:24:19 -0400 Subject: [PATCH 06/10] [SYCL] Use std::integral_constant Signed-off-by: John Pennycook --- sycl/include/CL/sycl/ONEAPI/reduction.hpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sycl/include/CL/sycl/ONEAPI/reduction.hpp b/sycl/include/CL/sycl/ONEAPI/reduction.hpp index c5de3e3389a32..5c374a65d244a 100644 --- a/sycl/include/CL/sycl/ONEAPI/reduction.hpp +++ b/sycl/include/CL/sycl/ONEAPI/reduction.hpp @@ -143,10 +143,9 @@ using IsKnownIdentityOp = IsMaximumIdentityOp::value>; template -struct has_known_identity_impl { - static constexpr bool value = - IsKnownIdentityOp::value; -}; +struct has_known_identity_impl + : std::integral_constant< + bool, IsKnownIdentityOp::value> {}; template struct known_identity_impl {}; From b7910344579cdba07f7817dc44fa68edc6d7922c Mon Sep 17 00:00:00 2001 From: Roland Schulz Date: Fri, 2 Oct 2020 13:58:00 -0700 Subject: [PATCH 07/10] Add half inf/-inf support --- sycl/include/CL/sycl/half_type.hpp | 36 ++++++++++++++----- .../constexpr-fp16-numeric-limits.cpp | 2 ++ 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/sycl/include/CL/sycl/half_type.hpp b/sycl/include/CL/sycl/half_type.hpp index d836af37c9698..4c5dc83eb4dfd 100644 --- a/sycl/include/CL/sycl/half_type.hpp +++ b/sycl/include/CL/sycl/half_type.hpp @@ -35,8 +35,8 @@ namespace host_half_impl { class __SYCL_EXPORT half { public: half() = default; - half(const half &) = default; - half(half &&) = default; + constexpr half(const half &) = default; + constexpr half(half &&) = default; half(const float &rhs); @@ -74,13 +74,21 @@ class __SYCL_EXPORT half { return ret; } + // Operator neg + constexpr half &operator-() { + Buf ^= 0x8000; + return *this; + } + // Operator float operator float() const; template friend struct std::hash; private: + constexpr explicit half(uint16_t x) : Buf(x) {} uint16_t Buf; + friend std::numeric_limits; }; } // namespace host_half_impl @@ -136,8 +144,8 @@ class half; class half { public: half() = default; - half(const half &) = default; - half(half &&) = default; + constexpr half(const half &) = default; + constexpr half(half &&) = default; __SYCL_CONSTEXPR_ON_DEVICE half(const float &rhs) : Data(rhs) {} @@ -146,8 +154,8 @@ class half { #ifndef __SYCL_DEVICE_ONLY__ // Since StorageT and BIsRepresentationT are different on host, these two // helpers are required for 'vec' class - half(const detail::host_half_impl::half &rhs) : Data(rhs) {}; - operator detail::host_half_impl::half() const { return Data; } + constexpr half(const detail::host_half_impl::half &rhs) : Data(rhs){}; + constexpr operator detail::host_half_impl::half() const { return Data; } #endif // __SYCL_DEVICE_ONLY__ // Operator +=, -=, *=, /= @@ -193,7 +201,14 @@ class half { operator--(); return ret; } - + constexpr half &operator-() { + Data = -Data; + return *this; + } + constexpr half operator-() const { + half r = *this; + return -r; + } // Operator float operator float() const { return static_cast(Data); } @@ -327,8 +342,13 @@ template <> struct numeric_limits { return 0.5F; } - static __SYCL_CONSTEXPR_ON_DEVICE const half infinity() noexcept { + static constexpr const half infinity() noexcept { +#ifdef __SYCL_DEVICE_ONLY__ return __builtin_huge_valf(); +#else + return cl::sycl::detail::host_half_impl::half( + static_cast(0x7C00)); +#endif } static __SYCL_CONSTEXPR_ON_DEVICE const half quiet_NaN() noexcept { diff --git a/sycl/test/regression/constexpr-fp16-numeric-limits.cpp b/sycl/test/regression/constexpr-fp16-numeric-limits.cpp index feab488478683..c1e00be21b8bb 100644 --- a/sycl/test/regression/constexpr-fp16-numeric-limits.cpp +++ b/sycl/test/regression/constexpr-fp16-numeric-limits.cpp @@ -10,6 +10,8 @@ int main() { constexpr cl::sycl::half L5 = std::numeric_limits::round_error(); constexpr cl::sycl::half L6 = std::numeric_limits::infinity(); + constexpr cl::sycl::half L6n = + -std::numeric_limits::infinity(); constexpr cl::sycl::half L7 = std::numeric_limits::quiet_NaN(); constexpr cl::sycl::half L8 = From d5b3766cb30725743cfc5468cd64ea9d647843f6 Mon Sep 17 00:00:00 2001 From: Roland Schulz Date: Mon, 11 Jan 2021 14:20:29 -0800 Subject: [PATCH 08/10] Add 0/1 half known_idenitfy for host --- sycl/include/CL/sycl/ONEAPI/reduction.hpp | 14 ++++++++++++-- sycl/include/CL/sycl/half_type.hpp | 5 +++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/sycl/include/CL/sycl/ONEAPI/reduction.hpp b/sycl/include/CL/sycl/ONEAPI/reduction.hpp index b773edf354057..9bb98bc078017 100644 --- a/sycl/include/CL/sycl/ONEAPI/reduction.hpp +++ b/sycl/include/CL/sycl/ONEAPI/reduction.hpp @@ -157,7 +157,12 @@ template struct known_identity_impl::value>::type> { - static constexpr AccumulatorT value = 0; + static constexpr AccumulatorT value = +#ifdef __SYCL_DEVICE_ONLY__ + 0; +#else + cl::sycl::detail::host_half_impl::half(static_cast(0)); +#endif }; /// Returns one as identify for MULTIPLY operations. @@ -165,7 +170,12 @@ template struct known_identity_impl::value>::type> { - static constexpr AccumulatorT value = 1; + static constexpr AccumulatorT value = +#ifdef __SYCL_DEVICE_ONLY__ + 1; +#else + cl::sycl::detail::host_half_impl::half(static_cast(0x3C00)); +#endif }; /// Returns bit image consisting of all ones as identity for AND operations. diff --git a/sycl/include/CL/sycl/half_type.hpp b/sycl/include/CL/sycl/half_type.hpp index 3aa09a26a5cbc..e1beb123fb265 100644 --- a/sycl/include/CL/sycl/half_type.hpp +++ b/sycl/include/CL/sycl/half_type.hpp @@ -85,10 +85,11 @@ class __SYCL_EXPORT half { template friend struct std::hash; -private: + // Initialize underlying data constexpr explicit half(uint16_t x) : Buf(x) {} + +private: uint16_t Buf; - friend std::numeric_limits; }; } // namespace host_half_impl From bb3c4c28a1698b3b77c708d06733013185dfa79e Mon Sep 17 00:00:00 2001 From: Roland Schulz Date: Mon, 11 Jan 2021 16:40:22 -0800 Subject: [PATCH 09/10] Apply work-around for half only to half --- sycl/include/CL/sycl/ONEAPI/reduction.hpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/sycl/include/CL/sycl/ONEAPI/reduction.hpp b/sycl/include/CL/sycl/ONEAPI/reduction.hpp index 9bb98bc078017..73ae2d7e5428b 100644 --- a/sycl/include/CL/sycl/ONEAPI/reduction.hpp +++ b/sycl/include/CL/sycl/ONEAPI/reduction.hpp @@ -157,7 +157,14 @@ template struct known_identity_impl::value>::type> { - static constexpr AccumulatorT value = + static constexpr AccumulatorT value = 0; +}; + +template +struct known_identity_impl::value>::type> { + static constexpr half value = #ifdef __SYCL_DEVICE_ONLY__ 0; #else @@ -170,7 +177,14 @@ template struct known_identity_impl::value>::type> { - static constexpr AccumulatorT value = + static constexpr AccumulatorT value = 1; +}; + +template +struct known_identity_impl::value>::type> { + static constexpr half value = #ifdef __SYCL_DEVICE_ONLY__ 1; #else From 93d9499b629544f996a73a8001fc7f3ec1258527 Mon Sep 17 00:00:00 2001 From: Roland Schulz Date: Mon, 11 Jan 2021 17:58:58 -0800 Subject: [PATCH 10/10] Make compile with C++11 --- sycl/include/CL/sycl/half_type.hpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/sycl/include/CL/sycl/half_type.hpp b/sycl/include/CL/sycl/half_type.hpp index e1beb123fb265..6627612f79cb2 100644 --- a/sycl/include/CL/sycl/half_type.hpp +++ b/sycl/include/CL/sycl/half_type.hpp @@ -26,6 +26,11 @@ #else #define __SYCL_CONSTEXPR_ON_DEVICE #endif +#if __cplusplus >= 201402L +#define _CPP14_CONSTEXPR constexpr +#else +#define _CPP14_CONSTEXPR +#endif __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -75,7 +80,7 @@ class __SYCL_EXPORT half { } // Operator neg - constexpr half &operator-() { + _CPP14_CONSTEXPR half &operator-() { Buf ^= 0x8000; return *this; } @@ -202,11 +207,11 @@ class half { operator--(); return ret; } - constexpr half &operator-() { + _CPP14_CONSTEXPR half &operator-() { Data = -Data; return *this; } - constexpr half operator-() const { + _CPP14_CONSTEXPR half operator-() const { half r = *this; return -r; } @@ -334,3 +339,4 @@ inline std::istream &operator>>(std::istream &I, cl::sycl::half &rhs) { } #undef __SYCL_CONSTEXPR_ON_DEVICE +#undef _CPP14_CONSTEXPR