diff --git a/sycl/include/CL/sycl/INTEL/esimd/detail/esimd_sycl_util.hpp b/sycl/include/CL/sycl/INTEL/esimd/detail/esimd_sycl_util.hpp new file mode 100644 index 0000000000000..043b40cfa08a7 --- /dev/null +++ b/sycl/include/CL/sycl/INTEL/esimd/detail/esimd_sycl_util.hpp @@ -0,0 +1,86 @@ +//==------------- esimd_sycl_util.hpp - DPC++ Explicit SIMD API -----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Utility functions related to interaction with generic SYCL and used for +// implementing Explicit SIMD APIs. +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace INTEL { +namespace gpu { +namespace detail { + +// Checks that given type is a SYCL accessor type. Sets its static field +// \c value accordingly. Also, if the check is succesful, sets \c mode and +// \c target static fields to the accessor type's access mode and access target +// respectively. Otherwise they are set to -1. +template struct is_sycl_accessor : public std::false_type { + static constexpr sycl::access::mode mode = + static_cast(-1); + static constexpr sycl::access::target target = + static_cast(-1); +}; + +template +struct is_sycl_accessor> + : public std::true_type { + static constexpr sycl::access::mode mode = AccessMode; + static constexpr sycl::access::target target = AccessTarget; +}; + +using accessor_mode_cap_val_t = bool; + +// Denotes an accessor's capability - whether it can read or write. +struct accessor_mode_cap { + static inline constexpr accessor_mode_cap_val_t can_read = false; + static inline constexpr accessor_mode_cap_val_t can_write = true; +}; + +template +constexpr bool accessor_mode_has_capability() { + static_assert(Cap == accessor_mode_cap::can_read || + Cap == accessor_mode_cap::can_write, + "unsupported capability"); + + if constexpr (Mode == sycl::access::mode::atomic || + Mode == sycl::access::mode::read_write || + Mode == sycl::access::mode::discard_read_write) + return true; // atomic and *read_write accessors can read/write + + return (Cap == accessor_mode_cap::can_read) == + (Mode == sycl::access::mode::read); +} + +// Checks that given type is a SYCL accessor type with given capability and +// target. +template +struct is_sycl_accessor_with + : public std::conditional_t< + accessor_mode_has_capability::mode, + Capability>() && + (is_sycl_accessor::target == AccessTarget), + std::true_type, std::false_type> {}; + +template +using EnableIfAccessor = sycl::detail::enable_if_t< + detail::is_sycl_accessor_with::value, RetT>; + +} // namespace detail +} // namespace gpu +} // namespace INTEL +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/INTEL/esimd/esimd.hpp b/sycl/include/CL/sycl/INTEL/esimd/esimd.hpp index d2e21bd77cc50..ca33a35949f96 100644 --- a/sycl/include/CL/sycl/INTEL/esimd/esimd.hpp +++ b/sycl/include/CL/sycl/INTEL/esimd/esimd.hpp @@ -11,6 +11,8 @@ #pragma once #include +#include +#include #include __SYCL_INLINE_NAMESPACE(cl) { @@ -480,6 +482,49 @@ template class simd { } } + /// @name Memory operations + /// TODO NOTE: These APIs do not support cache hint specification yet, as this + /// is WIP. Later addition of hints is not expected to break code using these + /// APIs. + /// + /// @{ + + /// Copy a contiguous block of data from memory into this simd object. + /// The amount of memory copied equals the total size of vector elements in + /// this object. + /// @param addr the memory address to copy from. Must be a pointer to the + /// global address space, otherwise behavior is undefined. + ESIMD_INLINE void copy_from(const Ty *const addr) SYCL_ESIMD_FUNCTION; + + /// Copy a contiguous block of data from memory into this simd object. + /// The amount of memory copied equals the total size of vector elements in + /// this object. + /// Source memory location is represented via a global accessor and offset. + /// @param acc accessor to copy from. + /// @param offset offset to copy from. + template + ESIMD_INLINE + detail::EnableIfAccessor + copy_from(AccessorT acc, uint32_t offset) SYCL_ESIMD_FUNCTION; + + /// Copy all vector elements of this object into a contiguous block in memory. + /// @param addr the memory address to copy to. Must be a pointer to the + /// global address space, otherwise behavior is undefined. + ESIMD_INLINE void copy_to(Ty *addr) SYCL_ESIMD_FUNCTION; + + /// Copy all vector elements of this object into a contiguous block in memory. + /// Destination memory location is represented via a global accessor and + /// offset. + /// @param acc accessor to copy from. + /// @param offset offset to copy from. + template + ESIMD_INLINE + detail::EnableIfAccessor + copy_to(AccessorT acc, uint32_t offset) SYCL_ESIMD_FUNCTION; + + /// @} // Memory operations private: // The underlying data for this vector. vector_type M_data; @@ -498,6 +543,88 @@ ESIMD_INLINE simd convert(simd val) { return __builtin_convertvector(val.data(), detail::vector_type_t); } +// ----------- Outlined implementations of esimd class APIs. + +template void simd::copy_from(const T *const Addr) { + constexpr unsigned Sz = sizeof(T) * N; + static_assert(Sz >= detail::OperandSize::OWORD, + "block size must be at least 1 oword"); + static_assert(Sz % detail::OperandSize::OWORD == 0, + "block size must be whole number of owords"); + static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD), + "block must be 1, 2, 4 or 8 owords long"); + static_assert(Sz <= 8 * detail::OperandSize::OWORD, + "block size must be at most 8 owords"); + + uintptr_t AddrVal = reinterpret_cast(Addr); + *this = + __esimd_flat_block_read_unaligned( + AddrVal); +} + +template +template +ESIMD_INLINE + detail::EnableIfAccessor + simd::copy_from(AccessorT acc, uint32_t offset) { + constexpr unsigned Sz = sizeof(T) * N; + static_assert(Sz >= detail::OperandSize::OWORD, + "block size must be at least 1 oword"); + static_assert(Sz % detail::OperandSize::OWORD == 0, + "block size must be whole number of owords"); + static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD), + "block must be 1, 2, 4 or 8 owords long"); + static_assert(Sz <= 8 * detail::OperandSize::OWORD, + "block size must be at most 8 owords"); +#if defined(__SYCL_DEVICE_ONLY__) + auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc); + *this = __esimd_block_read(surf_ind, offset); +#else + *this = __esimd_block_read(acc, offset); +#endif // __SYCL_DEVICE_ONLY__ +} + +template void simd::copy_to(T *addr) { + constexpr unsigned Sz = sizeof(T) * N; + static_assert(Sz >= detail::OperandSize::OWORD, + "block size must be at least 1 oword"); + static_assert(Sz % detail::OperandSize::OWORD == 0, + "block size must be whole number of owords"); + static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD), + "block must be 1, 2, 4 or 8 owords long"); + static_assert(Sz <= 8 * detail::OperandSize::OWORD, + "block size must be at most 8 owords"); + + uintptr_t AddrVal = reinterpret_cast(addr); + __esimd_flat_block_write(AddrVal, + data()); +} + +template +template +ESIMD_INLINE + detail::EnableIfAccessor + simd::copy_to(AccessorT acc, uint32_t offset) { + constexpr unsigned Sz = sizeof(T) * N; + static_assert(Sz >= detail::OperandSize::OWORD, + "block size must be at least 1 oword"); + static_assert(Sz % detail::OperandSize::OWORD == 0, + "block size must be whole number of owords"); + static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD), + "block must be 1, 2, 4 or 8 owords long"); + static_assert(Sz <= 8 * detail::OperandSize::OWORD, + "block size must be at most 8 owords"); + +#if defined(__SYCL_DEVICE_ONLY__) + auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc); + __esimd_block_write(surf_ind, offset >> 4, data()); +#else + __esimd_block_write(acc, offset >> 4, data()); +#endif // __SYCL_DEVICE_ONLY__ +} + } // namespace gpu } // namespace INTEL } // namespace sycl @@ -516,4 +643,5 @@ std::ostream &operator<<(std::ostream &OS, OS << "}"; return OS; } + #endif diff --git a/sycl/include/CL/sycl/INTEL/esimd/esimd_memory.hpp b/sycl/include/CL/sycl/INTEL/esimd/esimd_memory.hpp index a4f072c317af9..1c9e991a46fe4 100644 --- a/sycl/include/CL/sycl/INTEL/esimd/esimd_memory.hpp +++ b/sycl/include/CL/sycl/INTEL/esimd/esimd_memory.hpp @@ -158,14 +158,15 @@ scatter(T *p, simd vals, simd offsets, pred.data()); } -// TODO @rolandschulz -// Should follow existing std::simd naming for similar APIs - "copy_from" and -// "copy_to" to avoid confusion. -// /// Flat-address block-load. /// \ingroup sycl_esimd +// TODO normally, this function should just delegate to +// simd::copy_from for the deprecation period, but separate implementations are +// needed for now, as simd::copy_from does not support cache hints yet. +// This API, even though deprecated, can't be removed until then. template +__SYCL_DEPRECATED("use simd::copy_from.") ESIMD_INLINE ESIMD_NODEBUG simd block_load(const T *const addr) { constexpr unsigned Sz = sizeof(T) * n; static_assert(Sz >= detail::OperandSize::OWORD, @@ -184,30 +185,20 @@ ESIMD_INLINE ESIMD_NODEBUG simd block_load(const T *const addr) { /// Accessor-based block-load. /// \ingroup sycl_esimd template +__SYCL_DEPRECATED("use simd::copy_from.") ESIMD_INLINE ESIMD_NODEBUG simd block_load(AccessorTy acc, uint32_t offset) { - constexpr unsigned Sz = sizeof(T) * n; - static_assert(Sz >= detail::OperandSize::OWORD, - "block size must be at least 1 oword"); - static_assert(Sz % detail::OperandSize::OWORD == 0, - "block size must be whole number of owords"); - static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD), - "block must be 1, 2, 4 or 8 owords long"); - static_assert(Sz <= 8 * detail::OperandSize::OWORD, - "block size must be at most 8 owords"); - -#if defined(__SYCL_DEVICE_ONLY__) - auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc); - return __esimd_block_read(surf_ind, offset); -#else - return __esimd_block_read(acc, offset); -#endif // __SYCL_DEVICE_ONLY__ + simd Res; + Res.copy_from(acc, offset); + return Res; } /// Flat-address block-store. /// \ingroup sycl_esimd +// TODO the above note about cache hints applies to this API as well. template +__SYCL_DEPRECATED("use simd::copy_to.") ESIMD_INLINE ESIMD_NODEBUG void block_store(T *p, simd vals) { constexpr unsigned Sz = sizeof(T) * n; static_assert(Sz >= detail::OperandSize::OWORD, @@ -226,24 +217,10 @@ ESIMD_INLINE ESIMD_NODEBUG void block_store(T *p, simd vals) { /// Accessor-based block-store. /// \ingroup sycl_esimd template -ESIMD_INLINE ESIMD_NODEBUG void block_store(AccessorTy acc, uint32_t offset, - simd vals) { - constexpr unsigned Sz = sizeof(T) * n; - static_assert(Sz >= detail::OperandSize::OWORD, - "block size must be at least 1 oword"); - static_assert(Sz % detail::OperandSize::OWORD == 0, - "block size must be whole number of owords"); - static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD), - "block must be 1, 2, 4 or 8 owords long"); - static_assert(Sz <= 8 * detail::OperandSize::OWORD, - "block size must be at most 8 owords"); - -#if defined(__SYCL_DEVICE_ONLY__) - auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc); - __esimd_block_write(surf_ind, offset >> 4, vals.data()); -#else - __esimd_block_write(acc, offset >> 4, vals.data()); -#endif // __SYCL_DEVICE_ONLY__ +__SYCL_DEPRECATED("use simd::copy_to.") +ESIMD_INLINE ESIMD_NODEBUG + void block_store(AccessorTy acc, uint32_t offset, simd vals) { + vals.copy_to(acc, offset); } /// Accessor-based gather. diff --git a/sycl/test/esimd/block_load_store.cpp b/sycl/test/esimd/block_load_store.cpp index 8399eccfe2f85..1b0da0341b2a7 100644 --- a/sycl/test/esimd/block_load_store.cpp +++ b/sycl/test/esimd/block_load_store.cpp @@ -1,5 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-device-only -fsyntax-only -Xclang -verify %s -// expected-no-diagnostics +// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s #include #include @@ -9,12 +8,26 @@ using namespace sycl::INTEL::gpu; using namespace cl::sycl; -void kernel(accessor &buf) __attribute__((sycl_device)) { +SYCL_EXTERNAL void kernel1( + accessor + &buf) SYCL_ESIMD_FUNCTION { simd v1(0, 1); - - auto v0 = block_load(buf.get_pointer()); - + // expected-warning@+2 {{deprecated}} + // expected-note@CL/sycl/INTEL/esimd/esimd_memory.hpp:188 {{}} + auto v0 = block_load(buf, 0); v0 = v0 + v1; + // expected-warning@+2 {{deprecated}} + // expected-note@CL/sycl/INTEL/esimd/esimd_memory.hpp:220 {{}} + block_store(buf, 0, v0); +} - block_store(buf.get_pointer(), v0); +SYCL_EXTERNAL void kernel2(int *ptr) SYCL_ESIMD_FUNCTION { + simd v1(0, 1); + // expected-warning@+2 {{deprecated}} + // expected-note@CL/sycl/INTEL/esimd/esimd_memory.hpp:169 {{}} + auto v0 = block_load(ptr); + v0 = v0 + v1; + // expected-warning@+2 {{deprecated}} + // expected-note@CL/sycl/INTEL/esimd/esimd_memory.hpp:201 {{}} + block_store(ptr, v0); } diff --git a/sycl/test/esimd/simd_copy_to_copy_from.cpp b/sycl/test/esimd/simd_copy_to_copy_from.cpp new file mode 100644 index 0000000000000..fd465ff068c0e --- /dev/null +++ b/sycl/test/esimd/simd_copy_to_copy_from.cpp @@ -0,0 +1,75 @@ +// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s + +// This test checks that both host and device compilers can: +// - successfully compile simd::copy_to and simd::copy_from APIs +// - emit an error if argument of an incompatible type is used +// in place of the accessor argument + +#include +#include +#include +#include + +using namespace sycl::INTEL::gpu; +using namespace cl::sycl; + +// --- Postive tests. + +SYCL_EXTERNAL void kernel1( + accessor + &buf) SYCL_ESIMD_FUNCTION { + simd v1(0, 1); + simd v0; + v0.copy_from(buf, 0); + v0 = v0 + v1; + v0.copy_to(buf, 0); +} + +SYCL_EXTERNAL void kernel2(int *ptr) SYCL_ESIMD_FUNCTION { + simd v1(0, 1); + simd v0; + v0.copy_from(ptr); + v0 = v0 + v1; + v0.copy_to(ptr); +} + +// --- Negative tests. + +// Incompatible target. +SYCL_EXTERNAL void +kernel3(accessor &buf) + SYCL_ESIMD_FUNCTION { + simd v1(0, 1); + simd v0; + // expected-error@+3 {{no matching member function for call to 'copy_from'}} + // expected-note@CL/sycl/INTEL/esimd/esimd.hpp:514 {{}} + // expected-note@CL/sycl/INTEL/esimd/esimd.hpp:509 {{}} + v0.copy_from(buf, 0); + v0 = v0 + v1; + // expected-error@+3 {{no matching member function for call to 'copy_to'}} + // expected-note@CL/sycl/INTEL/esimd/esimd.hpp:497 {{}} + // expected-note@CL/sycl/INTEL/esimd/esimd.hpp:525 {{}} + v0.copy_to(buf, 0); +} + +// Incompatible mode (write). +SYCL_EXTERNAL void kernel4( + accessor &buf) + SYCL_ESIMD_FUNCTION { + simd v; + // expected-error@+3 {{no matching member function for call to 'copy_from'}} + // expected-note@CL/sycl/INTEL/esimd/esimd.hpp:514 {{}} + // expected-note@CL/sycl/INTEL/esimd/esimd.hpp:509 {{}} + v.copy_from(buf, 0); +} + +// Incompatible mode (read). +SYCL_EXTERNAL void kernel5( + accessor &buf) + SYCL_ESIMD_FUNCTION { + simd v(0, 1); + // expected-error@+3 {{no matching member function for call to 'copy_to'}} + // expected-note@CL/sycl/INTEL/esimd/esimd.hpp:497 {{}} + // expected-note@CL/sycl/INTEL/esimd/esimd.hpp:525 {{}} + v.copy_to(buf, 0); +}