diff --git a/sycl/doc/extensions/supported/sycl_ext_intel_dataflow_pipes.asciidoc b/sycl/doc/extensions/supported/sycl_ext_intel_dataflow_pipes.asciidoc index 76a6ff2f774c7..885cf2a780487 100644 --- a/sycl/doc/extensions/supported/sycl_ext_intel_dataflow_pipes.asciidoc +++ b/sycl/doc/extensions/supported/sycl_ext_intel_dataflow_pipes.asciidoc @@ -110,9 +110,9 @@ A pipe type is a specialization of the pipe class: [source,c++,Pipe type def,linenums] ---- -template +template class pipe; ---- @@ -131,7 +131,7 @@ using pipe; The interface of a pipe is through static member functions, and instances of a pipe class cannot be instantiated. Allowing instances of pipe objects, when their type defines connectivity, would introduce an error prone secondary mechanism of reference. -The first template parameter, `name`, can be any type, and is typically expected to be a user defined class in a user namespace. The type only needs to be forward declared, and not defined. +The first template parameter, `Name`, can be any type, and is typically expected to be a user defined class in a user namespace. The type only needs to be forward declared, and not defined. Above this basic mechanism of {cpp} type being used to identify a pipe, additional layers can be built on top to contain the type in an object that can be passed by value. Because such mechanisms (e.g. `boost::hana::type`) can layer on top of the fundamental type-based mechanism described here, those mechanisms are not included in the extension specification. @@ -166,27 +166,27 @@ myQueue.submit([&](handler& cgh) { == Read/write member functions, and pipe template parameters -The pipe class exposes static member functions for writing a data word to a pipe, and for reading a data word from a pipe. A data word in this context is the data type that the pipe contains (`dataT` pipe template argument). +The pipe class exposes static member functions for writing a data word to a pipe, and for reading a data word from a pipe. A data word in this context is the data type that the pipe contains (`DataT` pipe template argument). Blocking and non-blocking forms of the read and write members are defined, with the form chosen based on overload resolution. [source,c++,Read write members,linenums] ---- -template +template class pipe { // Blocking - static dataT read(); - static void write( const dataT &data ); + static DataT read(); + static void write( const DataT &Data ); // Non-blocking - static dataT read( bool &success_code ); - static void write( const dataT &data, bool &success_code ); + static DataT read( bool &Success ); + static void write( const DataT &Data, bool &Success ); // Static members - using value_type = dataT; - size_t min_capacity = min_capacity; + using value_type = DataT; + size_t min_capacity = MinCapacity; } ---- @@ -194,9 +194,9 @@ The read and write member functions may be invoked within device code, or within The template parameters of the device type are defined as: -* `name`: Type that is the basis of pipe identification. Typically a user-defined class, in a user namespace. Forward declaration of the type is sufficient, and the type does not need to be defined. -* `dataT`: The type of data word/packet contained within a pipe. This is the data type that is read during a successful `pipe::read` operation, or written during a successful `pipe::write` operation. The type must be standard layout and trivially copyable. This template parameter can be queried by using the `value_type` type alias. -* `min_capacity`: User defined minimum number of words in units of `dataT` that the pipe must be able to store without any being read out. A minimum capacity is required in some algorithms to avoid deadlock, or for performance tuning. An implementation can include more capacity than this parameter, but not less. This template parameter can be queried by using the `min_capacity` static member. +* `Name`: Type that is the basis of pipe identification. Typically a user-defined class, in a user namespace. Forward declaration of the type is sufficient, and the type does not need to be defined. +* `DataT`: The type of data word/packet contained within a pipe. This is the data type that is read during a successful `pipe::read` operation, or written during a successful `pipe::write` operation. The type must be standard layout and trivially copyable. This template parameter can be queried by using the `value_type` type alias. +* `MinCapacity`: User defined minimum number of words in units of `DataT` that the pipe must be able to store without any being read out. A minimum capacity is required in some algorithms to avoid deadlock, or for performance tuning. An implementation can include more capacity than this parameter, but not less. This template parameter can be queried by using the `min_capacity` static member. == Pipe types and {cpp} scope @@ -260,12 +260,12 @@ Pipes expose two additional static member functions that are available within ho [source,c++,Read write members,linenums] ---- -template +template class pipe { template - static dataT* map(size_t requested_size, size_t &mapped_size); + static DataT* map(size_t requested_size, size_t &mapped_size); static size_t unmap(T* mapped_ptr, size_t size_to_unmap); } @@ -284,11 +284,11 @@ The APIs are defined as: |Function |Description |`template + dataT* map(size_t requested_size, size_t &mapped_size);` -|Returns a _dataT *_ in the host address space. The host can write data to this pointer for reading by a device pipe endpoint, if it was created with template parameter `host_writeable = true`. Alternatively, the host can read data from this pointer if it was created with template parameter `host_writeable = false`. +|Returns a DataT *_ in the host address space. The host can write data to this pointer for reading by a device pipe endpoint, if it was created with template parameter `host_writeable = true`. Alternatively, the host can read data from this pointer if it was created with template parameter `host_writeable = false`. The value returned in the mapped_size argument specifies the maximum number of bytes that the host can access. The value specified by _mapped_size_ is less than or equal to the value of the _requested_size_ argument that the caller specifies. _mapped_size_ does not impact the _min_capacity_ property of the pipe. -After writing to or reading from the returned _dataT *_, the host must execute one or more `unmap` calls on the same pipe, to signal to the runtime that data is ready for transfer to the device (on a write), and that the runtime can reclaim the memory for reuse (on a read or write). If `map` is called on a pipe before `unmap` has been used to unmap all memory mapped by a previous `map` operation, the buffer returned by the second `map` call will not overlap with that returned by the first call. +After writing to or reading from the returned DataT *_, the host must execute one or more `unmap` calls on the same pipe, to signal to the runtime that data is ready for transfer to the device (on a write), and that the runtime can reclaim the memory for reuse (on a read or write). If `map` is called on a pipe before `unmap` has been used to unmap all memory mapped by a previous `map` operation, the buffer returned by the second `map` call will not overlap with that returned by the first call. |`static size_t unmap(T* mapped_ptr, size_t size_to_unmap);` |Signals to the runtime that the host is no longer using _size_to_unmap_ bytes of the host allocation that was returned previously from a call to `map`. In the case of a writeable host pipe, calling `unmap` allows the unmapped data to become available to the kernel. If the _size_to_unmap_ value is smaller than the _mapped_size_ value specified to `map`, then multiple `unmap` function calls are necessary to unmap the full capacity of the host allocation. It is legal to perform multiple `unmap` function calls to unmap successive bytes in the buffer returned by `map`, up to the _mapped_size_ value defined in the `map` operation. @@ -382,22 +382,22 @@ The pipe class described above exposes both read and write static member functio [source,c++,iopipes,linenums] ---- -template +template class kernel_readable_io_pipe { public: - static dataT read(); // Blocking - static dataT read( bool &success_code ); // Non-blocking + static DataT read(); // Blocking + static DataT read( bool &Success ); // Non-blocking }; -template +template class kernel_writeable_io_pipe { public: - static void write( dataT data ); // Blocking - static void write( dataT data, bool &success_code ); // Non-blocking + static void write( DataT Data ); // Blocking + static void write( DataT Data, bool &Success ); // Non-blocking } ---- @@ -642,55 +642,76 @@ Automated mechanisms are possible to provide uniquification across calls, and co *NOTE*: The APIs described in this section are experimental. Future versions of this extension may change these APIs in ways that are incompatible with the versions described here. -In the experimental API version, read/write methods take template arguments, which can contain the latency control properties `latency_anchor_id` and/or `latency_constraint`. +The Intel FPGA experimental `pipe` class is implemented in `sycl/ext/intel/experimental/pipes.hpp` which is included in `sycl/ext/intel/fpga_extensions.hpp`. + +In the experimental API version, read/write methods take in a property list as function argument, which can contain the latency control properties `latency_anchor_id` and/or `latency_constraint`. * `sycl::ext::intel::experimental::latency_anchor_id`, where `N` is an integer: An ID to associate with the current read/write function call, which can then be referenced by other `latency_constraint` properties elsewhere in the program to define relative latency constaints. ID must be unique within the application, and a diagnostic is required if that condition is not met. * `sycl::ext::intel::experimental::latency_constraint`: A tuple of three values which cause the current read/write function call to act as an endpoint of a latency constraint relative to a specified `latency_anchor_id` defined by a different instruction. ** `A` is an integer: The ID of the target anchor defined on a different instruction through a `latency_anchor_id` property. -** `B` is an enum value: The type of control from the set {`type::exact`, `type::max`, `type::min`}. +** `B` is an enum value: The type of control from the set {`latency_control_type::exact`, `latency_control_type::max`, `latency_control_type::min`}. ** `C` is an integer: The relative clock cycle difference between the target anchor and the current function call, that the constraint should infer subject to the type of the control (exact, max, min). -The template arguments above don't have to be specified if user doesn't want to apply latency controls. The template arguments can be passed in arbitrary order. - -=== Implementation +=== Synopsis [source,c++] ---- // Added in version 2 of this extension. namespace sycl::ext::intel::experimental { -enum class type { +enum class latency_control_type { none, // default exact, max, min }; -template struct latency_anchor_id { - static constexpr int32_t value = _N; - static constexpr int32_t default_value = -1; +struct latency_anchor_id_key { + template + using value_t = + oneapi::experimental::property_value>; }; -template struct latency_constraint { - static constexpr std::tuple value = {_N1, _N2, _N3}; - static constexpr std::tuple default_value = { - 0, type::none, 0}; +struct latency_constraint_key { + template + using value_t = oneapi::experimental::property_value< + latency_constraint_key, std::integral_constant, + std::integral_constant, + std::integral_constant>; }; -template +template +inline constexpr latency_anchor_id_key::value_t latency_anchor_id; + +template +inline constexpr latency_constraint_key::value_t + latency_constraint; + +template class pipe { // Blocking - template - static dataT read(); - template - static void write( const dataT &data ); + static DataT read(); + + template + static DataT read( PropertiesT Properties ); + + static void write( const DataT &Data); + + template + static void write( const DataT &Data, PropertiesT Properties ); // Non-blocking - template - static dataT read( bool &success_code ); - template - static void write( const dataT &data, bool &success_code ); + static DataT read( bool &Success ); + + template + static DataT read( bool &Success, PropertiesT Properties ); + + static void write( const DataT &Data, bool &Success ); + + template + static void write( const DataT &Data, bool &Success, PropertiesT Properties ); } } // namespace sycl::ext::intel::experimental ---- @@ -709,17 +730,20 @@ using Pipe3 = ext::intel::experimental::pipe; myQueue.submit([&](handler &cgh) { cgh.single_task([=] { // The following Pipe1::read is anchor 0 - int value = Pipe1::read>(); + int value = Pipe1::read( + ext::oneapi::experimental::properties(latency_anchor_id<0>)); // The following Pipe2::write is anchor 1 // The following Pipe2::write occurs exactly 2 cycles after anchor 0 - Pipe2::write, - ext::intel::experimental::latency_constraint< - 0, ext::intel::experimental::type::exact, 2>>(value); + Pipe2::write(value, + ext::oneapi::experimental::properties( + latency_anchor_id<1>, + latency_constraint<0, latency_control_type::exact, 2>)); // The following Pipe3::write occurs at least 2 cycles after anchor 1 - Pipe3::write>(value); + Pipe3::write(value, + ext::oneapi::experimental::properties( + latency_constraint<1, latency_control_type::min, 2>)); }); }); ---- diff --git a/sycl/doc/extensions/supported/sycl_ext_intel_fpga_lsu.md b/sycl/doc/extensions/supported/sycl_ext_intel_fpga_lsu.md index ad85bee3af4bf..512a7a381288b 100644 --- a/sycl/doc/extensions/supported/sycl_ext_intel_fpga_lsu.md +++ b/sycl/doc/extensions/supported/sycl_ext_intel_fpga_lsu.md @@ -43,16 +43,16 @@ The implementation relies on the Clang built-in `__builtin_intel_fpga_mem` when parsing the SYCL device code. The built-in uses the LLVM `ptr.annotation` intrinsic under the hood to annotate the pointer that is being accessed. ```c++ -template class lsu final { +template class lsu final { public: lsu() = delete; - template - static _T load(sycl::multi_ptr<_T, _space> Ptr) { - check_space<_space>(); + template + static T load(sycl::multi_ptr Ptr) { + check_space(); check_load(); #if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem) - return *__builtin_intel_fpga_mem((_T *)Ptr, + return *__builtin_intel_fpga_mem((T *)Ptr, _burst_coalesce | _cache | _dont_statically_coalesce | _prefetch, _cache_val); @@ -61,12 +61,12 @@ public: #endif } - template - static void store(sycl::multi_ptr<_T, _space> Ptr, _T Val) { - check_space<_space>(); + template + static void store(sycl::multi_ptr Ptr, T Val) { + check_space(); check_store(); #if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem) - *__builtin_intel_fpga_mem((_T *)Ptr, + *__builtin_intel_fpga_mem((T *)Ptr, _burst_coalesce | _cache | _dont_statically_coalesce | _prefetch, _cache_val) = Val; @@ -126,8 +126,8 @@ this extension may change these APIs in ways that are incompatible with the versions described here. In the experimental API version, member functions `load()` and `store()` take -template arguments, which can contain the latency control properties -`latency_anchor_id` and/or `latency_constraint`. +in a property list as function argument, which can contain the latency control +properties `latency_anchor_id` and/or `latency_constraint`. 1. **`sycl::ext::intel::experimental::latency_anchor_id`, where `N` is an integer**: represents ID of the current function call when it performs as an anchor. The ID @@ -138,116 +138,58 @@ parameters when the current function performs as a non-anchor, where: - **`A` is an integer**: The ID of the target anchor defined on a different instruction through a `latency_anchor_id` property. - **`B` is an enum value**: The type of control from the set - {`type::exact`, `type::max`, `type::min`}. + {`latency_control_type::exact`, `latency_control_type::max`, `latency_control_type::min`}. - **`C` is an integer**: The relative clock cycle difference between the target anchor and the current function call, that the constraint should infer subject to the type of the control (exact, max, min). -The template arguments above don't have to be specified if user doesn't want to -apply latency controls. The template arguments can be passed in arbitrary order. - -### Implementation +### Synopsis ```c++ // Added in version 2 of this extension. namespace sycl::ext::intel::experimental { -enum class type { +enum class latency_control_type { none, // default exact, max, min }; -template struct latency_anchor_id { - static constexpr int32_t value = _N; - static constexpr int32_t default_value = -1; +struct latency_anchor_id_key { + template + using value_t = + oneapi::experimental::property_value>; }; -template struct latency_constraint { - static constexpr std::tuple value = {_N1, _N2, _N3}; - static constexpr std::tuple default_value = { - 0, type::none, 0}; +struct latency_constraint_key { + template + using value_t = oneapi::experimental::property_value< + latency_constraint_key, std::integral_constant, + std::integral_constant, + std::integral_constant>; }; -template class lsu final { -public: - lsu() = delete; +template +inline constexpr latency_anchor_id_key::value_t latency_anchor_id; - template - static _T load(sycl::multi_ptr<_T, _space> Ptr) { - check_space<_space>(); - check_load(); -#if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem) - static constexpr auto _anchor_id = - __GetValue::value; - static constexpr auto _constraint = - __GetValue3::value; - - static constexpr int _target_anchor = std::get<0>(_constraint); - static constexpr type _control_type = std::get<1>(_constraint); - static constexpr int _cycle = std::get<2>(_constraint); - int _type; - if (_control_type == type::none) { - _type = 0; - } else if (_control_type == type::exact) { - _type = 1; - } else if (_control_type == type::max) { - _type = 2; - } else { // _control_type == type::min - _type = 3; - } - - return *__latency_control_mem_wrapper((_T *)Ptr, _anchor_id, _target_anchor, - _type, _cycle); -#else - return *Ptr; -#endif - } +template +inline constexpr latency_constraint_key::value_t + latency_constraint; - template - static void store(sycl::multi_ptr<_T, _space> Ptr, _T Val) { - check_space<_space>(); - check_store(); -#if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem) - static constexpr auto _anchor_id = - __GetValue::value; - static constexpr auto _constraint = - __GetValue3::value; - - static constexpr int _target_anchor = std::get<0>(_constraint); - static constexpr type _control_type = std::get<1>(_constraint); - static constexpr int _cycle = std::get<2>(_constraint); - int _type; - if (_control_type == type::none) { - _type = 0; - } else if (_control_type == type::exact) { - _type = 1; - } else if (_control_type == type::max) { - _type = 2; - } else { // _control_type == type::min - _type = 3; - } - - *__latency_control_mem_wrapper((_T *)Ptr, _anchor_id, _target_anchor, _type, - _cycle) = Val; -#else - *Ptr = Val; -#endif - } - ... -private: -#if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem) - template - static _T *__latency_control_mem_wrapper(_T * Ptr, int32_t AnchorID, - int32_t TargetAnchor, int32_t Type, - int32_t Cycle) { - return __builtin_intel_fpga_mem(Ptr, - _burst_coalesce | _cache | - _dont_statically_coalesce | _prefetch, - _cache_val); - } -#endif - ... -} +template class lsu final { + template + static T load(sycl::multi_ptr Ptr); + + template + static T load(sycl::multi_ptr Ptr, PropertiesT Properties); + + template + static void store(sycl::multi_ptr Ptr, T Val); + + template + static void store(sycl::multi_ptr Ptr, T Val, + PropertiesT Properties); +}; } // namespace sycl::ext::intel::experimental ``` @@ -267,7 +209,6 @@ Queue.submit([&](sycl::handler &cgh) { auto input_ptr = input_accessor.get_pointer(); auto output_ptr = output_accessor.get_pointer(); - // latency controls using ExpPrefetchingLSU = sycl::ext::intel::experimental::lsu< sycl::ext::intel::experimental::prefetch, sycl::ext::intel::experimental::statically_coalesce>; @@ -277,17 +218,19 @@ Queue.submit([&](sycl::handler &cgh) { sycl::ext::intel::experimental::statically_coalesce>; // The following load is anchor 1 - int Z = ExpPrefetchingLSU::load< - sycl::ext::intel::experimental::latency_anchor_id<1>>(input_ptr + 2); + int Z = ExpPrefetchingLSU::load( + input_ptr + 2, + sycl::ext::oneapi::experimental::properties(latency_anchor_id<1>)); // The following store occurs exactly 5 cycles after the anchor 1 read - ExpBurstCoalescedLSU::store< - sycl::ext::intel::experimental::latency_constraint< - 1, sycl::ext::intel::experimental::type::exact, 5>>(output_ptr + 2, - Z); + ExpBurstCoalescedLSU::store( + output_ptr + 2, Z, + sycl::ext::oneapi::experimental::properties( + latency_constraint<1, latency_control_type::exact, 5>)); }); }); ... +} // namespace sycl::ext::intel::experimental ``` ## Feature Test Macro diff --git a/sycl/include/sycl/ext/intel/experimental/fpga_lsu.hpp b/sycl/include/sycl/ext/intel/experimental/fpga_lsu.hpp index ad3fda6729b3c..93bc8b613c83f 100644 --- a/sycl/include/sycl/ext/intel/experimental/fpga_lsu.hpp +++ b/sycl/include/sycl/ext/intel/experimental/fpga_lsu.hpp @@ -50,84 +50,108 @@ template class lsu final { public: lsu() = delete; - template - static _T load(sycl::multi_ptr<_T, _space> Ptr) { + template + static _T load(sycl::multi_ptr<_T, _space> Ptr, _propertiesT Properties) { check_space<_space>(); check_load(); #if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem) - static constexpr auto _anchor_id = - _GetValue::value; - static constexpr auto _constraint = - _GetValue3::value; - - static constexpr int32_t _target_anchor = std::get<0>(_constraint); - static constexpr type _control_type = std::get<1>(_constraint); - static constexpr int32_t _cycle = std::get<2>(_constraint); - int32_t _type = 0; // Default: _control_type == type::none - if constexpr (_control_type == type::exact) { - _type = 1; - } else if constexpr (_control_type == type::max) { - _type = 2; - } else if constexpr (_control_type == type::min) { - _type = 3; + // Get latency control properties + using _latency_anchor_id_prop = typename detail::GetOrDefaultValT< + _propertiesT, latency_anchor_id_key, + detail::defaultLatencyAnchorIdProperty>::type; + using _latency_constraint_prop = typename detail::GetOrDefaultValT< + _propertiesT, latency_constraint_key, + detail::defaultLatencyConstraintProperty>::type; + + // Get latency control property values + static constexpr int32_t _anchor_id = _latency_anchor_id_prop::value; + static constexpr int32_t _target_anchor = _latency_constraint_prop::target; + static constexpr latency_control_type _control_type = + _latency_constraint_prop::type; + static constexpr int32_t _relative_cycle = _latency_constraint_prop::cycle; + + int32_t _control_type_code = 0; // latency_control_type::none is default + if constexpr (_control_type == latency_control_type::exact) { + _control_type_code = 1; + } else if constexpr (_control_type == latency_control_type::max) { + _control_type_code = 2; + } else if constexpr (_control_type == latency_control_type::min) { + _control_type_code = 3; } return *__latency_control_mem_wrapper((_T *)Ptr, _anchor_id, _target_anchor, - _type, _cycle); + _control_type_code, _relative_cycle); #else + (void)Properties; return *Ptr; #endif } - template - static void store(sycl::multi_ptr<_T, _space> Ptr, _T Val) { + template + static _T load(sycl::multi_ptr<_T, _space> Ptr) { + return load<_T, _space>(Ptr, oneapi::experimental::properties{}); + } + + template + static void store(sycl::multi_ptr<_T, _space> Ptr, _T Val, + _propertiesT Properties) { check_space<_space>(); check_store(); #if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem) - static constexpr auto _anchor_id = - _GetValue::value; - static constexpr auto _constraint = - _GetValue3::value; - - static constexpr int32_t _target_anchor = std::get<0>(_constraint); - static constexpr type _control_type = std::get<1>(_constraint); - static constexpr int32_t _cycle = std::get<2>(_constraint); - int32_t _type = 0; // Default: _control_type == type::none - if constexpr (_control_type == type::exact) { - _type = 1; - } else if constexpr (_control_type == type::max) { - _type = 2; - } else if constexpr (_control_type == type::min) { - _type = 3; + // Get latency control properties + using _latency_anchor_id_prop = typename detail::GetOrDefaultValT< + _propertiesT, latency_anchor_id_key, + detail::defaultLatencyAnchorIdProperty>::type; + using _latency_constraint_prop = typename detail::GetOrDefaultValT< + _propertiesT, latency_constraint_key, + detail::defaultLatencyConstraintProperty>::type; + + // Get latency control property values + static constexpr int32_t _anchor_id = _latency_anchor_id_prop::value; + static constexpr int32_t _target_anchor = _latency_constraint_prop::target; + static constexpr latency_control_type _control_type = + _latency_constraint_prop::type; + static constexpr int32_t _relative_cycle = _latency_constraint_prop::cycle; + + int32_t _control_type_code = 0; // latency_control_type::none is default + if constexpr (_control_type == latency_control_type::exact) { + _control_type_code = 1; + } else if constexpr (_control_type == latency_control_type::max) { + _control_type_code = 2; + } else if constexpr (_control_type == latency_control_type::min) { + _control_type_code = 3; } - *__latency_control_mem_wrapper((_T *)Ptr, _anchor_id, _target_anchor, _type, - _cycle) = Val; + *__latency_control_mem_wrapper((_T *)Ptr, _anchor_id, _target_anchor, + _control_type_code, _relative_cycle) = Val; #else + (void)Properties; *Ptr = Val; #endif } + template + static void store(sycl::multi_ptr<_T, _space> Ptr, _T Val) { + store<_T, _space>(Ptr, Val, oneapi::experimental::properties{}); + } + private: static constexpr int32_t _burst_coalesce_val = - _GetValue::value; + detail::_GetValue::value; static constexpr uint8_t _burst_coalesce = _burst_coalesce_val == 1 ? BURST_COALESCE : 0; static constexpr int32_t _cache_val = - _GetValue::value; + detail::_GetValue::value; static constexpr uint8_t _cache = (_cache_val > 0) ? CACHE : 0; static constexpr int32_t _statically_coalesce_val = - _GetValue::value; + detail::_GetValue::value; static constexpr uint8_t _dont_statically_coalesce = _statically_coalesce_val == 0 ? STATICALLY_COALESCE : 0; static constexpr int32_t _prefetch_val = - _GetValue::value; + detail::_GetValue::value; static constexpr uint8_t _prefetch = _prefetch_val ? PREFETCH : 0; static_assert(_cache_val >= 0, "cache size parameter must be non-negative"); diff --git a/sycl/include/sycl/ext/intel/experimental/fpga_utils.hpp b/sycl/include/sycl/ext/intel/experimental/fpga_utils.hpp index a1c7f5e857967..fe5d9f259c300 100644 --- a/sycl/include/sycl/ext/intel/experimental/fpga_utils.hpp +++ b/sycl/include/sycl/ext/intel/experimental/fpga_utils.hpp @@ -11,95 +11,48 @@ #include #include #include +#include #include __SYCL_INLINE_NAMESPACE(cl) { -namespace sycl { -namespace ext { -namespace intel { -namespace experimental { +namespace sycl::ext::intel::experimental::detail { -enum class type { - none, // default - exact, - max, - min -}; - -template struct latency_anchor_id { - static constexpr int32_t value = _N; - static constexpr int32_t default_value = -1; -}; - -template struct latency_constraint { - static constexpr std::tuple value = {_N1, _N2, _N3}; - static constexpr std::tuple default_value = { - 0, type::none, 0}; -}; - -using ignoreParam_int_t = int32_t; -constexpr ignoreParam_int_t IgnoreParamInt{}; -using ignoreParam_enum_t = type; -constexpr ignoreParam_enum_t IgnoreParamEnum{}; - -template struct _ValueExtractorImp { - static constexpr auto _First = _T::value; - static constexpr auto _Second = IgnoreParamEnum; - static constexpr auto _Third = IgnoreParamInt; -}; - -template -struct _ValueExtractorImp< - const std::tuple<_VTypeFirst, _VTypeSecond, _VTypeThird>, _T> { - static constexpr auto _First = std::get<0>(_T::value); - static constexpr auto _Second = std::get<1>(_T::value); - static constexpr auto _Third = std::get<2>(_T::value); -}; - -template -struct _ValueExtractor : _ValueExtractorImp {}; - -template class _Type, - class _T> -struct _MatchType - : std::is_same< - _Type<_ValueExtractor<_T>::_First, _ValueExtractor<_T>::_Second, - _ValueExtractor<_T>::_Third>, - _T> {}; +template