diff --git a/sycl/include/CL/sycl/detail/pi.def b/sycl/include/CL/sycl/detail/pi.def index 83decff7c69e8..18c1c0c3dacb9 100644 --- a/sycl/include/CL/sycl/detail/pi.def +++ b/sycl/include/CL/sycl/detail/pi.def @@ -79,6 +79,7 @@ _PI_API(piKernelRetain) _PI_API(piKernelRelease) _PI_API(piextKernelSetArgPointer) _PI_API(piKernelSetExecInfo) +_PI_API(piextKernelSetCacheConfig) // Event _PI_API(piEventCreate) _PI_API(piEventGetInfo) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 7ef7be8083f21..2db31d6bbafcb 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -325,6 +325,8 @@ typedef enum { PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE = CL_KERNEL_PRIVATE_MEM_SIZE } _pi_kernel_group_info; +typedef enum { PI_LARGE_SLM = 0, PI_LARGE_DATA = 1 } _pi_kernel_cache_config; + typedef enum { PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT = CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT, PI_FP_ROUND_TO_NEAREST = CL_FP_ROUND_TO_NEAREST, @@ -544,6 +546,7 @@ using pi_queue_info = _pi_queue_info; using pi_image_info = _pi_image_info; using pi_kernel_info = _pi_kernel_info; using pi_kernel_group_info = _pi_kernel_group_info; +using pi_kernel_cache_config = _pi_kernel_cache_config; using pi_kernel_sub_group_info = _pi_kernel_sub_group_info; using pi_fp_capabilities = _pi_fp_capabilities; using pi_event_info = _pi_event_info; @@ -1133,6 +1136,13 @@ __SYCL_EXPORT pi_result piextProgramSetSpecializationConstant(pi_program prog, pi_uint32 spec_id, size_t spec_size, const void *spec_value); +/// Sets a cache config for a kernel. +/// +/// \param kernel the kernel object +/// \param conf cache config to set +pi_result piextKernelSetCacheConfig(pi_kernel Kernel, + pi_kernel_cache_config Conf); + /// Gets the native handle of a PI program object. /// /// \param program is the PI program to get the native handle of. diff --git a/sycl/include/CL/sycl/detail/properties_traits.def b/sycl/include/CL/sycl/detail/properties_traits.def index 7f27dbf40a263..aa95fa7afd03c 100644 --- a/sycl/include/CL/sycl/detail/properties_traits.def +++ b/sycl/include/CL/sycl/detail/properties_traits.def @@ -5,6 +5,7 @@ __SYCL_PARAM_TRAITS_SPEC(sycl::property::image::use_host_ptr) __SYCL_PARAM_TRAITS_SPEC(sycl::property::image::use_mutex) __SYCL_PARAM_TRAITS_SPEC(sycl::property::image::context_bound) __SYCL_PARAM_TRAITS_SPEC(sycl::ext::oneapi::property::buffer::use_pinned_host_memory) +__SYCL_PARAM_TRAITS_SPEC(sycl::INTEL::property::kernel::gpu_cache_config) __SYCL_PARAM_TRAITS_SPEC(sycl::property::noinit) __SYCL_PARAM_TRAITS_SPEC(sycl::property::context::cuda::use_primary_context) __SYCL_PARAM_TRAITS_SPEC(sycl::property::queue::in_order) diff --git a/sycl/include/CL/sycl/detail/property_helper.hpp b/sycl/include/CL/sycl/detail/property_helper.hpp index de0e07c6a8d41..774c4a6387eeb 100644 --- a/sycl/include/CL/sycl/detail/property_helper.hpp +++ b/sycl/include/CL/sycl/detail/property_helper.hpp @@ -42,7 +42,8 @@ enum PropWithDataKind { ImageUseMutex = 2, ImageContextBound = 3, BufferMemChannel = 4, - PropWithDataKindSize = 5 + GPUCacheConfig = 5, + PropWithDataKindSize = 6 }; // Base class for dataless properties, needed to check that the type of an diff --git a/sycl/include/CL/sycl/properties/all_properties.hpp b/sycl/include/CL/sycl/properties/all_properties.hpp index 8b2e276218961..560c3675d3d38 100644 --- a/sycl/include/CL/sycl/properties/all_properties.hpp +++ b/sycl/include/CL/sycl/properties/all_properties.hpp @@ -2,5 +2,6 @@ #include #include #include +#include #include #include diff --git a/sycl/include/CL/sycl/properties/invoke_properties.hpp b/sycl/include/CL/sycl/properties/invoke_properties.hpp new file mode 100644 index 0000000000000..db0b9c231f655 --- /dev/null +++ b/sycl/include/CL/sycl/properties/invoke_properties.hpp @@ -0,0 +1,39 @@ +//==----------- kernel_properties.hpp --- SYCL kernel properties -----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace INTEL { + +enum gpu_cache_config { large_slm = 0, large_data = 1 }; + +namespace property { +namespace kernel { + +class gpu_cache_config : public cl::sycl::detail::PropertyWithData< + cl::sycl::detail::GPUCacheConfig> { +public: + gpu_cache_config(cl::sycl::INTEL::gpu_cache_config Config) : Config(Config) {} + + cl::sycl::INTEL::gpu_cache_config get_gpu_cache_config() const { + return Config; + } + +private: + cl::sycl::INTEL::gpu_cache_config Config; +}; + +} // namespace kernel +} // namespace property +} // namespace INTEL +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 6e64d4e21b443..cee3e78835fbb 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -5674,6 +5674,14 @@ pi_result piextProgramSetSpecializationConstant(pi_program Prog, return PI_SUCCESS; } +pi_result piextKernelSetCacheConfig(pi_kernel Kernel, + pi_kernel_cache_config Conf) { + PI_ASSERT(Kernel, PI_INVALID_KERNEL); + ZE_CALL(zeKernelSetCacheConfig, + (Kernel->ZeKernel, pi_cast(Conf))); + return PI_SUCCESS; +} + pi_result piPluginInit(pi_plugin *PluginInit) { PI_ASSERT(PluginInit, PI_INVALID_VALUE); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 2e0e28a3538f7..fd1ce2b9c04c8 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -1706,6 +1707,16 @@ pi_result ExecCGCommand::SetKernelParamsAndLaunch( const bool HasLocalSize = (NDRDesc.LocalSize[0] != 0); ReverseRangeDimensionsForKernel(NDRDesc); + if (PropList + .has_property()) { + sycl::INTEL::property::kernel::gpu_cache_config Config = + PropList + .get_property(); + Plugin.call( + Kernel, + static_cast(Config.get_gpu_cache_config())); + } + pi_result Error = Plugin.call_nocheck( MQueue->getHandleRef(), Kernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0], &NDRDesc.GlobalSize[0], HasLocalSize ? &NDRDesc.LocalSize[0] : nullptr, diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index f9540b8637696..e6edc76fd475a 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -4125,6 +4125,7 @@ _ZNK2cl4sycl7context12get_propertyINS0_8property6buffer9use_mutexEEET_v _ZNK2cl4sycl7context12get_propertyINS0_8property6noinitEEET_v _ZNK2cl4sycl7context12get_propertyINS0_8property7context4cuda19use_primary_contextEEET_v _ZNK2cl4sycl7context12get_propertyINS0_8property9reduction22initialize_to_identityEEET_v +_ZNK2cl4sycl7context12get_propertyINS0_5INTEL8property6kernel16gpu_cache_configEEET_v _ZNK2cl4sycl7context12has_propertyINS0_3ext6oneapi8property6buffer22use_pinned_host_memoryEEEbv _ZNK2cl4sycl7context12has_propertyINS0_8property5image12use_host_ptrEEEbv _ZNK2cl4sycl7context12has_propertyINS0_8property5image13context_boundEEEbv @@ -4136,6 +4137,7 @@ _ZNK2cl4sycl7context12has_propertyINS0_8property6buffer9use_mutexEEEbv _ZNK2cl4sycl7context12has_propertyINS0_8property6noinitEEEbv _ZNK2cl4sycl7context12has_propertyINS0_8property7context4cuda19use_primary_contextEEEbv _ZNK2cl4sycl7context12has_propertyINS0_8property9reduction22initialize_to_identityEEEbv +_ZNK2cl4sycl7context12has_propertyINS0_5INTEL8property6kernel16gpu_cache_configEEEbv _ZNK2cl4sycl7context3getEv _ZNK2cl4sycl7context7is_hostEv _ZNK2cl4sycl7context8get_infoILNS0_4info7contextE4224EEENS3_12param_traitsIS4_XT_EE11return_typeEv @@ -4161,6 +4163,7 @@ _ZNK2cl4sycl7program12get_propertyINS0_8property6buffer9use_mutexEEET_v _ZNK2cl4sycl7program12get_propertyINS0_8property6noinitEEET_v _ZNK2cl4sycl7program12get_propertyINS0_8property7context4cuda19use_primary_contextEEET_v _ZNK2cl4sycl7program12get_propertyINS0_8property9reduction22initialize_to_identityEEET_v +_ZNK2cl4sycl7program12get_propertyINS0_5INTEL8property6kernel16gpu_cache_configEEET_v _ZNK2cl4sycl7program12has_propertyINS0_3ext6oneapi8property6buffer22use_pinned_host_memoryEEEbv _ZNK2cl4sycl7program12has_propertyINS0_8property5image12use_host_ptrEEEbv _ZNK2cl4sycl7program12has_propertyINS0_8property5image13context_boundEEEbv @@ -4172,6 +4175,7 @@ _ZNK2cl4sycl7program12has_propertyINS0_8property6buffer9use_mutexEEEbv _ZNK2cl4sycl7program12has_propertyINS0_8property6noinitEEEbv _ZNK2cl4sycl7program12has_propertyINS0_8property7context4cuda19use_primary_contextEEEbv _ZNK2cl4sycl7program12has_propertyINS0_8property9reduction22initialize_to_identityEEEbv +_ZNK2cl4sycl7program12has_propertyINS0_5INTEL8property6kernel16gpu_cache_configEEEbv _ZNK2cl4sycl7program16get_link_optionsB5cxx11Ev _ZNK2cl4sycl7program17get_build_optionsB5cxx11Ev _ZNK2cl4sycl7program19get_compile_optionsB5cxx11Ev @@ -4193,6 +4197,7 @@ _ZNK2cl4sycl7sampler12get_propertyINS0_8property6buffer9use_mutexEEET_v _ZNK2cl4sycl7sampler12get_propertyINS0_8property6noinitEEET_v _ZNK2cl4sycl7sampler12get_propertyINS0_8property7context4cuda19use_primary_contextEEET_v _ZNK2cl4sycl7sampler12get_propertyINS0_8property9reduction22initialize_to_identityEEET_v +_ZNK2cl4sycl7sampler12get_propertyINS0_5INTEL8property6kernel16gpu_cache_configEEET_v _ZNK2cl4sycl7sampler12has_propertyINS0_3ext6oneapi8property6buffer22use_pinned_host_memoryEEEbv _ZNK2cl4sycl7sampler12has_propertyINS0_8property5image12use_host_ptrEEEbv _ZNK2cl4sycl7sampler12has_propertyINS0_8property5image13context_boundEEEbv @@ -4204,6 +4209,7 @@ _ZNK2cl4sycl7sampler12has_propertyINS0_8property6buffer9use_mutexEEEbv _ZNK2cl4sycl7sampler12has_propertyINS0_8property6noinitEEEbv _ZNK2cl4sycl7sampler12has_propertyINS0_8property7context4cuda19use_primary_contextEEEbv _ZNK2cl4sycl7sampler12has_propertyINS0_8property9reduction22initialize_to_identityEEEbv +_ZNK2cl4sycl7sampler12has_propertyINS0_5INTEL8property6kernel16gpu_cache_configEEEbv _ZNK2cl4sycl7sampler18get_filtering_modeEv _ZNK2cl4sycl7sampler19get_addressing_modeEv _ZNK2cl4sycl7sampler33get_coordinate_normalization_modeEv