Skip to content

[SYCL] Implement runtime property to set cache size for a kernel #3417

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sycl/include/CL/sycl/detail/pi.def
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ _PI_API(piKernelRetain)
_PI_API(piKernelRelease)
_PI_API(piextKernelSetArgPointer)
_PI_API(piKernelSetExecInfo)
_PI_API(piextKernelSetCacheConfig)
// Event
_PI_API(piEventCreate)
_PI_API(piEventGetInfo)
Expand Down
10 changes: 10 additions & 0 deletions sycl/include/CL/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,8 @@ typedef enum {
PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE = CL_KERNEL_PRIVATE_MEM_SIZE
} _pi_kernel_group_info;

typedef enum { PI_LARGE_SLM = 0, PI_LARGE_DATA = 1 } _pi_kernel_cache_config;

typedef enum {
PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT = CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT,
PI_FP_ROUND_TO_NEAREST = CL_FP_ROUND_TO_NEAREST,
Expand Down Expand Up @@ -544,6 +546,7 @@ using pi_queue_info = _pi_queue_info;
using pi_image_info = _pi_image_info;
using pi_kernel_info = _pi_kernel_info;
using pi_kernel_group_info = _pi_kernel_group_info;
using pi_kernel_cache_config = _pi_kernel_cache_config;
using pi_kernel_sub_group_info = _pi_kernel_sub_group_info;
using pi_fp_capabilities = _pi_fp_capabilities;
using pi_event_info = _pi_event_info;
Expand Down Expand Up @@ -1133,6 +1136,13 @@ __SYCL_EXPORT pi_result
piextProgramSetSpecializationConstant(pi_program prog, pi_uint32 spec_id,
size_t spec_size, const void *spec_value);

/// Sets a cache config for a kernel.
///
/// \param kernel the kernel object
/// \param conf cache config to set
pi_result piextKernelSetCacheConfig(pi_kernel Kernel,
pi_kernel_cache_config Conf);

/// Gets the native handle of a PI program object.
///
/// \param program is the PI program to get the native handle of.
Expand Down
1 change: 1 addition & 0 deletions sycl/include/CL/sycl/detail/properties_traits.def
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ __SYCL_PARAM_TRAITS_SPEC(sycl::property::image::use_host_ptr)
__SYCL_PARAM_TRAITS_SPEC(sycl::property::image::use_mutex)
__SYCL_PARAM_TRAITS_SPEC(sycl::property::image::context_bound)
__SYCL_PARAM_TRAITS_SPEC(sycl::ext::oneapi::property::buffer::use_pinned_host_memory)
__SYCL_PARAM_TRAITS_SPEC(sycl::INTEL::property::kernel::gpu_cache_config)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think all these SYCL::INTEL namespaces should be sycl::ext::intel instead. For example, look at the line immediately above, which has sycl::ext::oneapi.

__SYCL_PARAM_TRAITS_SPEC(sycl::property::noinit)
__SYCL_PARAM_TRAITS_SPEC(sycl::property::context::cuda::use_primary_context)
__SYCL_PARAM_TRAITS_SPEC(sycl::property::queue::in_order)
Expand Down
3 changes: 2 additions & 1 deletion sycl/include/CL/sycl/detail/property_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ enum PropWithDataKind {
ImageUseMutex = 2,
ImageContextBound = 3,
BufferMemChannel = 4,
PropWithDataKindSize = 5
GPUCacheConfig = 5,
PropWithDataKindSize = 6
};

// Base class for dataless properties, needed to check that the type of an
Expand Down
1 change: 1 addition & 0 deletions sycl/include/CL/sycl/properties/all_properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
#include <CL/sycl/properties/buffer_properties.hpp>
#include <CL/sycl/properties/context_properties.hpp>
#include <CL/sycl/properties/image_properties.hpp>
#include <CL/sycl/properties/invoke_properties.hpp>
#include <CL/sycl/properties/queue_properties.hpp>
#include <CL/sycl/properties/reduction_properties.hpp>
39 changes: 39 additions & 0 deletions sycl/include/CL/sycl/properties/invoke_properties.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
//==----------- kernel_properties.hpp --- SYCL kernel properties -----------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <CL/sycl/detail/property_helper.hpp>

__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace INTEL {

enum gpu_cache_config { large_slm = 0, large_data = 1 };

namespace property {
namespace kernel {

class gpu_cache_config : public cl::sycl::detail::PropertyWithData<
cl::sycl::detail::GPUCacheConfig> {
public:
gpu_cache_config(cl::sycl::INTEL::gpu_cache_config Config) : Config(Config) {}

cl::sycl::INTEL::gpu_cache_config get_gpu_cache_config() const {
return Config;
}

private:
cl::sycl::INTEL::gpu_cache_config Config;
};

} // namespace kernel
} // namespace property
} // namespace INTEL
} // namespace sycl
} // __SYCL_INLINE_NAMESPACE(cl)
8 changes: 8 additions & 0 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5674,6 +5674,14 @@ pi_result piextProgramSetSpecializationConstant(pi_program Prog,
return PI_SUCCESS;
}

pi_result piextKernelSetCacheConfig(pi_kernel Kernel,
pi_kernel_cache_config Conf) {
PI_ASSERT(Kernel, PI_INVALID_KERNEL);
ZE_CALL(zeKernelSetCacheConfig,
(Kernel->ZeKernel, pi_cast<ze_cache_config_flags_t>(Conf)));
return PI_SUCCESS;
}

pi_result piPluginInit(pi_plugin *PluginInit) {
PI_ASSERT(PluginInit, PI_INVALID_VALUE);

Expand Down
11 changes: 11 additions & 0 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <CL/sycl/detail/kernel_desc.hpp>
#include <CL/sycl/detail/memory_manager.hpp>
#include <CL/sycl/program.hpp>
#include <CL/sycl/properties/all_properties.hpp>
#include <CL/sycl/sampler.hpp>
#include <detail/context_impl.hpp>
#include <detail/event_impl.hpp>
Expand Down Expand Up @@ -1706,6 +1707,16 @@ pi_result ExecCGCommand::SetKernelParamsAndLaunch(
const bool HasLocalSize = (NDRDesc.LocalSize[0] != 0);

ReverseRangeDimensionsForKernel(NDRDesc);
if (PropList
.has_property<sycl::INTEL::property::kernel::gpu_cache_config>()) {
sycl::INTEL::property::kernel::gpu_cache_config Config =
PropList
.get_property<sycl::INTEL::property::kernel::gpu_cache_config>();
Plugin.call<PiApiKind::piextKernelSetCacheConfig>(
Kernel,
static_cast<pi_kernel_cache_config>(Config.get_gpu_cache_config()));
}

pi_result Error = Plugin.call_nocheck<PiApiKind::piEnqueueKernelLaunch>(
MQueue->getHandleRef(), Kernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0],
&NDRDesc.GlobalSize[0], HasLocalSize ? &NDRDesc.LocalSize[0] : nullptr,
Expand Down
6 changes: 6 additions & 0 deletions sycl/test/abi/sycl_symbols_linux.dump
Original file line number Diff line number Diff line change
Expand Up @@ -4125,6 +4125,7 @@ _ZNK2cl4sycl7context12get_propertyINS0_8property6buffer9use_mutexEEET_v
_ZNK2cl4sycl7context12get_propertyINS0_8property6noinitEEET_v
_ZNK2cl4sycl7context12get_propertyINS0_8property7context4cuda19use_primary_contextEEET_v
_ZNK2cl4sycl7context12get_propertyINS0_8property9reduction22initialize_to_identityEEET_v
_ZNK2cl4sycl7context12get_propertyINS0_5INTEL8property6kernel16gpu_cache_configEEET_v
_ZNK2cl4sycl7context12has_propertyINS0_3ext6oneapi8property6buffer22use_pinned_host_memoryEEEbv
_ZNK2cl4sycl7context12has_propertyINS0_8property5image12use_host_ptrEEEbv
_ZNK2cl4sycl7context12has_propertyINS0_8property5image13context_boundEEEbv
Expand All @@ -4136,6 +4137,7 @@ _ZNK2cl4sycl7context12has_propertyINS0_8property6buffer9use_mutexEEEbv
_ZNK2cl4sycl7context12has_propertyINS0_8property6noinitEEEbv
_ZNK2cl4sycl7context12has_propertyINS0_8property7context4cuda19use_primary_contextEEEbv
_ZNK2cl4sycl7context12has_propertyINS0_8property9reduction22initialize_to_identityEEEbv
_ZNK2cl4sycl7context12has_propertyINS0_5INTEL8property6kernel16gpu_cache_configEEEbv
_ZNK2cl4sycl7context3getEv
_ZNK2cl4sycl7context7is_hostEv
_ZNK2cl4sycl7context8get_infoILNS0_4info7contextE4224EEENS3_12param_traitsIS4_XT_EE11return_typeEv
Expand All @@ -4161,6 +4163,7 @@ _ZNK2cl4sycl7program12get_propertyINS0_8property6buffer9use_mutexEEET_v
_ZNK2cl4sycl7program12get_propertyINS0_8property6noinitEEET_v
_ZNK2cl4sycl7program12get_propertyINS0_8property7context4cuda19use_primary_contextEEET_v
_ZNK2cl4sycl7program12get_propertyINS0_8property9reduction22initialize_to_identityEEET_v
_ZNK2cl4sycl7program12get_propertyINS0_5INTEL8property6kernel16gpu_cache_configEEET_v
_ZNK2cl4sycl7program12has_propertyINS0_3ext6oneapi8property6buffer22use_pinned_host_memoryEEEbv
_ZNK2cl4sycl7program12has_propertyINS0_8property5image12use_host_ptrEEEbv
_ZNK2cl4sycl7program12has_propertyINS0_8property5image13context_boundEEEbv
Expand All @@ -4172,6 +4175,7 @@ _ZNK2cl4sycl7program12has_propertyINS0_8property6buffer9use_mutexEEEbv
_ZNK2cl4sycl7program12has_propertyINS0_8property6noinitEEEbv
_ZNK2cl4sycl7program12has_propertyINS0_8property7context4cuda19use_primary_contextEEEbv
_ZNK2cl4sycl7program12has_propertyINS0_8property9reduction22initialize_to_identityEEEbv
_ZNK2cl4sycl7program12has_propertyINS0_5INTEL8property6kernel16gpu_cache_configEEEbv
_ZNK2cl4sycl7program16get_link_optionsB5cxx11Ev
_ZNK2cl4sycl7program17get_build_optionsB5cxx11Ev
_ZNK2cl4sycl7program19get_compile_optionsB5cxx11Ev
Expand All @@ -4193,6 +4197,7 @@ _ZNK2cl4sycl7sampler12get_propertyINS0_8property6buffer9use_mutexEEET_v
_ZNK2cl4sycl7sampler12get_propertyINS0_8property6noinitEEET_v
_ZNK2cl4sycl7sampler12get_propertyINS0_8property7context4cuda19use_primary_contextEEET_v
_ZNK2cl4sycl7sampler12get_propertyINS0_8property9reduction22initialize_to_identityEEET_v
_ZNK2cl4sycl7sampler12get_propertyINS0_5INTEL8property6kernel16gpu_cache_configEEET_v
_ZNK2cl4sycl7sampler12has_propertyINS0_3ext6oneapi8property6buffer22use_pinned_host_memoryEEEbv
_ZNK2cl4sycl7sampler12has_propertyINS0_8property5image12use_host_ptrEEEbv
_ZNK2cl4sycl7sampler12has_propertyINS0_8property5image13context_boundEEEbv
Expand All @@ -4204,6 +4209,7 @@ _ZNK2cl4sycl7sampler12has_propertyINS0_8property6buffer9use_mutexEEEbv
_ZNK2cl4sycl7sampler12has_propertyINS0_8property6noinitEEEbv
_ZNK2cl4sycl7sampler12has_propertyINS0_8property7context4cuda19use_primary_contextEEEbv
_ZNK2cl4sycl7sampler12has_propertyINS0_8property9reduction22initialize_to_identityEEEbv
_ZNK2cl4sycl7sampler12has_propertyINS0_5INTEL8property6kernel16gpu_cache_configEEEbv
_ZNK2cl4sycl7sampler18get_filtering_modeEv
_ZNK2cl4sycl7sampler19get_addressing_modeEv
_ZNK2cl4sycl7sampler33get_coordinate_normalization_modeEv
Expand Down