diff --git a/sycl/source/detail/kernel_program_cache.hpp b/sycl/source/detail/kernel_program_cache.hpp index fd375949c8b71..356c0a1288429 100644 --- a/sycl/source/detail/kernel_program_cache.hpp +++ b/sycl/source/detail/kernel_program_cache.hpp @@ -21,6 +21,9 @@ #include #include +// For testing purposes +class MockKernelProgramCache; + __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { @@ -79,6 +82,13 @@ class KernelProgramCache { using KernelByNameT = std::map; using KernelCacheT = std::map; + using KernelFastCacheKeyT = + std::tuple; + using KernelFastCacheValT = + std::tuple; + using KernelFastCacheT = std::map; + ~KernelProgramCache(); void setContextPtr(const ContextPtr &AContext) { MParentContext = AContext; } @@ -102,6 +112,24 @@ class KernelProgramCache { BR.MBuildCV.notify_all(); } + template + KernelFastCacheValT tryToGetKernelFast(KeyT &&CacheKey) { + std::unique_lock Lock(MKernelFastCacheMutex); + auto It = MKernelFastCache.find(CacheKey); + if (It != MKernelFastCache.end()) { + return It->second; + } + return std::make_tuple(nullptr, nullptr, nullptr); + } + + template + void saveKernel(KeyT &&CacheKey, ValT &&CacheVal) { + std::unique_lock Lock(MKernelFastCacheMutex); + // if no insertion took place, thus some other thread has already inserted + // smth in the cache + MKernelFastCache.emplace(CacheKey, CacheVal); + } + private: std::mutex MProgramCacheMutex; std::mutex MKernelsPerProgramCacheMutex; @@ -109,6 +137,10 @@ class KernelProgramCache { ProgramCacheT MCachedPrograms; KernelCacheT MKernelsPerProgramCache; ContextPtr MParentContext; + + std::mutex MKernelFastCacheMutex; + KernelFastCacheT MKernelFastCache; + friend class ::MockKernelProgramCache; }; } // namespace detail } // namespace sycl diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp index 4ce35d44031cc..8090280afa7e0 100644 --- a/sycl/source/detail/program_impl.cpp +++ b/sycl/source/detail/program_impl.cpp @@ -439,7 +439,7 @@ RT::PiKernel program_impl::get_pi_kernel(const std::string &KernelName) const { RT::PiKernel Kernel = nullptr; if (is_cacheable()) { - std::tie(Kernel, std::ignore) = + std::tie(Kernel, std::ignore, std::ignore) = ProgramManager::getInstance().getOrCreateKernel( MProgramModuleHandle, detail::getSyclObjImpl(get_context()), detail::getSyclObjImpl(get_devices()[0]), KernelName, this); diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index c864e47dc182c..fbf2727d9eac8 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -519,24 +519,41 @@ RT::PiProgram ProgramManager::getBuiltPIProgram( return BuildResult->Ptr.load(); } -std::pair ProgramManager::getOrCreateKernel( - OSModuleHandle M, const ContextImplPtr &Context, - const DeviceImplPtr &Device, const std::string &KernelName, - const program_impl *Prg) { +std::tuple +ProgramManager::getOrCreateKernel(OSModuleHandle M, + const ContextImplPtr &ContextImpl, + const DeviceImplPtr &DeviceImpl, + const std::string &KernelName, + const program_impl *Prg) { if (DbgProgMgr > 0) { std::cerr << ">>> ProgramManager::getOrCreateKernel(" << M << ", " - << Context.get() << ", " << Device.get() << ", " << KernelName - << ")\n"; + << ContextImpl.get() << ", " << DeviceImpl.get() << ", " + << KernelName << ")\n"; } - RT::PiProgram Program = - getBuiltPIProgram(M, Context, Device, KernelName, Prg); - using PiKernelT = KernelProgramCache::PiKernelT; using KernelCacheT = KernelProgramCache::KernelCacheT; using KernelByNameT = KernelProgramCache::KernelByNameT; - KernelProgramCache &Cache = Context->getKernelProgramCache(); + KernelProgramCache &Cache = ContextImpl->getKernelProgramCache(); + + std::string CompileOpts, LinkOpts; + SerializedObj SpecConsts; + if (Prg) { + CompileOpts = Prg->get_build_options(); + Prg->stableSerializeSpecConstRegistry(SpecConsts); + } + applyOptionsFromEnvironment(CompileOpts, LinkOpts); + const RT::PiDevice PiDevice = DeviceImpl->getHandleRef(); + + auto key = std::make_tuple(std::move(SpecConsts), M, PiDevice, + CompileOpts + LinkOpts, KernelName); + auto ret_tuple = Cache.tryToGetKernelFast(key); + if (std::get<0>(ret_tuple)) + return ret_tuple; + + RT::PiProgram Program = + getBuiltPIProgram(M, ContextImpl, DeviceImpl, KernelName, Prg); auto AcquireF = [](KernelProgramCache &Cache) { return Cache.acquireKernelsPerProgramCache(); @@ -545,12 +562,12 @@ std::pair ProgramManager::getOrCreateKernel( [&Program](const Locked &LockedCache) -> KernelByNameT & { return LockedCache.get()[Program]; }; - auto BuildF = [&Program, &KernelName, &Context] { + auto BuildF = [&Program, &KernelName, &ContextImpl] { PiKernelT *Result = nullptr; // TODO need some user-friendly error/exception // instead of currently obscure one - const detail::plugin &Plugin = Context->getPlugin(); + const detail::plugin &Plugin = ContextImpl->getPlugin(); Plugin.call(Program, KernelName.c_str(), &Result); @@ -564,8 +581,10 @@ std::pair ProgramManager::getOrCreateKernel( auto BuildResult = getOrBuild( Cache, KernelName, AcquireF, GetF, BuildF); - return std::make_pair(BuildResult->Ptr.load(), - &(BuildResult->MBuildResultMutex)); + auto ret_val = std::make_tuple(BuildResult->Ptr.load(), + &(BuildResult->MBuildResultMutex), Program); + Cache.saveKernel(key, ret_val); + return ret_val; } RT::PiProgram diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index f8c5da4ffd7c6..7332503c94e0c 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -106,7 +106,7 @@ class ProgramManager { const property_list &PropList, bool JITCompilationIsRequired = false); - std::pair + std::tuple getOrCreateKernel(OSModuleHandle M, const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, const std::string &KernelName, const program_impl *Prg); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index cf414082e7a44..75b14e5677996 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2009,7 +2009,7 @@ cl_int ExecCGCommand::enqueueImp() { Program = SyclProg->getHandleRef(); if (SyclProg->is_cacheable()) { RT::PiKernel FoundKernel = nullptr; - std::tie(FoundKernel, KernelMutex) = + std::tie(FoundKernel, KernelMutex, std::ignore) = detail::ProgramManager::getInstance().getOrCreateKernel( ExecKernel->MOSModuleHandle, ContextImpl, DeviceImpl, ExecKernel->MKernelName, SyclProg.get()); @@ -2017,13 +2017,10 @@ cl_int ExecCGCommand::enqueueImp() { } else KnownProgram = false; } else { - std::tie(Kernel, KernelMutex) = + std::tie(Kernel, KernelMutex, Program) = detail::ProgramManager::getInstance().getOrCreateKernel( ExecKernel->MOSModuleHandle, ContextImpl, DeviceImpl, ExecKernel->MKernelName, nullptr); - MQueue->getPlugin().call( - Kernel, PI_KERNEL_INFO_PROGRAM, sizeof(RT::PiProgram), &Program, - nullptr); } pi_result Error = PI_SUCCESS; diff --git a/sycl/unittests/kernel-and-program/Cache.cpp b/sycl/unittests/kernel-and-program/Cache.cpp index 946f54f5de596..4204908798e91 100644 --- a/sycl/unittests/kernel-and-program/Cache.cpp +++ b/sycl/unittests/kernel-and-program/Cache.cpp @@ -434,3 +434,146 @@ TEST_F(KernelAndProgramCacheTest, KernelNegativeSource) { CtxImpl->getKernelProgramCache().acquireKernelsPerProgramCache().get(); EXPECT_EQ(Cache.size(), 0U) << "Expect empty cache for kernels"; } + +typedef KernelAndProgramCacheTest KernelAndProgramFastCacheTest; + +class MockKernelProgramCache : public detail::KernelProgramCache { +public: + static detail::KernelProgramCache::KernelFastCacheT & + getFastCache(detail::KernelProgramCache &cache) { + return (reinterpret_cast(cache)).get(); + } + + detail::KernelProgramCache::KernelFastCacheT &get() { + return this->MKernelFastCache; + } +}; + +// Check that kernels built without options are cached. +TEST_F(KernelAndProgramFastCacheTest, KernelPositive) { + if (Plt.is_host() || Plt.get_backend() != backend::opencl) { + return; + } + + context Ctx{Plt}; + auto CtxImpl = detail::getSyclObjImpl(Ctx); + + globalCtx.reset(new TestCtx{CtxImpl->getHandleRef()}); + + program Prg{Ctx}; + + Prg.build_with_kernel_type(); + kernel Ker = Prg.get_kernel(); + detail::KernelProgramCache::KernelFastCacheT &Cache = + MockKernelProgramCache::getFastCache(CtxImpl->getKernelProgramCache()); + EXPECT_EQ(Cache.size(), 1U) << "Expect non-empty cache for kernels"; +} + +// Check that kernels built with options are cached. +TEST_F(KernelAndProgramFastCacheTest, KernelPositiveBuildOpts) { + if (Plt.is_host() || Plt.get_backend() != backend::opencl) { + return; + } + + context Ctx{Plt}; + auto CtxImpl = detail::getSyclObjImpl(Ctx); + + globalCtx.reset(new TestCtx{CtxImpl->getHandleRef()}); + + program Prg{Ctx}; + + Prg.build_with_kernel_type("-g"); + + kernel Ker = Prg.get_kernel(); + detail::KernelProgramCache::KernelFastCacheT &Cache = + MockKernelProgramCache::getFastCache(CtxImpl->getKernelProgramCache()); + EXPECT_EQ(Cache.size(), 1U) << "Expect non-empty cache for kernels"; +} + +// Check that kernels built with compile options are not cached. +TEST_F(KernelAndProgramFastCacheTest, KernelNegativeCompileOpts) { + if (Plt.is_host() || Plt.get_backend() != backend::opencl) { + return; + } + + context Ctx{Plt}; + auto CtxImpl = detail::getSyclObjImpl(Ctx); + + globalCtx.reset(new TestCtx{CtxImpl->getHandleRef()}); + + program Prg{Ctx}; + + Prg.compile_with_kernel_type("-g"); + Prg.link(); + kernel Ker = Prg.get_kernel(); + detail::KernelProgramCache::KernelFastCacheT &Cache = + MockKernelProgramCache::getFastCache(CtxImpl->getKernelProgramCache()); + EXPECT_EQ(Cache.size(), 0U) << "Expect empty cache for kernels"; +} + +// Check that kernels built with link options are not cached. +TEST_F(KernelAndProgramFastCacheTest, KernelNegativeLinkOpts) { + if (Plt.is_host() || Plt.get_backend() != backend::opencl) { + return; + } + + context Ctx{Plt}; + auto CtxImpl = detail::getSyclObjImpl(Ctx); + + globalCtx.reset(new TestCtx{CtxImpl->getHandleRef()}); + + program Prg{Ctx}; + + Prg.compile_with_kernel_type(); + Prg.link("-g"); + kernel Ker = Prg.get_kernel(); + detail::KernelProgramCache::KernelFastCacheT &Cache = + MockKernelProgramCache::getFastCache(CtxImpl->getKernelProgramCache()); + EXPECT_EQ(Cache.size(), 0U) << "Expect empty cache for kernels"; +} + +// Check that kernels are not cached if program is created from multiple +// programs. +TEST_F(KernelAndProgramFastCacheTest, KernelNegativeLinkedProgs) { + if (Plt.is_host() || Plt.get_backend() != backend::opencl) { + return; + } + + context Ctx{Plt}; + auto CtxImpl = detail::getSyclObjImpl(Ctx); + + globalCtx.reset(new TestCtx{CtxImpl->getHandleRef()}); + + program Prg1{Ctx}; + program Prg2{Ctx}; + + Prg1.compile_with_kernel_type(); + Prg2.compile_with_kernel_type(); + program Prg({Prg1, Prg2}); + kernel Ker = Prg.get_kernel(); + + detail::KernelProgramCache::KernelFastCacheT &Cache = + MockKernelProgramCache::getFastCache(CtxImpl->getKernelProgramCache()); + EXPECT_EQ(Cache.size(), 0U) << "Expect empty cache for kernels"; +} + +// Check that kernels created from source are not cached. +TEST_F(KernelAndProgramFastCacheTest, KernelNegativeSource) { + if (Plt.is_host() || Plt.get_backend() != backend::opencl) { + return; + } + + context Ctx{Plt}; + auto CtxImpl = detail::getSyclObjImpl(Ctx); + + globalCtx.reset(new TestCtx{CtxImpl->getHandleRef()}); + + program Prg{Ctx}; + + Prg.build_with_source(""); + kernel Ker = Prg.get_kernel("test"); + + detail::KernelProgramCache::KernelFastCacheT &Cache = + MockKernelProgramCache::getFastCache(CtxImpl->getKernelProgramCache()); + EXPECT_EQ(Cache.size(), 0U) << "Expect empty cache for kernels"; +}