diff --git a/sycl/doc/EnvironmentVariables.md b/sycl/doc/EnvironmentVariables.md index f005abca796bd..dc93cf8de8fb1 100644 --- a/sycl/doc/EnvironmentVariables.md +++ b/sycl/doc/EnvironmentVariables.md @@ -35,6 +35,14 @@ subject to change. Do not rely on these variables in production code. | SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING | Any(\*) | Disables automatic rounding-up of parallel_for invocation ranges. | | SYCL_ENABLE_PCI | Integer | When set to 1, enables obtaining the GPU PCI address when using the Level Zero backend. The default is 0. | | SYCL_HOST_UNIFIED_MEMORY | Integer | Enforce host unified memory support or lack of it for the execution graph builder. If set to 0, it is enforced as not supported by all devices. If set to 1, it is enforced as supported by all devices. | +| SYCL_CACHE_DIR | Path | Path to persistent cache root directory. Default values are `%AppData%\libsycl_cache` for Windows and `$XDG_CACHE_HOME/libsycl_cache` on Linux, if XDG_CACHE_HOME is not set then `$HOME/.cache/libsycl_cache`. | +| SYCL_CACHE_TRACE | Any(\*) | Enables printing messages to std::cerr when non-blocking failures happen (e.g. unable to access cache item file). Default values if off. | +| SYCL_CACHE_DISABLE_PERSISTENT | Any(\*) | Switches persistent cache switch off. Default value is ON. | +| SYCL_CACHE_EVICTION_DISABLE | Any(\*) | Switches cache eviction off. Default value is ON. | +| SYCL_CACHE_MAX_SIZE | Positive integer | Cache eviction is triggered once total size of cached images exceeds the value in megabytes (default - 8 192 for 8 GB). Set to 0 to disable size-based cache eviction. | +| SYCL_CACHE_THRESHOLD | Positive integer | Cache eviction threshold in days (default value is 7 for 1 week). Set to 0 for disabling time-based cache eviction. | +| SYCL_CACHE_MIN_DEVICE_IMAGE_SIZE | Positive integer | Minimum size of device code image in bytes which is reasonable to cache on disk because disk access operation may take more time than do JIT compilation for it. Default value is 0 to cache all images. | +| SYCL_CACHE_MAX_DEVICE_IMAGE_SIZE | Positive integer | Maximum size of device image in bytes which is cached. Too big kernels may overload disk too fast. Default value is 1 GB. | `(*) Note: Any means this environment variable is effective when set to any non-null value.` diff --git a/sycl/doc/KernelProgramCache.md b/sycl/doc/KernelProgramCache.md index b75f26b507fa7..7a7c7247cdd85 100644 --- a/sycl/doc/KernelProgramCache.md +++ b/sycl/doc/KernelProgramCache.md @@ -172,15 +172,8 @@ three sources of build options: ## Cache configuration -There is set of configuration parameters which can be set as environment variables or parameters in `sycl.conf` and affect cache behavior: -| Environment variable | Values | Description | -| -------------------- | ------ | ----------- | -| `SYCL_CACHE_DIR`| Path | Path to persistent cache root directory. Default values are `%AppData%\Intel\sycl_program_cache` for Windows and `$HOME/intel/sycl_program_cache` on Linux. | -| `SYCL_CACHE_ENABLED` | ON, OFF | Switches persistent cache switch on or off. Default value is ON. | -| `SYCL_CACHE_MAX_SIZE` | Positive integer | Cache eviction is triggered once total size of cached images exceeds the value in megabytes (default - 8 192 for 8 GB). Set to 0 to disable size-based cache eviction. | -| `SYCL_CACHE_THRESHOLD` | Positive integer | Cache eviction threshold in days (default value is 7 for 1 week). Set to 0 for disabling time-based cache eviction. | -| `SYCL_CACHE_MIN_DEVICE_IMAGE_SIZE` | Positive integer | Minimum size of device code image in kilobytes which is reasonable to cache on disk because disk access operation may take more time than do JIT compilation for it. Default value is 0 to cache all images. | -| `SYCL_CACHE_MAX_DEVICE_IMAGE_SIZE` | Positive integer | Maximum size of device image in megabytes which is cached. Too big kernels may overload disk too fast. Default value is 0 to cache all images. | +The environment variables which affect cache behavior are described in +[EnvironmentVariables.md](https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md). ## Implementation details diff --git a/sycl/include/CL/sycl/detail/os_util.hpp b/sycl/include/CL/sycl/detail/os_util.hpp index 271943484f85b..55b41b28b640c 100644 --- a/sycl/include/CL/sycl/detail/os_util.hpp +++ b/sycl/include/CL/sycl/detail/os_util.hpp @@ -16,6 +16,8 @@ #include #include #include +#include +#include #ifdef _WIN32 #define __SYCL_RT_OS_WINDOWS @@ -80,6 +82,20 @@ class __SYCL_EXPORT OSUtil { /// Deallocates the memory referenced by \p Ptr. static void alignedFree(void *Ptr); + + /// Make directory recursively and returns zero code on success + static int makeDir(const char *Dir); + + /// Checks if specified path is present + static inline bool isPathPresent(const std::string &Path) { +#ifdef __SYCL_RT_OS_WINDOWS + struct _stat Stat; + return !_stat(Path.c_str(), &Stat); +#else + struct stat Stat; + return !stat(Path.c_str(), &Stat); +#endif + } }; } // namespace detail diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 82aebf61eb283..0c6535f21c6de 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -137,6 +137,7 @@ set(SYCL_SOURCES "detail/queue_impl.cpp" "detail/online_compiler/online_compiler.cpp" "detail/os_util.cpp" + "detail/persistent_device_code_cache.cpp" "detail/platform_util.cpp" "detail/reduction.cpp" "detail/sampler_impl.cpp" diff --git a/sycl/source/detail/config.def b/sycl/source/detail/config.def index 6404f6508360a..c28fea915f658 100644 --- a/sycl/source/detail/config.def +++ b/sycl/source/detail/config.def @@ -20,3 +20,12 @@ CONFIG(SYCL_DEVICE_FILTER, 1024, __SYCL_DEVICE_FILTER) CONFIG(SYCL_PROGRAM_LINK_OPTIONS, 64, __SYCL_PROGRAM_LINK_OPTIONS) CONFIG(SYCL_PROGRAM_COMPILE_OPTIONS, 64, __SYCL_PROGRAM_COMPILE_OPTIONS) CONFIG(SYCL_HOST_UNIFIED_MEMORY, 1, __SYCL_HOST_UNIFIED_MEMORY) +// 260 (Windows limit) - 12 (filename) - 84 (cache directory structure) +CONFIG(SYCL_CACHE_DIR, 164, __SYCL_CACHE_DIR) +CONFIG(SYCL_CACHE_TRACE, 1, __SYCL_CACHE_TRACE) +CONFIG(SYCL_CACHE_DISABLE_PERSISTENT, 1, __SYCL_CACHE_DISABLE_PERSISTENT) +CONFIG(SYCL_CACHE_EVICTION_DISABLE, 1, __SYCL_CACHE_EVICTION_DISABLE) +CONFIG(SYCL_CACHE_MAX_SIZE, 16, __SYCL_CACHE_MAX_SIZE) +CONFIG(SYCL_CACHE_THRESHOLD, 16, __SYCL_CACHE_THRESHOLD) +CONFIG(SYCL_CACHE_MIN_DEVICE_IMAGE_SIZE, 16, __SYCL_CACHE_MIN_DEVICE_IMAGE_SIZE) +CONFIG(SYCL_CACHE_MAX_DEVICE_IMAGE_SIZE, 16, __SYCL_CACHE_MAX_DEVICE_IMAGE_SIZE) diff --git a/sycl/source/detail/os_util.cpp b/sycl/source/detail/os_util.cpp index d49fcb037d786..9fa163825aa41 100644 --- a/sycl/source/detail/os_util.cpp +++ b/sycl/source/detail/os_util.cpp @@ -24,11 +24,13 @@ #include // for dirname #include #include // for PATH_MAX +#include #include #elif defined(__SYCL_RT_OS_WINDOWS) #include +#include #include #include @@ -211,6 +213,19 @@ std::string OSUtil::getCurrentDSODir() { return Path; } +std::string OSUtil::getDirName(const char *Path) { + std::string Tmp(Path); + // Remove trailing directory separators + Tmp.erase(Tmp.find_last_not_of("/\\") + 1, std::string::npos); + + int pos = Tmp.find_last_of("/\\"); + if (pos != std::string::npos) + return Tmp.substr(0, pos); + + // If no directory separator is present return initial path like dirname does + return Tmp; +} + #elif defined(__SYCL_RT_OS_DARWIN) OSModuleHandle OSUtil::getOSModuleHandle(const void *VirtAddr) { Dl_info Res; @@ -258,6 +273,32 @@ void OSUtil::alignedFree(void *Ptr) { #endif } +/* This is temporary solution until std::filesystem is available when SYCL RT + * is moved to c++17 standard*/ + +/* Create directory recursively and return non zero code on success*/ +int OSUtil::makeDir(const char *Dir) { + assert((Dir != nullptr) && "Passed null-pointer as directory name."); + if (isPathPresent(Dir)) + return 0; + + std::string Path{Dir}, CurPath; + size_t pos = 0; + + do { + pos = Path.find_first_of("/\\", ++pos); + CurPath = Path.substr(0, pos); +#if defined(__SYCL_RT_OS_LINUX) + auto Res = mkdir(CurPath.c_str(), 0777); +#else + auto Res = _mkdir(CurPath.c_str()); +#endif + if (Res && errno != EEXIST) + return Res; + } while (pos != std::string::npos); + return 0; +} + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/persistent_device_code_cache.cpp b/sycl/source/detail/persistent_device_code_cache.cpp new file mode 100644 index 0000000000000..49b7530c79dcb --- /dev/null +++ b/sycl/source/detail/persistent_device_code_cache.cpp @@ -0,0 +1,358 @@ +//==---------- persistent_device_code_cache.cpp -----------------*- C++-*---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +#if defined(__SYCL_RT_OS_LINUX) +#include +#else +#include +#include +#endif + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace detail { + +/* Lock file suffix */ +const char LockCacheItem::LockSuffix[] = ".lock"; + +LockCacheItem::LockCacheItem(const std::string &Path) + : FileName(Path + LockSuffix) { + int fd; + + /* If the lock fail is not created */ + if ((fd = open(FileName.c_str(), O_CREAT | O_EXCL, S_IWRITE)) != -1) { + close(fd); + Owned = true; + } else { + PersistentDeviceCodeCache::trace("Failed to aquire lock file: " + FileName); + } +} + +LockCacheItem::~LockCacheItem() { + if (Owned && std::remove(FileName.c_str())) + PersistentDeviceCodeCache::trace("Failed to release lock file: " + + FileName); +} + +/* Returns true if specified image should be cached on disk. It checks if + * cache is enabled, image has SPIRV type and matches thresholds. */ +bool PersistentDeviceCodeCache::isImageCached(const RTDeviceBinaryImage &Img) { + // Cache shoould be enabled and image type should be SPIR-V + if (!isEnabled() || Img.getFormat() != PI_DEVICE_BINARY_TYPE_SPIRV) + return false; + + static auto MaxImgSize = getNumParam( + DEFAULT_MAX_DEVICE_IMAGE_SIZE); + static auto MinImgSize = getNumParam( + DEFAULT_MIN_DEVICE_IMAGE_SIZE); + + // Make sure that image size is between caching thresholds if they are set. + // Zero values for threshold is treated as disabled threshold. + if ((MaxImgSize && (Img.getSize() > MaxImgSize)) || + (MinImgSize && (Img.getSize() < MinImgSize))) + return false; + + return true; +} + +/* Stores built program in persisten cache + */ +void PersistentDeviceCodeCache::putItemToDisc( + const device &Device, const RTDeviceBinaryImage &Img, + const SerializedObj &SpecConsts, const std::string &BuildOptionsString, + const RT::PiProgram &NativePrg) { + + if (!isImageCached(Img)) + return; + + auto Plugin = detail::getSyclObjImpl(Device)->getPlugin(); + std::string DirName = + getCacheItemPath(Device, Img, SpecConsts, BuildOptionsString); + + size_t i = 0; + std::string FileName; + do { + FileName = DirName + "/" + std::to_string(i++); + } while (OSUtil::isPathPresent(FileName + ".bin")); + + unsigned int DeviceNum = 0; + + Plugin.call( + NativePrg, PI_PROGRAM_INFO_NUM_DEVICES, sizeof(DeviceNum), &DeviceNum, + nullptr); + + std::vector BinarySizes(DeviceNum); + Plugin.call( + NativePrg, PI_PROGRAM_INFO_BINARY_SIZES, + sizeof(size_t) * BinarySizes.size(), BinarySizes.data(), nullptr); + + std::vector> Result; + std::vector Pointers; + for (size_t I = 0; I < BinarySizes.size(); ++I) { + Result.emplace_back(BinarySizes[I]); + Pointers.push_back(Result[I].data()); + } + + Plugin.call(NativePrg, PI_PROGRAM_INFO_BINARIES, + sizeof(char *) * Pointers.size(), + Pointers.data(), nullptr); + + try { + OSUtil::makeDir(DirName.c_str()); + LockCacheItem Lock{FileName}; + if (Lock.isOwned()) { + writeBinaryDataToFile(FileName + ".bin", Result); + writeSourceItem(FileName + ".src", Device, Img, SpecConsts, + BuildOptionsString); + } + } catch (...) { + // If a problem happens on storing cache item, do nothing + } +} + +/* Program binaries built for one or more devices are read from persistent + * cache and returned in form of vector of programs. Each binary program is + * stored in vector of chars. + */ +std::vector> PersistentDeviceCodeCache::getItemFromDisc( + const device &Device, const RTDeviceBinaryImage &Img, + const SerializedObj &SpecConsts, const std::string &BuildOptionsString, + RT::PiProgram &NativePrg) { + + if (!isImageCached(Img)) + return {}; + + std::string Path = + getCacheItemPath(Device, Img, SpecConsts, BuildOptionsString); + + if (!OSUtil::isPathPresent(Path)) + return {}; + + int i = 0; + + std::string FileName{Path + "/" + std::to_string(i)}; + while (OSUtil::isPathPresent(FileName + ".bin") || + OSUtil::isPathPresent(FileName + ".src")) { + + if (!LockCacheItem::isLocked(FileName) && + isCacheItemSrcEqual(FileName + ".src", Device, Img, SpecConsts, + BuildOptionsString)) { + try { + return readBinaryDataFromFile(FileName + ".bin"); + } catch (...) { + // If read was unsuccessfull try the next item + } + } + FileName = Path + "/" + std::to_string(++i); + } + return {}; +} + +/* Returns string value which can be used to identify different device + */ +std::string PersistentDeviceCodeCache::getDeviceIDString(const device &Device) { + return Device.get_platform().get_info() + "/" + + Device.get_info() + "/" + + Device.get_info() + "/" + + Device.get_info(); +} + +/* Write built binary to persistent cache + * Format: numImages, 1stImageSize, Image[, NthImageSize, NthImage...] + * Return on first unsuccessfull file operation + */ +void PersistentDeviceCodeCache::writeBinaryDataToFile( + const std::string &FileName, const std::vector> &Data) { + std::ofstream FileStream{FileName, std::ios::binary}; + + size_t Size = Data.size(); + FileStream.write((char *)&Size, sizeof(Size)); + + for (size_t i = 0; i < Data.size(); ++i) { + Size = Data[i].size(); + FileStream.write((char *)&Size, sizeof(Size)); + FileStream.write(Data[i].data(), Size); + } + FileStream.close(); + if (FileStream.fail()) + trace("Failed to write binary file " + FileName); +} + +/* Read built binary to persistent cache + * Format: numImages, 1stImageSize, Image[, NthImageSize, NthImage...] + */ +std::vector> +PersistentDeviceCodeCache::readBinaryDataFromFile(const std::string &FileName) { + std::ifstream FileStream{FileName, std::ios::binary}; + size_t ImgNum = 0, ImgSize = 0; + FileStream.read((char *)&ImgNum, sizeof(ImgNum)); + + std::vector> Res(ImgNum); + for (size_t i = 0; i < ImgNum; ++i) { + FileStream.read((char *)&ImgSize, sizeof(ImgSize)); + + std::vector ImgData(ImgSize); + FileStream.read(ImgData.data(), ImgSize); + + Res[i] = std::move(ImgData); + } + FileStream.close(); + + if (FileStream.fail()) { + trace("Failed to read binary file from " + FileName); + return {}; + } + + return Res; +} + +/* Writing cache item key sources to be used for reliable identification + * Format: Four pairs of [size, value] for device, build options, specialization + * constant values, device code SPIR-V image. + */ +void PersistentDeviceCodeCache::writeSourceItem( + const std::string &FileName, const device &Device, + const RTDeviceBinaryImage &Img, const SerializedObj &SpecConsts, + const std::string &BuildOptionsString) { + std::ofstream FileStream{FileName, std::ios::binary}; + + std::string DeviceString{getDeviceIDString(Device)}; + size_t Size = DeviceString.size(); + FileStream.write((char *)&Size, sizeof(Size)); + FileStream.write(DeviceString.data(), Size); + + Size = BuildOptionsString.size(); + FileStream.write((char *)&Size, sizeof(Size)); + FileStream.write(BuildOptionsString.data(), Size); + + Size = SpecConsts.size(); + FileStream.write((char *)&Size, sizeof(Size)); + FileStream.write((const char *)SpecConsts.data(), Size); + + Size = Img.getSize(); + FileStream.write((char *)&Size, sizeof(Size)); + FileStream.write((const char *)Img.getRawData().BinaryStart, Size); + FileStream.close(); + + if (FileStream.fail()) { + trace("Failed to write source file to " + FileName); + } +} + +/* Check that cache item key sources are equal to the current program. + * If file read operations fail cache item is treated as not equal. + */ +bool PersistentDeviceCodeCache::isCacheItemSrcEqual( + const std::string &FileName, const device &Device, + const RTDeviceBinaryImage &Img, const SerializedObj &SpecConsts, + const std::string &BuildOptionsString) { + std::ifstream FileStream{FileName, std::ios::binary}; + + std::string ImgString{(const char *)Img.getRawData().BinaryStart, + Img.getSize()}; + std::string SpecConstsString{(const char *)SpecConsts.data(), + SpecConsts.size()}; + + size_t Size = 0; + FileStream.read((char *)&Size, sizeof(Size)); + std::string res(Size, '\0'); + FileStream.read(&res[0], Size); + if (getDeviceIDString(Device).compare(res)) + return false; + + FileStream.read((char *)&Size, sizeof(Size)); + res.resize(Size); + FileStream.read(&res[0], Size); + if (BuildOptionsString.compare(0, Size, res.data())) + return false; + + FileStream.read((char *)&Size, sizeof(Size)); + res.resize(Size); + FileStream.read(&res[0], Size); + if (SpecConstsString.compare(res)) + return false; + + FileStream.read((char *)&Size, sizeof(Size)); + res.resize(Size); + FileStream.read(&res[0], Size); + if (ImgString.compare(res)) + return false; + + FileStream.close(); + + if (FileStream.fail()) { + trace("Failed to read source file from " + FileName); + } + + return true; +} + +/* Returns directory name to store specific kernel image for specified + * device, build options and specialization constants values. + */ +std::string PersistentDeviceCodeCache::getCacheItemPath( + const device &Device, const RTDeviceBinaryImage &Img, + const SerializedObj &SpecConsts, const std::string &BuildOptionsString) { + static std::string cache_root{getRootDir()}; + + std::string ImgString{(const char *)Img.getRawData().BinaryStart, + Img.getSize()}; + std::string DeviceString{getDeviceIDString(Device)}; + std::string SpecConstsString{(const char *)SpecConsts.data(), + SpecConsts.size()}; + std::hash StringHasher{}; + + return cache_root + "/" + std::to_string(StringHasher(DeviceString)) + "/" + + std::to_string(StringHasher(ImgString)) + "/" + + std::to_string(StringHasher(SpecConstsString)) + "/" + + std::to_string(StringHasher(BuildOptionsString)); +} + +/* Returns true if persistent cache enabled. The cache can be disabled by + * setting SYCL_CACHE_EVICTION_DISABLE environmnet variable. + */ +bool PersistentDeviceCodeCache::isEnabled() { + static const char *PersistenCacheDisabled = + SYCLConfig::get(); + return !PersistenCacheDisabled; +} + +/* Returns path for device code cache root directory + */ +std::string PersistentDeviceCodeCache::getRootDir() { + static const char *RootDir = SYCLConfig::get(); + if (RootDir) + return RootDir; + + constexpr char DeviceCodeCacheDir[] = "/libsycl_cache"; + + // Use static to calculate directory only once per program run +#if defined(__SYCL_RT_OS_LINUX) + static const char *CacheDir = std::getenv("XDG_CACHE_HOME"); + static const char *HomeDir = std::getenv("HOME"); + static std::string Res{ + std::string(CacheDir + ? CacheDir + : (HomeDir ? std::string(HomeDir) + "/.cache" : ".")) + + DeviceCodeCacheDir}; +#else + static const char *AppDataDir = std::getenv("AppData"); + static std::string Res{std::string(AppDataDir ? AppDataDir : ".") + + DeviceCodeCacheDir}; +#endif + return Res; +} + +} // namespace detail +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/persistent_device_code_cache.hpp b/sycl/source/detail/persistent_device_code_cache.hpp new file mode 100644 index 0000000000000..ed7201dab9198 --- /dev/null +++ b/sycl/source/detail/persistent_device_code_cache.hpp @@ -0,0 +1,197 @@ +//==---------- persistent_device_code_cache.hpp -----------------*- C++-*---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace detail { + +/* This is temporary solution until std::filesystem is available when SYCL RT + * is moved to c++17 standard*/ +std::string getDirName(const char *Path); + +/* The class manages inter-process synchronization: + * - Path passed to the constructor is appended with .lock and used as lock + * file. + * - All operations are not blocking and failure ignoring (diagnostic may be + * sent to std::cerr when SYCL_CACHE_TRACE environment variable is set). + * - There are two modes of accessing shared resource: + * - write access assumes that lock is acquired (object is created and + * isOwned() method confirms that current executor owns the lock); + * - read access checks that the lock is not acquired for write by others + * with the help of isLocked() method. + */ +class LockCacheItem { +private: + const std::string FileName; + bool Owned = false; + static const char LockSuffix[]; + +public: + LockCacheItem(const std::string &Path); + + bool isOwned() { return Owned; } + static bool isLocked(const std::string &Path) { + return OSUtil::isPathPresent(Path + LockSuffix); + } + ~LockCacheItem(); +}; +/* End of temporary solution*/ + +class PersistentDeviceCodeCache { + /* The device code images are stored on file system using structure below: + * / + * / + * / + * / + * / + * .src + * .bin + * .lock + * - root directory storing cache files; + * - hash out of device information used to + * identify target device; + * - hash made out of device image used as + * input for the JIT compilation; + * - hash for specialization constants values; + * - hash for all build options; + * - sequential number of hash collisions. + * When hashes match for the specific build + * but full values don't, new cache item is + * added with incremented value(enumeration + * started from 0). + * Two files per cache item are stored on disk: + * .src - contains full values for build parameters (device information, + * specialization constant values, build options, device image) + * which is used to resolve hash collisions and analysis of + * cached items. + * .bin - contains built device code. + * .lock - cache item lock file. It is created when data is saved to + * filesystem. On read operation the absence of file is checked + * but it is not created to avoid lock. + * All filesystem operation failures are not treated as SYCL errors and + * ignored. If such errors happen warning messages are written to std::cerr + * and: + * - on cache write operation cache item is not created; + * - on cache read operation it is treated as cache miss. + */ +private: + /* Write built binary to persistent cache + * Format: numImages, 1stImageSize, Image[, NthImageSize, NthImage...] + */ + static void writeBinaryDataToFile(const std::string &FileName, + const std::vector> &Data); + + /* Read built binary to persistent cache + * Format: numImages, 1stImageSize, Image[, NthImageSize, NthImage...] + */ + static std::vector> + readBinaryDataFromFile(const std::string &FileName); + + /* Writing cache item key sources to be used for reliable identification + * Format: Four pairs of [size, value] for device, build options, + * specialization constant values, device code SPIR-V image. + */ + static void writeSourceItem(const std::string &FileName, const device &Device, + const RTDeviceBinaryImage &Img, + const SerializedObj &SpecConsts, + const std::string &BuildOptionsString); + + /* Check that cache item key sources are equal to the current program + */ + static bool isCacheItemSrcEqual(const std::string &FileName, + const device &Device, + const RTDeviceBinaryImage &Img, + const SerializedObj &SpecConsts, + const std::string &BuildOptionsString); + + /* Check if on-disk cache enabled. + */ + static bool isEnabled(); + + /* Returns the path to directory storing persistent device code cache.*/ + static std::string getRootDir(); + + /* Form string representing device version */ + static std::string getDeviceIDString(const device &Device); + + /* Returns true if specified image should be cached on disk. It checks if + * cache is enabled, image has SPIRV type and matches thresholds. */ + static bool isImageCached(const RTDeviceBinaryImage &Img); + + /* Returns value of specified parameter. Default value is used if failure + * happens during obtaining value. */ + template + static unsigned long getNumParam(unsigned long Default) { + auto Value = SYCLConfig::get(); + try { + if (Value) + return std::stol(Value); + } catch (std::exception const &) { + PersistentDeviceCodeCache::trace("Invalid value provided, use default " + + std::to_string(Default)); + } + return Default; + } + + /* Default value for minimum device code size to be cached on disk in bytes */ + static constexpr unsigned long DEFAULT_MIN_DEVICE_IMAGE_SIZE = 0; + + /* Default value for maximum device code size to be cached on disk in bytes */ + static constexpr unsigned long DEFAULT_MAX_DEVICE_IMAGE_SIZE = + 1024 * 1024 * 1024; + +public: + /* Get directory name for storing current cache item + */ + static std::string getCacheItemPath(const device &Device, + const RTDeviceBinaryImage &Img, + const SerializedObj &SpecConsts, + const std::string &BuildOptionsString); + + /* Program binaries built for one or more devices are read from persistent + * cache and returned in form of vector of programs. Each binary program is + * stored in vector of chars. + */ + static std::vector> + getItemFromDisc(const device &Device, const RTDeviceBinaryImage &Img, + const SerializedObj &SpecConsts, + const std::string &BuildOptionsString, + RT::PiProgram &NativePrg); + + /* Stores build program in persisten cache + */ + static void putItemToDisc(const device &Device, + const RTDeviceBinaryImage &Img, + const SerializedObj &SpecConsts, + const std::string &BuildOptionsString, + const RT::PiProgram &NativePrg); + + /* Sends message to std:cerr stream when SYCL_CACHE_TRACE environemnt is set*/ + static void trace(const std::string &msg) { + static const char *TraceEnabled = SYCLConfig::get(); + if (TraceEnabled) + std::cerr << "*** Code caching: " << msg << std::endl; + } +}; +} // namespace detail +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 20535a9c66947..962a47d41b767 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -393,9 +394,12 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M, if (LinkOptsEnv) { LinkOpts = LinkOptsEnv; } + SerializedObj SpecConsts; + if (Prg) + Prg->stableSerializeSpecConstRegistry(SpecConsts); auto BuildF = [this, &M, &KSId, &Context, &Device, Prg, &CompileOpts, - &LinkOpts, &JITCompilationIsRequired] { + &LinkOpts, &JITCompilationIsRequired, SpecConsts] { const RTDeviceBinaryImage &Img = getDeviceImage(M, KSId, Context, Device, JITCompilationIsRequired); // Update only if compile options are not overwritten by environment @@ -416,19 +420,32 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M, LinkOpts += Img.getLinkOptions(); ContextImplPtr ContextImpl = getSyclObjImpl(Context); const detail::plugin &Plugin = ContextImpl->getPlugin(); - RT::PiProgram NativePrg = createPIProgram(Img, Context, Device); - if (Prg) - flushSpecConstants(*Prg, NativePrg, &Img); + RT::PiProgram NativePrg; + + auto BinProg = PersistentDeviceCodeCache::getItemFromDisc( + Device, Img, SpecConsts, CompileOpts + LinkOpts, NativePrg); + if (BinProg.size()) { + // TODO: Build for multiple devices once supported by program manager + NativePrg = createBinaryProgram(ContextImpl, Device, + (const unsigned char *)BinProg[0].data(), + BinProg[0].size()); + } else { + NativePrg = createPIProgram(Img, Context, Device); + if (Prg) + flushSpecConstants(*Prg, NativePrg, &Img); + } + ProgramPtr ProgramManaged( NativePrg, Plugin.getPiPlugin().PiFunctionTable.piProgramRelease); // Link a fallback implementation of device libraries if they are not // supported by a device compiler. - // Pre-compiled programs are supposed to be already linked. + // Pre-compiled programs (after AOT compilation or read from persitent + // cache) are supposed to be already linked. // If device image is not SPIR-V, DeviceLibReqMask will be 0 which means // no fallback device library will be linked. uint32_t DeviceLibReqMask = 0; - if (Img.getFormat() == PI_DEVICE_BINARY_TYPE_SPIRV && + if (!BinProg.size() && Img.getFormat() == PI_DEVICE_BINARY_TYPE_SPIRV && !SYCLConfig::get()) DeviceLibReqMask = getDeviceLibReqMask(Img); @@ -441,13 +458,14 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M, std::lock_guard Lock(MNativeProgramsMutex); NativePrograms[BuiltProgram.get()] = &Img; } + + // Save program to persistent cache if it is not there + if (!BinProg.size()) + PersistentDeviceCodeCache::putItemToDisc( + Device, Img, SpecConsts, CompileOpts + LinkOpts, BuiltProgram.get()); return BuiltProgram.release(); }; - SerializedObj SpecConsts; - if (Prg) - Prg->stableSerializeSpecConstRegistry(SpecConsts); - const RT::PiDevice PiDevice = getRawSyclObjImpl(Device)->getHandleRef(); auto BuildResult = getOrBuild( Cache, diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index d89ea44ee2ab5..07f47338ec335 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3810,6 +3810,7 @@ _ZN2cl4sycl6detail6OSUtil12alignedAllocEmm _ZN2cl4sycl6detail6OSUtil12getOSMemSizeEv _ZN2cl4sycl6detail6OSUtil16getCurrentDSODirB5cxx11Ev _ZN2cl4sycl6detail6OSUtil17getOSModuleHandleEPKv +_ZN2cl4sycl6detail6OSUtil7makeDirEPKc _ZN2cl4sycl6detail9join_implERKSt6vectorISt10shared_ptrINS1_18kernel_bundle_implEESaIS5_EE _ZN2cl4sycl6detail9link_implERKSt6vectorINS0_13kernel_bundleILNS0_12bundle_stateE1EEESaIS5_EERKS2_INS0_6deviceESaISA_EERKNS0_13property_listE _ZN2cl4sycl6device11get_devicesENS0_4info11device_typeE diff --git a/sycl/test/on-device/cache/basic.cpp b/sycl/test/on-device/cache/basic.cpp new file mode 100644 index 0000000000000..d033cdce97a26 --- /dev/null +++ b/sycl/test/on-device/cache/basic.cpp @@ -0,0 +1,18 @@ +// No JITing for host devices. +// REQUIRES: opencl || level_zero +// RUN: rm -rf %t/cache_dir +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: env SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER --check-prefixes=CHECK-BUILD +// RUN: env SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER --check-prefixes=CHECK-CACHE +// RUN: env SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER --check-prefixes=CHECK-BUILD +// RUN: env SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER --check-prefixes=CHECK-CACHE +// RUN: env SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 %ACC_RUN_PLACEHOLDER %t.out %ACC_CHECK_PLACEHOLDER --check-prefixes=CHECK-BUILD +// RUN: env SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 %ACC_RUN_PLACEHOLDER %t.out %ACC_CHECK_PLACEHOLDER --check-prefixes=CHECK-CACHE +// +// The test checks that caching works properly. +#include "basic.hpp" + +// CHECK-BUILD: piProgramBuild + +// CHECK-CACHE-NOT: piProgramBuild +// CHECK-CACHE: piProgramCreateWithBinary diff --git a/sycl/test/on-device/cache/basic.hpp b/sycl/test/on-device/cache/basic.hpp new file mode 100644 index 0000000000000..9a1d11597957b --- /dev/null +++ b/sycl/test/on-device/cache/basic.hpp @@ -0,0 +1,60 @@ +#include + +int main(int argc, char **argv) { + // Test program and kernel APIs when building a kernel. + { + cl::sycl::queue q; + int data = 0; + { + cl::sycl::buffer buf(&data, cl::sycl::range<1>(1)); + cl::sycl::program prg1(q.get_context()); + cl::sycl::program prg2(q.get_context()); + cl::sycl::program prg3(q.get_context()); + cl::sycl::program prg4(q.get_context()); + cl::sycl::program prg5(q.get_context()); + + prg1.build_with_kernel_type(); // 1 cache item + prg2.build_with_kernel_type( + "-cl-fast-relaxed-math"); // +1 cache item due to build options + prg3.build_with_kernel_type(); // program binary is + // equal to prg1 + prg4.build_with_kernel_type(); // program binary is + // equal to prg1 + cl::sycl::kernel krn = prg2.get_kernel(); + + q.submit([&](cl::sycl::handler &cgh) { + auto acc = buf.get_access(cgh); + cgh.single_task(krn, [=]() { acc[0] = acc[0] + 1; }); + }); + } + assert(data == 1); + } + + // Test program and kernel APIs when compiling / linking a kernel. + { + cl::sycl::queue q; + int data = 0; + { + cl::sycl::buffer buf(&data, cl::sycl::range<1>(1)); + cl::sycl::program prg6(q.get_context()); + cl::sycl::program prg7(q.get_context()); + cl::sycl::program prg8(q.get_context()); + prg6.compile_with_kernel_type(); + prg6.link(); // The binary is not cached for separate compile/link + prg7.build_with_kernel_type( + "-cl-fast-relaxed-math"); // program binary is equal to prg2 + prg8.build_with_kernel_type( + "-g"); // +1 cache item due to build options + + cl::sycl::kernel krn = prg6.get_kernel(); + + q.submit([&](cl::sycl::handler &cgh) { + auto acc = buf.get_access(cgh); + cgh.single_task(krn, + [=]() { acc[0] = acc[0] + 1; }); + }); + } + assert(data == 1); + } + return 0; +} diff --git a/sycl/test/on-device/cache/cache_env_vars.cpp b/sycl/test/on-device/cache/cache_env_vars.cpp new file mode 100644 index 0000000000000..39cb403362847 --- /dev/null +++ b/sycl/test/on-device/cache/cache_env_vars.cpp @@ -0,0 +1,129 @@ +// No JITing for host devices. +// REQUIRES: opencl || level_zero +// RUN: rm -rf %t/cache_dir +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -DTARGET_IMAGE=INC100 +// Build program and add item to cache +// RUN: env SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 %t.out | FileCheck %s --check-prefixes=CHECK-BUILD +// Ignore caching because image size is less than threshold +// RUN: env SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 SYCL_CACHE_MIN_DEVICE_IMAGE_SIZE=100000 %t.out | FileCheck %s --check-prefixes=CHECK-BUILD +// Ignore caching because image size is more than threshold +// RUN: env SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 SYCL_CACHE_MAX_DEVICE_IMAGE_SIZE=1000 %t.out | FileCheck %s --check-prefixes=CHECK-BUILD +// Use cache +// RUN: env SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 %t.out | FileCheck %s --check-prefixes=CHECK-CACHE +// Ignore cache because of environment variable +// RUN: env SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 SYCL_CACHE_DISABLE_PERSISTENT=1 %t.out | FileCheck %s --check-prefixes=CHECK-BUILD +// +// The test checks environment variables which may disable caching. +// Also it can be used for benchmarking cache: +// Rough data collected on 32 core machine. +// Number of lines 1 10 100 1000 10000 +// Image Size(kB) 2 2 20 165 1700 +// Device code build time in seconds +// CPU OCL JIT 0.12 0.12 0.16 1.1 16 +// CPU OCL Cache 0.01 0.01 0.01 0.02 0.08 + +// CHECK-BUILD: piProgramBuild +// CHECK-BUILD-NOT: piProgramCreateWithBinary + +// CHECK-CACHE-NOT: piProgramBuild +// CHECK-CACHE: piProgramCreateWithBinary + +#define INC1(x) ((x) = (x) + 1); + +#define INC10(x) \ + INC1(x) \ + INC1(x) \ + INC1(x) \ + INC1(x) \ + INC1(x) \ + INC1(x) \ + INC1(x) \ + INC1(x) \ + INC1(x) \ + INC1(x) + +#define INC100(x) \ + INC10(x) \ + INC10(x) \ + INC10(x) \ + INC10(x) \ + INC10(x) \ + INC10(x) \ + INC10(x) \ + INC10(x) \ + INC10(x) \ + INC10(x) + +#define INC1000(x) \ + INC100(x) \ + INC100(x) \ + INC100(x) \ + INC100(x) \ + INC100(x) \ + INC100(x) \ + INC100(x) \ + INC100(x) \ + INC100(x) \ + INC100(x) + +#define INC10000(x) \ + INC1000(x) \ + INC1000(x) \ + INC1000(x) \ + INC1000(x) \ + INC1000(x) \ + INC1000(x) \ + INC1000(x) \ + INC1000(x) \ + INC1000(x) \ + INC1000(x) + +#define INC100000(x) \ + INC10000(x) \ + INC10000(x) \ + INC10000(x) \ + INC10000(x) \ + INC10000(x) \ + INC10000(x) \ + INC10000(x) \ + INC10000(x) \ + INC10000(x) \ + INC10000(x) + +#include +#include +#include +class Inc; +template void check_build_time(cl::sycl::queue &q) { + cl::sycl::program program(q.get_context()); + auto start = std::chrono::steady_clock::now(); + program.build_with_kernel_type(); + auto end = std::chrono::steady_clock::now(); + + std::chrono::duration elapsed_seconds = end - start; + std::cout << "elapsed build time: " << elapsed_seconds.count() << "s\n"; +} +int main(int argc, char **argv) { + auto start = std::chrono::steady_clock::now(); + // Test program and kernel APIs when building a kernel. + { + cl::sycl::queue q; + check_build_time(q); + + int data = 0; + { + cl::sycl::buffer buf(&data, cl::sycl::range<1>(1)); + cl::sycl::range<1> NumOfWorkItems{buf.get_count()}; + + q.submit([&](cl::sycl::handler &cgh) { + auto acc = buf.get_access(cgh); + cgh.parallel_for( + NumOfWorkItems, [=](cl::sycl::id<1> WIid) { TARGET_IMAGE(acc[0]) }); + }); + } + // check_build_time(q); + auto end = std::chrono::steady_clock::now(); + std::chrono::duration elapsed_seconds = end - start; + std::cout << "elapsed kernel time: " << elapsed_seconds.count() << "s\n"; + } +} diff --git a/sycl/test/on-device/cache/spec_consts.cpp b/sycl/test/on-device/cache/spec_consts.cpp new file mode 100644 index 0000000000000..289849a815bad --- /dev/null +++ b/sycl/test/on-device/cache/spec_consts.cpp @@ -0,0 +1,21 @@ +// No JITing for host devices. +// Specialization constant values are not supported on CUDA +// REQUIRES: opencl || level_zero +// RUN: rm -rf %t/cache_dir +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: env SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER --check-prefixes=CHECK-BUILD +// RUN: env SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER --check-prefixes=CHECK-CACHE +// RUN: env SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER --check-prefixes=CHECK-BUILD +// RUN: env SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER --check-prefixes=CHECK-CACHE +// RUN: env SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 %ACC_RUN_PLACEHOLDER %t.out %ACC_CHECK_PLACEHOLDER --check-prefixes=CHECK-BUILD +// RUN: env SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 %ACC_RUN_PLACEHOLDER %t.out %ACC_CHECK_PLACEHOLDER --check-prefixes=CHECK-CACHE +// +// The test checks that caching works properly for SYCL application containing +// specialization constant values. +#include "spec_consts.hpp" + +// CHECK-BUILD: piProgramBuild +// CHECK-BUILD-NOT: piProgramCreateWithBinary + +// CHECK-CACHE-NOT: piProgramBuild +// CHECK-CACHE: piProgramCreateWithBinary diff --git a/sycl/test/on-device/cache/spec_consts.hpp b/sycl/test/on-device/cache/spec_consts.hpp new file mode 100644 index 0000000000000..9285d5b500086 --- /dev/null +++ b/sycl/test/on-device/cache/spec_consts.hpp @@ -0,0 +1,170 @@ +#include + +#include +#include + +class MyInt32Const; +class MyFloatConst; +class MyConst; + +using namespace sycl; + +class KernelAAAi; +class KernelBBBf; + +int global_val = 10; + +// Fetch a value at runtime. +int get_value() { return global_val; } + +float foo( + const cl::sycl::ONEAPI::experimental::spec_constant + &f32) { + return f32; +} + +struct SCWrapper { + SCWrapper(cl::sycl::program &p) + : SC1(p.set_spec_constant(4)), + SC2(p.set_spec_constant(2)) {} + + cl::sycl::ONEAPI::experimental::spec_constant SC1; + cl::sycl::ONEAPI::experimental::spec_constant SC2; +}; + +// MyKernel is used to test default constructor +using AccT = sycl::accessor; +using ScT = sycl::ONEAPI::experimental::spec_constant; + +struct MyKernel { + MyKernel(AccT &Acc) : Acc(Acc) {} + + void setConst(ScT Sc) { this->Sc = Sc; } + + void operator()() const { Acc[0] = Sc.get(); } + AccT Acc; + ScT Sc; +}; + +int main(int argc, char **argv) { + global_val = argc + 16; + + cl::sycl::queue q(default_selector{}, [](exception_list l) { + for (auto ep : l) { + try { + std::rethrow_exception(ep); + } catch (cl::sycl::exception &e0) { + std::cout << e0.what(); + } catch (std::exception &e1) { + std::cout << e1.what(); + } catch (...) { + std::cout << "*** catch (...)\n"; + } + } + }); + + std::cout << "Running on " << q.get_device().get_info() + << "\n"; + std::cout << "global_val = " << global_val << "\n"; + cl::sycl::program program1(q.get_context()); + cl::sycl::program program2(q.get_context()); + cl::sycl::program program3(q.get_context()); + cl::sycl::program program4(q.get_context()); + + int goldi = (int)get_value(); + // TODO make this floating point once supported by the compiler + float goldf = (float)get_value(); + + cl::sycl::ONEAPI::experimental::spec_constant i32 = + program1.set_spec_constant(goldi); + + cl::sycl::ONEAPI::experimental::spec_constant f32 = + program2.set_spec_constant(goldf); + + cl::sycl::ONEAPI::experimental::spec_constant sc = + program4.set_spec_constant(goldi); + + program1.build_with_kernel_type(); + // Use an option (does not matter which exactly) to test different internal + // SYCL RT execution path + program2.build_with_kernel_type("-cl-fast-relaxed-math"); + + SCWrapper W(program3); + program3.build_with_kernel_type(); + + program4.build_with_kernel_type(); + + int goldw = 6; + + std::vector veci(1); + std::vector vecf(1); + std::vector vecw(1); + std::vector vec(1); + try { + cl::sycl::buffer bufi(veci.data(), veci.size()); + cl::sycl::buffer buff(vecf.data(), vecf.size()); + cl::sycl::buffer bufw(vecw.data(), vecw.size()); + cl::sycl::buffer buf(vec.data(), vec.size()); + + q.submit([&](cl::sycl::handler &cgh) { + auto acci = bufi.get_access(cgh); + cgh.single_task(program1.get_kernel(), + [=]() { acci[0] = i32.get(); }); + }); + q.submit([&](cl::sycl::handler &cgh) { + auto accf = buff.get_access(cgh); + cgh.single_task(program2.get_kernel(), + [=]() { accf[0] = foo(f32); }); + }); + + q.submit([&](cl::sycl::handler &cgh) { + auto accw = bufw.get_access(cgh); + cgh.single_task( + program3.get_kernel(), + [=]() { accw[0] = W.SC1.get() + W.SC2.get(); }); + }); + // Check spec_constant default construction with subsequent initialization + q.submit([&](cl::sycl::handler &cgh) { + auto acc = buf.get_access(cgh); + // Specialization constants specification says: + // cl::sycl::experimental::spec_constant is default constructible, + // although the object is not considered initialized until the result of + // the call to cl::sycl::program::set_spec_constant is assigned to it. + MyKernel Kernel(acc); // default construct inside MyKernel instance + Kernel.setConst(sc); // initialize to sc, returned by set_spec_constant + + cgh.single_task(program4.get_kernel(), Kernel); + }); + + } catch (cl::sycl::exception &e) { + std::cout << "*** Exception caught: " << e.what() << "\n"; + return 1; + } + bool passed = true; + int vali = veci[0]; + + if (vali != goldi) { + std::cout << "*** ERROR: " << vali << " != " << goldi << "(gold)\n"; + passed = false; + } + int valf = vecf[0]; + + if (valf != goldf) { + std::cout << "*** ERROR: " << valf << " != " << goldf << "(gold)\n"; + passed = false; + } + int valw = vecw[0]; + + if (valw != goldw) { + std::cout << "*** ERROR: " << valw << " != " << goldw << "(gold)\n"; + passed = false; + } + int val = vec[0]; + + if (val != goldi) { + std::cout << "*** ERROR: " << val << " != " << goldi << "(gold)\n"; + passed = false; + } + std::cout << (passed ? "passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/sycl/unittests/kernel-and-program/CMakeLists.txt b/sycl/unittests/kernel-and-program/CMakeLists.txt index f61498b901bd9..e5033a6568b0f 100644 --- a/sycl/unittests/kernel-and-program/CMakeLists.txt +++ b/sycl/unittests/kernel-and-program/CMakeLists.txt @@ -2,4 +2,5 @@ add_sycl_unittest_with_device(KernelAndProgramTests OBJECT Cache.cpp KernelRelease.cpp KernelInfo.cpp + PersistentDeviceCodeCache.cpp ) diff --git a/sycl/unittests/kernel-and-program/Cache.cpp b/sycl/unittests/kernel-and-program/Cache.cpp index a04134273b8af..daf532fe293c6 100644 --- a/sycl/unittests/kernel-and-program/Cache.cpp +++ b/sycl/unittests/kernel-and-program/Cache.cpp @@ -46,6 +46,13 @@ static pi_result redefinedProgramCreateWithSource(pi_context context, return PI_SUCCESS; } +static pi_result redefinedProgramCreateWithBinary( + pi_context context, pi_uint32 num_devices, const pi_device *device_list, + const size_t *lengths, const unsigned char **binaries, + pi_int32 *binary_status, pi_program *ret_program) { + return PI_SUCCESS; +} + static pi_result redefinedProgramBuild(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, @@ -72,6 +79,37 @@ redefinedProgramLink(pi_context context, pi_uint32 num_devices, return PI_SUCCESS; } +static pi_result redefinedProgramGetInfo(pi_program program, + pi_program_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret) { + if (param_name == PI_PROGRAM_INFO_NUM_DEVICES) { + auto value = reinterpret_cast(param_value); + *value = 1; + } + + if (param_name == PI_PROGRAM_INFO_BINARY_SIZES) { + auto value = reinterpret_cast(param_value); + value[0] = 1; + } + + if (param_name == PI_PROGRAM_INFO_BINARIES) { + auto value = reinterpret_cast(param_value); + value[0] = 1; + } + + return PI_SUCCESS; +} + +static pi_result redefinedProgramRetain(pi_program program) { + return PI_SUCCESS; +} + +static pi_result redefinedProgramRelease(pi_program program) { + return PI_SUCCESS; +} + static pi_result redefinedKernelCreate(pi_program program, const char *kernel_name, pi_kernel *ret_kernel) { @@ -119,10 +157,17 @@ class KernelAndProgramCacheTest : public ::testing::Test { Mock->redefine( redefinedProgramCreateWithSource); + Mock->redefine( + redefinedProgramCreateWithBinary); Mock->redefine( redefinedProgramCompile); Mock->redefine(redefinedProgramLink); Mock->redefine(redefinedProgramBuild); + Mock->redefine( + redefinedProgramGetInfo); + Mock->redefine(redefinedProgramRetain); + Mock->redefine( + redefinedProgramRelease); Mock->redefine(redefinedKernelCreate); Mock->redefine(redefinedKernelRetain); Mock->redefine(redefinedKernelRelease); diff --git a/sycl/unittests/kernel-and-program/PersistentDeviceCodeCache.cpp b/sycl/unittests/kernel-and-program/PersistentDeviceCodeCache.cpp new file mode 100644 index 0000000000000..2bbc4e90125e6 --- /dev/null +++ b/sycl/unittests/kernel-and-program/PersistentDeviceCodeCache.cpp @@ -0,0 +1,328 @@ +//==----- PersistenDeviceCodeCache.cpp --- Persistent cache tests ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file contains tests covering persistena device code cache functionality. +// Detailed description of the tests cases can be seen per test function. +#include "../thread_safety/ThreadUtils.h" +#include "detail/persistent_device_code_cache.hpp" +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { +constexpr auto sycl_read_write = cl::sycl::access::mode::read_write; +using namespace cl::sycl; + +/* Vector of programs which can be used for testing + */ +std::vector> Progs = { + {128}, /*tiny program for 1 target device, 128 B long*/ + {10240}, /*small program for 1 target device, 10 kB long*/ + {1024 * 1024, 1024, 256, 1024 * 64}, /*big program for 4 target + device, ~1 MB long*/ +}; + +static unsigned char DeviceCodeID = 2; + +static pi_result redefinedProgramGetInfo(pi_program program, + pi_program_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret) { + if (param_name == PI_PROGRAM_INFO_NUM_DEVICES) { + auto value = reinterpret_cast(param_value); + *value = Progs[DeviceCodeID].size(); + } + + if (param_name == PI_PROGRAM_INFO_BINARY_SIZES) { + auto value = reinterpret_cast(param_value); + for (int i = 0; i < Progs[DeviceCodeID].size(); ++i) + value[i] = Progs[DeviceCodeID][i]; + } + + if (param_name == PI_PROGRAM_INFO_BINARIES) { + auto value = reinterpret_cast(param_value); + for (int i = 0; i < Progs[DeviceCodeID].size(); ++i) + for (int j = 0; j < Progs[DeviceCodeID][i]; ++j) + value[i][j] = i; + } + + return PI_SUCCESS; +} + +class PersistenDeviceCodeCache : public ::testing::Test { +public: +#ifdef _WIN32 + int setenv(const char *name, const char *value, int overwrite) { + int errcode = 0; + if (!overwrite) { + size_t envsize = 0; + errcode = getenv_s(&envsize, NULL, 0, name); + if (errcode || envsize) + return errcode; + } + return _putenv_s(name, value); + } +#endif + + PersistenDeviceCodeCache() : Plt{default_selector()} { + + if (Plt.is_host() || Plt.get_backend() != backend::opencl) { + std::clog << "This test is only supported on OpenCL devices\n"; + std::clog << "Current platform is " + << Plt.get_info(); + return; + } + + Mock = std::make_unique(Plt); + Dev = Plt.get_devices()[0]; + Mock->redefine( + redefinedProgramGetInfo); + } + + /* Helper function for concurent cache item read/write from diffrent number + * of threads with diffrent cache item sizes: + * ProgramID - defines program parameters to be used for testing (see Progs + * vector above. + * ThreadCount - number of parallel executors used for the test*/ + void ConcurentReadWriteCache(unsigned char ProgramID, size_t ThreadCount) { + if (Plt.is_host() || Plt.get_backend() != backend::opencl) { + return; + } + std::string BuildOptions{"--concurrent-access=" + + std::to_string(ThreadCount)}; + DeviceCodeID = ProgramID; + std::string ItemDir = detail::PersistentDeviceCodeCache::getCacheItemPath( + Dev, Img, {'S', 'p', 'e', 'c', 'C', 'o', 'n', 's', 't', ProgramID}, + BuildOptions); + llvm::sys::fs::remove_directories(ItemDir); + + Barrier b(ThreadCount); + { + auto testLambda = [&](std::size_t threadId) { + b.wait(); + detail::PersistentDeviceCodeCache::putItemToDisc( + Dev, Img, + sycl::vector_class( + {'S', 'p', 'e', 'c', 'C', 'o', 'n', 's', 't', ProgramID}), + BuildOptions, NativeProg); + auto Res = detail::PersistentDeviceCodeCache::getItemFromDisc( + Dev, Img, + sycl::vector_class( + {'S', 'p', 'e', 'c', 'C', 'o', 'n', 's', 't', ProgramID}), + BuildOptions, NativeProg); + for (int i = 0; i < Res.size(); ++i) { + for (int j = 0; j < Res[i].size(); ++j) { + assert(Res[i][j] == i && + "Corrupted image loaded from persistent cache"); + } + } + }; + + ThreadPool MPool(ThreadCount, testLambda); + } + llvm::sys::fs::remove_directories(ItemDir); + } + +protected: + detail::OSModuleHandle ModuleHandle = detail::OSUtil::ExeModuleHandle; + platform Plt; + device Dev; + pi_device_binary_struct BinStruct{/*Version*/ 1, /*Kind*/ 4, + /*Format*/ PI_DEVICE_BINARY_TYPE_SPIRV}; + pi_device_binary Bin = &BinStruct; + detail::RTDeviceBinaryImage Img{Bin, ModuleHandle}; + RT::PiProgram NativeProg; + std::unique_ptr Mock; +}; + +/* Do read/write for the same cache item to/from 300 threads for small device + * code size. Make sure that there is no data corruption or crashes. + */ +TEST_F(PersistenDeviceCodeCache, ConcurentReadWriteSmallItem) { + ConcurentReadWriteCache(0, 300); +} + +/* Do read/write for the same cache item to/from 100 threads for medium device + * code size. Make sure that there is no data corruption or crashes. + */ +TEST_F(PersistenDeviceCodeCache, ConcurentReadWriteCacheMediumItem) { + ConcurentReadWriteCache(1, 100); +} + +/* Do read/write for the same cache item to/from 20 threads from big device + * code size. Make sure that there is no data corruption or crashes. + */ +TEST_F(PersistenDeviceCodeCache, ConcurentReadWriteCacheBigItem) { + ConcurentReadWriteCache(2, 20); +} + +/* Checks that no crash happens when cache items are corrupted on cache read. + * The case when source or binary files are corrupted is treated as cache miss. + * - only source file is present; + * - only binary file is present; + * - source file is corrupted; + * - binary file is corrupted. + */ +TEST_F(PersistenDeviceCodeCache, CorruptedCacheFiles) { + if (Plt.is_host() || Plt.get_backend() != backend::opencl) { + return; + } + std::string BuildOptions{"--corrupted-file"}; + std::string ItemDir = detail::PersistentDeviceCodeCache::getCacheItemPath( + Dev, Img, {}, BuildOptions); + llvm::sys::fs::remove_directories(ItemDir); + + // Only source file is present + detail::PersistentDeviceCodeCache::putItemToDisc(Dev, Img, {}, BuildOptions, + NativeProg); + assert(!llvm::sys::fs::remove(ItemDir + "/0.bin") && + "Failed to remove binary file"); + auto Res = detail::PersistentDeviceCodeCache::getItemFromDisc( + Dev, Img, {}, BuildOptions, NativeProg); + assert(Res.size() == 0 && "Item with missed binary file was read"); + llvm::sys::fs::remove_directories(ItemDir); + + // Only binary file is present + detail::PersistentDeviceCodeCache::putItemToDisc(Dev, Img, {}, BuildOptions, + NativeProg); + assert(!llvm::sys::fs::remove(ItemDir + "/0.src") && + "Failed to remove source file"); + Res = detail::PersistentDeviceCodeCache::getItemFromDisc( + Dev, Img, {}, BuildOptions, NativeProg); + assert(Res.size() == 0 && "Item with missed source file was read"); + llvm::sys::fs::remove_directories(ItemDir); + + // Binary file is corrupted + detail::PersistentDeviceCodeCache::putItemToDisc(Dev, Img, {}, BuildOptions, + NativeProg); + std::ofstream FileStream(ItemDir + "/0.bin", + std::ofstream::out | std::ofstream::trunc); + /* Emulate binary built for 2 devices: first is OK, second is trancated + * from 23 bytes to 4 + */ + FileStream << 2 << 12 << "123456789012" << 23 << "1234"; + FileStream.close(); + assert((!FileStream.fail()) && "Failed to create trancated binary file"); + Res = detail::PersistentDeviceCodeCache::getItemFromDisc( + Dev, Img, {}, BuildOptions, NativeProg); + assert(Res.size() == 0 && "Item with corrupted binary file was read"); + + llvm::sys::fs::remove_directories(ItemDir); + + // Source file is empty + detail::PersistentDeviceCodeCache::putItemToDisc(Dev, Img, {}, BuildOptions, + NativeProg); + { + std::ofstream FileStream(ItemDir + "/0.src", + std::ofstream::out | std::ofstream::trunc); + } + Res = detail::PersistentDeviceCodeCache::getItemFromDisc( + Dev, Img, {}, BuildOptions, NativeProg); + assert(Res.size() == 0 && "Item with corrupted binary file was read"); + llvm::sys::fs::remove_directories(ItemDir); +} + +/* Checks that lock file affects cache operations as expected: + * - new cache item is created if existing one is locked on write operation; + * - cache miss happens on read operation. + */ +TEST_F(PersistenDeviceCodeCache, LockFile) { + if (Plt.is_host() || Plt.get_backend() != backend::opencl) { + return; + } + std::string BuildOptions{"--obsolete-lock"}; + std::string ItemDir = detail::PersistentDeviceCodeCache::getCacheItemPath( + Dev, Img, {}, BuildOptions); + llvm::sys::fs::remove_directories(ItemDir); + + // Create 1st cahe item + detail::PersistentDeviceCodeCache::putItemToDisc(Dev, Img, {}, BuildOptions, + NativeProg); + assert(llvm::sys::fs::exists(ItemDir + "/0.bin") && "No file created"); + std::string LockFile = ItemDir + "/0.lock"; + assert(!llvm::sys::fs::exists(LockFile) && "Cache item locked"); + + // Create lock file for the 1st cache item + { std::ofstream File{LockFile}; } + + // Cache item is locked, cache miss happens on read + auto Res = detail::PersistentDeviceCodeCache::getItemFromDisc( + Dev, Img, {}, BuildOptions, NativeProg); + assert(Res.size() == 0 && "Locked item was read"); + + // Cache item is locked - new cache item to be created + detail::PersistentDeviceCodeCache::putItemToDisc(Dev, Img, {}, BuildOptions, + NativeProg); + assert(llvm::sys::fs::exists(ItemDir + "/1.bin") && "No file created"); + + // Second cache item is locked, cache miss happens on read + { std::ofstream File{ItemDir + "/1.lock"}; } + Res = detail::PersistentDeviceCodeCache::getItemFromDisc( + Dev, Img, {}, BuildOptions, NativeProg); + assert(Res.size() == 0 && "Locked item was read"); + + // First cache item was unlocked and successfully read + std::remove(LockFile.c_str()); + Res = detail::PersistentDeviceCodeCache::getItemFromDisc( + Dev, Img, {}, BuildOptions, NativeProg); + for (int i = 0; i < Res.size(); ++i) { + for (int j = 0; j < Res[i].size(); ++j) { + assert(Res[i][j] == i && "Corrupted image loaded from persistent cache"); + } + } + llvm::sys::fs::remove_directories(ItemDir); +} + +#ifndef _WIN32 +// llvm::sys::fs::setPermissions does not make effect on Windows +/* Checks cache behavior when filesystem read/write operations fail + */ +TEST_F(PersistenDeviceCodeCache, AccessDeniedForCacheDir) { + if (Plt.is_host() || Plt.get_backend() != backend::opencl) { + return; + } + std::string BuildOptions{"--build-options"}; + std::string ItemDir = detail::PersistentDeviceCodeCache::getCacheItemPath( + Dev, Img, {}, BuildOptions); + llvm::sys::fs::remove_directories(ItemDir); + detail::PersistentDeviceCodeCache::putItemToDisc(Dev, Img, {}, BuildOptions, + NativeProg); + assert(llvm::sys::fs::exists(ItemDir + "/0.bin") && "No file created"); + llvm::sys::fs::setPermissions(ItemDir + "/0.bin", llvm::sys::fs::no_perms); + // No access to binary file new cache item to be created + detail::PersistentDeviceCodeCache::putItemToDisc(Dev, Img, {}, BuildOptions, + NativeProg); + assert(llvm::sys::fs::exists(ItemDir + "/1.bin") && "No file created"); + + llvm::sys::fs::setPermissions(ItemDir + "/1.bin", llvm::sys::fs::no_perms); + auto Res = detail::PersistentDeviceCodeCache::getItemFromDisc( + Dev, Img, {}, BuildOptions, NativeProg); + + // No image to be read due to lack of permissions fro source file + assert(Res.size() == 0); + + llvm::sys::fs::setPermissions(ItemDir + "/0.bin", llvm::sys::fs::all_perms); + llvm::sys::fs::setPermissions(ItemDir + "/1.bin", llvm::sys::fs::all_perms); + + Res = detail::PersistentDeviceCodeCache::getItemFromDisc( + Dev, Img, {}, BuildOptions, NativeProg); + // Image should be successfully read + for (int i = 0; i < Res.size(); ++i) { + for (int j = 0; j < Res[i].size(); ++j) { + assert(Res[i][j] == i && "Corrupted image loaded from persistent cache"); + } + } + llvm::sys::fs::remove_directories(ItemDir); +} +#endif //_WIN32 +} // namespace diff --git a/sycl/unittests/thread_safety/ThreadUtils.h b/sycl/unittests/thread_safety/ThreadUtils.h index 18a50bb37a7b9..dc7f75c749493 100644 --- a/sycl/unittests/thread_safety/ThreadUtils.h +++ b/sycl/unittests/thread_safety/ThreadUtils.h @@ -3,6 +3,29 @@ #include #include +/* Single use thread barrier which makes threads wait until defined number of + * threads reach it. + * std:barrier should be used instead once compiler is moved to C++20 standard. + */ +class Barrier { +public: + Barrier() = delete; + explicit Barrier(std::size_t count) : threadNum(count) {} + void wait() { + std::unique_lock lock(mutex); + if (--threadNum == 0) { + cv.notify_all(); + } else { + cv.wait(lock, [this] { return threadNum == 0; }); + } + } + +private: + std::mutex mutex; + std::condition_variable cv; + std::size_t threadNum; +}; + class ThreadPool { public: ThreadPool() = delete;