From 100ea1d6b75a0b368c2bf3a8d130d7f7807f73d7 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Tue, 10 Dec 2024 09:55:04 +0000 Subject: [PATCH 1/8] [SYCL][RTC] Experimental use of program manager to build device images Signed-off-by: Julian Oppermann --- sycl/source/detail/jit_compiler.cpp | 13 +++-- sycl/source/detail/jit_compiler.hpp | 3 +- sycl/source/detail/kernel_bundle_impl.hpp | 49 +++++++++++++++---- .../kernel_compiler/kernel_compiler_sycl.cpp | 12 +++-- .../kernel_compiler/kernel_compiler_sycl.hpp | 2 +- .../program_manager/program_manager.cpp | 6 ++- .../program_manager/program_manager.hpp | 3 +- .../kernel_compiler_sycl_jit.cpp | 20 -------- 8 files changed, 68 insertions(+), 40 deletions(-) diff --git a/sycl/source/detail/jit_compiler.cpp b/sycl/source/detail/jit_compiler.cpp index daec9af9ff6d..1a1d18ca4b9b 100644 --- a/sycl/source/detail/jit_compiler.cpp +++ b/sycl/source/detail/jit_compiler.cpp @@ -1120,13 +1120,17 @@ sycl_device_binaries jit_compiler::createPIDeviceBinary( } sycl_device_binaries jit_compiler::createDeviceBinaryImage( - const ::jit_compiler::RTCBundleInfo &BundleInfo) { + const ::jit_compiler::RTCBundleInfo &BundleInfo, + const std::string &OffloadEntryPrefix) { DeviceBinaryContainer Binary; for (const auto &Symbol : BundleInfo.SymbolTable) { - // Create an offload entry for each kernel. + // Create an offload entry for each kernel. We prepend a unique prefix to + // support reusing the same name across multiple RTC requests. The actual + // entrypoints remain unchanged. // It seems to be OK to set zero for most of the information here, at least // that is the case for compiled SPIR-V binaries. - OffloadEntryContainer Entry{Symbol.c_str(), /*Addr=*/nullptr, /*Size=*/0, + std::string PrefixedName = OffloadEntryPrefix + Symbol.c_str(); + OffloadEntryContainer Entry{PrefixedName, /*Addr=*/nullptr, /*Size=*/0, /*Flags=*/0, /*Reserved=*/0}; Binary.addOffloadEntry(std::move(Entry)); } @@ -1250,7 +1254,8 @@ sycl_device_binaries jit_compiler::compileSYCL( throw sycl::exception(sycl::errc::build, Result.getBuildLog()); } - return createDeviceBinaryImage(Result.getBundleInfo()); + return createDeviceBinaryImage(Result.getBundleInfo(), + /*OffloadEntryPrefix=*/Id + '$'); } } // namespace detail diff --git a/sycl/source/detail/jit_compiler.hpp b/sycl/source/detail/jit_compiler.hpp index b673e4d37b8f..c11b163ba6a6 100644 --- a/sycl/source/detail/jit_compiler.hpp +++ b/sycl/source/detail/jit_compiler.hpp @@ -71,7 +71,8 @@ class jit_compiler { ::jit_compiler::BinaryFormat Format); sycl_device_binaries - createDeviceBinaryImage(const ::jit_compiler::RTCBundleInfo &BundleInfo); + createDeviceBinaryImage(const ::jit_compiler::RTCBundleInfo &BundleInfo, + const std::string &OffloadEntryPrefix); std::vector encodeArgUsageMask(const ::jit_compiler::ArgUsageMask &Mask) const; diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 9366398a6a05..51fae6cce9ea 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -474,6 +474,46 @@ class kernel_bundle_impl { DeviceVec.push_back(Dev); } + if (Language == syclex::source_language::sycl_jit) { + // Experimental: Build device images via the program manager. + // TODO: Support persistent caching. + + const std::string &SourceStr = std::get(this->Source); + auto [Binaries, Id] = syclex::detail::SYCL_JIT_to_SPIRV( + SourceStr, IncludePairs, BuildOptions, LogPtr, RegisteredKernelNames); + + assert(Binaries->NumDeviceBinaries == 1); + + auto &PM = detail::ProgramManager::getInstance(); + std::unordered_set ImageIds; + PM.addImages(Binaries, &ImageIds); + auto DevImgs = PM.getSYCLDeviceImages( + MContext, MDevices, + [&ImageIds](const detail::DeviceImageImplPtr &DevImgImpl) -> bool { + return ImageIds.count( + DevImgImpl->get_bin_image_ref()->getImageID()); + }, + bundle_state::executable); + + PM.bringSYCLDeviceImagesToState(DevImgs, bundle_state::executable); + + std::vector KernelNames; + std::transform(Binaries->DeviceBinaries->EntriesBegin, + Binaries->DeviceBinaries->EntriesEnd, + std::back_inserter(KernelNames), + [PrefixLen = Id.length() + 1](auto &OffloadEntry) { + // `jit_compiler::compileSYCL` uses `Id + '$'` as name + // prefix; drop that here. + return std::string{OffloadEntry.name + PrefixLen}; + }); + + assert(DevImgs.size() == 1); + assert(!DevImgs.front().hasDeps()); + + return std::make_shared( + MContext, MDevices, DevImgs.front().getMain(), KernelNames, Language); + } + ur_program_handle_t UrProgram = nullptr; // SourceStrPtr will be null when source is Spir-V bytes. const std::string *SourceStrPtr = std::get_if(&this->Source); @@ -514,15 +554,6 @@ class kernel_bundle_impl { BuildOptions, LogPtr, RegisteredKernelNames); } - if (Language == syclex::source_language::sycl_jit) { - auto *Binaries = syclex::detail::SYCL_JIT_to_SPIRV( - *SourceStrPtr, IncludePairs, BuildOptions, LogPtr, - RegisteredKernelNames); - assert(Binaries->NumDeviceBinaries == 1 && - "Device code splitting is not yet supported"); - return std::vector(Binaries->DeviceBinaries->BinaryStart, - Binaries->DeviceBinaries->BinaryEnd); - } throw sycl::exception( make_error_code(errc::invalid), "SYCL C++, OpenCL C and SPIR-V are the only supported " diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp index 6362bf355cfc..09d868276fc2 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp @@ -327,6 +327,7 @@ bool SYCL_Compilation_Available() { #if SYCL_EXT_JIT_ENABLE #include "../jit_compiler.hpp" +#include #endif namespace sycl { @@ -342,15 +343,20 @@ bool SYCL_JIT_Compilation_Available() { #endif } -sycl_device_binaries SYCL_JIT_to_SPIRV( +std::pair SYCL_JIT_to_SPIRV( [[maybe_unused]] const std::string &SYCLSource, [[maybe_unused]] include_pairs_t IncludePairs, [[maybe_unused]] const std::vector &UserArgs, [[maybe_unused]] std::string *LogPtr, [[maybe_unused]] const std::vector &RegisteredKernelNames) { #if SYCL_EXT_JIT_ENABLE - return sycl::detail::jit_compiler::get_instance().compileSYCL( - "rtc", SYCLSource, IncludePairs, UserArgs, LogPtr, RegisteredKernelNames); + static std::atomic_uintptr_t CompilationId; + std::string Id = "rtc_" + std::to_string(CompilationId++); + sycl_device_binaries Binaries = + sycl::detail::jit_compiler::get_instance().compileSYCL( + Id, SYCLSource, IncludePairs, UserArgs, LogPtr, + RegisteredKernelNames); + return std::make_pair(Binaries, std::move(Id)); #else throw sycl::exception(sycl::errc::build, "kernel_compiler via sycl-jit is not available"); diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp index eee0f8a78f60..8187c5373150 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp @@ -35,7 +35,7 @@ bool SYCL_Compilation_Available(); std::string userArgsAsString(const std::vector &UserArguments); -sycl_device_binaries +std::pair SYCL_JIT_to_SPIRV(const std::string &Source, include_pairs_t IncludePairs, const std::vector &UserArgs, std::string *LogPtr, const std::vector &RegisteredKernelNames); diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 035ca965ce2e..efad5b4db51b 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1802,7 +1802,8 @@ ProgramManager::kernelImplicitLocalArgPos(const std::string &KernelName) const { return {}; } -void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { +void ProgramManager::addImages(sycl_device_binaries DeviceBinary, + std::unordered_set *ImageIds) { const bool DumpImages = std::getenv("SYCL_DUMP_IMAGES") && !m_UseSpvFile; for (int I = 0; I < DeviceBinary->NumDeviceBinaries; I++) { sycl_device_binary RawImg = &(DeviceBinary->DeviceBinaries[I]); @@ -1825,6 +1826,9 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { else Img = std::make_unique(RawImg); + if (ImageIds) + ImageIds->insert(Img->getImageID()); + static uint32_t SequenceID = 0; // Fill the kernel argument mask map diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index abfdb1144105..10ac1537d5f8 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -210,7 +210,8 @@ class ProgramManager { ur_program_handle_t getUrProgramFromUrKernel(ur_kernel_handle_t Kernel, const ContextImplPtr Context); - void addImages(sycl_device_binaries DeviceImages); + void addImages(sycl_device_binaries DeviceImages, + std::unordered_set *ImageIds = nullptr); void debugPrintBinaryImages() const; static std::string getProgramBuildLog(const ur_program_handle_t &Program, const ContextImplPtr Context); diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp index 0835cf1116eb..ac1eed68018f 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp @@ -13,26 +13,6 @@ // RUN: %{run} %t.out 1 // RUN: %{l0_leak_check} %{run} %t.out 1 -// -- Test again, with caching. - -// DEFINE: %{cache_vars} = %{l0_leak_check} env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_TRACE=5 SYCL_CACHE_DIR=%t/cache_dir -// RUN: %if run-mode %{ rm -rf %t/cache_dir %} -// RUN: %{cache_vars} %{run-unfiltered-devices} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-WRITTEN-TO-CACHE -// RUN: %{cache_vars} %{run-unfiltered-devices} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-READ-FROM-CACHE - -// -- Add leak check. -// RUN: %if run-mode %{ rm -rf %t/cache_dir %} -// RUN: %{l0_leak_check} %{cache_vars} %{run-unfiltered-devices} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-WRITTEN-TO-CACHE -// RUN: %{l0_leak_check} %{cache_vars} %{run-unfiltered-devices} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-READ-FROM-CACHE - -// CHECK-WRITTEN-TO-CACHE: [Persistent Cache]: enabled -// CHECK-WRITTEN-TO-CACHE-NOT: [kernel_compiler Persistent Cache]: using cached binary -// CHECK-WRITTEN-TO-CACHE: [kernel_compiler Persistent Cache]: binary has been cached - -// CHECK-READ-FROM-CACHE: [Persistent Cache]: enabled -// CHECK-READ-FROM-CACHE-NOT: [kernel_compiler Persistent Cache]: binary has been cached -// CHECK-READ-FROM-CACHE: [kernel_compiler Persistent Cache]: using cached binary - #include #include #include From 4a2e36e250e9169c5e5125733672697303c39060 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Tue, 10 Dec 2024 11:09:45 +0000 Subject: [PATCH 2/8] Revert changes to PM and just use prefix to identify kernels Signed-off-by: Julian Oppermann --- sycl/source/detail/jit_compiler.cpp | 6 ++-- sycl/source/detail/jit_compiler.hpp | 2 +- sycl/source/detail/kernel_bundle_impl.hpp | 36 +++++++++---------- .../kernel_compiler/kernel_compiler_sycl.cpp | 8 ++--- .../program_manager/program_manager.cpp | 6 +--- .../program_manager/program_manager.hpp | 3 +- .../kernel_compiler_sycl_jit.cpp | 17 +++++++++ 7 files changed, 43 insertions(+), 35 deletions(-) diff --git a/sycl/source/detail/jit_compiler.cpp b/sycl/source/detail/jit_compiler.cpp index 1a1d18ca4b9b..b096d784f250 100644 --- a/sycl/source/detail/jit_compiler.cpp +++ b/sycl/source/detail/jit_compiler.cpp @@ -1210,7 +1210,7 @@ std::vector jit_compiler::encodeReqdWorkGroupSize( } sycl_device_binaries jit_compiler::compileSYCL( - const std::string &Id, const std::string &SYCLSource, + const std::string &CompilationID, const std::string &SYCLSource, const std::vector> &IncludePairs, const std::vector &UserArgs, std::string *LogPtr, const std::vector &RegisteredKernelNames) { @@ -1227,7 +1227,7 @@ sycl_device_binaries jit_compiler::compileSYCL( std::string FinalSource = ss.str(); - std::string SYCLFileName = Id + ".cpp"; + std::string SYCLFileName = CompilationID + ".cpp"; ::jit_compiler::InMemoryFile SourceFile{SYCLFileName.c_str(), FinalSource.c_str()}; @@ -1255,7 +1255,7 @@ sycl_device_binaries jit_compiler::compileSYCL( } return createDeviceBinaryImage(Result.getBundleInfo(), - /*OffloadEntryPrefix=*/Id + '$'); + /*OffloadEntryPrefix=*/CompilationID + '$'); } } // namespace detail diff --git a/sycl/source/detail/jit_compiler.hpp b/sycl/source/detail/jit_compiler.hpp index c11b163ba6a6..1370c5686eec 100644 --- a/sycl/source/detail/jit_compiler.hpp +++ b/sycl/source/detail/jit_compiler.hpp @@ -46,7 +46,7 @@ class jit_compiler { const std::vector &SpecConstBlob); sycl_device_binaries compileSYCL( - const std::string &Id, const std::string &SYCLSource, + const std::string &CompilationID, const std::string &SYCLSource, const std::vector> &IncludePairs, const std::vector &UserArgs, std::string *LogPtr, const std::vector &RegisteredKernelNames); diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 51fae6cce9ea..f47dadde08e2 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -479,34 +479,30 @@ class kernel_bundle_impl { // TODO: Support persistent caching. const std::string &SourceStr = std::get(this->Source); - auto [Binaries, Id] = syclex::detail::SYCL_JIT_to_SPIRV( + auto [Binaries, CompilationID] = syclex::detail::SYCL_JIT_to_SPIRV( SourceStr, IncludePairs, BuildOptions, LogPtr, RegisteredKernelNames); assert(Binaries->NumDeviceBinaries == 1); auto &PM = detail::ProgramManager::getInstance(); - std::unordered_set ImageIds; - PM.addImages(Binaries, &ImageIds); - auto DevImgs = PM.getSYCLDeviceImages( - MContext, MDevices, - [&ImageIds](const detail::DeviceImageImplPtr &DevImgImpl) -> bool { - return ImageIds.count( - DevImgImpl->get_bin_image_ref()->getImageID()); - }, - bundle_state::executable); - - PM.bringSYCLDeviceImagesToState(DevImgs, bundle_state::executable); + PM.addImages(Binaries); + std::vector KernelIDs; std::vector KernelNames; - std::transform(Binaries->DeviceBinaries->EntriesBegin, - Binaries->DeviceBinaries->EntriesEnd, - std::back_inserter(KernelNames), - [PrefixLen = Id.length() + 1](auto &OffloadEntry) { - // `jit_compiler::compileSYCL` uses `Id + '$'` as name - // prefix; drop that here. - return std::string{OffloadEntry.name + PrefixLen}; - }); + // `jit_compiler::compileSYCL(..)` uses `CompilationID + '$'` as prefix + // for offload entry names. + std::string Prefix = CompilationID + '$'; + for (const auto &KernelID : PM.getAllSYCLKernelIDs()) { + std::string_view KernelName{KernelID.get_name()}; + if (KernelName.find(Prefix) == 0) { + KernelIDs.push_back(KernelID); + KernelName.remove_prefix(Prefix.length()); + KernelNames.emplace_back(KernelName); + } + } + auto DevImgs = PM.getSYCLDeviceImages(MContext, MDevices, KernelIDs, + bundle_state::executable); assert(DevImgs.size() == 1); assert(!DevImgs.front().hasDeps()); diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp index 09d868276fc2..bc91dcd1f458 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp @@ -350,13 +350,13 @@ std::pair SYCL_JIT_to_SPIRV( [[maybe_unused]] std::string *LogPtr, [[maybe_unused]] const std::vector &RegisteredKernelNames) { #if SYCL_EXT_JIT_ENABLE - static std::atomic_uintptr_t CompilationId; - std::string Id = "rtc_" + std::to_string(CompilationId++); + static std::atomic_uintptr_t CompilationCounter; + std::string CompilationID = "rtc_" + std::to_string(CompilationCounter++); sycl_device_binaries Binaries = sycl::detail::jit_compiler::get_instance().compileSYCL( - Id, SYCLSource, IncludePairs, UserArgs, LogPtr, + CompilationID, SYCLSource, IncludePairs, UserArgs, LogPtr, RegisteredKernelNames); - return std::make_pair(Binaries, std::move(Id)); + return std::make_pair(Binaries, std::move(CompilationID)); #else throw sycl::exception(sycl::errc::build, "kernel_compiler via sycl-jit is not available"); diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index efad5b4db51b..035ca965ce2e 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1802,8 +1802,7 @@ ProgramManager::kernelImplicitLocalArgPos(const std::string &KernelName) const { return {}; } -void ProgramManager::addImages(sycl_device_binaries DeviceBinary, - std::unordered_set *ImageIds) { +void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { const bool DumpImages = std::getenv("SYCL_DUMP_IMAGES") && !m_UseSpvFile; for (int I = 0; I < DeviceBinary->NumDeviceBinaries; I++) { sycl_device_binary RawImg = &(DeviceBinary->DeviceBinaries[I]); @@ -1826,9 +1825,6 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary, else Img = std::make_unique(RawImg); - if (ImageIds) - ImageIds->insert(Img->getImageID()); - static uint32_t SequenceID = 0; // Fill the kernel argument mask map diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 10ac1537d5f8..abfdb1144105 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -210,8 +210,7 @@ class ProgramManager { ur_program_handle_t getUrProgramFromUrKernel(ur_kernel_handle_t Kernel, const ContextImplPtr Context); - void addImages(sycl_device_binaries DeviceImages, - std::unordered_set *ImageIds = nullptr); + void addImages(sycl_device_binaries DeviceImages); void debugPrintBinaryImages() const; static std::string getProgramBuildLog(const ur_program_handle_t &Program, const ContextImplPtr Context); diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp index ac1eed68018f..88a30d2a4406 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp @@ -194,6 +194,23 @@ int test_build_and_run() { test_1(q, k, 37 + 5); // ff_cp seeds 37. AddEm will add 5 more. test_1(q, k2, 38 + 6); // ff_templated seeds 38. PlusEm adds 6 more. + // Create and compile new bundle with different header. + std::string AddEmHModified = AddEmH; + AddEmHModified[AddEmHModified.find('5')] = '7'; + syclex::include_files incFiles2{"intermediate/AddEm.h", AddEmHModified}; + incFiles2.add("intermediate/PlusEm.h", PlusEmH); + source_kb kbSrc2 = syclex::create_kernel_bundle_from_source( + ctx, syclex::source_language::sycl_jit, SYCLSource, + syclex::properties{incFiles2}); + + exe_kb kbExe3 = syclex::build(kbSrc2); + sycl::kernel k3 = kbExe3.ext_oneapi_get_kernel("ff_cp"); + test_1(q, k3, 37 + 7); + + // Can we still run the original compilation? + sycl::kernel k4 = kbExe1.ext_oneapi_get_kernel("ff_cp"); + test_1(q, k4, 37 + 5); + return 0; } From 2ab36bef22d87af3149fdfe626e477566dca8d2e Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Mon, 16 Dec 2024 13:29:49 +0000 Subject: [PATCH 3/8] Support multiple device images per RTC bundle. Signed-off-by: Julian Oppermann --- sycl-jit/common/include/Kernel.h | 14 +- sycl-jit/jit-compiler/lib/KernelFusion.cpp | 21 +-- .../lib/rtc/DeviceCompilation.cpp | 164 ++++++++++-------- .../jit-compiler/lib/rtc/DeviceCompilation.h | 4 +- .../lib/translation/KernelTranslation.cpp | 12 +- .../lib/translation/KernelTranslation.h | 4 +- sycl/source/detail/jit_compiler.cpp | 65 +++---- sycl/source/detail/jit_compiler.hpp | 3 +- sycl/source/detail/kernel_bundle_impl.hpp | 46 ++++- .../kernel_compiler_sycl_jit.cpp | 9 +- 10 files changed, 203 insertions(+), 139 deletions(-) diff --git a/sycl-jit/common/include/Kernel.h b/sycl-jit/common/include/Kernel.h index efd6e1ded305..eb5ba0f05c91 100644 --- a/sycl-jit/common/include/Kernel.h +++ b/sycl-jit/common/include/Kernel.h @@ -359,7 +359,7 @@ struct InMemoryFile { const char *Contents; }; -using RTCBundleBinaryInfo = SYCLKernelBinaryInfo; +using RTCDevImgBinaryInfo = SYCLKernelBinaryInfo; using FrozenSymbolTable = DynArray; // Note: `FrozenPropertyValue` and `FrozenPropertySet` constructors take @@ -399,16 +399,18 @@ struct FrozenPropertySet { using FrozenPropertyRegistry = DynArray; -struct RTCBundleInfo { - RTCBundleBinaryInfo BinaryInfo; +struct RTCDevImgInfo { + RTCDevImgBinaryInfo BinaryInfo; FrozenSymbolTable SymbolTable; FrozenPropertyRegistry Properties; - RTCBundleInfo() = default; - RTCBundleInfo(RTCBundleInfo &&) = default; - RTCBundleInfo &operator=(RTCBundleInfo &&) = default; + RTCDevImgInfo() = default; + RTCDevImgInfo(RTCDevImgInfo &&) = default; + RTCDevImgInfo &operator=(RTCDevImgInfo &&) = default; }; +using RTCBundleInfo = DynArray; + } // namespace jit_compiler #endif // SYCL_FUSION_COMMON_KERNEL_H diff --git a/sycl-jit/jit-compiler/lib/KernelFusion.cpp b/sycl-jit/jit-compiler/lib/KernelFusion.cpp index d7e18003d2a7..d9594332c6b4 100644 --- a/sycl-jit/jit-compiler/lib/KernelFusion.cpp +++ b/sycl-jit/jit-compiler/lib/KernelFusion.cpp @@ -266,17 +266,18 @@ compileSYCL(InMemoryFile SourceFile, View IncludeFiles, return errorTo(PostLinkResultOrError.takeError(), "Post-link phase failed"); } - RTCBundleInfo BundleInfo; - std::tie(BundleInfo, Module) = std::move(*PostLinkResultOrError); - - auto BinaryInfoOrError = - translation::KernelTranslator::translateBundleToSPIRV( - *Module, JITContext::getInstance()); - if (!BinaryInfoOrError) { - return errorTo(BinaryInfoOrError.takeError(), - "SPIR-V translation failed"); + auto [BundleInfo, Modules] = std::move(*PostLinkResultOrError); + + for (auto [DevImgInfo, Module] : llvm::zip_equal(BundleInfo, Modules)) { + auto BinaryInfoOrError = + translation::KernelTranslator::translateDevImgToSPIRV( + *Module, JITContext::getInstance()); + if (!BinaryInfoOrError) { + return errorTo(BinaryInfoOrError.takeError(), + "SPIR-V translation failed"); + } + DevImgInfo.BinaryInfo = std::move(*BinaryInfoOrError); } - BundleInfo.BinaryInfo = std::move(*BinaryInfoOrError); return RTCResult{std::move(BundleInfo), BuildLog.c_str()}; } diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index fe2c9756ce20..bca7e6335d13 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -232,8 +232,6 @@ Expected> jit_compiler::compileDeviceCode( DerivedArgList DAL{UserArgList}; const auto &OptTable = getDriverOptTable(); DAL.AddFlagArg(nullptr, OptTable.getOption(OPT_fsycl_device_only)); - DAL.AddFlagArg(nullptr, - OptTable.getOption(OPT_fno_sycl_dead_args_optimization)); DAL.AddJoinedArg( nullptr, OptTable.getOption(OPT_resource_dir_EQ), (DPCPPRoot + "/lib/clang/" + Twine(CLANG_VERSION_MAJOR)).str()); @@ -435,15 +433,35 @@ template static bool runModulePass(llvm::Module &M) { return !Res.areAllPreserved(); } -llvm::Expected jit_compiler::performPostLink( - std::unique_ptr Module, - [[maybe_unused]] const llvm::opt::InputArgList &UserArgList) { +static IRSplitMode getDeviceCodeSplitMode(const InputArgList &UserArgList) { + // This is the (combined) logic from + // `get[NonTriple|Triple]BasedSYCLPostLinkOpts` in + // `clang/lib/Driver/ToolChains/Clang.cpp`: Default is auto mode, but the user + // can override it by specifying the `-fsycl-device-code-split=` option. The + // no-argument variant `-fsycl-device-code-split` is ignored. + if (auto *Arg = UserArgList.getLastArg(OPT_fsycl_device_code_split_EQ)) { + StringRef ArgVal{Arg->getValue()}; + if (ArgVal == "per_kernel") { + return SPLIT_PER_KERNEL; + } + if (ArgVal == "per_source") { + return SPLIT_PER_TU; + } + if (ArgVal == "off") { + return SPLIT_NONE; + } + } + return SPLIT_AUTO; +} + +Expected +jit_compiler::performPostLink(std::unique_ptr Module, + const InputArgList &UserArgList) { // This is a simplified version of `processInputModule` in // `llvm/tools/sycl-post-link.cpp`. Assertions/TODOs point to functionality // left out of the algorithm for now. - // TODO: SplitMode can be controlled by the user. - const auto SplitMode = SPLIT_NONE; + const auto SplitMode = getDeviceCodeSplitMode(UserArgList); // TODO: EmitOnlyKernelsAsEntryPoints is controlled by // `shouldEmitOnlyKernelsAsEntryPoints` in @@ -486,70 +504,83 @@ llvm::Expected jit_compiler::performPostLink( ModuleDesc{std::move(Module)}, SplitMode, /*IROutputOnly=*/false, EmitOnlyKernelsAsEntryPoints); assert(Splitter->hasMoreSplits()); - if (Splitter->remainingSplits() > 1) { - return createStringError("Device code requires splitting"); - } // TODO: Call `verifyNoCrossModuleDeviceGlobalUsage` if device globals shall // be processed. - ModuleDesc MDesc = Splitter->nextSplit(); + // TODO: This allocation assumes that there are no further splits required, + // i.e. due to mixed SYCL/ESIMD modules. + RTCBundleInfo BundleInfo{Splitter->remainingSplits()}; + SmallVector> Modules; - // TODO: Call `MDesc.fixupLinkageOfDirectInvokeSimdTargets()` when - // `invoke_simd` is supported. + auto *DevImgInfoIt = BundleInfo.begin(); + while (Splitter->hasMoreSplits()) { + assert(DevImgInfoIt != BundleInfo.end()); - SmallVector ESIMDSplits = - splitByESIMD(std::move(MDesc), EmitOnlyKernelsAsEntryPoints); - assert(!ESIMDSplits.empty()); - if (ESIMDSplits.size() > 1) { - return createStringError("Mixing SYCL and ESIMD code is unsupported"); - } - MDesc = std::move(ESIMDSplits.front()); + ModuleDesc MDesc = Splitter->nextSplit(); + RTCDevImgInfo &DevImgInfo = *DevImgInfoIt++; - if (MDesc.isESIMD()) { - // `sycl-post-link` has a `-lower-esimd` option, but there's no clang driver - // option to influence it. Rather, the driver sets it unconditionally in the - // multi-file output mode, which we are mimicking here. - lowerEsimdConstructs(MDesc, PerformOpts); - } + // TODO: Call `MDesc.fixupLinkageOfDirectInvokeSimdTargets()` when + // `invoke_simd` is supported. - MDesc.saveSplitInformationAsMetadata(); - - RTCBundleInfo BundleInfo; - BundleInfo.SymbolTable = FrozenSymbolTable{MDesc.entries().size()}; - transform(MDesc.entries(), BundleInfo.SymbolTable.begin(), - [](Function *F) { return F->getName(); }); - - // TODO: Determine what is requested. - GlobalBinImageProps PropReq{ - /*EmitKernelParamInfo=*/true, /*EmitProgramMetadata=*/true, - /*EmitExportedSymbols=*/true, /*EmitImportedSymbols=*/true, - /*DeviceGlobals=*/false}; - PropertySetRegistry Properties = - computeModuleProperties(MDesc.getModule(), MDesc.entries(), PropReq); - // TODO: Manually add `compile_target` property as in - // `saveModuleProperties`? - const auto &PropertySets = Properties.getPropSets(); - - BundleInfo.Properties = FrozenPropertyRegistry{PropertySets.size()}; - for (auto &&[KV, FrozenPropSet] : zip(PropertySets, BundleInfo.Properties)) { - const auto &PropertySetName = KV.first; - const auto &PropertySet = KV.second; - FrozenPropSet = - FrozenPropertySet{PropertySetName.str(), PropertySet.size()}; - for (auto &&[KV2, FrozenProp] : zip(PropertySet, FrozenPropSet.Values)) { - const auto &PropertyName = KV2.first; - const auto &PropertyValue = KV2.second; - FrozenProp = PropertyValue.getType() == PropertyValue::Type::UINT32 - ? FrozenPropertyValue{PropertyName.str(), - PropertyValue.asUint32()} - : FrozenPropertyValue{ - PropertyName.str(), PropertyValue.asRawByteArray(), - PropertyValue.getRawByteArraySize()}; + SmallVector ESIMDSplits = + splitByESIMD(std::move(MDesc), EmitOnlyKernelsAsEntryPoints); + assert(!ESIMDSplits.empty()); + if (ESIMDSplits.size() > 1) { + return createStringError("Mixing SYCL and ESIMD code is unsupported"); } - }; + MDesc = std::move(ESIMDSplits.front()); + + if (MDesc.isESIMD()) { + // `sycl-post-link` has a `-lower-esimd` option, but there's no clang + // driver option to influence it. Rather, the driver sets it + // unconditionally in the multi-file output mode, which we are mimicking + // here. + lowerEsimdConstructs(MDesc, PerformOpts); + } + + MDesc.saveSplitInformationAsMetadata(); + + DevImgInfo.SymbolTable = FrozenSymbolTable{MDesc.entries().size()}; + transform(MDesc.entries(), DevImgInfo.SymbolTable.begin(), + [](Function *F) { return F->getName(); }); + + // TODO: Determine what is requested. + GlobalBinImageProps PropReq{ + /*EmitKernelParamInfo=*/true, /*EmitProgramMetadata=*/true, + /*EmitExportedSymbols=*/true, /*EmitImportedSymbols=*/true, + /*DeviceGlobals=*/false}; + PropertySetRegistry Properties = + computeModuleProperties(MDesc.getModule(), MDesc.entries(), PropReq); + // TODO: Manually add `compile_target` property as in + // `saveModuleProperties`? + const auto &PropertySets = Properties.getPropSets(); + + DevImgInfo.Properties = FrozenPropertyRegistry{PropertySets.size()}; + for (auto [KV, FrozenPropSet] : + zip_equal(PropertySets, DevImgInfo.Properties)) { + const auto &PropertySetName = KV.first; + const auto &PropertySet = KV.second; + FrozenPropSet = + FrozenPropertySet{PropertySetName.str(), PropertySet.size()}; + for (auto [KV2, FrozenProp] : + zip_equal(PropertySet, FrozenPropSet.Values)) { + const auto &PropertyName = KV2.first; + const auto &PropertyValue = KV2.second; + FrozenProp = + PropertyValue.getType() == PropertyValue::Type::UINT32 + ? FrozenPropertyValue{PropertyName.str(), + PropertyValue.asUint32()} + : FrozenPropertyValue{PropertyName.str(), + PropertyValue.asRawByteArray(), + PropertyValue.getRawByteArraySize()}; + } + }; + + Modules.push_back(MDesc.releaseModulePtr()); + } - return PostLinkResult{std::move(BundleInfo), MDesc.releaseModulePtr()}; + return PostLinkResult{std::move(BundleInfo), std::move(Modules)}; } Expected @@ -606,21 +637,10 @@ jit_compiler::parseUserArgs(View UserArgs) { } } - if (auto DCSMode = AL.getLastArgValue(OPT_fsycl_device_code_split_EQ, "none"); - DCSMode != "none" && DCSMode != "auto") { - return createStringError("Device code splitting is not yet supported"); - } - if (!AL.hasFlag(OPT_fsycl_device_code_split_esimd, OPT_fno_sycl_device_code_split_esimd, true)) { return createStringError("ESIMD device code split cannot be deactivated"); } - if (AL.hasFlag(OPT_fsycl_dead_args_optimization, - OPT_fno_sycl_dead_args_optimization, false)) { - return createStringError( - "Dead argument optimization must be disabled for runtime compilation"); - } - return std::move(AL); } diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.h b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.h index 0fea81bdc1d3..1c30e5a61fb4 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.h +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.h @@ -12,6 +12,7 @@ #include "Kernel.h" #include "View.h" +#include #include #include #include @@ -30,7 +31,8 @@ llvm::Error linkDeviceLibraries(llvm::Module &Module, const llvm::opt::InputArgList &UserArgList, std::string &BuildLog); -using PostLinkResult = std::pair>; +using PostLinkResult = + std::pair>>; llvm::Expected performPostLink(std::unique_ptr Module, const llvm::opt::InputArgList &UserArgList); diff --git a/sycl-jit/jit-compiler/lib/translation/KernelTranslation.cpp b/sycl-jit/jit-compiler/lib/translation/KernelTranslation.cpp index 61ca038dea4c..374bc943119d 100644 --- a/sycl-jit/jit-compiler/lib/translation/KernelTranslation.cpp +++ b/sycl-jit/jit-compiler/lib/translation/KernelTranslation.cpp @@ -222,18 +222,18 @@ llvm::Error KernelTranslator::translateKernel(SYCLKernelInfo &Kernel, return Error::success(); } -llvm::Expected -KernelTranslator::translateBundleToSPIRV(llvm::Module &Mod, +llvm::Expected +KernelTranslator::translateDevImgToSPIRV(llvm::Module &Mod, JITContext &JITCtx) { llvm::Expected BinaryOrError = translateToSPIRV(Mod, JITCtx); if (auto Error = BinaryOrError.takeError()) { return Error; } KernelBinary *Binary = *BinaryOrError; - RTCBundleBinaryInfo BBI{BinaryFormat::SPIRV, - Mod.getDataLayout().getPointerSizeInBits(), - Binary->address(), Binary->size()}; - return BBI; + RTCDevImgBinaryInfo DIBI{BinaryFormat::SPIRV, + Mod.getDataLayout().getPointerSizeInBits(), + Binary->address(), Binary->size()}; + return DIBI; } llvm::Expected diff --git a/sycl-jit/jit-compiler/lib/translation/KernelTranslation.h b/sycl-jit/jit-compiler/lib/translation/KernelTranslation.h index e71aa2b0d19f..9747182e0ce7 100644 --- a/sycl-jit/jit-compiler/lib/translation/KernelTranslation.h +++ b/sycl-jit/jit-compiler/lib/translation/KernelTranslation.h @@ -27,8 +27,8 @@ class KernelTranslator { static llvm::Error translateKernel(SYCLKernelInfo &Kernel, llvm::Module &Mod, JITContext &JITCtx, BinaryFormat Format); - static llvm::Expected - translateBundleToSPIRV(llvm::Module &Mod, JITContext &JITCtx); + static llvm::Expected + translateDevImgToSPIRV(llvm::Module &Mod, JITContext &JITCtx); private: /// diff --git a/sycl/source/detail/jit_compiler.cpp b/sycl/source/detail/jit_compiler.cpp index b096d784f250..7ca4f822f559 100644 --- a/sycl/source/detail/jit_compiler.cpp +++ b/sycl/source/detail/jit_compiler.cpp @@ -1122,41 +1122,46 @@ sycl_device_binaries jit_compiler::createPIDeviceBinary( sycl_device_binaries jit_compiler::createDeviceBinaryImage( const ::jit_compiler::RTCBundleInfo &BundleInfo, const std::string &OffloadEntryPrefix) { - DeviceBinaryContainer Binary; - for (const auto &Symbol : BundleInfo.SymbolTable) { - // Create an offload entry for each kernel. We prepend a unique prefix to - // support reusing the same name across multiple RTC requests. The actual - // entrypoints remain unchanged. - // It seems to be OK to set zero for most of the information here, at least - // that is the case for compiled SPIR-V binaries. - std::string PrefixedName = OffloadEntryPrefix + Symbol.c_str(); - OffloadEntryContainer Entry{PrefixedName, /*Addr=*/nullptr, /*Size=*/0, - /*Flags=*/0, /*Reserved=*/0}; - Binary.addOffloadEntry(std::move(Entry)); - } + DeviceBinariesCollection Collection; - for (const auto &FPS : BundleInfo.Properties) { - PropertySetContainer PropSet{FPS.Name.c_str()}; - for (const auto &FPV : FPS.Values) { - if (FPV.IsUIntValue) { - PropSet.addProperty(PropertyContainer{FPV.Name.c_str(), FPV.UIntValue}); - } else { - PropSet.addProperty(PropertyContainer{ - FPV.Name.c_str(), FPV.Bytes.begin(), FPV.Bytes.size(), - sycl_property_type::SYCL_PROPERTY_TYPE_BYTE_ARRAY}); + for (const auto &DevImgInfo : BundleInfo) { + DeviceBinaryContainer Binary; + for (const auto &Symbol : DevImgInfo.SymbolTable) { + // Create an offload entry for each kernel. We prepend a unique prefix to + // support reusing the same name across multiple RTC requests. The actual + // entrypoints remain unchanged. + // It seems to be OK to set zero for most of the information here, at + // least that is the case for compiled SPIR-V binaries. + std::string PrefixedName = OffloadEntryPrefix + Symbol.c_str(); + OffloadEntryContainer Entry{PrefixedName, /*Addr=*/nullptr, /*Size=*/0, + /*Flags=*/0, /*Reserved=*/0}; + Binary.addOffloadEntry(std::move(Entry)); + } + + for (const auto &FPS : DevImgInfo.Properties) { + PropertySetContainer PropSet{FPS.Name.c_str()}; + for (const auto &FPV : FPS.Values) { + if (FPV.IsUIntValue) { + PropSet.addProperty( + PropertyContainer{FPV.Name.c_str(), FPV.UIntValue}); + } else { + PropSet.addProperty(PropertyContainer{ + FPV.Name.c_str(), FPV.Bytes.begin(), FPV.Bytes.size(), + sycl_property_type::SYCL_PROPERTY_TYPE_BYTE_ARRAY}); + } } + Binary.addProperty(std::move(PropSet)); } - Binary.addProperty(std::move(PropSet)); + + Collection.addDeviceBinary(std::move(Binary), + DevImgInfo.BinaryInfo.BinaryStart, + DevImgInfo.BinaryInfo.BinarySize, + (DevImgInfo.BinaryInfo.AddressBits == 64) + ? __SYCL_DEVICE_BINARY_TARGET_SPIRV64 + : __SYCL_DEVICE_BINARY_TARGET_SPIRV32, + SYCL_DEVICE_BINARY_TYPE_SPIRV); } - DeviceBinariesCollection Collection; - Collection.addDeviceBinary(std::move(Binary), - BundleInfo.BinaryInfo.BinaryStart, - BundleInfo.BinaryInfo.BinarySize, - (BundleInfo.BinaryInfo.AddressBits == 64) - ? __SYCL_DEVICE_BINARY_TARGET_SPIRV64 - : __SYCL_DEVICE_BINARY_TARGET_SPIRV32, - SYCL_DEVICE_BINARY_TYPE_SPIRV); JITDeviceBinaries.push_back(std::move(Collection)); return JITDeviceBinaries.back().getPIDeviceStruct(); } diff --git a/sycl/source/detail/jit_compiler.hpp b/sycl/source/detail/jit_compiler.hpp index 1370c5686eec..5b73eb81f3fb 100644 --- a/sycl/source/detail/jit_compiler.hpp +++ b/sycl/source/detail/jit_compiler.hpp @@ -23,10 +23,11 @@ enum class BinaryFormat : uint32_t; class JITContext; struct SYCLKernelInfo; struct SYCLKernelAttribute; -struct RTCBundleInfo; +struct RTCDevImgInfo; template class DynArray; using ArgUsageMask = DynArray; using JITEnvVar = DynArray; +using RTCBundleInfo = DynArray; } // namespace jit_compiler namespace sycl { diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index f47dadde08e2..667757eafa2b 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -376,6 +376,19 @@ class kernel_bundle_impl { Language = Lang; } + // oneapi_ext_kernel_compiler + // experimental program manager integration, only for sycl_jit language + kernel_bundle_impl(context Ctx, std::vector Devs, + const std::vector &KernelIDs, + std::vector KNames, std::string Pfx, + syclex::source_language Lang) + : kernel_bundle_impl(Ctx, Devs, KernelIDs, bundle_state::executable) { + assert(Lang == syclex::source_language::sycl_jit); + KernelNames = KNames; + Prefix = Pfx; + Language = Lang; + } + std::string trimXsFlags(std::string &str) { // Trim first and last quote if they exist, but no others. char EncounteredQuote = '\0'; @@ -482,8 +495,6 @@ class kernel_bundle_impl { auto [Binaries, CompilationID] = syclex::detail::SYCL_JIT_to_SPIRV( SourceStr, IncludePairs, BuildOptions, LogPtr, RegisteredKernelNames); - assert(Binaries->NumDeviceBinaries == 1); - auto &PM = detail::ProgramManager::getInstance(); PM.addImages(Binaries); @@ -501,13 +512,8 @@ class kernel_bundle_impl { } } - auto DevImgs = PM.getSYCLDeviceImages(MContext, MDevices, KernelIDs, - bundle_state::executable); - assert(DevImgs.size() == 1); - assert(!DevImgs.front().hasDeps()); - return std::make_shared( - MContext, MDevices, DevImgs.front().getMain(), KernelNames, Language); + MContext, MDevices, KernelIDs, KernelNames, Prefix, Language); } ur_program_handle_t UrProgram = nullptr; @@ -646,6 +652,29 @@ class kernel_bundle_impl { "kernel '" + AdjustedName + "' not found in kernel_bundle"); + if (Language == syclex::source_language::sycl_jit) { + auto &PM = ProgramManager::getInstance(); + auto KID = PM.getSYCLKernelID(Prefix + AdjustedName); + + for (const auto &DevImgWithDeps : MDeviceImages) { + const auto &DevImg = DevImgWithDeps.getMain(); + if (!DevImg.has_kernel(KID)) + continue; + + const auto &DevImgImpl = getSyclObjImpl(DevImg); + auto UrProgram = DevImgImpl->get_ur_program_ref(); + auto [UrKernel, CacheMutex, ArgMask] = + PM.getOrCreateKernel(MContext, AdjustedName, + /*PropList=*/{}, UrProgram); + auto KernelImpl = std::make_shared( + UrKernel, getSyclObjImpl(MContext), DevImgImpl, Self, ArgMask, + UrProgram, CacheMutex); + return createSyclObjFromImpl(KernelImpl); + } + + assert(false && "Malformed RTC kernel bundle"); + } + assert(MDeviceImages.size() > 0); const std::shared_ptr &DeviceImageImpl = detail::getSyclObjImpl(MDeviceImages[0].getMain()); @@ -923,6 +952,7 @@ class kernel_bundle_impl { const std::variant> Source; // only kernel_bundles created from source have KernelNames member. std::vector KernelNames; + std::string Prefix; include_pairs_t IncludePairs; }; diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp index 88a30d2a4406..d0240bc9b896 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp @@ -203,7 +203,12 @@ int test_build_and_run() { ctx, syclex::source_language::sycl_jit, SYCLSource, syclex::properties{incFiles2}); - exe_kb kbExe3 = syclex::build(kbSrc2); + exe_kb kbExe3 = syclex::build( + kbSrc2, syclex::properties{ + syclex::build_options{"-fsycl-device-code-split=per_kernel"}, + syclex::registered_kernel_names{"ff_templated"}}); + assert(std::distance(kbExe3.begin(), kbExe3.end()) == 2 && + "Expected 2 device images"); sycl::kernel k3 = kbExe3.ext_oneapi_get_kernel("ff_cp"); test_1(q, k3, 37 + 7); @@ -322,9 +327,7 @@ int test_unsupported_options() { CheckUnsupported({"-Xsycl-target-frontend", "-fsanitize=address"}); CheckUnsupported({"-Xsycl-target-frontend=spir64", "-fsanitize=address"}); CheckUnsupported({"-Xarch_device", "-fsanitize=address"}); - CheckUnsupported({"-fsycl-device-code-split=kernel"}); CheckUnsupported({"-fno-sycl-device-code-split-esimd"}); - CheckUnsupported({"-fsycl-dead-args-optimization"}); return 0; } From 274fe3d74ca594e4786ca04e0c5f782f64bf827a Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Mon, 16 Dec 2024 20:37:06 +0000 Subject: [PATCH 4/8] Clarify comment Signed-off-by: Julian Oppermann --- sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index bca7e6335d13..ad6d20d27a81 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -509,7 +509,7 @@ jit_compiler::performPostLink(std::unique_ptr Module, // be processed. // TODO: This allocation assumes that there are no further splits required, - // i.e. due to mixed SYCL/ESIMD modules. + // i.e. there are no mixed SYCL/ESIMD modules. RTCBundleInfo BundleInfo{Splitter->remainingSplits()}; SmallVector> Modules; From a7e5f57c8ae917138a04c682f2880e27b890faf7 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Fri, 20 Dec 2024 14:10:49 +0100 Subject: [PATCH 5/8] Update sycl/source/detail/kernel_bundle_impl.hpp Co-authored-by: Lukas Sommer --- sycl/source/detail/kernel_bundle_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 667757eafa2b..a0821779cd94 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -377,7 +377,7 @@ class kernel_bundle_impl { } // oneapi_ext_kernel_compiler - // experimental program manager integration, only for sycl_jit language + // program manager integration, only for sycl_jit language kernel_bundle_impl(context Ctx, std::vector Devs, const std::vector &KernelIDs, std::vector KNames, std::string Pfx, From 818d6c7358538071cb180161e5378063d710054e Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Fri, 20 Dec 2024 14:17:43 +0100 Subject: [PATCH 6/8] Update sycl/source/detail/kernel_bundle_impl.hpp Co-authored-by: Lukas Sommer --- sycl/source/detail/kernel_bundle_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index a0821779cd94..494662624b30 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -488,7 +488,7 @@ class kernel_bundle_impl { } if (Language == syclex::source_language::sycl_jit) { - // Experimental: Build device images via the program manager. + // Build device images via the program manager. // TODO: Support persistent caching. const std::string &SourceStr = std::get(this->Source); From 7aa758ca7563b806d55365b4c72c5dfacb9825f8 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Fri, 20 Dec 2024 13:20:45 +0000 Subject: [PATCH 7/8] Drop obsolete comment. Signed-off-by: Julian Oppermann --- sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index ad6d20d27a81..d9c3134dd189 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -497,9 +497,6 @@ jit_compiler::performPostLink(std::unique_ptr Module, return createStringError("`invoke_simd` calls detected"); } - // TODO: Implement actual device code splitting. We're just using the splitter - // to obtain additional information about the module for now. - std::unique_ptr Splitter = getDeviceCodeSplitter( ModuleDesc{std::move(Module)}, SplitMode, /*IROutputOnly=*/false, EmitOnlyKernelsAsEntryPoints); From 5a729bab4b92092af0a1ce816b9043d274a7e2f7 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Wed, 8 Jan 2025 03:15:48 +0000 Subject: [PATCH 8/8] Mark bundle as interop. Signed-off-by: Julian Oppermann --- sycl/source/detail/kernel_bundle_impl.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 9fc86087d720..a8d0bf13f287 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -384,6 +384,12 @@ class kernel_bundle_impl { syclex::source_language Lang) : kernel_bundle_impl(Ctx, Devs, KernelIDs, bundle_state::executable) { assert(Lang == syclex::source_language::sycl_jit); + // Mark this bundle explicitly as "interop" to ensure that its kernels are + // enqueued with the info from the kernel object passed by the application, + // cf. `enqueueImpKernel` in `commands.cpp`. While runtime-compiled kernels + // loaded via the program manager have `kernel_id`s, they can't be looked up + // from the (unprefixed) kernel name. + MIsInterop = true; KernelNames = KNames; Prefix = Pfx; Language = Lang;