From d4dffb48aa26903dfe2e1afa576ffa3e721410db Mon Sep 17 00:00:00 2001 From: Byoungro So Date: Mon, 20 Jul 2020 16:22:01 -0700 Subject: [PATCH 01/21] [SYCL] Implemented Device and Platform cache in L0 Signed-off-by: Byoungro So --- sycl/plugins/level_zero/pi_level0.cpp | 42 ++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/sycl/plugins/level_zero/pi_level0.cpp b/sycl/plugins/level_zero/pi_level0.cpp index 0fdf4e272e1d2..ee71c1cdc7c25 100644 --- a/sycl/plugins/level_zero/pi_level0.cpp +++ b/sycl/plugins/level_zero/pi_level0.cpp @@ -448,6 +448,8 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, return PI_INVALID_VALUE; } + static std::vector piPlatformsCache; + // This is a good time to initialize L0. // TODO: We can still safely recover if something goes wrong during the init. // Implement handling segfault using sigaction. @@ -470,6 +472,15 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, // L0 does not have concept of Platforms, but L0 driver is the // closest match. if (Platforms && NumEntries > 0) { + if (piPlatformsCache.size() > 0) { + // return the saved pi_platform from the cache + Platforms[0] = piPlatformsCache[0]; + if (NumPlatforms) { + *NumPlatforms = 1; + } + return PI_SUCCESS; + } + uint32_t ZeDriverCount = 0; ZE_CALL(zeDriverGet(&ZeDriverCount, nullptr)); if (ZeDriverCount == 0) { @@ -507,6 +518,8 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, Platforms[0]->ZeDriverApiVersion = std::to_string(ZE_MAJOR_VERSION(ZeApiVersion)) + std::string(".") + std::to_string(ZE_MINOR_VERSION(ZeApiVersion)); + // save a copy in the cache for future uses + piPlatformsCache.push_back(Platforms[0]); } catch (const std::bad_alloc &) { return PI_OUT_OF_HOST_MEMORY; } catch (...) { @@ -596,13 +609,27 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, pi_uint32 *NumDevices) { assert(Platform); + // save discovered pi_devices for quick return + static std::vector piDevicesCache; + ze_driver_handle_t ZeDriver = Platform->ZeDriver; // Get number of devices supporting L0 uint32_t ZeDeviceCount = 0; const bool AskingForGPU = (DeviceType & PI_DEVICE_TYPE_GPU); const bool AskingForDefault = (DeviceType == PI_DEVICE_TYPE_DEFAULT); - ZE_CALL(zeDeviceGet(ZeDriver, &ZeDeviceCount, nullptr)); + + if (piDevicesCache.size() != 0) { + for (uint32_t i = 0; i < piDevicesCache.size(); i++) { + if (piDevicesCache[i]->Platform == Platform) { + ZeDeviceCount++; + } + } + } + if (ZeDeviceCount == 0) { + ZE_CALL(zeDeviceGet(ZeDriver, &ZeDeviceCount, nullptr)); + } + if (ZeDeviceCount == 0 || !(AskingForGPU || AskingForDefault)) { if (NumDevices) *NumDevices = 0; @@ -618,6 +645,17 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, return PI_SUCCESS; } + // if devices are already captured in cache, return them from the cache. + uint32_t count = 0; + for (uint32_t i = 0; i < piDevicesCache.size(); i++) { + if (piDevicesCache[i]->Platform == Platform) { + Devices[count++] = piDevicesCache[i]; + } + } + if (count == ZeDeviceCount) { + return PI_SUCCESS; + } + try { std::vector ZeDevices(ZeDeviceCount); ZE_CALL(zeDeviceGet(ZeDriver, &ZeDeviceCount, ZeDevices.data())); @@ -629,6 +667,8 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, if (Result != PI_SUCCESS) { return Result; } + // save a copy in the cache for future uses. + piDevicesCache.push_back(Devices[I]); } } } catch (const std::bad_alloc &) { From 81b1bbdcd16839444d9b1142778edf4b1b5b82c4 Mon Sep 17 00:00:00 2001 From: Byoungro So Date: Sat, 25 Jul 2020 14:31:37 -0700 Subject: [PATCH 02/21] [SYCL] Implement Device and Platform cache in L0 added piPlatformRelease to invalidate platforms when platform_impl is deallocated. Signed-off-by: Byoungro So --- sycl/include/CL/sycl/detail/pi.def | 1 + sycl/include/CL/sycl/detail/pi.h | 2 + sycl/plugins/level_zero/pi_level0.cpp | 52 ++++++++++++++++------- sycl/source/detail/platform_impl.cpp | 6 +++ sycl/source/detail/platform_impl.hpp | 2 +- sycl/test/abi/pi_level0_symbol_check.dump | 2 +- 6 files changed, 47 insertions(+), 18 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.def b/sycl/include/CL/sycl/detail/pi.def index 72332dd65113c..2f8eb06591549 100644 --- a/sycl/include/CL/sycl/detail/pi.def +++ b/sycl/include/CL/sycl/detail/pi.def @@ -19,6 +19,7 @@ _PI_API(piPlatformsGet) _PI_API(piPlatformGetInfo) _PI_API(piextPlatformGetNativeHandle) _PI_API(piextPlatformCreateWithNativeHandle) +_PI_API(piPlatformRelease) // only used for L0 now // Device _PI_API(piDevicesGet) _PI_API(piDeviceGetInfo) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index ad6410fd46def..f6e9092e8aea3 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -808,6 +808,8 @@ __SYCL_EXPORT pi_result piPlatformGetInfo(pi_platform platform, void *param_value, size_t *param_value_size_ret); +__SYCL_EXPORT pi_result piPlatformRelease(pi_platform platform); // only for L0 + /// Gets the native handle of a PI platform object. /// /// \param platform is the PI platform to get the native handle of. diff --git a/sycl/plugins/level_zero/pi_level0.cpp b/sycl/plugins/level_zero/pi_level0.cpp index ee71c1cdc7c25..a7f279b88fe31 100644 --- a/sycl/plugins/level_zero/pi_level0.cpp +++ b/sycl/plugins/level_zero/pi_level0.cpp @@ -139,6 +139,10 @@ class ReturnHelper { size_t *param_value_size_ret; }; +// save discovered pi_devices & pi_platforms for quick return +static std::vector piDevicesCache; +static std::vector piPlatformsCache; + } // anonymous namespace // TODO:: In the following 4 methods we may want to distinguish read access vs. @@ -448,8 +452,6 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, return PI_INVALID_VALUE; } - static std::vector piPlatformsCache; - // This is a good time to initialize L0. // TODO: We can still safely recover if something goes wrong during the init. // Implement handling segfault using sigaction. @@ -472,15 +474,6 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, // L0 does not have concept of Platforms, but L0 driver is the // closest match. if (Platforms && NumEntries > 0) { - if (piPlatformsCache.size() > 0) { - // return the saved pi_platform from the cache - Platforms[0] = piPlatformsCache[0]; - if (NumPlatforms) { - *NumPlatforms = 1; - } - return PI_SUCCESS; - } - uint32_t ZeDriverCount = 0; ZE_CALL(zeDriverGet(&ZeDriverCount, nullptr)); if (ZeDriverCount == 0) { @@ -493,8 +486,17 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, assert(ZeDriverCount == 1); ZE_CALL(zeDriverGet(&ZeDriverCount, &ZeDriver)); + for (uint32_t i = 0; i < piPlatformsCache.size(); i++) { + if (piPlatformsCache[i]->ZeDriver == ZeDriver) { + Platforms[0] = piPlatformsCache[i]; + if (NumPlatforms) + *NumPlatforms = 1; + + return PI_SUCCESS; + } + } + try { - // TODO: figure out how/when to release this memory *Platforms = new _pi_platform(ZeDriver); // Cache driver properties @@ -518,6 +520,7 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, Platforms[0]->ZeDriverApiVersion = std::to_string(ZE_MAJOR_VERSION(ZeApiVersion)) + std::string(".") + std::to_string(ZE_MINOR_VERSION(ZeApiVersion)); + // save a copy in the cache for future uses piPlatformsCache.push_back(Platforms[0]); } catch (const std::bad_alloc &) { @@ -604,13 +607,24 @@ pi_result piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle, return PI_SUCCESS; } +pi_result piPlatformRelease(pi_platform Platform) { + assert(Platform); + + // invalidate piDeviceCache entry + for (uint32_t i = 0; i < piPlatformsCache.size(); i++) { + if (Platform == piPlatformsCache[i]) { + piPlatformsCache.erase(piPlatformsCache.begin() + i); + break; + } + } + + return PI_SUCCESS; +} + pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, pi_uint32 NumEntries, pi_device *Devices, pi_uint32 *NumDevices) { - assert(Platform); - // save discovered pi_devices for quick return - static std::vector piDevicesCache; ze_driver_handle_t ZeDriver = Platform->ZeDriver; @@ -691,12 +705,18 @@ pi_result piDeviceRetain(pi_device Device) { pi_result piDeviceRelease(pi_device Device) { assert(Device); - // TODO: OpenCL says root-device ref-count remains unchanged (1), // but when would we free the device's data? if (--(Device->RefCount) == 0) { // Destroy the command list used for initializations ZE_CALL(zeCommandListDestroy(Device->ZeCommandListInit)); + // invalidate piDeviceCache entry + for (uint32_t i = 0; i < piDevicesCache.size(); i++) { + if (Device == piDevicesCache[i]) { + piDevicesCache.erase(piDevicesCache.begin() + i); + break; + } + } delete Device; } diff --git a/sycl/source/detail/platform_impl.cpp b/sycl/source/detail/platform_impl.cpp index dec3ebe975d24..897f40d9dcf5c 100644 --- a/sycl/source/detail/platform_impl.cpp +++ b/sycl/source/detail/platform_impl.cpp @@ -287,6 +287,12 @@ pi_native_handle platform_impl::getNative() const { return Handle; } +platform_impl::~platform_impl() { + if (!MHostPlatform && MPlugin->getBackend() == backend::level0) { + MPlugin->call(MPlatform); + } +} + template typename info::param_traits::return_type platform_impl::get_info() const { diff --git a/sycl/source/detail/platform_impl.hpp b/sycl/source/detail/platform_impl.hpp index 16cf7ac908212..b437ba65430b9 100644 --- a/sycl/source/detail/platform_impl.hpp +++ b/sycl/source/detail/platform_impl.hpp @@ -42,7 +42,7 @@ class platform_impl { std::shared_ptr APlugin) : MPlatform(APlatform), MPlugin(APlugin) {} - ~platform_impl() = default; + ~platform_impl(); /// Checks if this platform supports extension. /// diff --git a/sycl/test/abi/pi_level0_symbol_check.dump b/sycl/test/abi/pi_level0_symbol_check.dump index 8967179926f78..a6f9a1aa0b50e 100644 --- a/sycl/test/abi/pi_level0_symbol_check.dump +++ b/sycl/test/abi/pi_level0_symbol_check.dump @@ -102,4 +102,4 @@ piEnqueueMemBufferWriteRect piextUSMHostAlloc piextPlatformGetNativeHandle piextPlatformCreateWithNativeHandle - +piPlatformRelease From 1cc004d554af9a8ab45dbf2cb05e685b6c1a7eff Mon Sep 17 00:00:00 2001 From: Byoungro So Date: Tue, 28 Jul 2020 21:46:51 -0700 Subject: [PATCH 03/21] fixed platform cache lifesycle Signed-off-by: Byoungro So --- sycl/include/CL/sycl/detail/pi.def | 1 - sycl/include/CL/sycl/detail/pi.h | 2 -- sycl/plugins/level_zero/pi_level0.cpp | 18 +++--------------- sycl/source/detail/platform_impl.cpp | 6 ------ sycl/source/detail/platform_impl.hpp | 2 +- sycl/test/abi/pi_level0_symbol_check.dump | 1 - 6 files changed, 4 insertions(+), 26 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.def b/sycl/include/CL/sycl/detail/pi.def index 2f8eb06591549..72332dd65113c 100644 --- a/sycl/include/CL/sycl/detail/pi.def +++ b/sycl/include/CL/sycl/detail/pi.def @@ -19,7 +19,6 @@ _PI_API(piPlatformsGet) _PI_API(piPlatformGetInfo) _PI_API(piextPlatformGetNativeHandle) _PI_API(piextPlatformCreateWithNativeHandle) -_PI_API(piPlatformRelease) // only used for L0 now // Device _PI_API(piDevicesGet) _PI_API(piDeviceGetInfo) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index f6e9092e8aea3..ad6410fd46def 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -808,8 +808,6 @@ __SYCL_EXPORT pi_result piPlatformGetInfo(pi_platform platform, void *param_value, size_t *param_value_size_ret); -__SYCL_EXPORT pi_result piPlatformRelease(pi_platform platform); // only for L0 - /// Gets the native handle of a PI platform object. /// /// \param platform is the PI platform to get the native handle of. diff --git a/sycl/plugins/level_zero/pi_level0.cpp b/sycl/plugins/level_zero/pi_level0.cpp index a7f279b88fe31..4464ea45b13d6 100644 --- a/sycl/plugins/level_zero/pi_level0.cpp +++ b/sycl/plugins/level_zero/pi_level0.cpp @@ -497,6 +497,7 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, } try { + // TODO: figure out how/when to release this memory *Platforms = new _pi_platform(ZeDriver); // Cache driver properties @@ -607,25 +608,11 @@ pi_result piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle, return PI_SUCCESS; } -pi_result piPlatformRelease(pi_platform Platform) { - assert(Platform); - - // invalidate piDeviceCache entry - for (uint32_t i = 0; i < piPlatformsCache.size(); i++) { - if (Platform == piPlatformsCache[i]) { - piPlatformsCache.erase(piPlatformsCache.begin() + i); - break; - } - } - - return PI_SUCCESS; -} - pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, pi_uint32 NumEntries, pi_device *Devices, pi_uint32 *NumDevices) { - assert(Platform); + assert(Platform); ze_driver_handle_t ZeDriver = Platform->ZeDriver; // Get number of devices supporting L0 @@ -705,6 +692,7 @@ pi_result piDeviceRetain(pi_device Device) { pi_result piDeviceRelease(pi_device Device) { assert(Device); + // TODO: OpenCL says root-device ref-count remains unchanged (1), // but when would we free the device's data? if (--(Device->RefCount) == 0) { diff --git a/sycl/source/detail/platform_impl.cpp b/sycl/source/detail/platform_impl.cpp index 897f40d9dcf5c..dec3ebe975d24 100644 --- a/sycl/source/detail/platform_impl.cpp +++ b/sycl/source/detail/platform_impl.cpp @@ -287,12 +287,6 @@ pi_native_handle platform_impl::getNative() const { return Handle; } -platform_impl::~platform_impl() { - if (!MHostPlatform && MPlugin->getBackend() == backend::level0) { - MPlugin->call(MPlatform); - } -} - template typename info::param_traits::return_type platform_impl::get_info() const { diff --git a/sycl/source/detail/platform_impl.hpp b/sycl/source/detail/platform_impl.hpp index b437ba65430b9..16cf7ac908212 100644 --- a/sycl/source/detail/platform_impl.hpp +++ b/sycl/source/detail/platform_impl.hpp @@ -42,7 +42,7 @@ class platform_impl { std::shared_ptr APlugin) : MPlatform(APlatform), MPlugin(APlugin) {} - ~platform_impl(); + ~platform_impl() = default; /// Checks if this platform supports extension. /// diff --git a/sycl/test/abi/pi_level0_symbol_check.dump b/sycl/test/abi/pi_level0_symbol_check.dump index a6f9a1aa0b50e..74051ec475c3b 100644 --- a/sycl/test/abi/pi_level0_symbol_check.dump +++ b/sycl/test/abi/pi_level0_symbol_check.dump @@ -102,4 +102,3 @@ piEnqueueMemBufferWriteRect piextUSMHostAlloc piextPlatformGetNativeHandle piextPlatformCreateWithNativeHandle -piPlatformRelease From 3491603da7e8de0ffa9336cbc689f67ed79d6a4a Mon Sep 17 00:00:00 2001 From: Byoungro So Date: Thu, 30 Jul 2020 19:19:00 -0700 Subject: [PATCH 04/21] revert Signed-off-by: Byoungro So --- sycl/test/abi/pi_level0_symbol_check.dump | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/test/abi/pi_level0_symbol_check.dump b/sycl/test/abi/pi_level0_symbol_check.dump index 74051ec475c3b..8967179926f78 100644 --- a/sycl/test/abi/pi_level0_symbol_check.dump +++ b/sycl/test/abi/pi_level0_symbol_check.dump @@ -102,3 +102,4 @@ piEnqueueMemBufferWriteRect piextUSMHostAlloc piextPlatformGetNativeHandle piextPlatformCreateWithNativeHandle + From fcb58ce9554a8cb23a74ff08537095a8b86498c1 Mon Sep 17 00:00:00 2001 From: Byoungro So Date: Mon, 20 Jul 2020 16:22:01 -0700 Subject: [PATCH 05/21] [SYCL] Implemented Device and Platform cache in L0 Signed-off-by: Byoungro So --- sycl/plugins/level_zero/pi_level_zero.cpp | 42 ++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index a77719a2220ae..fc727970a76dc 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -448,6 +448,8 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, return PI_INVALID_VALUE; } + static std::vector piPlatformsCache; + // This is a good time to initialize Level Zero. // TODO: We can still safely recover if something goes wrong during the init. // Implement handling segfault using sigaction. @@ -470,6 +472,15 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, // Level Zero does not have concept of Platforms, but Level Zero driver is the // closest match. if (Platforms && NumEntries > 0) { + if (piPlatformsCache.size() > 0) { + // return the saved pi_platform from the cache + Platforms[0] = piPlatformsCache[0]; + if (NumPlatforms) { + *NumPlatforms = 1; + } + return PI_SUCCESS; + } + uint32_t ZeDriverCount = 0; ZE_CALL(zeDriverGet(&ZeDriverCount, nullptr)); if (ZeDriverCount == 0) { @@ -507,6 +518,8 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, Platforms[0]->ZeDriverApiVersion = std::to_string(ZE_MAJOR_VERSION(ZeApiVersion)) + std::string(".") + std::to_string(ZE_MINOR_VERSION(ZeApiVersion)); + // save a copy in the cache for future uses + piPlatformsCache.push_back(Platforms[0]); } catch (const std::bad_alloc &) { return PI_OUT_OF_HOST_MEMORY; } catch (...) { @@ -596,13 +609,27 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, pi_uint32 *NumDevices) { assert(Platform); + // save discovered pi_devices for quick return + static std::vector piDevicesCache; + ze_driver_handle_t ZeDriver = Platform->ZeDriver; // Get number of devices supporting Level Zero uint32_t ZeDeviceCount = 0; const bool AskingForGPU = (DeviceType & PI_DEVICE_TYPE_GPU); const bool AskingForDefault = (DeviceType == PI_DEVICE_TYPE_DEFAULT); - ZE_CALL(zeDeviceGet(ZeDriver, &ZeDeviceCount, nullptr)); + + if (piDevicesCache.size() != 0) { + for (uint32_t i = 0; i < piDevicesCache.size(); i++) { + if (piDevicesCache[i]->Platform == Platform) { + ZeDeviceCount++; + } + } + } + if (ZeDeviceCount == 0) { + ZE_CALL(zeDeviceGet(ZeDriver, &ZeDeviceCount, nullptr)); + } + if (ZeDeviceCount == 0 || !(AskingForGPU || AskingForDefault)) { if (NumDevices) *NumDevices = 0; @@ -618,6 +645,17 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, return PI_SUCCESS; } + // if devices are already captured in cache, return them from the cache. + uint32_t count = 0; + for (uint32_t i = 0; i < piDevicesCache.size(); i++) { + if (piDevicesCache[i]->Platform == Platform) { + Devices[count++] = piDevicesCache[i]; + } + } + if (count == ZeDeviceCount) { + return PI_SUCCESS; + } + try { std::vector ZeDevices(ZeDeviceCount); ZE_CALL(zeDeviceGet(ZeDriver, &ZeDeviceCount, ZeDevices.data())); @@ -629,6 +667,8 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, if (Result != PI_SUCCESS) { return Result; } + // save a copy in the cache for future uses. + piDevicesCache.push_back(Devices[I]); } } } catch (const std::bad_alloc &) { From de69459d9330bb9aebfec67c3a6baf29fbc48e7a Mon Sep 17 00:00:00 2001 From: Byoungro So Date: Sat, 25 Jul 2020 14:31:37 -0700 Subject: [PATCH 06/21] [SYCL] Implement Device and Platform cache in L0 added piPlatformRelease to invalidate platforms when platform_impl is deallocated. Signed-off-by: Byoungro So --- sycl/include/CL/sycl/detail/pi.def | 1 + sycl/include/CL/sycl/detail/pi.h | 2 + sycl/plugins/level_zero/pi_level_zero.cpp | 54 ++++++++++++++----- sycl/source/detail/platform_impl.cpp | 6 +++ sycl/source/detail/platform_impl.hpp | 2 +- sycl/test/abi/pi_level_zero_symbol_check.dump | 6 +++ 6 files changed, 56 insertions(+), 15 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.def b/sycl/include/CL/sycl/detail/pi.def index 72332dd65113c..2f8eb06591549 100644 --- a/sycl/include/CL/sycl/detail/pi.def +++ b/sycl/include/CL/sycl/detail/pi.def @@ -19,6 +19,7 @@ _PI_API(piPlatformsGet) _PI_API(piPlatformGetInfo) _PI_API(piextPlatformGetNativeHandle) _PI_API(piextPlatformCreateWithNativeHandle) +_PI_API(piPlatformRelease) // only used for L0 now // Device _PI_API(piDevicesGet) _PI_API(piDeviceGetInfo) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index ad6410fd46def..f6e9092e8aea3 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -808,6 +808,8 @@ __SYCL_EXPORT pi_result piPlatformGetInfo(pi_platform platform, void *param_value, size_t *param_value_size_ret); +__SYCL_EXPORT pi_result piPlatformRelease(pi_platform platform); // only for L0 + /// Gets the native handle of a PI platform object. /// /// \param platform is the PI platform to get the native handle of. diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index fc727970a76dc..108aaf33531ac 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -139,6 +139,10 @@ class ReturnHelper { size_t *param_value_size_ret; }; +// save discovered pi_devices & pi_platforms for quick return +static std::vector piDevicesCache; +static std::vector piPlatformsCache; + } // anonymous namespace // TODO:: In the following 4 methods we may want to distinguish read access vs. @@ -448,9 +452,13 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, return PI_INVALID_VALUE; } +<<<<<<< HEAD:sycl/plugins/level_zero/pi_level_zero.cpp static std::vector piPlatformsCache; // This is a good time to initialize Level Zero. +======= + // This is a good time to initialize L0. +>>>>>>> [SYCL] Implement Device and Platform cache in L0:sycl/plugins/level_zero/pi_level0.cpp // TODO: We can still safely recover if something goes wrong during the init. // Implement handling segfault using sigaction. // TODO: We should not call zeInit multiples times ever, so @@ -472,15 +480,6 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, // Level Zero does not have concept of Platforms, but Level Zero driver is the // closest match. if (Platforms && NumEntries > 0) { - if (piPlatformsCache.size() > 0) { - // return the saved pi_platform from the cache - Platforms[0] = piPlatformsCache[0]; - if (NumPlatforms) { - *NumPlatforms = 1; - } - return PI_SUCCESS; - } - uint32_t ZeDriverCount = 0; ZE_CALL(zeDriverGet(&ZeDriverCount, nullptr)); if (ZeDriverCount == 0) { @@ -493,8 +492,17 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, assert(ZeDriverCount == 1); ZE_CALL(zeDriverGet(&ZeDriverCount, &ZeDriver)); + for (uint32_t i = 0; i < piPlatformsCache.size(); i++) { + if (piPlatformsCache[i]->ZeDriver == ZeDriver) { + Platforms[0] = piPlatformsCache[i]; + if (NumPlatforms) + *NumPlatforms = 1; + + return PI_SUCCESS; + } + } + try { - // TODO: figure out how/when to release this memory *Platforms = new _pi_platform(ZeDriver); // Cache driver properties @@ -518,6 +526,7 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, Platforms[0]->ZeDriverApiVersion = std::to_string(ZE_MAJOR_VERSION(ZeApiVersion)) + std::string(".") + std::to_string(ZE_MINOR_VERSION(ZeApiVersion)); + // save a copy in the cache for future uses piPlatformsCache.push_back(Platforms[0]); } catch (const std::bad_alloc &) { @@ -604,13 +613,24 @@ pi_result piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle, return PI_SUCCESS; } +pi_result piPlatformRelease(pi_platform Platform) { + assert(Platform); + + // invalidate piDeviceCache entry + for (uint32_t i = 0; i < piPlatformsCache.size(); i++) { + if (Platform == piPlatformsCache[i]) { + piPlatformsCache.erase(piPlatformsCache.begin() + i); + break; + } + } + + return PI_SUCCESS; +} + pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, pi_uint32 NumEntries, pi_device *Devices, pi_uint32 *NumDevices) { - assert(Platform); - // save discovered pi_devices for quick return - static std::vector piDevicesCache; ze_driver_handle_t ZeDriver = Platform->ZeDriver; @@ -691,12 +711,18 @@ pi_result piDeviceRetain(pi_device Device) { pi_result piDeviceRelease(pi_device Device) { assert(Device); - // TODO: OpenCL says root-device ref-count remains unchanged (1), // but when would we free the device's data? if (--(Device->RefCount) == 0) { // Destroy the command list used for initializations ZE_CALL(zeCommandListDestroy(Device->ZeCommandListInit)); + // invalidate piDeviceCache entry + for (uint32_t i = 0; i < piDevicesCache.size(); i++) { + if (Device == piDevicesCache[i]) { + piDevicesCache.erase(piDevicesCache.begin() + i); + break; + } + } delete Device; } diff --git a/sycl/source/detail/platform_impl.cpp b/sycl/source/detail/platform_impl.cpp index dec3ebe975d24..897f40d9dcf5c 100644 --- a/sycl/source/detail/platform_impl.cpp +++ b/sycl/source/detail/platform_impl.cpp @@ -287,6 +287,12 @@ pi_native_handle platform_impl::getNative() const { return Handle; } +platform_impl::~platform_impl() { + if (!MHostPlatform && MPlugin->getBackend() == backend::level0) { + MPlugin->call(MPlatform); + } +} + template typename info::param_traits::return_type platform_impl::get_info() const { diff --git a/sycl/source/detail/platform_impl.hpp b/sycl/source/detail/platform_impl.hpp index 16cf7ac908212..b437ba65430b9 100644 --- a/sycl/source/detail/platform_impl.hpp +++ b/sycl/source/detail/platform_impl.hpp @@ -42,7 +42,7 @@ class platform_impl { std::shared_ptr APlugin) : MPlatform(APlatform), MPlugin(APlugin) {} - ~platform_impl() = default; + ~platform_impl(); /// Checks if this platform supports extension. /// diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index 7c8c74b8cc77d..15a2a75def321 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -106,4 +106,10 @@ piextUSMEnqueuePrefetch piextUSMFree piextUSMGetMemAllocInfo piextUSMHostAlloc +<<<<<<< HEAD:sycl/test/abi/pi_level_zero_symbol_check.dump piextUSMSharedAlloc +======= +piextPlatformGetNativeHandle +piextPlatformCreateWithNativeHandle +piPlatformRelease +>>>>>>> [SYCL] Implement Device and Platform cache in L0:sycl/test/abi/pi_level0_symbol_check.dump From 9fd5d0b9cf80bdb147b29ae466688c351f236969 Mon Sep 17 00:00:00 2001 From: Byoungro So Date: Tue, 28 Jul 2020 21:46:51 -0700 Subject: [PATCH 07/21] fixed platform cache lifesycle Signed-off-by: Byoungro So --- sycl/include/CL/sycl/detail/pi.def | 1 - sycl/include/CL/sycl/detail/pi.h | 2 -- sycl/plugins/level_zero/pi_level_zero.cpp | 18 +++--------------- sycl/source/detail/platform_impl.cpp | 6 ------ sycl/source/detail/platform_impl.hpp | 2 +- sycl/test/abi/pi_level_zero_symbol_check.dump | 1 - 6 files changed, 4 insertions(+), 26 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.def b/sycl/include/CL/sycl/detail/pi.def index 2f8eb06591549..72332dd65113c 100644 --- a/sycl/include/CL/sycl/detail/pi.def +++ b/sycl/include/CL/sycl/detail/pi.def @@ -19,7 +19,6 @@ _PI_API(piPlatformsGet) _PI_API(piPlatformGetInfo) _PI_API(piextPlatformGetNativeHandle) _PI_API(piextPlatformCreateWithNativeHandle) -_PI_API(piPlatformRelease) // only used for L0 now // Device _PI_API(piDevicesGet) _PI_API(piDeviceGetInfo) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index f6e9092e8aea3..ad6410fd46def 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -808,8 +808,6 @@ __SYCL_EXPORT pi_result piPlatformGetInfo(pi_platform platform, void *param_value, size_t *param_value_size_ret); -__SYCL_EXPORT pi_result piPlatformRelease(pi_platform platform); // only for L0 - /// Gets the native handle of a PI platform object. /// /// \param platform is the PI platform to get the native handle of. diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 108aaf33531ac..be2b09bb4300e 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -503,6 +503,7 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, } try { + // TODO: figure out how/when to release this memory *Platforms = new _pi_platform(ZeDriver); // Cache driver properties @@ -613,25 +614,11 @@ pi_result piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle, return PI_SUCCESS; } -pi_result piPlatformRelease(pi_platform Platform) { - assert(Platform); - - // invalidate piDeviceCache entry - for (uint32_t i = 0; i < piPlatformsCache.size(); i++) { - if (Platform == piPlatformsCache[i]) { - piPlatformsCache.erase(piPlatformsCache.begin() + i); - break; - } - } - - return PI_SUCCESS; -} - pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, pi_uint32 NumEntries, pi_device *Devices, pi_uint32 *NumDevices) { - assert(Platform); + assert(Platform); ze_driver_handle_t ZeDriver = Platform->ZeDriver; // Get number of devices supporting Level Zero @@ -711,6 +698,7 @@ pi_result piDeviceRetain(pi_device Device) { pi_result piDeviceRelease(pi_device Device) { assert(Device); + // TODO: OpenCL says root-device ref-count remains unchanged (1), // but when would we free the device's data? if (--(Device->RefCount) == 0) { diff --git a/sycl/source/detail/platform_impl.cpp b/sycl/source/detail/platform_impl.cpp index 897f40d9dcf5c..dec3ebe975d24 100644 --- a/sycl/source/detail/platform_impl.cpp +++ b/sycl/source/detail/platform_impl.cpp @@ -287,12 +287,6 @@ pi_native_handle platform_impl::getNative() const { return Handle; } -platform_impl::~platform_impl() { - if (!MHostPlatform && MPlugin->getBackend() == backend::level0) { - MPlugin->call(MPlatform); - } -} - template typename info::param_traits::return_type platform_impl::get_info() const { diff --git a/sycl/source/detail/platform_impl.hpp b/sycl/source/detail/platform_impl.hpp index b437ba65430b9..16cf7ac908212 100644 --- a/sycl/source/detail/platform_impl.hpp +++ b/sycl/source/detail/platform_impl.hpp @@ -42,7 +42,7 @@ class platform_impl { std::shared_ptr APlugin) : MPlatform(APlatform), MPlugin(APlugin) {} - ~platform_impl(); + ~platform_impl() = default; /// Checks if this platform supports extension. /// diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index 15a2a75def321..bee01f41c6632 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -111,5 +111,4 @@ piextUSMSharedAlloc ======= piextPlatformGetNativeHandle piextPlatformCreateWithNativeHandle -piPlatformRelease >>>>>>> [SYCL] Implement Device and Platform cache in L0:sycl/test/abi/pi_level0_symbol_check.dump From ced7e6161ce6dd503e0b595a52f706f561025699 Mon Sep 17 00:00:00 2001 From: Byoungro So Date: Thu, 30 Jul 2020 19:19:00 -0700 Subject: [PATCH 08/21] revert Signed-off-by: Byoungro So --- sycl/test/abi/pi_level_zero_symbol_check.dump | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index bee01f41c6632..a80a8a2c9c046 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -106,9 +106,7 @@ piextUSMEnqueuePrefetch piextUSMFree piextUSMGetMemAllocInfo piextUSMHostAlloc -<<<<<<< HEAD:sycl/test/abi/pi_level_zero_symbol_check.dump piextUSMSharedAlloc -======= piextPlatformGetNativeHandle piextPlatformCreateWithNativeHandle ->>>>>>> [SYCL] Implement Device and Platform cache in L0:sycl/test/abi/pi_level0_symbol_check.dump + From c87a32e5719b3be8a17cf6d35375cd1c4278acee Mon Sep 17 00:00:00 2001 From: Byoungro So Date: Thu, 30 Jul 2020 21:19:18 -0700 Subject: [PATCH 09/21] clean up merge conflict Signed-off-by: Byoungro So --- sycl/plugins/level_zero/pi_level_zero.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index be2b09bb4300e..3e2cd4590e783 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -452,13 +452,9 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, return PI_INVALID_VALUE; } -<<<<<<< HEAD:sycl/plugins/level_zero/pi_level_zero.cpp static std::vector piPlatformsCache; // This is a good time to initialize Level Zero. -======= - // This is a good time to initialize L0. ->>>>>>> [SYCL] Implement Device and Platform cache in L0:sycl/plugins/level_zero/pi_level0.cpp // TODO: We can still safely recover if something goes wrong during the init. // Implement handling segfault using sigaction. // TODO: We should not call zeInit multiples times ever, so From d386e8bf6249d9e9830f6ac9a0c291b325dedef0 Mon Sep 17 00:00:00 2001 From: Byoungro So Date: Thu, 30 Jul 2020 21:27:39 -0700 Subject: [PATCH 10/21] revert merge conflict Signed-off-by: Byoungro So --- sycl/test/abi/pi_level_zero_symbol_check.dump | 3 --- 1 file changed, 3 deletions(-) diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index a80a8a2c9c046..7c8c74b8cc77d 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -107,6 +107,3 @@ piextUSMFree piextUSMGetMemAllocInfo piextUSMHostAlloc piextUSMSharedAlloc -piextPlatformGetNativeHandle -piextPlatformCreateWithNativeHandle - From 4dd593c2a903d2a619c42023e192574d64ab036e Mon Sep 17 00:00:00 2001 From: Byoungro So Date: Tue, 4 Aug 2020 21:03:11 -0700 Subject: [PATCH 11/21] changed to range based loop Signed-off-by: Byoungro So --- sycl/plugins/level_zero/pi_level_zero.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 3e2cd4590e783..ee8a27fbd5ae2 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -141,7 +141,6 @@ class ReturnHelper { // save discovered pi_devices & pi_platforms for quick return static std::vector piDevicesCache; -static std::vector piPlatformsCache; } // anonymous namespace @@ -488,9 +487,10 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, assert(ZeDriverCount == 1); ZE_CALL(zeDriverGet(&ZeDriverCount, &ZeDriver)); - for (uint32_t i = 0; i < piPlatformsCache.size(); i++) { - if (piPlatformsCache[i]->ZeDriver == ZeDriver) { - Platforms[0] = piPlatformsCache[i]; + for (const pi_platform CachedPlatform : piPlatformsCache) { + if (CachedPlatform->ZeDriver == ZeDriver) { + Platforms[0] = CachedPlatform; + // if the caller sent a valid NumPlatforms pointer, set it here if (NumPlatforms) *NumPlatforms = 1; @@ -623,8 +623,8 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, const bool AskingForDefault = (DeviceType == PI_DEVICE_TYPE_DEFAULT); if (piDevicesCache.size() != 0) { - for (uint32_t i = 0; i < piDevicesCache.size(); i++) { - if (piDevicesCache[i]->Platform == Platform) { + for (const pi_device CachedDevice : piDevicesCache) { + if (CachedDevice->Platform == Platform) { ZeDeviceCount++; } } @@ -650,9 +650,9 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, // if devices are already captured in cache, return them from the cache. uint32_t count = 0; - for (uint32_t i = 0; i < piDevicesCache.size(); i++) { - if (piDevicesCache[i]->Platform == Platform) { - Devices[count++] = piDevicesCache[i]; + for (const pi_device CachedDevice : piDevicesCache) { + if (CachedDevice->Platform == Platform) { + Devices[count++] = CachedDevice; } } if (count == ZeDeviceCount) { From a34e6483dc603458b0453890fc56e831dde701b0 Mon Sep 17 00:00:00 2001 From: Byoungro So Date: Tue, 4 Aug 2020 21:07:06 -0700 Subject: [PATCH 12/21] fixed clang-format Signed-off-by: Byoungro So --- sycl/plugins/level_zero/pi_level_zero.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 1f9bbdb3d33b9..1f454984d2da1 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -504,7 +504,7 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, for (const pi_platform CachedPlatform : piPlatformsCache) { if (CachedPlatform->ZeDriver == ZeDriver) { Platforms[0] = CachedPlatform; - // if the caller sent a valid NumPlatforms pointer, set it here + // if the caller sent a valid NumPlatforms pointer, set it here if (NumPlatforms) *NumPlatforms = 1; From 5d43bf5258768f98ada8eb7465a257f26ad09dff Mon Sep 17 00:00:00 2001 From: Byoungro So Date: Wed, 5 Aug 2020 15:40:05 -0700 Subject: [PATCH 13/21] Moved PiDevicesCache into _pi_platform Signed-off-by: Byoungro So --- sycl/plugins/level_zero/pi_level_zero.cpp | 31 ++++++----------------- sycl/plugins/level_zero/pi_level_zero.hpp | 4 +++ 2 files changed, 12 insertions(+), 23 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 192d722d45f38..1412f22b1690c 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -20,7 +20,6 @@ #include #include #include -#include #include @@ -153,9 +152,6 @@ class ReturnHelper { size_t *param_value_size_ret; }; -// save discovered pi_devices & pi_platforms for quick return -static std::vector piDevicesCache; - } // anonymous namespace // TODO:: In the following 4 methods we may want to distinguish read access vs. @@ -632,17 +628,10 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, ze_driver_handle_t ZeDriver = Platform->ZeDriver; // Get number of devices supporting Level Zero - uint32_t ZeDeviceCount = 0; + uint32_t ZeDeviceCount = Platform->PiDevicesCache.size(); const bool AskingForGPU = (DeviceType & PI_DEVICE_TYPE_GPU); const bool AskingForDefault = (DeviceType == PI_DEVICE_TYPE_DEFAULT); - if (piDevicesCache.size() != 0) { - for (const pi_device CachedDevice : piDevicesCache) { - if (CachedDevice->Platform == Platform) { - ZeDeviceCount++; - } - } - } if (ZeDeviceCount == 0) { ZE_CALL(zeDeviceGet(ZeDriver, &ZeDeviceCount, nullptr)); } @@ -663,13 +652,8 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, } // if devices are already captured in cache, return them from the cache. - uint32_t count = 0; - for (const pi_device CachedDevice : piDevicesCache) { - if (CachedDevice->Platform == Platform) { - Devices[count++] = CachedDevice; - } - } - if (count == ZeDeviceCount) { + for (const pi_device CachedDevice : Platform->PiDevicesCache) { + *(Devices++) = CachedDevice; return PI_SUCCESS; } @@ -685,7 +669,7 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, return Result; } // save a copy in the cache for future uses. - piDevicesCache.push_back(Devices[I]); + Platform->PiDevicesCache.push_back(Devices[I]); } } } catch (const std::bad_alloc &) { @@ -715,9 +699,10 @@ pi_result piDeviceRelease(pi_device Device) { // Destroy the command list used for initializations ZE_CALL(zeCommandListDestroy(Device->ZeCommandListInit)); // invalidate piDeviceCache entry - for (uint32_t i = 0; i < piDevicesCache.size(); i++) { - if (Device == piDevicesCache[i]) { - piDevicesCache.erase(piDevicesCache.begin() + i); + pi_platform Platform = Device->Platform; + for (uint32_t i = 0; i < Platform->PiDevicesCache.size(); i++) { + if (Device == Platform->PiDevicesCache[i]) { + Platform->PiDevicesCache.erase(Platform->PiDevicesCache.begin() + i); break; } } diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index a3db143a55a48..7748da53de273 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -69,6 +70,9 @@ struct _pi_platform { // Cache versions info from zeDriverGetProperties. std::string ZeDriverVersion; std::string ZeDriverApiVersion; + + // Cache pi_devices for reuse + std::vector PiDevicesCache; }; struct _pi_device : _pi_object { From 550374ebf7e6a6df56906b7b60a044926edbd488 Mon Sep 17 00:00:00 2001 From: Byoungro So Date: Wed, 5 Aug 2020 17:02:44 -0700 Subject: [PATCH 14/21] take out return error Signed-off-by: Byoungro So --- sycl/plugins/level_zero/pi_level_zero.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 1412f22b1690c..6abc7be641561 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -654,6 +654,8 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, // if devices are already captured in cache, return them from the cache. for (const pi_device CachedDevice : Platform->PiDevicesCache) { *(Devices++) = CachedDevice; + } + if (!Platform->PiDevicesCache.empty()) { return PI_SUCCESS; } From 8007bc54a301ad5c8782e269d0c51a8cd93a898e Mon Sep 17 00:00:00 2001 From: Byoungro So Date: Wed, 5 Aug 2020 23:55:02 -0700 Subject: [PATCH 15/21] moved device cache into _pi_platform struct also, reverted device_imple destructor change because it caused bugs. Signed-off-by: Byoungro So --- sycl/plugins/level_zero/pi_level_zero.cpp | 7 ++++--- sycl/plugins/level_zero/pi_level_zero.hpp | 1 + sycl/source/detail/device_impl.cpp | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 6abc7be641561..1ce053e11483a 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -625,6 +625,7 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, pi_uint32 *NumDevices) { assert(Platform); + std::lock_guard Lock(Platform->DeviceCacheMutex); ze_driver_handle_t ZeDriver = Platform->ZeDriver; // Get number of devices supporting Level Zero @@ -653,7 +654,7 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, // if devices are already captured in cache, return them from the cache. for (const pi_device CachedDevice : Platform->PiDevicesCache) { - *(Devices++) = CachedDevice; + *Devices++ = CachedDevice; } if (!Platform->PiDevicesCache.empty()) { return PI_SUCCESS; @@ -694,14 +695,14 @@ pi_result piDeviceRetain(pi_device Device) { pi_result piDeviceRelease(pi_device Device) { assert(Device); - + pi_platform Platform = Device->Platform; + std::lock_guard Lock(Platform->DeviceCacheMutex); // TODO: OpenCL says root-device ref-count remains unchanged (1), // but when would we free the device's data? if (--(Device->RefCount) == 0) { // Destroy the command list used for initializations ZE_CALL(zeCommandListDestroy(Device->ZeCommandListInit)); // invalidate piDeviceCache entry - pi_platform Platform = Device->Platform; for (uint32_t i = 0; i < Platform->PiDevicesCache.size(); i++) { if (Device == Platform->PiDevicesCache[i]) { Platform->PiDevicesCache.erase(Platform->PiDevicesCache.begin() + i); diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index 7748da53de273..47c51fca30b8c 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -73,6 +73,7 @@ struct _pi_platform { // Cache pi_devices for reuse std::vector PiDevicesCache; + std::mutex DeviceCacheMutex; }; struct _pi_device : _pi_object { diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index ba7b21c91d87d..207cb1dc9e800 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -77,7 +77,7 @@ device_impl::device_impl(pi_native_handle InteropDeviceHandle, } device_impl::~device_impl() { - if (!MIsHostDevice) { + if (!MIsRootDevice && !MIsHostDevice) { // TODO catch an exception and put it to list of asynchronous exceptions const detail::plugin &Plugin = getPlugin(); RT::PiResult Err = Plugin.call_nocheck(MDevice); From 20a07ad67b8beb326b39ec8613bee6fa0f47ef46 Mon Sep 17 00:00:00 2001 From: Byoungro So Date: Thu, 6 Aug 2020 13:10:01 -0700 Subject: [PATCH 16/21] added a flag to invalidate the cache. Signed-off-by: Byoungro So --- sycl/plugins/level_zero/pi_level_zero.cpp | 35 ++++++++++++----------- sycl/plugins/level_zero/pi_level_zero.hpp | 2 ++ 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 1ce053e11483a..aaab6305d6562 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -461,7 +461,8 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, return PI_INVALID_VALUE; } - static std::vector piPlatformsCache; + static std::vector PiPlatformsCache; + static std::mutex PlatformsCacheMutex; // This is a good time to initialize Level Zero. // TODO: We can still safely recover if something goes wrong during the init. @@ -497,7 +498,8 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, assert(ZeDriverCount == 1); ZE_CALL(zeDriverGet(&ZeDriverCount, &ZeDriver)); - for (const pi_platform CachedPlatform : piPlatformsCache) { + std::lock_guard Lock(PlatformsCacheMutex); + for (const pi_platform CachedPlatform : PiPlatformsCache) { if (CachedPlatform->ZeDriver == ZeDriver) { Platforms[0] = CachedPlatform; // if the caller sent a valid NumPlatforms pointer, set it here @@ -535,7 +537,7 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, std::to_string(ZE_MINOR_VERSION(ZeApiVersion)); // save a copy in the cache for future uses - piPlatformsCache.push_back(Platforms[0]); + PiPlatformsCache.push_back(Platforms[0]); } catch (const std::bad_alloc &) { return PI_OUT_OF_HOST_MEMORY; } catch (...) { @@ -625,11 +627,19 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, pi_uint32 *NumDevices) { assert(Platform); - std::lock_guard Lock(Platform->DeviceCacheMutex); ze_driver_handle_t ZeDriver = Platform->ZeDriver; // Get number of devices supporting Level Zero - uint32_t ZeDeviceCount = Platform->PiDevicesCache.size(); + uint32_t ZeDeviceCount = 0; + std::lock_guard Lock(Platform->DeviceCacheMutex); + if (Platform->CacheInvalidated) { + for (const pi_device CachedDevice : Platform->PiDevicesCache) { + CachedDevice->initialize(); + } + Platform->CacheInvalidated = false; + } + ZeDeviceCount = Platform->PiDevicesCache.size(); + const bool AskingForGPU = (DeviceType & PI_DEVICE_TYPE_GPU); const bool AskingForDefault = (DeviceType == PI_DEVICE_TYPE_DEFAULT); @@ -695,21 +705,12 @@ pi_result piDeviceRetain(pi_device Device) { pi_result piDeviceRelease(pi_device Device) { assert(Device); - pi_platform Platform = Device->Platform; - std::lock_guard Lock(Platform->DeviceCacheMutex); // TODO: OpenCL says root-device ref-count remains unchanged (1), // but when would we free the device's data? if (--(Device->RefCount) == 0) { - // Destroy the command list used for initializations - ZE_CALL(zeCommandListDestroy(Device->ZeCommandListInit)); - // invalidate piDeviceCache entry - for (uint32_t i = 0; i < Platform->PiDevicesCache.size(); i++) { - if (Device == Platform->PiDevicesCache[i]) { - Platform->PiDevicesCache.erase(Platform->PiDevicesCache.begin() + i); - break; - } - } - delete Device; + pi_platform Platform = Device->Platform; + std::lock_guard Lock(Platform->DeviceCacheMutex); + Platform->CacheInvalidated = true; } return PI_SUCCESS; diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index 47c51fca30b8c..f477320593742 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -74,6 +74,8 @@ struct _pi_platform { // Cache pi_devices for reuse std::vector PiDevicesCache; std::mutex DeviceCacheMutex; + // Flag to indicate PiDevicesCache is invalidated + bool CacheInvalidated = false; }; struct _pi_device : _pi_object { From fabf4f2e0157ad77c81c45fd5d3fb0dd77c13a31 Mon Sep 17 00:00:00 2001 From: Byoungro So Date: Fri, 7 Aug 2020 13:04:44 -0700 Subject: [PATCH 17/21] removed special handling for root devices Signed-off-by: Byoungro So --- sycl/source/detail/device_impl.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 207cb1dc9e800..b2d9fa201923b 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -58,7 +58,7 @@ device_impl::device_impl(pi_native_handle InteropDeviceHandle, nullptr); MIsRootDevice = (nullptr == parent); - if (!MIsRootDevice && !InteroperabilityConstructor) { + if (!InteroperabilityConstructor) { // TODO catch an exception and put it to list of asynchronous exceptions // Interoperability Constructor already calls DeviceRetain in // piextDeviceFromNative. @@ -77,7 +77,7 @@ device_impl::device_impl(pi_native_handle InteropDeviceHandle, } device_impl::~device_impl() { - if (!MIsRootDevice && !MIsHostDevice) { + if (!MIsHostDevice) { // TODO catch an exception and put it to list of asynchronous exceptions const detail::plugin &Plugin = getPlugin(); RT::PiResult Err = Plugin.call_nocheck(MDevice); @@ -98,10 +98,9 @@ cl_device_id device_impl::get() const { PI_INVALID_DEVICE); const detail::plugin &Plugin = getPlugin(); - if (!MIsRootDevice) { - // TODO catch an exception and put it to list of asynchronous exceptions - Plugin.call(MDevice); - } + + // TODO catch an exception and put it to list of asynchronous exceptions + Plugin.call(MDevice); return pi::cast(getNative()); } From b04b1c0bf21f27917e00b1eecd3151cbddbb65e0 Mon Sep 17 00:00:00 2001 From: Byoungro So Date: Mon, 10 Aug 2020 10:20:41 -0700 Subject: [PATCH 18/21] renamed mutexes to match with cache name Signed-off-by: Byoungro So --- sycl/plugins/level_zero/pi_level_zero.cpp | 8 ++++---- sycl/plugins/level_zero/pi_level_zero.hpp | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index aaab6305d6562..5fd142725d277 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -462,7 +462,7 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, } static std::vector PiPlatformsCache; - static std::mutex PlatformsCacheMutex; + static std::mutex PiPlatformsCacheMutex; // This is a good time to initialize Level Zero. // TODO: We can still safely recover if something goes wrong during the init. @@ -498,7 +498,7 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, assert(ZeDriverCount == 1); ZE_CALL(zeDriverGet(&ZeDriverCount, &ZeDriver)); - std::lock_guard Lock(PlatformsCacheMutex); + std::lock_guard Lock(PiPlatformsCacheMutex); for (const pi_platform CachedPlatform : PiPlatformsCache) { if (CachedPlatform->ZeDriver == ZeDriver) { Platforms[0] = CachedPlatform; @@ -631,7 +631,7 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, // Get number of devices supporting Level Zero uint32_t ZeDeviceCount = 0; - std::lock_guard Lock(Platform->DeviceCacheMutex); + std::lock_guard Lock(Platform->PiDevicesCacheMutex); if (Platform->CacheInvalidated) { for (const pi_device CachedDevice : Platform->PiDevicesCache) { CachedDevice->initialize(); @@ -709,7 +709,7 @@ pi_result piDeviceRelease(pi_device Device) { // but when would we free the device's data? if (--(Device->RefCount) == 0) { pi_platform Platform = Device->Platform; - std::lock_guard Lock(Platform->DeviceCacheMutex); + std::lock_guard Lock(Platform->PiDevicesCacheMutex); Platform->CacheInvalidated = true; } diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index f477320593742..4bfef4268469a 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -73,7 +73,7 @@ struct _pi_platform { // Cache pi_devices for reuse std::vector PiDevicesCache; - std::mutex DeviceCacheMutex; + std::mutex PiDevicesCacheMutex; // Flag to indicate PiDevicesCache is invalidated bool CacheInvalidated = false; }; From e2db14d2b3148e263f2b448d037f34981a5931ef Mon Sep 17 00:00:00 2001 From: Byoungro So Date: Mon, 10 Aug 2020 11:24:18 -0700 Subject: [PATCH 19/21] added more comments about cache invalidation logic Signed-off-by: Byoungro So --- sycl/plugins/level_zero/pi_level_zero.cpp | 11 +++++++++++ sycl/plugins/level_zero/pi_level_zero.hpp | 4 ++++ 2 files changed, 15 insertions(+) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 5fd142725d277..24da24fdeb18c 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -632,6 +632,9 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, // Get number of devices supporting Level Zero uint32_t ZeDeviceCount = 0; std::lock_guard Lock(Platform->PiDevicesCacheMutex); + // As soon as there was a call to piDeviceRelease(), the entire cache is + // invalidated by setting the flag CacheInvalidated. We just need to + // re-initialize cached pi_devices to reuse them. if (Platform->CacheInvalidated) { for (const pi_device CachedDevice : Platform->PiDevicesCache) { CachedDevice->initialize(); @@ -708,6 +711,14 @@ pi_result piDeviceRelease(pi_device Device) { // TODO: OpenCL says root-device ref-count remains unchanged (1), // but when would we free the device's data? if (--(Device->RefCount) == 0) { + // We invalidate the entire cache as soon as any device is released. + // The saved pi_devices in cache is still intact but flag CacheInvalided + // will not allow the entire cached devices to be reused without + // re-initializing them. + // TODO: This means the cached pi_device live until the program ends. + // If L0 RT does not do its own cleanup for Ze_Device_Handle upon tear-down, + // we need to figure out a way to call + // ZE_CALL(zeCommandListDestroy(Device->ZeCommandListInit)); pi_platform Platform = Device->Platform; std::lock_guard Lock(Platform->PiDevicesCacheMutex); Platform->CacheInvalidated = true; diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index 4bfef4268469a..ee2da433c2186 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -75,6 +75,10 @@ struct _pi_platform { std::vector PiDevicesCache; std::mutex PiDevicesCacheMutex; // Flag to indicate PiDevicesCache is invalidated + // This flag is used in piDeviceRelease to invalidate the entire cache + // whenever there is a call to piDeviceRelease for any cached device. + // This flag is used in piDevicesGet to reuse the cache + // without expensive calls to L0 RT. bool CacheInvalidated = false; }; From 2e0dbc16c4e4feecc3b367a963eb56651d3844c9 Mon Sep 17 00:00:00 2001 From: Byoungro So Date: Mon, 10 Aug 2020 12:25:17 -0700 Subject: [PATCH 20/21] removed invalidation logic Signed-off-by: Byoungro So --- sycl/plugins/level_zero/pi_level_zero.cpp | 28 +++++------------------ sycl/plugins/level_zero/pi_level_zero.hpp | 6 ----- 2 files changed, 6 insertions(+), 28 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 24da24fdeb18c..81506acf42485 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -632,15 +632,6 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, // Get number of devices supporting Level Zero uint32_t ZeDeviceCount = 0; std::lock_guard Lock(Platform->PiDevicesCacheMutex); - // As soon as there was a call to piDeviceRelease(), the entire cache is - // invalidated by setting the flag CacheInvalidated. We just need to - // re-initialize cached pi_devices to reuse them. - if (Platform->CacheInvalidated) { - for (const pi_device CachedDevice : Platform->PiDevicesCache) { - CachedDevice->initialize(); - } - Platform->CacheInvalidated = false; - } ZeDeviceCount = Platform->PiDevicesCache.size(); const bool AskingForGPU = (DeviceType & PI_DEVICE_TYPE_GPU); @@ -710,19 +701,12 @@ pi_result piDeviceRelease(pi_device Device) { assert(Device); // TODO: OpenCL says root-device ref-count remains unchanged (1), // but when would we free the device's data? - if (--(Device->RefCount) == 0) { - // We invalidate the entire cache as soon as any device is released. - // The saved pi_devices in cache is still intact but flag CacheInvalided - // will not allow the entire cached devices to be reused without - // re-initializing them. - // TODO: This means the cached pi_device live until the program ends. - // If L0 RT does not do its own cleanup for Ze_Device_Handle upon tear-down, - // we need to figure out a way to call - // ZE_CALL(zeCommandListDestroy(Device->ZeCommandListInit)); - pi_platform Platform = Device->Platform; - std::lock_guard Lock(Platform->PiDevicesCacheMutex); - Platform->CacheInvalidated = true; - } + --(Device->RefCount); + // TODO: All cached pi_devices live until the program ends. + // If L0 RT does not do its own cleanup for Ze_Device_Handle upon tear-down, + // we need to figure out a way to call here + // ZE_CALL(zeCommandListDestroy(Device->ZeCommandListInit)); and, + // in piDevicesGet(), we need to call initialize for each cached pi_device. return PI_SUCCESS; } diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index ee2da433c2186..fa274c7bd1d64 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -74,12 +74,6 @@ struct _pi_platform { // Cache pi_devices for reuse std::vector PiDevicesCache; std::mutex PiDevicesCacheMutex; - // Flag to indicate PiDevicesCache is invalidated - // This flag is used in piDeviceRelease to invalidate the entire cache - // whenever there is a call to piDeviceRelease for any cached device. - // This flag is used in piDevicesGet to reuse the cache - // without expensive calls to L0 RT. - bool CacheInvalidated = false; }; struct _pi_device : _pi_object { From e455095b25a571a0ae66ecba59cfc0a73040065c Mon Sep 17 00:00:00 2001 From: Byoungro So Date: Tue, 11 Aug 2020 16:24:14 -0700 Subject: [PATCH 21/21] added comment and fixed ref count issue Signed-off-by: Byoungro So --- sycl/plugins/level_zero/pi_level_zero.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 81506acf42485..4820eddb8cf0c 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -461,6 +461,10 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, return PI_INVALID_VALUE; } + // Cache pi_platforms for reuse in the future + // It solves two problems; + // 1. sycl::device equality issue; we always return the same pi_device. + // 2. performance; we can save time by immediately return from cache. static std::vector PiPlatformsCache; static std::mutex PiPlatformsCacheMutex; @@ -689,7 +693,6 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, pi_result piDeviceRetain(pi_device Device) { assert(Device); - // The root-device ref-count remains unchanged (always 1). if (Device->IsSubDevice) { ++(Device->RefCount); @@ -699,9 +702,11 @@ pi_result piDeviceRetain(pi_device Device) { pi_result piDeviceRelease(pi_device Device) { assert(Device); + assert(Device->RefCount > 0 && "Device is already released."); // TODO: OpenCL says root-device ref-count remains unchanged (1), // but when would we free the device's data? - --(Device->RefCount); + if (Device->IsSubDevice) + --(Device->RefCount); // TODO: All cached pi_devices live until the program ends. // If L0 RT does not do its own cleanup for Ze_Device_Handle upon tear-down, // we need to figure out a way to call here