diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index e697413ff38b6..4820eddb8cf0c 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -20,7 +20,6 @@ #include #include #include -#include #include @@ -462,6 +461,13 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, return PI_INVALID_VALUE; } + // Cache pi_platforms for reuse in the future + // It solves two problems; + // 1. sycl::device equality issue; we always return the same pi_device. + // 2. performance; we can save time by immediately return from cache. + static std::vector PiPlatformsCache; + static std::mutex PiPlatformsCacheMutex; + // This is a good time to initialize Level Zero. // TODO: We can still safely recover if something goes wrong during the init. // Implement handling segfault using sigaction. @@ -496,6 +502,18 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, assert(ZeDriverCount == 1); ZE_CALL(zeDriverGet(&ZeDriverCount, &ZeDriver)); + std::lock_guard Lock(PiPlatformsCacheMutex); + for (const pi_platform CachedPlatform : PiPlatformsCache) { + if (CachedPlatform->ZeDriver == ZeDriver) { + Platforms[0] = CachedPlatform; + // if the caller sent a valid NumPlatforms pointer, set it here + if (NumPlatforms) + *NumPlatforms = 1; + + return PI_SUCCESS; + } + } + try { // TODO: figure out how/when to release this memory *Platforms = new _pi_platform(ZeDriver); @@ -521,6 +539,9 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, Platforms[0]->ZeDriverApiVersion = std::to_string(ZE_MAJOR_VERSION(ZeApiVersion)) + std::string(".") + std::to_string(ZE_MINOR_VERSION(ZeApiVersion)); + + // save a copy in the cache for future uses + PiPlatformsCache.push_back(Platforms[0]); } catch (const std::bad_alloc &) { return PI_OUT_OF_HOST_MEMORY; } catch (...) { @@ -614,9 +635,16 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, // Get number of devices supporting Level Zero uint32_t ZeDeviceCount = 0; + std::lock_guard Lock(Platform->PiDevicesCacheMutex); + ZeDeviceCount = Platform->PiDevicesCache.size(); + const bool AskingForGPU = (DeviceType & PI_DEVICE_TYPE_GPU); const bool AskingForDefault = (DeviceType == PI_DEVICE_TYPE_DEFAULT); - ZE_CALL(zeDeviceGet(ZeDriver, &ZeDeviceCount, nullptr)); + + if (ZeDeviceCount == 0) { + ZE_CALL(zeDeviceGet(ZeDriver, &ZeDeviceCount, nullptr)); + } + if (ZeDeviceCount == 0 || !(AskingForGPU || AskingForDefault)) { if (NumDevices) *NumDevices = 0; @@ -632,6 +660,14 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, return PI_SUCCESS; } + // if devices are already captured in cache, return them from the cache. + for (const pi_device CachedDevice : Platform->PiDevicesCache) { + *Devices++ = CachedDevice; + } + if (!Platform->PiDevicesCache.empty()) { + return PI_SUCCESS; + } + try { std::vector ZeDevices(ZeDeviceCount); ZE_CALL(zeDeviceGet(ZeDriver, &ZeDeviceCount, ZeDevices.data())); @@ -643,6 +679,8 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, if (Result != PI_SUCCESS) { return Result; } + // save a copy in the cache for future uses. + Platform->PiDevicesCache.push_back(Devices[I]); } } } catch (const std::bad_alloc &) { @@ -655,7 +693,6 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, pi_result piDeviceRetain(pi_device Device) { assert(Device); - // The root-device ref-count remains unchanged (always 1). if (Device->IsSubDevice) { ++(Device->RefCount); @@ -665,14 +702,16 @@ pi_result piDeviceRetain(pi_device Device) { pi_result piDeviceRelease(pi_device Device) { assert(Device); - + assert(Device->RefCount > 0 && "Device is already released."); // TODO: OpenCL says root-device ref-count remains unchanged (1), // but when would we free the device's data? - if (--(Device->RefCount) == 0) { - // Destroy the command list used for initializations - ZE_CALL(zeCommandListDestroy(Device->ZeCommandListInit)); - delete Device; - } + if (Device->IsSubDevice) + --(Device->RefCount); + // TODO: All cached pi_devices live until the program ends. + // If L0 RT does not do its own cleanup for Ze_Device_Handle upon tear-down, + // we need to figure out a way to call here + // ZE_CALL(zeCommandListDestroy(Device->ZeCommandListInit)); and, + // in piDevicesGet(), we need to call initialize for each cached pi_device. return PI_SUCCESS; } diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index a3db143a55a48..fa274c7bd1d64 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -69,6 +70,10 @@ struct _pi_platform { // Cache versions info from zeDriverGetProperties. std::string ZeDriverVersion; std::string ZeDriverApiVersion; + + // Cache pi_devices for reuse + std::vector PiDevicesCache; + std::mutex PiDevicesCacheMutex; }; struct _pi_device : _pi_object { diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index ba7b21c91d87d..b2d9fa201923b 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -58,7 +58,7 @@ device_impl::device_impl(pi_native_handle InteropDeviceHandle, nullptr); MIsRootDevice = (nullptr == parent); - if (!MIsRootDevice && !InteroperabilityConstructor) { + if (!InteroperabilityConstructor) { // TODO catch an exception and put it to list of asynchronous exceptions // Interoperability Constructor already calls DeviceRetain in // piextDeviceFromNative. @@ -98,10 +98,9 @@ cl_device_id device_impl::get() const { PI_INVALID_DEVICE); const detail::plugin &Plugin = getPlugin(); - if (!MIsRootDevice) { - // TODO catch an exception and put it to list of asynchronous exceptions - Plugin.call(MDevice); - } + + // TODO catch an exception and put it to list of asynchronous exceptions + Plugin.call(MDevice); return pi::cast(getNative()); }