Skip to content

[SYCL][L0] Implement pi_device and pi_platform cache #2227

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 25 commits into from
Aug 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
d4dffb4
[SYCL] Implemented Device and Platform cache in L0
bso-intel Jul 20, 2020
81b1bbd
[SYCL] Implement Device and Platform cache in L0
bso-intel Jul 25, 2020
1cc004d
fixed platform cache lifesycle
bso-intel Jul 29, 2020
3491603
revert
bso-intel Jul 31, 2020
fcb58ce
[SYCL] Implemented Device and Platform cache in L0
bso-intel Jul 20, 2020
de69459
[SYCL] Implement Device and Platform cache in L0
bso-intel Jul 25, 2020
9fd5d0b
fixed platform cache lifesycle
bso-intel Jul 29, 2020
ced7e61
revert
bso-intel Jul 31, 2020
c87a32e
clean up merge conflict
bso-intel Jul 31, 2020
3d81d27
Merge branch 'device-cache' of https://github.com/bso-intel/llvm into…
bso-intel Jul 31, 2020
d386e8b
revert merge conflict
bso-intel Jul 31, 2020
4dd593c
changed to range based loop
bso-intel Aug 5, 2020
ee31f0a
Merge remote-tracking branch 'upstream/sycl' into device-cache
bso-intel Aug 5, 2020
a34e648
fixed clang-format
bso-intel Aug 5, 2020
0a804b3
Merge remote-tracking branch 'upstream/sycl' into device-cache
bso-intel Aug 5, 2020
5d43bf5
Moved PiDevicesCache into _pi_platform
bso-intel Aug 5, 2020
550374e
take out return error
bso-intel Aug 6, 2020
493cd5a
Merge remote-tracking branch 'upstream/sycl' into device-cache
bso-intel Aug 6, 2020
8007bc5
moved device cache into _pi_platform struct
bso-intel Aug 6, 2020
20a07ad
added a flag to invalidate the cache.
bso-intel Aug 6, 2020
fabf4f2
removed special handling for root devices
bso-intel Aug 7, 2020
b04b1c0
renamed mutexes to match with cache name
bso-intel Aug 10, 2020
e2db14d
added more comments about cache invalidation logic
bso-intel Aug 10, 2020
2e0dbc1
removed invalidation logic
bso-intel Aug 10, 2020
e455095
added comment and fixed ref count issue
bso-intel Aug 11, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 48 additions & 9 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include <string>
#include <thread>
#include <utility>
#include <vector>

#include <level_zero/zet_api.h>

Expand Down Expand Up @@ -462,6 +461,13 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
return PI_INVALID_VALUE;
}

// Cache pi_platforms for reuse in the future
// It solves two problems;
// 1. sycl::device equality issue; we always return the same pi_device.
// 2. performance; we can save time by immediately return from cache.
static std::vector<pi_platform> PiPlatformsCache;
static std::mutex PiPlatformsCacheMutex;

// This is a good time to initialize Level Zero.
// TODO: We can still safely recover if something goes wrong during the init.
// Implement handling segfault using sigaction.
Expand Down Expand Up @@ -496,6 +502,18 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
assert(ZeDriverCount == 1);
ZE_CALL(zeDriverGet(&ZeDriverCount, &ZeDriver));

std::lock_guard<std::mutex> Lock(PiPlatformsCacheMutex);
for (const pi_platform CachedPlatform : PiPlatformsCache) {
if (CachedPlatform->ZeDriver == ZeDriver) {
Platforms[0] = CachedPlatform;
// if the caller sent a valid NumPlatforms pointer, set it here
if (NumPlatforms)
*NumPlatforms = 1;

return PI_SUCCESS;
}
}

try {
// TODO: figure out how/when to release this memory
*Platforms = new _pi_platform(ZeDriver);
Expand All @@ -521,6 +539,9 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
Platforms[0]->ZeDriverApiVersion =
std::to_string(ZE_MAJOR_VERSION(ZeApiVersion)) + std::string(".") +
std::to_string(ZE_MINOR_VERSION(ZeApiVersion));

// save a copy in the cache for future uses
PiPlatformsCache.push_back(Platforms[0]);
} catch (const std::bad_alloc &) {
return PI_OUT_OF_HOST_MEMORY;
} catch (...) {
Expand Down Expand Up @@ -614,9 +635,16 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType,

// Get number of devices supporting Level Zero
uint32_t ZeDeviceCount = 0;
std::lock_guard<std::mutex> Lock(Platform->PiDevicesCacheMutex);
ZeDeviceCount = Platform->PiDevicesCache.size();

const bool AskingForGPU = (DeviceType & PI_DEVICE_TYPE_GPU);
const bool AskingForDefault = (DeviceType == PI_DEVICE_TYPE_DEFAULT);
ZE_CALL(zeDeviceGet(ZeDriver, &ZeDeviceCount, nullptr));

if (ZeDeviceCount == 0) {
ZE_CALL(zeDeviceGet(ZeDriver, &ZeDeviceCount, nullptr));
}

if (ZeDeviceCount == 0 || !(AskingForGPU || AskingForDefault)) {
if (NumDevices)
*NumDevices = 0;
Expand All @@ -632,6 +660,14 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType,
return PI_SUCCESS;
}

// if devices are already captured in cache, return them from the cache.
for (const pi_device CachedDevice : Platform->PiDevicesCache) {
*Devices++ = CachedDevice;
}
if (!Platform->PiDevicesCache.empty()) {
return PI_SUCCESS;
}

try {
std::vector<ze_device_handle_t> ZeDevices(ZeDeviceCount);
ZE_CALL(zeDeviceGet(ZeDriver, &ZeDeviceCount, ZeDevices.data()));
Expand All @@ -643,6 +679,8 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType,
if (Result != PI_SUCCESS) {
return Result;
}
// save a copy in the cache for future uses.
Platform->PiDevicesCache.push_back(Devices[I]);
}
}
} catch (const std::bad_alloc &) {
Expand All @@ -655,7 +693,6 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType,

pi_result piDeviceRetain(pi_device Device) {
assert(Device);

// The root-device ref-count remains unchanged (always 1).
if (Device->IsSubDevice) {
++(Device->RefCount);
Expand All @@ -665,14 +702,16 @@ pi_result piDeviceRetain(pi_device Device) {

pi_result piDeviceRelease(pi_device Device) {
assert(Device);

assert(Device->RefCount > 0 && "Device is already released.");
// TODO: OpenCL says root-device ref-count remains unchanged (1),
// but when would we free the device's data?
if (--(Device->RefCount) == 0) {
// Destroy the command list used for initializations
ZE_CALL(zeCommandListDestroy(Device->ZeCommandListInit));
delete Device;
}
if (Device->IsSubDevice)
--(Device->RefCount);
// TODO: All cached pi_devices live until the program ends.
// If L0 RT does not do its own cleanup for Ze_Device_Handle upon tear-down,
// we need to figure out a way to call here
// ZE_CALL(zeCommandListDestroy(Device->ZeCommandListInit)); and,
// in piDevicesGet(), we need to call initialize for each cached pi_device.

return PI_SUCCESS;
}
Expand Down
5 changes: 5 additions & 0 deletions sycl/plugins/level_zero/pi_level_zero.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <iostream>
#include <mutex>
#include <unordered_map>
#include <vector>

#include <level_zero/ze_api.h>

Expand Down Expand Up @@ -69,6 +70,10 @@ struct _pi_platform {
// Cache versions info from zeDriverGetProperties.
std::string ZeDriverVersion;
std::string ZeDriverApiVersion;

// Cache pi_devices for reuse
std::vector<pi_device> PiDevicesCache;
std::mutex PiDevicesCacheMutex;
};

struct _pi_device : _pi_object {
Expand Down
9 changes: 4 additions & 5 deletions sycl/source/detail/device_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ device_impl::device_impl(pi_native_handle InteropDeviceHandle,
nullptr);

MIsRootDevice = (nullptr == parent);
if (!MIsRootDevice && !InteroperabilityConstructor) {
if (!InteroperabilityConstructor) {
// TODO catch an exception and put it to list of asynchronous exceptions
// Interoperability Constructor already calls DeviceRetain in
// piextDeviceFromNative.
Expand Down Expand Up @@ -98,10 +98,9 @@ cl_device_id device_impl::get() const {
PI_INVALID_DEVICE);

const detail::plugin &Plugin = getPlugin();
if (!MIsRootDevice) {
// TODO catch an exception and put it to list of asynchronous exceptions
Plugin.call<PiApiKind::piDeviceRetain>(MDevice);
}

// TODO catch an exception and put it to list of asynchronous exceptions
Plugin.call<PiApiKind::piDeviceRetain>(MDevice);
return pi::cast<cl_device_id>(getNative());
}

Expand Down