20
20
#include < string>
21
21
#include < thread>
22
22
#include < utility>
23
- #include < vector>
24
23
25
24
#include < level_zero/zet_api.h>
26
25
@@ -487,6 +486,13 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
487
486
return PI_INVALID_VALUE;
488
487
}
489
488
489
+ // Cache pi_platforms for reuse in the future
490
+ // It solves two problems;
491
+ // 1. sycl::device equality issue; we always return the same pi_device.
492
+ // 2. performance; we can save time by immediately return from cache.
493
+ static std::vector<pi_platform> PiPlatformsCache;
494
+ static std::mutex PiPlatformsCacheMutex;
495
+
490
496
// This is a good time to initialize Level Zero.
491
497
// TODO: We can still safely recover if something goes wrong during the init.
492
498
// Implement handling segfault using sigaction.
@@ -521,6 +527,18 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
521
527
assert (ZeDriverCount == 1 );
522
528
ZE_CALL (zeDriverGet (&ZeDriverCount, &ZeDriver));
523
529
530
+ std::lock_guard<std::mutex> Lock (PiPlatformsCacheMutex);
531
+ for (const pi_platform CachedPlatform : PiPlatformsCache) {
532
+ if (CachedPlatform->ZeDriver == ZeDriver) {
533
+ Platforms[0 ] = CachedPlatform;
534
+ // if the caller sent a valid NumPlatforms pointer, set it here
535
+ if (NumPlatforms)
536
+ *NumPlatforms = 1 ;
537
+
538
+ return PI_SUCCESS;
539
+ }
540
+ }
541
+
524
542
try {
525
543
// TODO: figure out how/when to release this memory
526
544
*Platforms = new _pi_platform (ZeDriver);
@@ -546,6 +564,9 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
546
564
Platforms[0 ]->ZeDriverApiVersion =
547
565
std::to_string (ZE_MAJOR_VERSION (ZeApiVersion)) + std::string (" ." ) +
548
566
std::to_string (ZE_MINOR_VERSION (ZeApiVersion));
567
+
568
+ // save a copy in the cache for future uses
569
+ PiPlatformsCache.push_back (Platforms[0 ]);
549
570
} catch (const std::bad_alloc &) {
550
571
return PI_OUT_OF_HOST_MEMORY;
551
572
} catch (...) {
@@ -639,9 +660,16 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType,
639
660
640
661
// Get number of devices supporting Level Zero
641
662
uint32_t ZeDeviceCount = 0 ;
663
+ std::lock_guard<std::mutex> Lock (Platform->PiDevicesCacheMutex );
664
+ ZeDeviceCount = Platform->PiDevicesCache .size ();
665
+
642
666
const bool AskingForGPU = (DeviceType & PI_DEVICE_TYPE_GPU);
643
667
const bool AskingForDefault = (DeviceType == PI_DEVICE_TYPE_DEFAULT);
644
- ZE_CALL (zeDeviceGet (ZeDriver, &ZeDeviceCount, nullptr ));
668
+
669
+ if (ZeDeviceCount == 0 ) {
670
+ ZE_CALL (zeDeviceGet (ZeDriver, &ZeDeviceCount, nullptr ));
671
+ }
672
+
645
673
if (ZeDeviceCount == 0 || !(AskingForGPU || AskingForDefault)) {
646
674
if (NumDevices)
647
675
*NumDevices = 0 ;
@@ -657,6 +685,14 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType,
657
685
return PI_SUCCESS;
658
686
}
659
687
688
+ // if devices are already captured in cache, return them from the cache.
689
+ for (const pi_device CachedDevice : Platform->PiDevicesCache ) {
690
+ *Devices++ = CachedDevice;
691
+ }
692
+ if (!Platform->PiDevicesCache .empty ()) {
693
+ return PI_SUCCESS;
694
+ }
695
+
660
696
try {
661
697
std::vector<ze_device_handle_t > ZeDevices (ZeDeviceCount);
662
698
ZE_CALL (zeDeviceGet (ZeDriver, &ZeDeviceCount, ZeDevices.data ()));
@@ -668,6 +704,8 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType,
668
704
if (Result != PI_SUCCESS) {
669
705
return Result;
670
706
}
707
+ // save a copy in the cache for future uses.
708
+ Platform->PiDevicesCache .push_back (Devices[I]);
671
709
}
672
710
}
673
711
} catch (const std::bad_alloc &) {
@@ -680,7 +718,6 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType,
680
718
681
719
pi_result piDeviceRetain (pi_device Device) {
682
720
assert (Device);
683
-
684
721
// The root-device ref-count remains unchanged (always 1).
685
722
if (Device->IsSubDevice ) {
686
723
++(Device->RefCount );
@@ -690,14 +727,16 @@ pi_result piDeviceRetain(pi_device Device) {
690
727
691
728
pi_result piDeviceRelease (pi_device Device) {
692
729
assert (Device);
693
-
730
+ assert (Device-> RefCount > 0 && " Device is already released. " );
694
731
// TODO: OpenCL says root-device ref-count remains unchanged (1),
695
732
// but when would we free the device's data?
696
- if (--(Device->RefCount ) == 0 ) {
697
- // Destroy the command list used for initializations
698
- ZE_CALL (zeCommandListDestroy (Device->ZeCommandListInit ));
699
- delete Device;
700
- }
733
+ if (Device->IsSubDevice )
734
+ --(Device->RefCount );
735
+ // TODO: All cached pi_devices live until the program ends.
736
+ // If L0 RT does not do its own cleanup for Ze_Device_Handle upon tear-down,
737
+ // we need to figure out a way to call here
738
+ // ZE_CALL(zeCommandListDestroy(Device->ZeCommandListInit)); and,
739
+ // in piDevicesGet(), we need to call initialize for each cached pi_device.
701
740
702
741
return PI_SUCCESS;
703
742
}
0 commit comments